13 files changed, 5986 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/xtensa/crti.S b/gcc-4.9/libgcc/config/xtensa/crti.S
new file mode 100644
index 000000000..13e5e7ed9
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/crti.S
@@ -0,0 +1,51 @@
+# Start .init and .fini sections.
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+#include "xtensa-config.h"
+
+	.section .init
+	.globl _init
+	.type _init,@function
+	.align	4
+_init:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
+
+	.section .fini
+	.globl _fini
+	.type _fini,@function
+	.align	4
+_fini:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
diff --git a/gcc-4.9/libgcc/config/xtensa/crtn.S b/gcc-4.9/libgcc/config/xtensa/crtn.S
new file mode 100644
index 000000000..fda453b97
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/crtn.S
@@ -0,0 +1,46 @@
+# End of .init and .fini sections.
+# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+#include "xtensa-config.h"
+
+	.section .init
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
+
+	.section .fini
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
diff --git a/gcc-4.9/libgcc/config/xtensa/ieee754-df.S b/gcc-4.9/libgcc/config/xtensa/ieee754-df.S
new file mode 100644
index 000000000..3582338ac
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negdf2
+
+	.align	4
+	.global	__negdf2
+	.type	__negdf2, @function
+__negdf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	xh, xh, a4
+	leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+	/* Addition */
+__adddf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__adddf3
+	.type	__adddf3, @function
+__adddf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Ladd_xnan_or_inf
+	ball	yh, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, xh, 20, 12
+	extui	a8, yh, 20, 12
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Ladd_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Ladd_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Ladd_addy:
+	/* Do the 64-bit addition.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:	leaf_return
+
+.Ladd_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Ladd_addy
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Ladd_addy
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	xh, a6, .Ladd_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_bigshiftx
+	
+	ssr	a10
+	sll	a9, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+.Ladd_addx:
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_bigshiftx:
+	/* Mostly the same thing as "bigshifty"....  */
+	bgeui	a10, 64, .Ladd_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	beqz	a11, .Ladd_addx
+	or	a9, a9, a10
+	j	.Ladd_addx
+
+.Ladd_returny:
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x100000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+	   or:
+	       shifted mantissa + ((x + 1) << 19)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
+	mov	a10, xl
+	ssai	1
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 19
+	add	xh, xh, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	xh, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	movi	xl, 0
+	srli	xh, xh, 20
+	slli	xh, xh, 20
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	xh, xh, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Ladd_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+
+	/* Subtraction */
+__subdf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subdf3
+	.type	__subdf3, @function
+__subdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Lsub_xnan_or_inf
+	ball	yh, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, xh, 20, 11
+	extui	a8, yh, 20, 11
+	bltu	xh, yh, .Lsub_xsmaller
+	beq	xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Lsub_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Lsub_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Lsub_suby:
+	/* Do the 64-bit subtraction.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_compare_low:
+	/* The high words are equal.  Compare the low words.  */
+	bltu	xl, yl, .Lsub_xsmaller
+	bltu	yl, xl, .Lsub_ysmaller
+	/* The operands are equal.  Return 0.0.  */
+	movi	xh, 0
+	movi	xl, 0
+1:	leaf_return
+
+.Lsub_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Lsub_suby
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Lsub_suby
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	xh, a6, .Lsub_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_bigshiftx
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, xl, a9
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Negate y.  */
+	slli	a11, a6, 11
+	xor	yh, yh, a11
+
+.Lsub_subx:
+	sub	xl, yl, xl
+	sub	xh, yh, xh
+	bgeu	yl, xl, 1f
+	addi	xh, xh, -1
+1:
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Lsub_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	bnone	yh, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+	/* Mostly the same thing as "bigshifty", but with the sign bit of the
+	   shifted value set so that the subsequent subtraction flips the
+	   sign of y.  */
+	bgeui	a10, 64, .Lsub_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	slli	xh, a6, 11	/* set sign bit of xh */
+	beqz	a11, .Lsub_subx
+	or	a9, a9, a10
+	j	.Lsub_subx
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, xh, 12
+	beqz	a8, .Lsub_xhzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 12
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_shift_lt32:
+	/* Shift the mantissa (a8/xl/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, xl
+	src	xl, xl, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lsub_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+.Lsub_xhzero:
+	/* When normalizing the result, all the mantissa bits in the high
+	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
+	do_nsau	a6, xl, a7, a11
+	addi	a6, a6, 21
+	blt	a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+	bltui	a6, 32, .Lsub_shift_lt32
+
+	ssl	a6
+	src	a8, xl, a9
+	sll	xl, a9
+	movi	a9, 0
+
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Lmul_xh_zero
+	do_nsau	a10, xh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+.Lmul_xh_zero:
+	do_nsau	a10, xl, a11, a12
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Lmul_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Lmul_xnormalized
+.Lmul_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Lmul_xnormalized
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* If y is zero, return zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Lmul_yh_zero
+	do_nsau	a10, yh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+.Lmul_yh_zero:
+	do_nsau	a10, yl, a11, a12
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Lmul_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Lmul_ynormalized
+.Lmul_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	bnez	yl, 1f
+	slli	a8, yh, 1
+	bnez	a8, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	yh, a6, .Lmul_returnx
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	xh, yh
+	mov	xl, yl
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	xh, xh, 1
+	ssai	1
+	src	xh, a7, xh
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	bnez	xl, .Lmul_returny
+	slli	a8, xh, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x80000	/* make it a quiet NaN */
+	or	xh, yh, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__muldf3
+	.type	__muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Lmul_xnan_or_inf
+	ball	yh, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
+	   The least-significant word of the result is thrown away except
+	   that if it is nonzero, the lsb of a6 is set to 1.  */
+#if XCHAL_HAVE_MUL32_HIGH
+
+	/* Compute a6 with any carry-outs in a10.  */
+	movi	a10, 0
+	mull	a6, xl, yh
+	mull	a11, xh, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	muluh	a11, xl, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:	
+	/* If the low word of the result is nonzero, set the lsb of a6.  */
+	mull	a11, xl, yl
+	beqz	a11, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+1:
+	/* Compute xl with any carry-outs in a9.  */
+	movi	a9, 0
+	mull	a11, xh, yh
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	a11, xh, yl
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	xl, xl, yh
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute xh.  */
+	muluh	xh, xh, yh
+	add	xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+	   products.  These partial products are:
+
+		0 xll * yll
+
+		1 xll * ylh
+		2 xlh * yll
+
+		3 xll * yhl
+		4 xlh * ylh
+		5 xhl * yll
+
+		6 xll * yhh
+		7 xlh * yhl
+		8 xhl * ylh
+		9 xhh * yll
+
+		10 xlh * yhh
+		11 xhl * yhl
+		12 xhh * ylh
+
+		13 xhl * yhh
+		14 xhh * yhl
+
+		15 xhh * yhh
+
+	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
+	   or Mul32 multiplier options, these input chunks must be stored in
+	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
+	   that the inputs come from either half of the registers, so there
+	   is no need to shift them out ahead of time.  If there is no
+	   multiply hardware, the 16-bit chunks can be extracted when setting
+	   up the arguments to the separate multiply function.  */
+
+	/* Save a7 since it is needed to hold a temporary value.  */
+	s32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	xlh, xl, 16
+	srli	ylh, yl, 16
+	srli	xhh, xh, 16
+	srli	yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	xl, xl, 0, 16
+	extui	xh, xh, 0, 16
+	extui	yl, yl, 0, 16
+	extui	yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
+	do_mul(a10, xl, l, yl, h)	/* pp 1 */
+	do_mul(a11, xl, h, yl, l)	/* pp 2 */
+	movi	a9, 0
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Initialize a6 with a9/a10 shifted into position.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a6, a9, a10
+
+	/* Compute the low word into a10.  */
+	do_mul(a11, xl, l, yl, l)	/* pp 0 */
+	sll	a10, a10
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a6, a6, 1
+1:
+	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+	   This is good enough to determine the low half of a6, so that any
+	   nonzero bits from the low word of the result can be collapsed
+	   into a6, freeing up a register.  */
+	movi	a9, 0
+	do_mul(a11, xl, l, yh, l)	/* pp 3 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xl, h, yl, h)	/* pp 4 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xh, l, yl, l)	/* pp 5 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Collapse any nonzero bits from the low word into a6.  */
+	beqz	a10, 1f
+	movi	a11, 1
+	or	a6, a6, a11
+1:
+	/* Add pp6-9 into a11 with carry-outs in a10.  */
+	do_mul(a7, xl, l, yh, h)	/* pp 6 */
+	do_mul(a11, xh, h, yl, l)	/* pp 9 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xl, h, yh, l)	/* pp 7 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xh, l, yl, h)	/* pp 8 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Add pp10-12 into xl with carry-outs in a9.  */
+	movi	a9, 0
+	do_mul(xl, xl, h, yh, h)	/* pp 10 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, l, yh, l)	/* pp 11 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, h, yl, h)	/* pp 12 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Add pp13-14 into a11 with carry-outs in a10.  */
+	do_mul(a11, xh, l, yh, h)	/* pp 13 */
+	do_mul(a7, xh, h, yh, l)	/* pp 14 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	xl, xl, a11
+	bgeu	xl, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Compute xh.  */
+	do_mul(xh, xh, h, yh, h)	/* pp 15 */
+	add	xh, xh, a10
+
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	l32i	a0, sp, 0
+	l32i	a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 12 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 11 bits and increment the
+	   exponent.  Note: It is convenient to use the constant 0x3ff
+	   instead of 0x400 when removing the extra exponent bias (so that
+	   it is easy to construct 0x7fe for the overflow check).  Reverse
+	   the logic here to decrement the exponent sum by one unless there
+	   was a carry-out.  */
+	movi	a4, 11
+	srli	a5, xh, 21 - 12
+	bnez	a5, 1f
+	addi	a4, a4, 1
+	addi	a8, a8, -1
+1:	ssl	a4
+	src	xh, xh, xl
+	src	xl, xl, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x3ff
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	slli	a4, a4, 1	/* 0x7fe */
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	xl, xl, 1
+	beqz	xl, .Lmul_roundcarry
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, xh, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	j	.Lmul_rounded
+
+.Lmul_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow is OK -- it will be added to the exponent.  */
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_bigshift
+	
+	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+	j	1f
+
+.Lmul_bigshift:
+	bgeui	a8, 64, .Lmul_flush_to_zero
+	sll	a10, xl		/* lost bits shifted out of xl */
+	src	a6, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	or	a9, a9, a10
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+	/* Division */
+__divdf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* Check for division by zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Ldiv_yh_zero
+	do_nsau	a10, yh, a11, a9
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+.Ldiv_yh_zero:
+	do_nsau	a10, yl, a11, a9
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Ldiv_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Ldiv_ynormalized
+.Ldiv_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+	or	xl, xl, xh
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	or	xh, xh, a6
+	bnez	xl, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	movi	xl, 0
+	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Ldiv_xh_zero
+	do_nsau	a10, xh, a11, a8
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+.Ldiv_xh_zero:
+	do_nsau	a10, xl, a11, a8
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Ldiv_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Ldiv_xnormalized
+.Ldiv_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Ldiv_xnormalized
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, yh, 31
+	slli	a7, a7, 31
+	xor	xh, xh, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	yh, a6, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ldiv_highequal1:
+	bltu	xl, yl, 2f
+	j	3f
+
+	.align	4
+	.global	__divdf3
+	.type	__divdf3, @function
+__divdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Ldiv_xnan_or_inf
+	ball	yh, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Set SAR for left shift by one.  */
+	ssai	(32 - 1)
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	yh, xh, 3f
+	beq	yh, xh, .Ldiv_highequal1
+2:	src	xh, xh, xl
+	sll	xl, xl
+	addi	a8, a8, -1
+3:
+	/* Do the first subtraction and shift.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+	src	xh, xh, xl
+	sll	xl, xl
+
+	/* Put the quotient into a10/a11.  */
+	movi	a10, 0
+	movi	a11, 1
+
+	/* Divide one bit at a time for 52 bits.  */
+	movi	a9, 52
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	src	a10, a10, a11
+	sll	a11, a11
+
+	/* Is this digit a 0 or 1?  */
+	bltu	xh, yh, 3f
+	beq	xh, yh, .Ldiv_highequal2
+
+	/* Output a 1 and subtract.  */
+2:	addi	a11, a11, 1
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Shift the dividend << 1.  */
+3:	src	xh, xh, xl
+	sll	xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	movi	a9, 0x3fe
+	add	a8, a8, a9
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	addmi	a9, a9, 0x400	/* 0x7fe */
+	bgeu	a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in xh/xl.  */
+	bltu	xh, yh, .Ldiv_rounded
+	beq	xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+	mov	xl, a11
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Ldiv_highequal2:
+	bgeu	xl, yl, 2b
+	j	3b
+
+.Ldiv_highequal3:
+	bltu	xl, yl, .Ldiv_rounded
+	bne	xl, yl, .Ldiv_roundup
+
+	/* Remainder is exactly half the divisor.  Round even.  */
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a9, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Ldiv_addsign
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_bigshift
+	
+	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a11
+	src	a11, a10, a11
+	srl	a10, a10
+	j	1f
+
+.Ldiv_bigshift:
+	bgeui	a8, 64, .Ldiv_flush_to_zero
+	sll	a9, a11		/* lost bits shifted out of a11 */
+	src	a6, a10, a11
+	srl	a11, a10
+	movi	a10, 0
+	or	xl, xl, a9
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero remainder (in xh/xl) into a6.  */
+	or	xh, xh, xl
+	beqz	xh, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10/a11 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_roundcarry:
+	/* a11 is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	a10, a10, 1
+	/* Overflow to the exponent field is OK.  */
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqdf2
+	.global	__nedf2
+	.set	__nedf2, __eqdf2
+	.type	__eqdf2, @function
+__eqdf2:
+	leaf_entry sp, 16
+	bne	xl, yl, 2f
+	bne	xh, yh, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl	/* xl == yl here */
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7ff and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtdf2
+	.type	__gtdf2, @function
+__gtdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__ledf2
+	.type	__ledf2, @function
+__ledf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bltu	yl, xl, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bgeu	xl, yl, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	xh, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gedf2
+	.type	__gedf2, @function
+__gedf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltdf2
+	.type	__ltdf2, @function
+__ltdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Llt_xneg
+
+	/* Check if x < y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bgeu	xl, yl, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bltu	yl, xl, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	xh, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unorddf2
+	.type	__unorddf2, @function
+__unorddf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+1:	ball	yh, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+	.align	4
+	.global	__fixdfsi
+	.type	__fixdfsi, @function
+__fixdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixdfsi_maxint
+	blti	a4, 1, .Lfixdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+	.align	4
+	.global	__fixdfdi
+	.type	__fixdfdi, @function
+__fixdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixdfdi_maxint
+	blti	a4, 1, .Lfixdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixdfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfdi_maxint:
+	slli	a7, a6, 11	/* 0x80000000 */
+	bgez	xh, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+	.align	4
+	.global	__fixunsdfsi
+	.type	__fixunsdfsi, @function
+__fixunsdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixunsdfsi_maxint
+	bltz	a4, .Lfixunsdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunsdfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunsdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixunsdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunsdfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	xh, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 11
+	leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+	.align	4
+	.global	__fixunsdfdi
+	.type	__fixunsdfdi, @function
+__fixunsdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixunsdfdi_maxint
+	bltz	a4, .Lfixunsdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunsdfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunsdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunsdfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunsdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunsdfdi_maxint:
+	bgez	xh, 1b
+2:	slli	xh, a6, 11	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+	.align	4
+	.global	__floatunsidf
+	.type	__floatunsidf, @function
+__floatunsidf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Set the sign to zero and jump to the floatsidf code.  */
+	movi	a7, 0
+	j	.Lfloatsidf_normalize
+
+	.align	4
+	.global	__floatsidf
+	.type	__floatsidf, @function
+__floatsidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position.  */
+	srli	xh, a5, 11
+	slli	xl, a5, (32 - 11)
+
+	/* Set the exponent.  */
+	movi	a5, 0x41d	/* 0x3fe + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign and return. */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Lfloatsidf_return_zero:
+	movi	a3, 0
+	leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+	.align	4
+	.global	__floatundidf
+	.type	__floatundidf, @function
+__floatundidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdidf code.  */
+	movi	a7, 0
+	j	.Lfloatdidf_normalize
+
+	.align	4
+	.global	__floatdidf
+	.type	__floatdidf, @function
+__floatdidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdidf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdidf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdidf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdidf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdidf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	11
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Set the exponent.  */
+	movi	a5, 0x43d	/* 0x3fe + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	xl, xl, 1
+	beqz	xl, .Lfloatdidf_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdidf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdidf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lfloatdidf_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+	.align	4
+	.global	__truncdfsf2
+	.type	__truncdfsf2, @function
+__truncdfsf2:
+	leaf_entry sp, 16
+
+	/* Adjust the exponent bias.  */
+	movi	a4, (0x3ff - 0x7f) << 20
+	sub	a5, xh, a4
+
+	/* Check for underflow.  */
+	xor	a6, xh, a5
+	bltz	a6, .Ltrunc_underflow
+	extui	a6, a5, 20, 11
+	beqz	a6, .Ltrunc_underflow
+
+	/* Check for overflow.  */
+	movi	a4, 255
+	bge	a6, a4, .Ltrunc_overflow
+
+	/* Shift a5/xl << 3 into a5/a4.  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+.Ltrunc_addsign:
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	slli	a6, a6, 31
+	or	a2, a6, a5
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a4, 1f
+	addi	a2, a2, 1
+	/* Overflow to the exponent is OK.  The answer will be correct.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a4, a4, 1
+	beqz	a4, .Ltrunc_exactlyhalf
+1:	leaf_return
+
+.Ltrunc_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Ltrunc_overflow:
+	/* Check if exponent == 0x7ff.  */
+	movi	a4, 0x7ff00000
+	bnall	xh, a4, 1f
+
+	/* Check if mantissa is nonzero.  */
+	slli	a5, xh, 12
+	or	a5, a5, xl
+	beqz	a5, 1f
+
+	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
+	srli	a4, a4, 1
+
+1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	ssai	1
+	src	a2, a6, a4
+	leaf_return
+
+.Ltrunc_underflow:
+	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
+	extui	a6, xh, 20, 11
+	movi	a5, 0x3ff - 0x7f
+	sub	a6, a5, a6
+	addi	a6, a6, 1
+	bgeui	a6, 32, 1f
+
+	/* Replace the exponent with an explicit "1.0".  */
+	slli	a5, a5, 13	/* 0x700000 */
+	or	a5, a5, xh
+	slli	a5, a5, 11
+	srli	a5, a5, 11
+
+	/* Shift the mantissa left by 3 bits (into a5/a4).  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+	/* Shift right by a6.  */
+	ssr	a6
+	sll	a7, a4
+	src	a4, a5, a4
+	srl	a5, a5
+	beqz	a7, .Ltrunc_addsign
+	or	a4, a4, a6	/* any positive, nonzero value will work */
+	j	.Ltrunc_addsign
+
+	/* Return +/- zero.  */
+1:	extui	a2, xh, 31, 1
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+	.align	4
+	.global	__extendsfdf2
+	.type	__extendsfdf2, @function
+__extendsfdf2:
+	leaf_entry sp, 16
+
+	/* Save the sign bit and then shift it off.  */
+	extui	a5, a2, 31, 1
+	slli	a5, a5, 31
+	slli	a4, a2, 1
+
+	/* Extract and check the exponent.  */
+	extui	a6, a2, 23, 8
+	beqz	a6, .Lextend_expzero
+	addi	a6, a6, 1
+	beqi	a6, 256, .Lextend_nan_or_inf
+
+	/* Shift >> 3 into a4/xl.  */
+	srli	a4, a4, 4
+	slli	xl, a2, (32 - 3)
+
+	/* Adjust the exponent bias.  */
+	movi	a6, (0x3ff - 0x7f) << 20
+	add	a4, a4, a6
+
+	/* Add the sign bit.  */
+	or	xh, a4, a5
+	leaf_return
+
+.Lextend_nan_or_inf:
+	movi	a4, 0x7ff00000
+
+	/* Check for NaN.  */
+	slli	a7, a2, 9
+	beqz	a7, 1f
+
+	slli	a6, a6, 11	/* 0x80000 */
+	or	a4, a4, a6
+
+	/* Add the sign and return.  */
+1:	or	xh, a4, a5
+	movi	xl, 0
+	leaf_return
+
+.Lextend_expzero:
+	beqz	a4, 1b
+
+	/* Normalize it to have 8 zero bits before the first 1 bit.  */
+	do_nsau	a7, a4, a2, a3
+	addi	a7, a7, -8
+	ssl	a7
+	sll	a4, a4
+	
+	/* Shift >> 3 into a4/xl.  */
+	slli	xl, a4, (32 - 3)
+	srli	a4, a4, 3
+
+	/* Set the exponent.  */
+	movi	a6, 0x3fe - 0x7f
+	sub	a6, a6, a7
+	slli	a6, a6, 20
+	add	a4, a4, a6
+
+	/* Add the sign and return.  */
+	or	xh, a4, a5
+	leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..e96785c3f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+   Copyright (C) 2006-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negsf2
+
+	.align	4
+	.global	__negsf2
+	.type	__negsf2, @function
+__negsf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	a2, a2, a4
+	leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+	/* Addition */
+__addsf3_aux:
+
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, a2, 9
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	a2, a3
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__addsf3
+	.type	__addsf3, @function
+__addsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Ladd_xnan_or_inf
+	ball	a3, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, a2, 23, 9
+	extui	a8, a3, 23, 9
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Ladd_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	/* Do the addition.  */
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	a2, a2, a3
+1:	leaf_return
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	a2, a6, .Ladd_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_returny
+	
+	ssr	a10
+	sll	a9, a2
+	srl	a2, a2
+
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_returny:
+	mov	a2, a3
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x800000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+	   or:
+	       shifted mantissa + ((x + 1) << 22)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift x right by one bit.  Save the lsb.  */
+	mov	a10, a2
+	srli	a2, a2, 1
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 22
+	add	a2, a2, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	a2, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	a2, a2, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	srli	a2, a2, 23
+	slli	a2, a2, 23
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	a2, a2, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+
+	/* Subtraction */
+__subsf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subsf3
+	.type	__subsf3, @function
+__subsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Lsub_xnan_or_inf
+	ball	a3, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, a2, 23, 8
+	extui	a8, a3, 23, 8
+	bltu	a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Lsub_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	sub	a2, a2, a3
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from a2.  */
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_yexpzero:
+	/* Return zero if the inputs are equal.  (For the non-subnormal
+	   case, subtracting the "1.0" will cause a borrow from the exponent
+	   and this case can be detected when handling the borrow.)  */
+	beq	a2, a3, .Lsub_return_zero
+
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+1:	leaf_return
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	a2, a6, .Lsub_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_returny
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, a2, a9
+	srl	a2, a2
+
+	/* Negate y.  */
+	slli	a11, a6, 8
+	xor	a3, a3, a11
+
+	sub	a2, a3, a2
+
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	beq	a2, a3, .Lsub_return_zero
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	bnone	a3, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_return_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, a2, 9
+	beqz	a8, .Lsub_xzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 9
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_normalize_shift:
+	/* Shift the mantissa (a8/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	a2, a2, 23
+	sub	a2, a2, a6
+	slli	a2, a2, 23
+	add	a2, a2, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Lsub_xzero:
+	/* If there was a borrow from the exponent, and the mantissa and
+	   guard digits are all zero, then the inputs were equal and the
+	   result should be zero.  */
+	beqz	a9, .Lsub_return_zero
+
+	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
+	addi	a11, a10, -24
+	movi	a6, 24
+	movltz	a6, a10, a11
+	j	.Lsub_normalize_shift
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2 
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* If y is zero, return zero.  */
+	beqz	a3, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	slli	a8, a3, 1
+	bnez	a8, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	a3, a6, .Lmul_returnx
+	slli	a8, a3, 9
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	a2, a3
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	a2, a2, 1
+	ssai	1
+	src	a2, a7, a2
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	slli	a8, a2, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x400000	/* make it a quiet NaN */
+	or	a2, a3, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__mulsf3
+	.type	__mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Lmul_xnan_or_inf
+	ball	a3, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+	mull	a6, a2, a3
+	muluh	a2, a2, a3
+
+#else
+
+	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+	   products.  These partial products are:
+
+		0 xl * yl
+
+		1 xl * yh
+		2 xh * yl
+
+		3 xh * yh
+
+	   If using the Mul16 or Mul32 multiplier options, these input
+	   chunks must be stored in separate registers.  For Mac16, the
+	   UMUL.AA.* opcodes can specify that the inputs come from either
+	   half of the registers, so there is no need to shift them out
+	   ahead of time.  If there is no multiply hardware, the 16-bit
+	   chunks can be extracted when setting up the arguments to the
+	   separate multiply function.  */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into a2.  */
+	do_mul(a2, a2, h, a3, h)	/* pp 3 */
+	add	a2, a2, a9
+	
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a0, sp, 0
+	l32i	a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 9 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 8 bits and increment the
+	   exponent.  */
+	movi	a4, 9
+	srli	a5, a2, 24 - 9
+	beqz	a5, 1f
+	addi	a4, a4, -1
+	addi	a8, a8, 1
+1:	ssl	a4
+	src	a2, a2, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x80
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	a2, a2, 1
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a2, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	movi	a8, 0xff
+	slli	a2, a8, 23
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_flush_to_zero
+	
+	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, a2
+	srl	a2, a2
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+	/* Division */
+__divsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* Check for division by zero.  */
+	beqz	a3, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	a4, a2, 1
+	srli	a4, a4, 1
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	or	a2, a2, a6
+	bnez	a4, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, a3, 31
+	slli	a7, a7, 31
+	xor	a2, a2, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	a3, a6, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, a3, 9
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	a2, a3
+	leaf_return
+
+	.align	4
+	.global	__divsf3
+	.type	__divsf3, @function
+__divsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Ldiv_xnan_or_inf
+	ball	a3, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	a3, a2, 1f
+	slli	a2, a2, 1
+	addi	a8, a8, -1
+1:
+	/* Do the first subtraction and shift.  */
+	sub	a2, a2, a3
+	slli	a2, a2, 1
+
+	/* Put the quotient into a10.  */
+	movi	a10, 1
+
+	/* Divide one bit at a time for 23 bits.  */
+	movi	a9, 23
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	slli	a10, a10, 1
+
+	/* Is this digit a 0 or 1?  */
+	bltu	a2, a3, 1f
+
+	/* Output a 1 and subtract.  */
+	addi	a10, a10, 1
+	sub	a2, a2, a3
+
+	/* Shift the dividend << 1.  */
+1:	slli	a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	addi	a8, a8, 0x7e
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Ldiv_overflow
+	
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in a2.  */
+	bltu	a2, a3, .Ldiv_rounded
+	addi	a10, a10, 1
+	beq	a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+	leaf_return
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0xff */
+	slli	a2, a8, 23
+	j	.Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+	/* Remainder is exactly half the divisor.  Round even.  */
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_flush_to_zero
+	
+	/* Shift a10 right.  Any bits that are shifted out of a10 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a10
+	srl	a10, a10
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero remainder (in a2) into a6.  */
+	beqz	a2, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a10, a10, 1
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqsf2
+	.global	__nesf2
+	.set	__nesf2, __eqsf2
+	.type	__eqsf2, @function
+__eqsf2:
+	leaf_entry sp, 16
+	bne	a2, a3, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, a2, 9
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, a2, a3
+	slli	a7, a7, 1
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7f8 and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtsf2
+	.type	__gtsf2, @function
+__gtsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__lesf2
+	.type	__lesf2, @function
+__lesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	a3, a2, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bgeu	a2, a3, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	a2, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gesf2
+	.type	__gesf2, @function
+__gesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltsf2
+	.type	__ltsf2, @function
+__ltsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Llt_xneg
+
+	/* Check if x < y.  */
+	bgeu	a2, a3, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	a3, a2, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	a2, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unordsf2
+	.type	__unordsf2, @function
+__unordsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+1:	ball	a3, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, a3, 9
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+	.align	4
+	.global	__fixsfsi
+	.type	__fixsfsi, @function
+__fixsfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 32, .Lfixsfsi_maxint
+	blti	a4, 1, .Lfixsfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixsfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixsfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+	.align	4
+	.global	__fixsfdi
+	.type	__fixsfdi, @function
+__fixsfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 64, .Lfixsfdi_maxint
+	blti	a4, 1, .Lfixsfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixsfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixsfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixsfdi_smallshift:
+	movi	xl, 0
+	sll	xl, xh
+	srl	xh, xh
+	j	.Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfdi_maxint:
+	slli	a7, a6, 8	/* 0x80000000 */
+	bgez	a2, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixsfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+	.align	4
+	.global	__fixunssfsi
+	.type	__fixunssfsi, @function
+__fixunssfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 32, .Lfixunssfsi_maxint
+	bltz	a4, .Lfixunssfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunssfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunssfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixunssfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunssfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a2, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 8
+	leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+	.align	4
+	.global	__fixunssfdi
+	.type	__fixunssfdi, @function
+__fixunssfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 64, .Lfixunssfdi_maxint
+	bltz	a4, .Lfixunssfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunssfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunssfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunssfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunssfdi_smallshift:
+	movi	xl, 0
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunssfdi_maxint:
+	bgez	a2, 1b
+2:	slli	xh, a6, 8	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	movi	xl, 0
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+	.align	4
+	.global	__floatunsisf
+	.type	__floatunsisf, @function
+__floatunsisf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsisf_return
+
+	/* Set the sign to zero and jump to the floatsisf code.  */
+	movi	a7, 0
+	j	.Lfloatsisf_normalize
+
+	.align	4
+	.global	__floatsisf
+	.type	__floatsisf, @function
+__floatsisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsisf_return
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	srli	a2, a5, 8
+	slli	a6, a5, (32 - 8)
+
+	/* Set the exponent.  */
+	movi	a5, 0x9d	/* 0x7e + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, a2, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, .Lfloatsisf_return
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+	leaf_return
+
+.Lfloatsisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+	.align	4
+	.global	__floatundisf
+	.type	__floatundisf, @function
+__floatundisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdisf code.  */
+	movi	a7, 0
+	j	.Lfloatdisf_normalize
+
+	.align	4
+	.global	__floatdisf
+	.type	__floatdisf, @function
+__floatdisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdisf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdisf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdisf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdisf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdisf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	8
+	sll	a5, xl
+	src	a6, xh, xl
+	srl	xh, xh
+	beqz	a5, 1f
+	movi	a5, 1
+	or	a6, a6, a5
+1:
+	/* Set the exponent.  */
+	movi	a5, 0xbd	/* 0x7e + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdisf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdisf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatdisf */
diff --git a/gcc-4.9/libgcc/config/xtensa/lib1funcs.S b/gcc-4.9/libgcc/config/xtensa/lib1funcs.S
new file mode 100644
index 000000000..7c16cc850
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/lib1funcs.S
@@ -0,0 +1,844 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+   where they are not included in the Xtensa processor configuration.  */
+
+	.macro	do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+	abs	\dst, \src
+#else
+	neg	\tmp, \src
+	movgez	\tmp, \src, \src
+	mov	\dst, \tmp
+#endif
+	.endm
+
+	.macro	do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx2	\dst, \as, \at
+#else
+	slli	\tmp, \as, 1
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx4	\dst, \as, \at
+#else
+	slli	\tmp, \as, 2
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx8	\dst, \as, \at
+#else
+	slli	\tmp, \as, 3
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+/* Define macros for leaf function entry and return, supporting either the
+   standard register windowed ABI or the non-windowed call0 ABI.  These
+   macros do not allocate any extra stack space, so they only work for
+   leaf functions that do not need to spill anything to the stack.  */
+
+	.macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry \reg, \size
+#else
+	/* do nothing */
+#endif
+	.endm
+
+	.macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	ret
+#endif
+	.endm
+
+
+#ifdef L_mulsi3
+	.align	4
+	.global	__mulsi3
+	.type	__mulsi3, @function
+__mulsi3:
+	leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+	mull	a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+	or	a4, a2, a3
+	srai	a4, a4, 16
+	bnez	a4, .LMUL16
+	mul16u	a2, a2, a3
+	leaf_return
+.LMUL16:
+	srai	a4, a2, 16
+	srai	a5, a3, 16
+	mul16u	a7, a4, a3
+	mul16u	a6, a5, a2
+	mul16u	a4, a2, a3
+	add	a7, a7, a6
+	slli	a7, a7, 16
+	add	a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+	mul.aa.hl a2, a3
+	mula.aa.lh a2, a3
+	rsr	a5, ACCLO
+	umul.aa.ll a2, a3
+	rsr	a4, ACCLO
+	slli	a5, a5, 16
+	add	a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+	/* Multiply one bit at a time, but unroll the loop 4x to better
+	   exploit the addx instructions and avoid overhead.
+	   Peel the first iteration to save a cycle on init.  */
+
+	/* Avoid negative numbers.  */
+	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
+	do_abs	a3, a3, a6
+	do_abs	a2, a2, a6
+
+	/* Swap so the second argument is smaller.  */
+	sub	a7, a2, a3
+	mov	a4, a3
+	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
+	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
+
+	movi	a2, 0
+	extui	a6, a3, 0, 1
+	movnez	a2, a4, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+	neg	a3, a2
+	movltz	a2, a3, a5
+	leaf_return
+
+	.align	4
+.Lmult_main_loop:
+	srli	a3, a3, 4
+	slli	a4, a4, 4
+
+	add	a7, a4, a2
+	extui	a6, a3, 0, 1
+	movnez	a2, a7, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+
+	neg	a3, a2
+	movltz	a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+	leaf_return
+	.size	__mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+	.align	4
+	.global	__umulsidi3
+	.type	__umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 48
+#else
+	leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
+	   See more comments there.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+	mull	a6, a2, a3
+	muluh	wh, a2, a3
+	mov	wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* a0 and a8 will be clobbered by calling the multiply function
+	   but a8 is not used here and need not be saved.  */
+	s32i	a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into wh.  */
+	do_mul(wh, a2, h, a3, h)	/* pp 3 */
+	add	wh, wh, a9
+	mov	wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore the original return address.  */
+	l32i	a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+#if XCHAL_NO_MUL
+
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+
+	.size	__umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+   instruction, which computes the number of leading zero bits,
+   to handle cases where it is not included in the Xtensa processor
+   configuration.  */
+
+	.macro	do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+	nsau	\cnt, \val
+#else
+	mov	\a, \val
+	movi	\cnt, 0
+	extui	\tmp, \a, 16, 16
+	bnez	\tmp, 0f
+	movi	\cnt, 16
+	slli	\a, \a, 16
+0:
+	extui	\tmp, \a, 24, 8
+	bnez	\tmp, 1f
+	addi	\cnt, \cnt, 8
+	slli	\a, \a, 8
+1:
+	movi	\tmp, __nsau_data
+	extui	\a, \a, 24, 8
+	add	\tmp, \tmp, \a
+	l8ui	\tmp, \tmp, 0
+	add	\cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+	.endm
+
+#ifdef L_clz
+	.section .rodata
+	.align	4
+	.global	__nsau_data
+	.type	__nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+	.size	__nsau_data, . - __nsau_data
+	.hidden	__nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+	.align	4
+	.global	__clzsi2
+	.type	__clzsi2, @function
+__clzsi2:
+	leaf_entry sp, 16
+	do_nsau	a2, a2, a3, a4
+	leaf_return
+	.size	__clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+	.align	4
+	.global	__ctzsi2
+	.type	__ctzsi2, @function
+__ctzsi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 31
+	leaf_return
+	.size	__ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+	.align	4
+	.global	__ffssi2
+	.type	__ffssi2, @function
+__ffssi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 32
+	leaf_return
+	.size	__ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+	.align	4
+	.global	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quou	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor <= 1 */
+
+	mov	a6, a2		/* keep dividend in a6 */
+	do_nsau	a5, a6, a2, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a2, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment quotient if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror	/* if divisor == 1, return the dividend */
+	leaf_return
+
+.Lspecial:
+	/* return dividend >= divisor */
+	bltu	a6, a3, .Lreturn0
+	movi	a2, 1
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+	.align	4
+	.global	__divsi3
+	.type	__divsi3, @function
+__divsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quos	a2, a2, a3
+#else
+	xor	a7, a2, a3	/* sign = dividend ^ divisor */
+	do_abs	a6, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a6, a2, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a2, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment if udividend >= udivisor */
+.Lreturn:
+	neg	a5, a2
+	movltz	a2, a5, a7	/* return (sign < 0) ? -quotient : quotient */
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror
+	neg	a2, a6		/* if udivisor == 1, then return... */
+	movgez	a2, a6, a7	/* (sign < 0) ? -udividend : udividend */
+	leaf_return
+
+.Lspecial:
+	bltu	a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+	movi	a2, 1
+	movi	a4, -1
+	movltz	a2, a4, a7	/* else return (sign < 0) ? -1 : 1 */
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+	.align	4
+	.global	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	remu	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor is <= 1 */
+
+	do_nsau	a5, a2, a6, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a6, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract once more if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+	.align	4
+	.global	__modsi3
+	.type	__modsi3, @function
+__modsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	rems	a2, a2, a3
+#else
+	mov	a7, a2		/* save original (signed) dividend */
+	do_abs	a2, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a2, a6, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a6, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract again if udividend >= udivisor */
+.Lreturn:
+	bgez	a7, .Lpositive
+	neg	a2, a2		/* if (dividend < 0), return -udividend */
+.Lpositive:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+	.align	4
+	.global	__ashldi3
+	.type	__ashldi3, @function
+__ashldi3:
+	leaf_entry sp, 16
+	ssl	a4
+	bgei	a4, 32, .Llow_only
+	src	uh, uh, ul
+	sll	ul, ul
+	leaf_return
+
+.Llow_only:
+	sll	uh, ul
+	movi	ul, 0
+	leaf_return
+	.size	__ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+	.align	4
+	.global	__ashrdi3
+	.type	__ashrdi3, @function
+__ashrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only
+	src	ul, uh, ul
+	sra	uh, uh
+	leaf_return
+
+.Lhigh_only:
+	sra	ul, uh
+	srai	uh, uh, 31
+	leaf_return
+	.size	__ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+	.align	4
+	.global	__lshrdi3
+	.type	__lshrdi3, @function
+__lshrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only1
+	src	ul, uh, ul
+	srl	uh, uh
+	leaf_return
+
+.Lhigh_only1:
+	srl	ul, uh
+	movi	uh, 0
+	leaf_return
+	.size	__lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/gcc-4.9/libgcc/config/xtensa/lib2funcs.S b/gcc-4.9/libgcc/config/xtensa/lib2funcs.S
new file mode 100644
index 000000000..3ac8c1da2
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/lib2funcs.S
@@ -0,0 +1,186 @@
+/* Assembly functions for libgcc2.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* __xtensa_libgcc_window_spill: This function flushes out all but the
+   current register window.  This is used to set up the stack so that
+   arbitrary frames can be accessed.  */
+
+	.align	4
+	.global	__xtensa_libgcc_window_spill
+	.type	__xtensa_libgcc_window_spill,@function
+__xtensa_libgcc_window_spill:
+	entry	sp, 32
+	movi	a2, 0
+	syscall
+	retw
+	.size	__xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
+
+
+/* __xtensa_nonlocal_goto: This code does all the hard work of a
+   nonlocal goto on Xtensa.  It is here in the library to avoid the
+   code size bloat of generating it in-line.  There are two
+   arguments:
+
+	a2 = frame pointer for the procedure containing the label
+	a3 = goto handler address
+
+  This function never returns to its caller but instead goes directly
+  to the address of the specified goto handler.  */
+
+	.align	4
+	.global	__xtensa_nonlocal_goto
+	.type	__xtensa_nonlocal_goto,@function
+__xtensa_nonlocal_goto:
+	entry	sp, 32
+
+	/* Flush registers.  */
+	mov	a5, a2
+	movi	a2, 0
+	syscall
+	mov	a2, a5
+
+	/* Because the save area for a0-a3 is stored one frame below
+	   the one identified by a2, the only way to restore those
+	   registers is to unwind the stack.  If alloca() were never
+	   called, we could just unwind until finding the sp value
+	   matching a2.  However, a2 is a frame pointer, not a stack
+	   pointer, and may not be encountered during the unwinding.
+	   The solution is to unwind until going _past_ the value
+	   given by a2.  This involves keeping three stack pointer
+	   values during the unwinding:
+
+		next = sp of frame N-1
+		cur = sp of frame N
+		prev = sp of frame N+1
+
+	   When next > a2, the desired save area is stored relative
+	   to prev.  At this point, cur will be the same as a2
+	   except in the alloca() case.
+
+	   Besides finding the values to be restored to a0-a3, we also
+	   need to find the current window size for the target
+	   function.  This can be extracted from the high bits of the
+	   return address, initially in a0.  As the unwinding
+	   proceeds, the window size is taken from the value of a0
+	   saved _two_ frames below the current frame.  */
+
+	addi	a5, sp, -16	/* a5 = prev - save area */
+	l32i	a6, a5, 4
+	addi	a6, a6, -16	/* a6 = cur - save area */
+	mov	a8, a0		/* a8 = return address (for window size) */
+	j	.Lfirstframe
+
+.Lnextframe:
+	l32i	a8, a5, 0	/* next return address (for window size) */
+	mov	a5, a6		/* advance prev */
+	addi	a6, a7, -16	/* advance cur */
+.Lfirstframe:
+	l32i	a7, a6, 4	/* a7 = next */
+	bgeu	a2, a7, .Lnextframe
+
+	/* At this point, prev (a5) points to the save area with the saved
+	   values of a0-a3.  Copy those values into the save area at the
+	   current sp so they will be reloaded when the return from this
+	   function underflows.  We don't have to worry about exceptions
+	   while updating the current save area, because the windows have
+	   already been flushed.  */
+
+	addi	a4, sp, -16	/* a4 = save area of this function */
+	l32i	a6, a5, 0
+	l32i	a7, a5, 4
+	s32i	a6, a4, 0
+	s32i	a7, a4, 4
+	l32i	a6, a5, 8
+	l32i	a7, a5, 12
+	s32i	a6, a4, 8
+	s32i	a7, a4, 12
+
+	/* Set return address to goto handler.  Use the window size bits
+	   from the return address two frames below the target.  */
+	extui	a8, a8, 30, 2	/* get window size from return addr. */
+	slli	a3, a3, 2	/* get goto handler addr. << 2 */
+	ssai	2
+	src	a0, a8, a3	/* combine them with a funnel shift */
+
+	retw
+	.size	__xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto
+
+
+/* __xtensa_sync_caches: This function is called after writing a trampoline
+   on the stack to force all the data writes to memory and invalidate the
+   instruction cache. a2 is the address of the new trampoline.
+
+   After the trampoline data is written out, it must be flushed out of
+   the data cache into memory.  We use DHWB in case we have a writeback
+   cache.  At least one DHWB instruction is needed for each data cache
+   line which may be touched by the trampoline.  An ISYNC instruction
+   must follow the DHWBs.
+
+   We have to flush the i-cache to make sure that the new values get used.
+   At least one IHI instruction is needed for each i-cache line which may
+   be touched by the trampoline.  An ISYNC instruction is also needed to
+   make sure that the modified instructions are loaded into the instruction
+   fetch buffer.  */
+
+/* Use the maximum trampoline size.  Flushing a bit extra is OK.  */
+#define TRAMPOLINE_SIZE 60
+
+	.text
+	.align	4
+	.global	__xtensa_sync_caches
+	.type	__xtensa_sync_caches,@function
+__xtensa_sync_caches:
+	entry 	sp, 32
+#if XCHAL_DCACHE_SIZE > 0
+	/* Flush the trampoline from the data cache.  */
+	extui	a4, a2, 0, XCHAL_DCACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_DCACHE_LINEWIDTH
+	mov	a3, a2
+.Ldcache_loop:
+	dhwb	a3, 0
+	addi	a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Ldcache_loop
+	isync
+#endif
+#if XCHAL_ICACHE_SIZE > 0
+	/* Invalidate the corresponding lines in the instruction cache.  */
+	extui	a4, a2, 0, XCHAL_ICACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_ICACHE_LINEWIDTH
+.Licache_loop:
+	ihi	a2, 0
+	addi	a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Licache_loop
+#endif
+	isync
+	retw
+	.size	__xtensa_sync_caches, .-__xtensa_sync_caches
diff --git a/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver b/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver
new file mode 100644
index 000000000..43e7d4fc7
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/libgcc-glibc.ver
@@ -0,0 +1,3 @@
+GCC_4.3.0 {
+  __umulsidi3
+}
diff --git a/gcc-4.9/libgcc/config/xtensa/linux-unwind.h b/gcc-4.9/libgcc/config/xtensa/linux-unwind.h
new file mode 100644
index 000000000..6832d0b48
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/linux-unwind.h
@@ -0,0 +1,97 @@
+/* DWARF2 EH unwinding support for Xtensa.
+   Copyright (C) 2008-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2-xtensa.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Encoded bytes for Xtensa instructions:
+	movi a2, __NR_rt_sigreturn
+	syscall
+	entry (first byte only)
+   Some of the bytes are endian-dependent.  */
+
+#define MOVI_BYTE0 0x22
+#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */
+#define SYSC_BYTE0 0
+#define SYSC_BYTE2 0
+
+#ifdef __XTENSA_EB__
+#define MOVI_BYTE1 0x0a
+#define SYSC_BYTE1 0x05
+#define ENTRY_BYTE 0x6c
+#else
+#define MOVI_BYTE1 0xa0
+#define SYSC_BYTE1 0x50
+#define ENTRY_BYTE 0x36
+#endif
+
+#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state
+
+static _Unwind_Reason_Code
+xtensa_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+
+  struct rt_sigframe {
+    siginfo_t info;
+    struct ucontext uc;
+  } *rt_;
+
+  /* movi a2, __NR_rt_sigreturn; syscall */
+  if (pc[0] != MOVI_BYTE0
+      || pc[1] != MOVI_BYTE1
+      || pc[2] != MOVI_BYTE2
+      || pc[3] != SYSC_BYTE0
+      || pc[4] != SYSC_BYTE1
+      || pc[5] != SYSC_BYTE2)
+    return _URC_END_OF_STACK;
+
+  rt_ = context->sp;
+  sc = &rt_->uc.uc_mcontext;
+  fs->signal_regs = (_Unwind_Word *) sc->sc_a;
+
+  /* If the signal arrived just before an ENTRY instruction, find the return
+     address and pretend the signal arrived before executing the CALL.  */
+  if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE)
+   {
+     unsigned callinc = (sc->sc_ps >> 16) & 3;
+     fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK)
+		      | context->ra_high_bits) - 3;
+   }
+  else
+    fs->signal_ra = sc->sc_pc;
+
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc  */
diff --git a/gcc-4.9/libgcc/config/xtensa/t-elf b/gcc-4.9/libgcc/config/xtensa/t-elf
new file mode 100644
index 000000000..59d51210b
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-elf
@@ -0,0 +1,5 @@
+# Build CRT files and libgcc with the "longcalls" option
+CRTSTUFF_T_CFLAGS += -mlongcalls
+CRTSTUFF_T_CFLAGS_S += -mlongcalls
+
+HOST_LIBGCC2_CFLAGS += -mlongcalls
diff --git a/gcc-4.9/libgcc/config/xtensa/t-linux b/gcc-4.9/libgcc/config/xtensa/t-linux
new file mode 100644
index 000000000..6f4ae8934
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-linux
@@ -0,0 +1 @@
+SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-glibc.ver
diff --git a/gcc-4.9/libgcc/config/xtensa/t-xtensa b/gcc-4.9/libgcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..27399e67f
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/t-xtensa
@@ -0,0 +1,16 @@
+LIB1ASMSRC = xtensa/lib1funcs.S
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+	_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+	_ashldi3 _ashrdi3 _lshrdi3 \
+	_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+	_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+	_floatdisf _floatundisf \
+	_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+	_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+	_floatdidf _floatundidf \
+	_truncdfsf2 _extendsfdf2
+
+LIB2ADD = $(srcdir)/config/xtensa/lib2funcs.S
+
+LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
+   $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
diff --git a/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c
new file mode 100644
index 000000000..35f7797d4
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.c
@@ -0,0 +1,543 @@
+/* DWARF2 exception handling and frame unwinding for Xtensa.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "libgcc_tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#ifdef __USING_SJLJ_EXCEPTIONS__
+# define NO_SIZE_OF_ENCODED_VALUE
+#endif
+#include "unwind-pe.h"
+#include "unwind-dw2-fde.h"
+#include "unwind-dw2-xtensa.h"
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+/* The standard CIE and FDE structures work fine for Xtensa but the
+   variable-size register window save areas are not a good fit for the rest
+   of the standard DWARF unwinding mechanism.  Nor is that mechanism
+   necessary, since the register save areas are always in fixed locations
+   in each stack frame.  This file is a stripped down and customized version
+   of the standard DWARF unwinding code.  It needs to be customized to have
+   builtin logic for finding the save areas and also to track the stack
+   pointer value (besides the CFA) while unwinding since the primary save
+   area is located below the stack pointer.  It is stripped down to reduce
+   code size and ease the maintenance burden of tracking changes in the
+   standard version of the code.  */
+
+#ifndef DWARF_REG_TO_UNWIND_COLUMN
+#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO)
+#endif
+
+#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF
+
+/* This is the register and unwind state for a particular frame.  This
+   provides the information necessary to unwind up past a frame and return
+   to its caller.  */
+struct _Unwind_Context
+{
+  /* Track register window save areas of 4 registers each, instead of
+     keeping separate addresses for the individual registers.  */
+  _Unwind_Word *reg[4];
+
+  void *cfa;
+  void *sp;
+  void *ra;
+
+  /* Cache the 2 high bits to replace the window size in return addresses.  */
+  _Unwind_Word ra_high_bits;
+
+  void *lsda;
+  struct dwarf_eh_bases bases;
+  /* Signal frame context.  */
+#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1)
+  _Unwind_Word flags;
+  /* 0 for now, can be increased when further fields are added to
+     struct _Unwind_Context.  */
+  _Unwind_Word version;
+};
+
+
+/* Read unaligned data from the instruction buffer.  */
+
+union unaligned
+{
+  void *p;
+} __attribute__ ((packed));
+
+static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *);
+static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *,
+					       _Unwind_FrameState *);
+
+static inline void *
+read_pointer (const void *p) { const union unaligned *up = p; return up->p; }
+
+static inline _Unwind_Word
+_Unwind_IsSignalFrame (struct _Unwind_Context *context)
+{
+  return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0;
+}
+
+static inline void
+_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val)
+{
+  if (val)
+    context->flags |= SIGNAL_FRAME_BIT;
+  else
+    context->flags &= ~SIGNAL_FRAME_BIT;
+}
+
+/* Get the value of register INDEX as saved in CONTEXT.  */
+
+inline _Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  return *ptr;
+}
+
+/* Get the value of the CFA as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->cfa;
+}
+
+/* Overwrite the saved value for register INDEX in CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  *ptr = val;
+}
+
+/* Retrieve the return address for CONTEXT.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Retrieve the return address and flag whether that IP is before
+   or after first not yet fully executed instruction.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+  *ip_before_insn = _Unwind_IsSignalFrame (context);
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Overwrite the return address for CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+  context->ra = (void *) val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+  return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.func;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+  struct dwarf_eh_bases bases;
+  const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases);
+  if (fde)
+    return bases.func;
+  else
+    return NULL;
+}
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.dbase;
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.tbase;
+}
+
+#include "md-unwind-support.h"
+
+/* Extract any interesting information from the CIE for the translation
+   unit F belongs to.  Return a pointer to the byte after the augmentation,
+   or NULL if we encountered an undecipherable augmentation.  */
+
+static const unsigned char *
+extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context,
+		  _Unwind_FrameState *fs)
+{
+  const unsigned char *aug = cie->augmentation;
+  const unsigned char *p = aug + strlen ((const char *)aug) + 1;
+  const unsigned char *ret = NULL;
+  _uleb128_t utmp;
+  _sleb128_t stmp;
+
+  /* g++ v2 "eh" has pointer immediately following augmentation string,
+     so it must be handled first.  */
+  if (aug[0] == 'e' && aug[1] == 'h')
+    {
+      fs->eh_ptr = read_pointer (p);
+      p += sizeof (void *);
+      aug += 2;
+    }
+
+  /* Immediately following the augmentation are the code and
+     data alignment and return address column.  */
+  p = read_uleb128 (p, &utmp);
+  p = read_sleb128 (p, &stmp);
+  if (cie->version == 1)
+    fs->retaddr_column = *p++;
+  else
+    {
+      p = read_uleb128 (p, &utmp);
+      fs->retaddr_column = (_Unwind_Word)utmp;
+    }
+  fs->lsda_encoding = DW_EH_PE_omit;
+
+  /* If the augmentation starts with 'z', then a uleb128 immediately
+     follows containing the length of the augmentation field following
+     the size.  */
+  if (*aug == 'z')
+    {
+      p = read_uleb128 (p, &utmp);
+      ret = p + utmp;
+
+      fs->saw_z = 1;
+      ++aug;
+    }
+
+  /* Iterate over recognized augmentation subsequences.  */
+  while (*aug != '\0')
+    {
+      /* "L" indicates a byte showing how the LSDA pointer is encoded.  */
+      if (aug[0] == 'L')
+	{
+	  fs->lsda_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "R" indicates a byte indicating how FDE addresses are encoded.  */
+      else if (aug[0] == 'R')
+	{
+	  fs->fde_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "P" indicates a personality routine in the CIE augmentation.  */
+      else if (aug[0] == 'P')
+	{
+	  _Unwind_Ptr personality;
+	  
+	  p = read_encoded_value (context, *p, p + 1, &personality);
+	  fs->personality = (_Unwind_Personality_Fn) personality;
+	  aug += 1;
+	}
+
+      /* "S" indicates a signal frame.  */
+      else if (aug[0] == 'S')
+	{
+	  fs->signal_frame = 1;
+	  aug += 1;
+	}
+
+      /* Otherwise we have an unknown augmentation string.
+	 Bail unless we saw a 'z' prefix.  */
+      else
+	return ret;
+    }
+
+  return ret ? ret : p;
+}
+
+/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for
+   its caller and decode it into FS.  This function also sets the
+   lsda member of CONTEXT, as it is really information
+   about the caller's frame.  */
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  const struct dwarf_fde *fde;
+  const struct dwarf_cie *cie;
+  const unsigned char *aug;
+  int window_size;
+  _Unwind_Word *ra_ptr;
+
+  memset (fs, 0, sizeof (*fs));
+  context->lsda = 0;
+
+  fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
+			  &context->bases);
+  if (fde == NULL)
+    {
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+      _Unwind_Reason_Code reason;
+      /* Couldn't find frame unwind info for this function.  Try a
+	 target-specific fallback mechanism.  This will necessarily
+	 not provide a personality routine or LSDA.  */
+      reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs);
+      if (reason != _URC_END_OF_STACK)
+	return reason;
+#endif
+      /* The frame was not recognized and handled by the fallback function,
+	 but it is not really the end of the stack.  Fall through here and
+	 unwind it anyway.  */
+    }
+  else
+    {
+      cie = get_cie (fde);
+      if (extract_cie_info (cie, context, fs) == NULL)
+	/* CIE contained unknown augmentation.  */
+	return _URC_FATAL_PHASE1_ERROR;
+
+      /* Locate augmentation for the fde.  */
+      aug = (const unsigned char *) fde + sizeof (*fde);
+      aug += 2 * size_of_encoded_value (fs->fde_encoding);
+      if (fs->saw_z)
+	{
+	  _uleb128_t i;
+	  aug = read_uleb128 (aug, &i);
+	}
+      if (fs->lsda_encoding != DW_EH_PE_omit)
+	{
+	  _Unwind_Ptr lsda;
+
+	  aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
+	  context->lsda = (void *) lsda;
+	}
+    }
+
+  /* Check for the end of the stack.  This needs to be checked after
+     the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because
+     the contents of context->reg[0] are undefined at a signal frame,
+     and register a0 may appear to be zero.  (The return address in
+     context->ra comes from register a4 or a8).  */
+  ra_ptr = context->reg[0];
+  if (ra_ptr && *ra_ptr == 0)
+    return _URC_END_OF_STACK;
+
+  /* Find the window size from the high bits of the return address.  */
+  if (ra_ptr)
+    window_size = (*ra_ptr >> 30) * 4;
+  else
+    window_size = 8;
+
+  fs->retaddr_column = window_size;
+
+  return _URC_NO_REASON;
+}
+
+static void
+uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  struct _Unwind_Context orig_context = *context;
+  _Unwind_Word *sp, *cfa, *next_cfa;
+  int i;
+
+  if (fs->signal_regs)
+    {
+      cfa = (_Unwind_Word *) fs->signal_regs[1];
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      for (i = 0; i < 4; i++)
+	context->reg[i] = fs->signal_regs + (i << 2);
+    }
+  else
+    {
+      int window_size = fs->retaddr_column >> 2;
+
+      sp = (_Unwind_Word *) orig_context.sp;
+      cfa = (_Unwind_Word *) orig_context.cfa;
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      /* Registers a0-a3 are in the save area below sp.  */
+      context->reg[0] = sp - 4;
+
+      /* Find the extra save area below next_cfa.  */
+      for (i = 1; i < window_size; i++)
+	context->reg[i] = next_cfa - 4 * (1 + window_size - i);
+
+      /* Remaining registers rotate from previous save areas.  */
+      for (i = window_size; i < 4; i++)
+	context->reg[i] = orig_context.reg[i - window_size];
+    }
+
+  context->sp = cfa;
+  context->cfa = next_cfa;
+
+  _Unwind_SetSignalFrame (context, fs->signal_frame);
+}
+
+/* CONTEXT describes the unwind state for a frame, and FS describes the FDE
+   of its caller.  Update CONTEXT to refer to the caller as well.  Note
+   that the lsda member is not updated here, but later in
+   uw_frame_state_for.  */
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context_1 (context, fs);
+
+  /* Compute the return address now, since the return address column
+     can change from frame to frame.  */
+  if (fs->signal_ra != 0)
+    context->ra = (void *) fs->signal_ra;
+  else
+    context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column)
+			     & XTENSA_RA_FIELD_MASK) | context->ra_high_bits);
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack.  The only valid registers at this
+   level will be the return address and the CFA.  */
+
+#define uw_init_context(CONTEXT)					   \
+  do									   \
+    {									   \
+      __builtin_unwind_init ();						   \
+      uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (),		   \
+			 __builtin_return_address (0));			   \
+    }									   \
+  while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa,
+		   void *outer_ra)
+{
+  void *ra = __builtin_return_address (0);
+  void *cfa = __builtin_dwarf_cfa ();
+  _Unwind_FrameState fs;
+
+  memset (context, 0, sizeof (struct _Unwind_Context));
+  context->ra = ra;
+
+  memset (&fs, 0, sizeof (fs));
+  fs.retaddr_column = 8;
+  context->sp = cfa;
+  context->cfa = outer_cfa;
+  context->ra_high_bits =
+    ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK;
+  uw_update_context_1 (context, &fs);
+
+  context->ra = outer_ra;
+}
+
+
+/* Install TARGET into CURRENT so that we can return to it.  This is a
+   macro because __builtin_eh_return must be invoked in the context of
+   our caller.  */
+
+#define uw_install_context(CURRENT, TARGET)				 \
+  do									 \
+    {									 \
+      long offset = uw_install_context_1 ((CURRENT), (TARGET));		 \
+      void *handler = __builtin_frob_return_addr ((TARGET)->ra);	 \
+      __builtin_eh_return (offset, handler);				 \
+    }									 \
+  while (0)
+
+static long
+uw_install_context_1 (struct _Unwind_Context *current,
+		      struct _Unwind_Context *target)
+{
+  long i;
+
+  /* The eh_return insn assumes a window size of 8, so don't bother copying
+     the save areas for registers a8-a15 since they won't be reloaded.  */
+  for (i = 0; i < 2; ++i)
+    {
+      void *c = current->reg[i];
+      void *t = target->reg[i];
+
+      if (t && c && t != c)
+	memcpy (c, t, 4 * sizeof (_Unwind_Word));
+    }
+
+  return 0;
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+  return _Unwind_GetCFA (context);
+}
+
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetDataRelBase);
+alias (_Unwind_GetTextRelBase);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif /* !USING_SJLJ_EXCEPTIONS */
diff --git a/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h
new file mode 100644
index 000000000..c962ebb13
--- /dev/null
+++ b/gcc-4.9/libgcc/config/xtensa/unwind-dw2-xtensa.h
@@ -0,0 +1,49 @@
+/* DWARF2 frame unwind data structure for Xtensa.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* A target can override (perhaps for backward compatibility) how
+   many dwarf2 columns are unwound.  */
+#ifndef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER
+#endif
+
+/* Xtensa's variable-size register window save areas can be unwound without
+   any unwind info.  This is a stripped down version of the standard DWARF
+   _Unwind_FrameState.  */
+typedef struct
+{
+  /* The information we care about from the CIE/FDE.  */
+  _Unwind_Personality_Fn personality;
+  _Unwind_Word retaddr_column;
+  unsigned char fde_encoding;
+  unsigned char lsda_encoding;
+  unsigned char saw_z;
+  unsigned char signal_frame;
+  void *eh_ptr;
+
+  /* Saved registers for a signal frame.  */
+  _Unwind_Word *signal_regs;
+  _Unwind_Word signal_ra;
+} _Unwind_FrameState;
+