aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/avr
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/avr')
-rw-r--r--gcc-4.9/libgcc/config/avr/avr-lib.h105
-rw-r--r--gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S1915
-rw-r--r--gcc-4.9/libgcc/config/avr/lib1funcs.S3226
-rw-r--r--gcc-4.9/libgcc/config/avr/lib2-object.mk23
-rw-r--r--gcc-4.9/libgcc/config/avr/lib2funcs.c226
-rw-r--r--gcc-4.9/libgcc/config/avr/t-avr277
-rw-r--r--gcc-4.9/libgcc/config/avr/t-avrlibc66
-rw-r--r--gcc-4.9/libgcc/config/avr/t-rtems2
8 files changed, 5840 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/avr/avr-lib.h b/gcc-4.9/libgcc/config/avr/avr-lib.h
new file mode 100644
index 000000000..37d5fa1a7
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/avr-lib.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifdef FLOAT
+#define CMPtype QItype
+#define DF SF
+#define DI SI
+typedef int QItype __attribute__ ((mode (QI)));
+#endif
+
+/* fixed-bit.h does not define functions for TA and UTA because
+ that part is wrapped in #if MIN_UNITS_PER_WORD > 4.
+ This would lead to empty functions for TA and UTA.
+ Thus, supply appropriate defines as if HAVE_[U]TA == 1.
+ #define HAVE_[U]TA 1 won't work because avr-modes.def
+ uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough
+ to arrange for such changes of the mode size. */
+
+typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA)));
+
+#if defined (UTA_MODE)
+#define FIXED_SIZE 8 /* in bytes */
+#define INT_C_TYPE UDItype
+#define UINT_C_TYPE UDItype
+#define HINT_C_TYPE USItype
+#define HUINT_C_TYPE USItype
+#define MODE_NAME UTA
+#define MODE_NAME_S uta
+#define MODE_UNSIGNED 1
+#endif
+
+#if defined (FROM_UTA)
+#define FROM_TYPE 4 /* ID for fixed-point */
+#define FROM_MODE_NAME UTA
+#define FROM_MODE_NAME_S uta
+#define FROM_INT_C_TYPE UDItype
+#define FROM_SINT_C_TYPE DItype
+#define FROM_UINT_C_TYPE UDItype
+#define FROM_MODE_UNSIGNED 1
+#define FROM_FIXED_SIZE 8 /* in bytes */
+#elif defined (TO_UTA)
+#define TO_TYPE 4 /* ID for fixed-point */
+#define TO_MODE_NAME UTA
+#define TO_MODE_NAME_S uta
+#define TO_INT_C_TYPE UDItype
+#define TO_SINT_C_TYPE DItype
+#define TO_UINT_C_TYPE UDItype
+#define TO_MODE_UNSIGNED 1
+#define TO_FIXED_SIZE 8 /* in bytes */
+#endif
+
+/* Same for TAmode */
+
+typedef _Fract TAtype __attribute__ ((mode (TA)));
+
+#if defined (TA_MODE)
+#define FIXED_SIZE 8 /* in bytes */
+#define INT_C_TYPE DItype
+#define UINT_C_TYPE UDItype
+#define HINT_C_TYPE SItype
+#define HUINT_C_TYPE USItype
+#define MODE_NAME TA
+#define MODE_NAME_S ta
+#define MODE_UNSIGNED 0
+#endif
+
+#if defined (FROM_TA)
+#define FROM_TYPE 4 /* ID for fixed-point */
+#define FROM_MODE_NAME TA
+#define FROM_MODE_NAME_S ta
+#define FROM_INT_C_TYPE DItype
+#define FROM_SINT_C_TYPE DItype
+#define FROM_UINT_C_TYPE UDItype
+#define FROM_MODE_UNSIGNED 0
+#define FROM_FIXED_SIZE 8 /* in bytes */
+#elif defined (TO_TA)
+#define TO_TYPE 4 /* ID for fixed-point */
+#define TO_MODE_NAME TA
+#define TO_MODE_NAME_S ta
+#define TO_INT_C_TYPE DItype
+#define TO_SINT_C_TYPE DItype
+#define TO_UINT_C_TYPE UDItype
+#define TO_MODE_UNSIGNED 0
+#define TO_FIXED_SIZE 8 /* in bytes */
+#endif
diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S
new file mode 100644
index 000000000..8f3ed9201
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S
@@ -0,0 +1,1915 @@
+/* -*- Mode: Asm -*- */
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Contributed by Sean D'Epagnier (sean@depagnier.com)
+;; Georg-Johann Lay (avr@gjlay.de)
+
+;; This file is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by the
+;; Free Software Foundation; either version 3, or (at your option) any
+;; later version.
+
+;; In addition to the permissions in the GNU General Public License, the
+;; Free Software Foundation gives you unlimited permission to link the
+;; compiled version of this file into combinations with other programs,
+;; and to distribute those combinations without any restriction coming
+;; from the use of this file. (The General Public License restrictions
+;; do apply in other respects; for example, they cover modification of
+;; the file, and distribution when not linked into a combine
+;; executable.)
+
+;; This file is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Fixed point library routines for AVR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.section .text.libgcc.fixed, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions to float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractqqsf)
+DEFUN __fractqqsf
+ ;; Move in place for SA -> SF conversion
+ clr r22
+ mov r23, r24
+ ;; Sign-extend
+ lsl r24
+ sbc r24, r24
+ mov r25, r24
+ XJMP __fractsasf
+ENDF __fractqqsf
+#endif /* L_fractqqsf */
+
+#if defined (L_fractuqqsf)
+DEFUN __fractuqqsf
+ ;; Move in place for USA -> SF conversion
+ clr r22
+ mov r23, r24
+ ;; Zero-extend
+ clr r24
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuqqsf
+#endif /* L_fractuqqsf */
+
+#if defined (L_fracthqsf)
+DEFUN __fracthqsf
+ ;; Move in place for SA -> SF conversion
+ wmov 22, 24
+ ;; Sign-extend
+ lsl r25
+ sbc r24, r24
+ mov r25, r24
+ XJMP __fractsasf
+ENDF __fracthqsf
+#endif /* L_fracthqsf */
+
+#if defined (L_fractuhqsf)
+DEFUN __fractuhqsf
+ ;; Move in place for USA -> SF conversion
+ wmov 22, 24
+ ;; Zero-extend
+ clr r24
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuhqsf
+#endif /* L_fractuhqsf */
+
+#if defined (L_fracthasf)
+DEFUN __fracthasf
+ ;; Move in place for SA -> SF conversion
+ clr r22
+ mov r23, r24
+ mov r24, r25
+ ;; Sign-extend
+ lsl r25
+ sbc r25, r25
+ XJMP __fractsasf
+ENDF __fracthasf
+#endif /* L_fracthasf */
+
+#if defined (L_fractuhasf)
+DEFUN __fractuhasf
+ ;; Move in place for USA -> SF conversion
+ clr r22
+ mov r23, r24
+ mov r24, r25
+ ;; Zero-extend
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuhasf
+#endif /* L_fractuhasf */
+
+
+#if defined (L_fractsqsf)
+DEFUN __fractsqsf
+ XCALL __floatsisf
+ ;; Divide non-zero results by 2^31 to move the
+ ;; decimal point into place
+ tst r25
+ breq 0f
+ subi r24, exp_lo (31)
+ sbci r25, exp_hi (31)
+0: ret
+ENDF __fractsqsf
+#endif /* L_fractsqsf */
+
+#if defined (L_fractusqsf)
+DEFUN __fractusqsf
+ XCALL __floatunsisf
+ ;; Divide non-zero results by 2^32 to move the
+ ;; decimal point into place
+ cpse r25, __zero_reg__
+ subi r25, exp_hi (32)
+ ret
+ENDF __fractusqsf
+#endif /* L_fractusqsf */
+
+#if defined (L_fractsasf)
+DEFUN __fractsasf
+ XCALL __floatsisf
+ ;; Divide non-zero results by 2^15 to move the
+ ;; decimal point into place
+ tst r25
+ breq 0f
+ subi r24, exp_lo (15)
+ sbci r25, exp_hi (15)
+0: ret
+ENDF __fractsasf
+#endif /* L_fractsasf */
+
+#if defined (L_fractusasf)
+DEFUN __fractusasf
+ XCALL __floatunsisf
+ ;; Divide non-zero results by 2^16 to move the
+ ;; decimal point into place
+ cpse r25, __zero_reg__
+ subi r25, exp_hi (16)
+ ret
+ENDF __fractusasf
+#endif /* L_fractusasf */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions from float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractsfqq)
+DEFUN __fractsfqq
+ ;; Multiply with 2^{24+7} to get a QQ result in r25
+ subi r24, exp_lo (-31)
+ sbci r25, exp_hi (-31)
+ XCALL __fixsfsi
+ mov r24, r25
+ ret
+ENDF __fractsfqq
+#endif /* L_fractsfqq */
+
+#if defined (L_fractsfuqq)
+DEFUN __fractsfuqq
+ ;; Multiply with 2^{24+8} to get a UQQ result in r25
+ subi r25, exp_hi (-32)
+ XCALL __fixunssfsi
+ mov r24, r25
+ ret
+ENDF __fractsfuqq
+#endif /* L_fractsfuqq */
+
+#if defined (L_fractsfha)
+DEFUN __fractsfha
+ ;; Multiply with 2^{16+7} to get a HA result in r25:r24
+ subi r24, exp_lo (-23)
+ sbci r25, exp_hi (-23)
+ XJMP __fixsfsi
+ENDF __fractsfha
+#endif /* L_fractsfha */
+
+#if defined (L_fractsfuha)
+DEFUN __fractsfuha
+ ;; Multiply with 2^24 to get a UHA result in r25:r24
+ subi r25, exp_hi (-24)
+ XJMP __fixunssfsi
+ENDF __fractsfuha
+#endif /* L_fractsfuha */
+
+#if defined (L_fractsfhq)
+FALIAS __fractsfsq
+
+DEFUN __fractsfhq
+ ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
+ ;; resp. with 2^31 to get a SQ result in r25:r22
+ subi r24, exp_lo (-31)
+ sbci r25, exp_hi (-31)
+ XJMP __fixsfsi
+ENDF __fractsfhq
+#endif /* L_fractsfhq */
+
+#if defined (L_fractsfuhq)
+FALIAS __fractsfusq
+
+DEFUN __fractsfuhq
+ ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
+ ;; resp. with 2^32 to get a USQ result in r25:r22
+ subi r25, exp_hi (-32)
+ XJMP __fixunssfsi
+ENDF __fractsfuhq
+#endif /* L_fractsfuhq */
+
+#if defined (L_fractsfsa)
+DEFUN __fractsfsa
+ ;; Multiply with 2^15 to get a SA result in r25:r22
+ subi r24, exp_lo (-15)
+ sbci r25, exp_hi (-15)
+ XJMP __fixsfsi
+ENDF __fractsfsa
+#endif /* L_fractsfsa */
+
+#if defined (L_fractsfusa)
+DEFUN __fractsfusa
+ ;; Multiply with 2^16 to get a USA result in r25:r22
+ subi r25, exp_hi (-16)
+ XJMP __fixunssfsi
+ENDF __fractsfusa
+#endif /* L_fractsfusa */
+
+
+;; For multiplication the functions here are called directly from
+;; avr-fixed.md instead of using the standard libcall mechanisms.
+;; This can make better code because GCC knows exactly which
+;; of the call-used registers (not all of them) are clobbered. */
+
+/*******************************************************
+ Fractional Multiplication 8 x 8 without MUL
+*******************************************************/
+
+#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
+;;; R23 = R24 * R25
+;;; Clobbers: __tmp_reg__, R22, R24, R25
+;;; Rounding: ???
+DEFUN __mulqq3
+ XCALL __fmuls
+ ;; TR 18037 requires that (-1) * (-1) does not overflow
+ ;; The only input that can produce -1 is (-1)^2.
+ dec r23
+ brvs 0f
+ inc r23
+0: ret
+ENDF __mulqq3
+#endif /* L_mulqq3 && ! HAVE_MUL */
+
+/*******************************************************
+ Fractional Multiply .16 x .16 with and without MUL
+*******************************************************/
+
+#if defined (L_mulhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulhq3
+ XCALL __mulhisi3
+ ;; Shift result into place
+ lsl r23
+ rol r24
+ rol r25
+ brvs 1f
+ ;; Round
+ sbrc r23, 7
+ adiw r24, 1
+ ret
+1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
+ ldi r24, lo8 (0x7fff)
+ ldi r25, hi8 (0x7fff)
+ ret
+ENDF __mulhq3
+#endif /* defined (L_mulhq3) */
+
+#if defined (L_muluhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB < error <= 0.5 LSB
+DEFUN __muluhq3
+ XCALL __umulhisi3
+ ;; Round
+ sbrc r23, 7
+ adiw r24, 1
+ ret
+ENDF __muluhq3
+#endif /* L_muluhq3 */
+
+
+/*******************************************************
+ Fixed Multiply 8.8 x 8.8 with and without MUL
+*******************************************************/
+
+#if defined (L_mulha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulha3
+ XCALL __mulhisi3
+ lsl r22
+ rol r23
+ rol r24
+ XJMP __muluha3_round
+ENDF __mulha3
+#endif /* L_mulha3 */
+
+#if defined (L_muluha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB < error <= 0.5 LSB
+DEFUN __muluha3
+ XCALL __umulhisi3
+ XJMP __muluha3_round
+ENDF __muluha3
+#endif /* L_muluha3 */
+
+#if defined (L_muluha3_round)
+DEFUN __muluha3_round
+ ;; Shift result into place
+ mov r25, r24
+ mov r24, r23
+ ;; Round
+ sbrc r22, 7
+ adiw r24, 1
+ ret
+ENDF __muluha3_round
+#endif /* L_muluha3_round */
+
+
+/*******************************************************
+ Fixed Multiplication 16.16 x 16.16
+*******************************************************/
+
+;; Bits outside the result (below LSB), used in the signed version
+#define GUARD __tmp_reg__
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Multiplier
+#define A0 16
+#define A1 A0+1
+#define A2 A1+1
+#define A3 A2+1
+
+;; Multiplicand
+#define B0 20
+#define B1 B0+1
+#define B2 B1+1
+#define B3 B2+1
+
+;; Result
+#define C0 24
+#define C1 C0+1
+#define C2 C1+1
+#define C3 C2+1
+
+#if defined (L_mulusa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+DEFUN __mulusa3
+ set
+ ;; Fallthru
+ENDF __mulusa3
+
+;;; Round for last digit iff T = 1
+;;; Return guard bits in GUARD (__tmp_reg__).
+;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
+;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
+DEFUN __mulusa3_round
+ ;; Some of the MUL instructions have LSBs outside the result.
+ ;; Don't ignore these LSBs in order to tame rounding error.
+ ;; Use C2/C3 for these LSBs.
+
+ clr C0
+ clr C1
+ mul A0, B0 $ movw C2, r0
+
+ mul A1, B0 $ add C3, r0 $ adc C0, r1
+ mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
+
+ ;; Round if T = 1. Store guarding bits outside the result for rounding
+ ;; and left-shift by the signed version (function below).
+ brtc 0f
+ sbrc C3, 7
+ adiw C0, 1
+0: push C3
+
+ ;; The following MULs don't have LSBs outside the result.
+ ;; C2/C3 is the high part.
+
+ mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
+ mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
+ mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
+ neg C2
+
+ mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
+ mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ neg C3
+
+ mul A1, B3 $ add C2, r0 $ adc C3, r1
+ mul A2, B2 $ add C2, r0 $ adc C3, r1
+ mul A3, B1 $ add C2, r0 $ adc C3, r1
+
+ mul A2, B3 $ add C3, r0
+ mul A3, B2 $ add C3, r0
+
+ ;; Guard bits used in the signed version below.
+ pop GUARD
+ clr __zero_reg__
+ ret
+ENDF __mulusa3_round
+#endif /* L_mulusa3 */
+
+#if defined (L_mulsa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__, T
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulsa3
+ clt
+ XCALL __mulusa3_round
+ ;; A posteriori sign extension of the operands
+ tst B3
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: sbrs A3, 7
+ rjmp 2f
+ sub C2, B0
+ sbc C3, B1
+2:
+ ;; Shift 1 bit left to adjust for 15 fractional bits
+ lsl GUARD
+ rol C0
+ rol C1
+ rol C2
+ rol C3
+ ;; Round last digit
+ lsl GUARD
+ adc C0, __zero_reg__
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* __AVR_HAVE_MUL__ */
+
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 22
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+;; __tmp_reg__
+#define CC0 0
+;; __zero_reg__
+#define CC1 1
+#define CC2 16
+#define CC3 17
+
+#define AA0 26
+#define AA1 AA0+1
+#define AA2 30
+#define AA3 AA2+1
+
+#if defined (L_mulsa3)
+;;; (R25:R22) *= (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding: -1 LSB <= error <= 1 LSB
+DEFUN __mulsa3
+ push B0
+ push B1
+ push B3
+ clt
+ XCALL __mulusa3_round
+ pop r30
+ ;; sign-extend B
+ bst r30, 7
+ brtc 1f
+ ;; A1, A0 survived in R27:R26
+ sub C2, AA0
+ sbc C3, AA1
+1:
+ pop AA1 ;; B1
+ pop AA0 ;; B0
+
+ ;; sign-extend A. A3 survived in R31
+ bst AA3, 7
+ brtc 2f
+ sub C2, AA0
+ sbc C3, AA1
+2:
+ ;; Shift 1 bit left to adjust for 15 fractional bits
+ lsl GUARD
+ rol C0
+ rol C1
+ rol C2
+ rol C3
+ ;; Round last digit
+ lsl GUARD
+ adc C0, __zero_reg__
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#if defined (L_mulusa3)
+;;; (R25:R22) *= (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding: -1 LSB <= error <= 1 LSB
+DEFUN __mulusa3
+ set
+ ;; Fallthru
+ENDF __mulusa3
+
+;;; A[] survives in 26, 27, 30, 31
+;;; Also used by __mulsa3 with T = 0
+;;; Round if T = 1
+;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
+DEFUN __mulusa3_round
+ push CC2
+ push CC3
+ ; clear result
+ clr __tmp_reg__
+ wmov CC2, CC0
+ ; save multiplicand
+ wmov AA0, A0
+ wmov AA2, A2
+ rjmp 3f
+
+ ;; Loop the integral part
+
+1: ;; CC += A * 2^n; n >= 0
+ add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
+
+2: ;; A <<= 1
+ lsl A0 $ rol A1 $ rol A2 $ rol A3
+
+3: ;; IBIT(B) >>= 1
+ ;; Carry = n-th bit of B; n >= 0
+ lsr B3
+ ror B2
+ brcs 1b
+ sbci B3, 0
+ brne 2b
+
+ ;; Loop the fractional part
+ ;; B2/B3 is 0 now, use as guard bits for rounding
+ ;; Restore multiplicand
+ wmov A0, AA0
+ wmov A2, AA2
+ rjmp 5f
+
+4: ;; CC += A:Guard * 2^n; n < 0
+ add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
+5:
+ ;; A:Guard >>= 1
+ lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
+
+ ;; FBIT(B) <<= 1
+ ;; Carry = n-th bit of B; n < 0
+ lsl B0
+ rol B1
+ brcs 4b
+ sbci B0, 0
+ brne 5b
+
+ ;; Save guard bits and set carry for rounding
+ push B3
+ lsl B3
+ ;; Move result into place
+ wmov C2, CC2
+ wmov C0, CC0
+ clr __zero_reg__
+ brtc 6f
+ ;; Round iff T = 1
+ adc C0, __zero_reg__
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+6:
+ pop GUARD
+ ;; Epilogue
+ pop CC3
+ pop CC2
+ ret
+ENDF __mulusa3_round
+#endif /* L_mulusa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+#undef GUARD
+
+/***********************************************************
+ Fixed unsigned saturated Multiplication 8.8 x 8.8
+***********************************************************/
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define SS __tmp_reg__
+
+#if defined (L_usmuluha3)
+DEFUN __usmuluha3
+ ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+ ;; Adjust interface
+ movw R26, R22
+ movw R18, R24
+#endif /* HAVE MUL */
+ XCALL __umulhisi3
+ tst C3
+ brne .Lmax
+ ;; Round, target is in C1..C2
+ lsl C0
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ brcs .Lmax
+ ;; Move result into place
+ mov C3, C2
+ mov C2, C1
+ ret
+.Lmax:
+ ;; Saturate
+ ldi C2, 0xff
+ ldi C3, 0xff
+ ret
+ENDF __usmuluha3
+#endif /* L_usmuluha3 */
+
+/***********************************************************
+ Fixed signed saturated Multiplication s8.7 x s8.7
+***********************************************************/
+
+#if defined (L_ssmulha3)
+DEFUN __ssmulha3
+ ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+ ;; Adjust interface
+ movw R26, R22
+ movw R18, R24
+#endif /* HAVE MUL */
+ XCALL __mulhisi3
+ ;; Adjust decimal point
+ lsl C0
+ rol C1
+ rol C2
+ brvs .LsatC3.3
+ ;; The 9 MSBs must be the same
+ rol C3
+ sbc SS, SS
+ cp C3, SS
+ brne .LsatSS
+ ;; Round
+ lsl C0
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ brvs .Lmax
+ ;; Move result into place
+ mov C3, C2
+ mov C2, C1
+ ret
+.Lmax:
+ ;; Load 0x7fff
+ clr C3
+.LsatC3.3:
+ ;; C3 < 0 --> 0x8000
+ ;; C3 >= 0 --> 0x7fff
+ mov SS, C3
+.LsatSS:
+ ;; Load min / max value:
+ ;; SS = -1 --> 0x8000
+ ;; SS = 0 --> 0x7fff
+ ldi C3, 0x7f
+ ldi C2, 0xff
+ sbrc SS, 7
+ adiw C2, 1
+ ret
+ENDF __ssmulha3
+#endif /* L_ssmulha3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef SS
+
+/***********************************************************
+ Fixed unsigned saturated Multiplication 16.16 x 16.16
+***********************************************************/
+
+#define C0 18
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define C4 C0+4
+#define C5 C0+5
+#define C6 C0+6
+#define C7 C0+7
+#define SS __tmp_reg__
+
+#if defined (L_usmulusa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __usmulusa3
+ ;; Widening multiply
+ XCALL __umulsidi3
+ or C7, C6
+ brne .Lmax
+ ;; Round, target is in C2..C5
+ lsl C1
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ adc C4, __zero_reg__
+ adc C5, __zero_reg__
+ brcs .Lmax
+ ;; Move result into place
+ wmov C6, C4
+ wmov C4, C2
+ ret
+.Lmax:
+ ;; Saturate
+ ldi C7, 0xff
+ ldi C6, 0xff
+ wmov C4, C6
+ ret
+ENDF __usmulusa3
+#endif /* L_usmulusa3 */
+
+/***********************************************************
+ Fixed signed saturated Multiplication s16.15 x s16.15
+***********************************************************/
+
+#if defined (L_ssmulsa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __ssmulsa3
+ ;; Widening multiply
+ XCALL __mulsidi3
+ ;; Adjust decimal point
+ lsl C1
+ rol C2
+ rol C3
+ rol C4
+ rol C5
+ brvs .LsatC7.7
+ ;; The 17 MSBs must be the same
+ rol C6
+ rol C7
+ sbc SS, SS
+ cp C6, SS
+ cpc C7, SS
+ brne .LsatSS
+ ;; Round
+ lsl C1
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+ adc C4, __zero_reg__
+ adc C5, __zero_reg__
+ brvs .Lmax
+ ;; Move result into place
+ wmov C6, C4
+ wmov C4, C2
+ ret
+
+.Lmax:
+ ;; Load 0x7fffffff
+ clr C7
+.LsatC7.7:
+ ;; C7 < 0 --> 0x80000000
+ ;; C7 >= 0 --> 0x7fffffff
+ lsl C7
+ sbc SS, SS
+.LsatSS:
+ ;; Load min / max value:
+ ;; SS = -1 --> 0x80000000
+ ;; SS = 0 --> 0x7fffffff
+ com SS
+ mov C4, SS
+ mov C5, C4
+ wmov C6, C4
+ subi C7, 0x80
+ ret
+ENDF __ssmulsa3
+#endif /* L_ssmulsa3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#undef SS
+
+/*******************************************************
+ Fractional Division 8 / 8
+*******************************************************/
+
+#define r_divd r25 /* dividend */
+#define r_quo r24 /* quotient */
+#define r_div r22 /* divisor */
+#define r_sign __tmp_reg__
+
+#if defined (L_divqq3)
+DEFUN __divqq3
+ mov r_sign, r_divd
+ eor r_sign, r_div
+ sbrc r_div, 7
+ neg r_div
+ sbrc r_divd, 7
+ neg r_divd
+ XCALL __divqq_helper
+ lsr r_quo
+ sbrc r_sign, 7 ; negate result if needed
+ neg r_quo
+ ret
+ENDF __divqq3
+#endif /* L_divqq3 */
+
+#if defined (L_udivuqq3)
+DEFUN __udivuqq3
+ cp r_divd, r_div
+ brsh 0f
+ XJMP __divqq_helper
+ ;; Result is out of [0, 1) ==> Return 1 - eps.
+0: ldi r_quo, 0xff
+ ret
+ENDF __udivuqq3
+#endif /* L_udivuqq3 */
+
+
+#if defined (L_divqq_helper)
+DEFUN __divqq_helper
+ clr r_quo ; clear quotient
+ inc __zero_reg__ ; init loop counter, used per shift
+__udivuqq3_loop:
+ lsl r_divd ; shift dividend
+ brcs 0f ; dividend overflow
+ cp r_divd,r_div ; compare dividend & divisor
+ brcc 0f ; dividend >= divisor
+ rol r_quo ; shift quotient (with CARRY)
+ rjmp __udivuqq3_cont
+0:
+ sub r_divd,r_div ; restore dividend
+ lsl r_quo ; shift quotient (without CARRY)
+__udivuqq3_cont:
+ lsl __zero_reg__ ; shift loop-counter bit
+ brne __udivuqq3_loop
+ com r_quo ; complement result
+ ; because C flag was complemented in loop
+ ret
+ENDF __divqq_helper
+#endif /* L_divqq_helper */
+
+#undef r_divd
+#undef r_quo
+#undef r_div
+#undef r_sign
+
+
+/*******************************************************
+ Fractional Division 16 / 16
+*******************************************************/
+#define r_divdL 26 /* dividend Low */
+#define r_divdH 27 /* dividend Hig */
+#define r_quoL 24 /* quotient Low */
+#define r_quoH 25 /* quotient High */
+#define r_divL 22 /* divisor */
+#define r_divH 23 /* divisor */
+#define r_cnt 21
+
+#if defined (L_divhq3)
+DEFUN __divhq3
+ mov r0, r_divdH
+ eor r0, r_divH
+ sbrs r_divH, 7
+ rjmp 1f
+ NEG2 r_divL
+1:
+ sbrs r_divdH, 7
+ rjmp 2f
+ NEG2 r_divdL
+2:
+ cp r_divdL, r_divL
+ cpc r_divdH, r_divH
+ breq __divhq3_minus1 ; if equal return -1
+ XCALL __udivuhq3
+ lsr r_quoH
+ ror r_quoL
+ brpl 9f
+ ;; negate result if needed
+ NEG2 r_quoL
+9:
+ ret
+__divhq3_minus1:
+ ldi r_quoH, 0x80
+ clr r_quoL
+ ret
+ENDF __divhq3
+#endif /* defined (L_divhq3) */
+
+#if defined (L_udivuhq3)
+DEFUN __udivuhq3
+ sub r_quoH,r_quoH ; clear quotient and carry
+ ;; FALLTHRU
+ENDF __udivuhq3
+
+DEFUN __udivuha3_common
+ clr r_quoL ; clear quotient
+ ldi r_cnt,16 ; init loop counter
+__udivuhq3_loop:
+ rol r_divdL ; shift dividend (with CARRY)
+ rol r_divdH
+ brcs __udivuhq3_ep ; dividend overflow
+ cp r_divdL,r_divL ; compare dividend & divisor
+ cpc r_divdH,r_divH
+ brcc __udivuhq3_ep ; dividend >= divisor
+ rol r_quoL ; shift quotient (with CARRY)
+ rjmp __udivuhq3_cont
+__udivuhq3_ep:
+ sub r_divdL,r_divL ; restore dividend
+ sbc r_divdH,r_divH
+ lsl r_quoL ; shift quotient (without CARRY)
+__udivuhq3_cont:
+ rol r_quoH ; shift quotient
+ dec r_cnt ; decrement loop counter
+ brne __udivuhq3_loop
+ com r_quoL ; complement result
+ com r_quoH ; because C flag was complemented in loop
+ ret
+ENDF __udivuha3_common
+#endif /* defined (L_udivuhq3) */
+
+/*******************************************************
+ Fixed Division 8.8 / 8.8
+*******************************************************/
+#if defined (L_divha3)
+DEFUN __divha3
+ mov r0, r_divdH
+ eor r0, r_divH
+ sbrs r_divH, 7
+ rjmp 1f
+ NEG2 r_divL
+1:
+ sbrs r_divdH, 7
+ rjmp 2f
+ NEG2 r_divdL
+2:
+ XCALL __udivuha3
+ lsr r_quoH ; adjust to 7 fractional bits
+ ror r_quoL
+ sbrs r0, 7 ; negate result if needed
+ ret
+ NEG2 r_quoL
+ ret
+ENDF __divha3
+#endif /* defined (L_divha3) */
+
+#if defined (L_udivuha3)
+DEFUN __udivuha3
+ mov r_quoH, r_divdL
+ mov r_divdL, r_divdH
+ clr r_divdH
+ lsl r_quoH ; shift quotient into carry
+ XJMP __udivuha3_common ; same as fractional after rearrange
+ENDF __udivuha3
+#endif /* defined (L_udivuha3) */
+
+#undef r_divdL
+#undef r_divdH
+#undef r_quoL
+#undef r_quoH
+#undef r_divL
+#undef r_divH
+#undef r_cnt
+
+/*******************************************************
+ Fixed Division 16.16 / 16.16
+*******************************************************/
+
+#define r_arg1L 24 /* arg1 gets passed already in place */
+#define r_arg1H 25
+#define r_arg1HL 26
+#define r_arg1HH 27
+#define r_divdL 26 /* dividend Low */
+#define r_divdH 27
+#define r_divdHL 30
+#define r_divdHH 31 /* dividend High */
+#define r_quoL 22 /* quotient Low */
+#define r_quoH 23
+#define r_quoHL 24
+#define r_quoHH 25 /* quotient High */
+#define r_divL 18 /* divisor Low */
+#define r_divH 19
+#define r_divHL 20
+#define r_divHH 21 /* divisor High */
+#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
+
+#if defined (L_divsa3)
+DEFUN __divsa3
+ mov r0, r_arg1HH
+ eor r0, r_divHH
+ sbrs r_divHH, 7
+ rjmp 1f
+ NEG4 r_divL
+1:
+ sbrs r_arg1HH, 7
+ rjmp 2f
+ NEG4 r_arg1L
+2:
+ XCALL __udivusa3
+ lsr r_quoHH ; adjust to 15 fractional bits
+ ror r_quoHL
+ ror r_quoH
+ ror r_quoL
+ sbrs r0, 7 ; negate result if needed
+ ret
+ ;; negate r_quoL
+ XJMP __negsi2
+ENDF __divsa3
+#endif /* defined (L_divsa3) */
+
+#if defined (L_udivusa3)
+DEFUN __udivusa3
+ ldi r_divdHL, 32 ; init loop counter
+ mov r_cnt, r_divdHL
+ clr r_divdHL
+ clr r_divdHH
+ wmov r_quoL, r_divdHL
+ lsl r_quoHL ; shift quotient into carry
+ rol r_quoHH
+__udivusa3_loop:
+ rol r_divdL ; shift dividend (with CARRY)
+ rol r_divdH
+ rol r_divdHL
+ rol r_divdHH
+ brcs __udivusa3_ep ; dividend overflow
+ cp r_divdL,r_divL ; compare dividend & divisor
+ cpc r_divdH,r_divH
+ cpc r_divdHL,r_divHL
+ cpc r_divdHH,r_divHH
+ brcc __udivusa3_ep ; dividend >= divisor
+ rol r_quoL ; shift quotient (with CARRY)
+ rjmp __udivusa3_cont
+__udivusa3_ep:
+ sub r_divdL,r_divL ; restore dividend
+ sbc r_divdH,r_divH
+ sbc r_divdHL,r_divHL
+ sbc r_divdHH,r_divHH
+ lsl r_quoL ; shift quotient (without CARRY)
+__udivusa3_cont:
+ rol r_quoH ; shift quotient
+ rol r_quoHL
+ rol r_quoHH
+ dec r_cnt ; decrement loop counter
+ brne __udivusa3_loop
+ com r_quoL ; complement result
+ com r_quoH ; because C flag was complemented in loop
+ com r_quoHL
+ com r_quoHH
+ ret
+ENDF __udivusa3
+#endif /* defined (L_udivusa3) */
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg1HL
+#undef r_arg1HH
+#undef r_divdL
+#undef r_divdH
+#undef r_divdHL
+#undef r_divdHH
+#undef r_quoL
+#undef r_quoH
+#undef r_quoHL
+#undef r_quoHH
+#undef r_divL
+#undef r_divH
+#undef r_divHL
+#undef r_divHH
+#undef r_cnt
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 1 Byte
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0 24
+
+#if defined (L_ssabs_1)
+DEFUN __ssabs_1
+ sbrs A0, 7
+ ret
+ neg A0
+ sbrc A0,7
+ dec A0
+ ret
+ENDF __ssabs_1
+#endif /* L_ssabs_1 */
+
+#undef A0
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 2 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0 24
+#define A1 A0+1
+
+#if defined (L_ssneg_2)
+DEFUN __ssneg_2
+ NEG2 A0
+ brvc 0f
+ sbiw A0, 1
+0: ret
+ENDF __ssneg_2
+#endif /* L_ssneg_2 */
+
+#if defined (L_ssabs_2)
+DEFUN __ssabs_2
+ sbrs A1, 7
+ ret
+ XJMP __ssneg_2
+ENDF __ssabs_2
+#endif /* L_ssabs_2 */
+
+#undef A0
+#undef A1
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 4 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#if defined (L_ssneg_4)
+DEFUN __ssneg_4
+ XCALL __negsi2
+ brvc 0f
+ ldi A3, 0x7f
+ ldi A2, 0xff
+ ldi A1, 0xff
+ ldi A0, 0xff
+0: ret
+ENDF __ssneg_4
+#endif /* L_ssneg_4 */
+
+#if defined (L_ssabs_4)
+DEFUN __ssabs_4
+ sbrs A3, 7
+ ret
+ XJMP __ssneg_4
+ENDF __ssabs_4
+#endif /* L_ssabs_4 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 8 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#if defined (L_clr_8)
+FALIAS __usneguta2
+FALIAS __usneguda2
+FALIAS __usnegudq2
+
+;; Clear Carry and all Bytes
+DEFUN __clr_8
+ ;; Clear Carry and set Z
+ sub A7, A7
+ ;; FALLTHRU
+ENDF __clr_8
+;; Propagate Carry to all Bytes, Carry unaltered
+DEFUN __sbc_8
+ sbc A7, A7
+ sbc A6, A6
+ wmov A4, A6
+ wmov A2, A6
+ wmov A0, A6
+ ret
+ENDF __sbc_8
+#endif /* L_clr_8 */
+
+#if defined (L_ssneg_8)
+FALIAS __ssnegta2
+FALIAS __ssnegda2
+FALIAS __ssnegdq2
+
+DEFUN __ssneg_8
+ XCALL __negdi2
+ brvc 0f
+ ;; A[] = 0x7fffffff
+ sec
+ XCALL __sbc_8
+ ldi A7, 0x7f
+0: ret
+ENDF __ssneg_8
+#endif /* L_ssneg_8 */
+
+#if defined (L_ssabs_8)
+FALIAS __ssabsta2
+FALIAS __ssabsda2
+FALIAS __ssabsdq2
+
+DEFUN __ssabs_8
+ sbrs A7, 7
+ ret
+ XJMP __ssneg_8
+ENDF __ssabs_8
+#endif /* L_ssabs_8 */
+
+;; Second Argument
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#if defined (L_usadd_8)
+FALIAS __usadduta3
+FALIAS __usadduda3
+FALIAS __usaddudq3
+
+DEFUN __usadd_8
+ XCALL __adddi3
+ brcs 0f
+ ret
+0: ;; A[] = 0xffffffff
+ XJMP __sbc_8
+ENDF __usadd_8
+#endif /* L_usadd_8 */
+
+#if defined (L_ussub_8)
+FALIAS __ussubuta3
+FALIAS __ussubuda3
+FALIAS __ussubudq3
+
+DEFUN __ussub_8
+ XCALL __subdi3
+ brcs 0f
+ ret
+0: ;; A[] = 0
+ XJMP __clr_8
+ENDF __ussub_8
+#endif /* L_ussub_8 */
+
+#if defined (L_ssadd_8)
+FALIAS __ssaddta3
+FALIAS __ssaddda3
+FALIAS __ssadddq3
+
+DEFUN __ssadd_8
+ XCALL __adddi3
+ brvc 0f
+ ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
+ cpi B7, 0x80
+ XCALL __sbc_8
+ subi A7, 0x80
+0: ret
+ENDF __ssadd_8
+#endif /* L_ssadd_8 */
+
+#if defined (L_sssub_8)
+FALIAS __sssubta3
+FALIAS __sssubda3
+FALIAS __sssubdq3
+
+DEFUN __sssub_8
+ XCALL __subdi3
+ brvc 0f
+ ;; A = (B < 0) ? INT64_MAX : INT64_MIN
+ ldi A7, 0x7f
+ cp A7, B7
+ XCALL __sbc_8
+ subi A7, 0x80
+0: ret
+ENDF __sssub_8
+#endif /* L_sssub_8 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding Helpers
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_mask1
+
+#define AA 24
+#define CC 25
+
+;; R25 = 1 << (R24 & 7)
+;; CC = 1 << (AA & 7)
+;; Clobbers: None
+DEFUN __mask1
+ ;; CC = 2 ^ AA.1
+ ldi CC, 1 << 2
+ sbrs AA, 1
+ ldi CC, 1 << 0
+ ;; CC *= 2 ^ AA.0
+ sbrc AA, 0
+ lsl CC
+ ;; CC *= 2 ^ AA.2
+ sbrc AA, 2
+ swap CC
+ ret
+ENDF __mask1
+
+#undef AA
+#undef CC
+#endif /* L_mask1 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The rounding point. Any bits smaller than
+;; 2^{-RP} will be cleared.
+#define RP R24
+
+#define A0 22
+#define A1 A0 + 1
+
+#define C0 24
+#define C1 C0 + 1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 1 Byte
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_roundqq3
+
+;; R24 = round (R22, R24)
+;; Clobbers: R22, __tmp_reg__
+DEFUN __roundqq3
+ mov __tmp_reg__, C1
+ subi RP, __QQ_FBIT__ - 1
+ neg RP
+ ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
+ XCALL __mask1
+ mov C0, C1
+ ;; Add-Saturate 2^{-RP-1}
+ add A0, C0
+ brvc 0f
+ ldi C0, 0x7f
+ rjmp 9f
+0: ;; Mask out bits beyond RP
+ lsl C0
+ neg C0
+ and C0, A0
+9: mov C1, __tmp_reg__
+ ret
+ENDF __roundqq3
+#endif /* L_roundqq3 */
+
+#ifdef L_rounduqq3
+
+;; R24 = round (R22, R24)
+;; Clobbers: R22, __tmp_reg__
+DEFUN __rounduqq3
+ mov __tmp_reg__, C1
+ subi RP, __UQQ_FBIT__ - 1
+ neg RP
+ ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
+ XCALL __mask1
+ mov C0, C1
+ ;; Add-Saturate 2^{-RP-1}
+ add A0, C0
+ brcc 0f
+ ldi C0, 0xff
+ rjmp 9f
+0: ;; Mask out bits beyond RP
+ lsl C0
+ neg C0
+ and C0, A0
+9: mov C1, __tmp_reg__
+ ret
+ENDF __rounduqq3
+#endif /* L_rounduqq3 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 2 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_addmask_2
+
+;; [ R25:R24 = 1 << (R24 & 15)
+;; R23:R22 += 1 << (R24 & 15) ]
+;; SREG is set according to the addition
+DEFUN __addmask_2
+ ;; R25 = 1 << (R24 & 7)
+ XCALL __mask1
+ cpi RP, 1 << 3
+ sbc C0, C0
+ ;; Swap C0 and C1 if RP.3 was set
+ and C0, C1
+ eor C1, C0
+ ;; Finally, add the power-of-two: A[] += C[]
+ add A0, C0
+ adc A1, C1
+ ret
+ENDF __addmask_2
+#endif /* L_addmask_2 */
+
+#ifdef L_round_s2
+
+;; R25:R24 = round (R23:R22, R24)
+;; Clobbers: R23, R22
+DEFUN __roundhq3
+ subi RP, __HQ_FBIT__ - __HA_FBIT__
+ENDF __roundhq3
+DEFUN __roundha3
+ subi RP, __HA_FBIT__ - 1
+ neg RP
+ ;; [ R25:R24 = 1 << (FBIT-1 - RP)
+ ;; R23:R22 += 1 << (FBIT-1 - RP) ]
+ XCALL __addmask_2
+ XJMP __round_s2_const
+ENDF __roundha3
+
+#endif /* L_round_s2 */
+
+#ifdef L_round_u2
+
+;; R25:R24 = round (R23:R22, R24)
+;; Clobbers: R23, R22
+DEFUN __rounduhq3
+ subi RP, __UHQ_FBIT__ - __UHA_FBIT__
+ENDF __rounduhq3
+DEFUN __rounduha3
+ subi RP, __UHA_FBIT__ - 1
+ neg RP
+ ;; [ R25:R24 = 1 << (FBIT-1 - RP)
+ ;; R23:R22 += 1 << (FBIT-1 - RP) ]
+ XCALL __addmask_2
+ XJMP __round_u2_const
+ENDF __rounduha3
+
+#endif /* L_round_u2 */
+
+
+#ifdef L_round_2_const
+
+;; Helpers for 2 byte wide rounding
+
+DEFUN __round_s2_const
+ brvc 2f
+ ldi C1, 0x7f
+ rjmp 1f
+ ;; FALLTHRU (Barrier)
+ENDF __round_s2_const
+
+DEFUN __round_u2_const
+ brcc 2f
+ ldi C1, 0xff
+1:
+ ldi C0, 0xff
+ rjmp 9f
+2:
+ ;; Saturation is performed now.
+ ;; Currently, we have C[] = 2^{-RP-1}
+ ;; C[] = 2^{-RP}
+ lsl C0
+ rol C1
+ ;;
+ NEG2 C0
+ ;; Clear the bits beyond the rounding point.
+ and C0, A0
+ and C1, A1
+9: ret
+ENDF __round_u2_const
+
+#endif /* L_round_2_const */
+
+#undef A0
+#undef A1
+#undef C0
+#undef C1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 4 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#define A0 18
+#define A1 A0 + 1
+#define A2 A0 + 2
+#define A3 A0 + 3
+
+#define C0 22
+#define C1 C0 + 1
+#define C2 C0 + 2
+#define C3 C0 + 3
+
+#ifdef L_addmask_4
+
+;; [ R25:R22 = 1 << (R24 & 31)
+;; R21:R18 += 1 << (R24 & 31) ]
+;; SREG is set according to the addition
+DEFUN __addmask_4
+ ;; R25 = 1 << (R24 & 7)
+ XCALL __mask1
+ cpi RP, 1 << 4
+ sbc C0, C0
+ sbc C1, C1
+ ;; Swap C2 with C3 if RP.3 is not set
+ cpi RP, 1 << 3
+ sbc C2, C2
+ and C2, C3
+ eor C3, C2
+ ;; Swap C3:C2 with C1:C0 if RP.4 is not set
+ and C0, C2 $ eor C2, C0
+ and C1, C3 $ eor C3, C1
+ ;; Finally, add the power-of-two: A[] += C[]
+ add A0, C0
+ adc A1, C1
+ adc A2, C2
+ adc A3, C3
+ ret
+ENDF __addmask_4
+#endif /* L_addmask_4 */
+
+#ifdef L_round_s4
+
+;; R25:R22 = round (R21:R18, R24)
+;; Clobbers: R18...R21
+DEFUN __roundsq3
+ subi RP, __SQ_FBIT__ - __SA_FBIT__
+ENDF __roundsq3
+DEFUN __roundsa3
+ subi RP, __SA_FBIT__ - 1
+ neg RP
+ ;; [ R25:R22 = 1 << (FBIT-1 - RP)
+ ;; R21:R18 += 1 << (FBIT-1 - RP) ]
+ XCALL __addmask_4
+ XJMP __round_s4_const
+ENDF __roundsa3
+
+#endif /* L_round_s4 */
+
+#ifdef L_round_u4
+
+;; R25:R22 = round (R21:R18, R24)
+;; Clobbers: R18...R21
+DEFUN __roundusq3
+ subi RP, __USQ_FBIT__ - __USA_FBIT__
+ENDF __roundusq3
+DEFUN __roundusa3
+ subi RP, __USA_FBIT__ - 1
+ neg RP
+ ;; [ R25:R22 = 1 << (FBIT-1 - RP)
+ ;; R21:R18 += 1 << (FBIT-1 - RP) ]
+ XCALL __addmask_4
+ XJMP __round_u4_const
+ENDF __roundusa3
+
+#endif /* L_round_u4 */
+
+
+#ifdef L_round_4_const
+
+;; Helpers for 4 byte wide rounding
+
+DEFUN __round_s4_const
+ brvc 2f
+ ldi C3, 0x7f
+ rjmp 1f
+ ;; FALLTHRU (Barrier)
+ENDF __round_s4_const
+
+DEFUN __round_u4_const
+ brcc 2f
+ ldi C3, 0xff
+1:
+ ldi C2, 0xff
+ ldi C1, 0xff
+ ldi C0, 0xff
+ rjmp 9f
+2:
+ ;; Saturation is performed now.
+ ;; Currently, we have C[] = 2^{-RP-1}
+ ;; C[] = 2^{-RP}
+ lsl C0
+ rol C1
+ rol C2
+ rol C3
+ XCALL __negsi2
+ ;; Clear the bits beyond the rounding point.
+ and C0, A0
+ and C1, A1
+ and C2, A2
+ and C3, A3
+9: ret
+ENDF __round_u4_const
+
+#endif /* L_round_4_const */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#undef RP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 8 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#define RP 16
+#define FBITm1 31
+
+#define C0 18
+#define C1 C0 + 1
+#define C2 C0 + 2
+#define C3 C0 + 3
+#define C4 C0 + 4
+#define C5 C0 + 5
+#define C6 C0 + 6
+#define C7 C0 + 7
+
+#define A0 16
+#define A1 17
+#define A2 26
+#define A3 27
+#define A4 28
+#define A5 29
+#define A6 30
+#define A7 31
+
+
+#ifdef L_rounddq3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __rounddq3
+ ldi FBITm1, __DQ_FBIT__ - 1
+ clt
+ XJMP __round_x8
+ENDF __rounddq3
+#endif /* L_rounddq3 */
+
+#ifdef L_roundudq3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __roundudq3
+ ldi FBITm1, __UDQ_FBIT__ - 1
+ set
+ XJMP __round_x8
+ENDF __roundudq3
+#endif /* L_roundudq3 */
+
+#ifdef L_roundda3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __roundda3
+ ldi FBITm1, __DA_FBIT__ - 1
+ clt
+ XJMP __round_x8
+ENDF __roundda3
+#endif /* L_roundda3 */
+
+#ifdef L_rounduda3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __rounduda3
+ ldi FBITm1, __UDA_FBIT__ - 1
+ set
+ XJMP __round_x8
+ENDF __rounduda3
+#endif /* L_rounduda3 */
+
+#ifdef L_roundta3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __roundta3
+ ldi FBITm1, __TA_FBIT__ - 1
+ clt
+ XJMP __round_x8
+ENDF __roundta3
+#endif /* L_roundta3 */
+
+#ifdef L_rounduta3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN __rounduta3
+ ldi FBITm1, __UTA_FBIT__ - 1
+ set
+ XJMP __round_x8
+ENDF __rounduta3
+#endif /* L_rounduta3 */
+
+
+#ifdef L_round_x8
+DEFUN __round_x8
+ push r16
+ push r17
+ push r28
+ push r29
+ ;; Compute log2 of addend from rounding point
+ sub RP, FBITm1
+ neg RP
+ ;; Move input to work register A[]
+ push C0
+ mov A1, C1
+ wmov A2, C2
+ wmov A4, C4
+ wmov A6, C6
+ ;; C[] = 1 << (FBIT-1 - RP)
+ XCALL __clr_8
+ inc C0
+ XCALL __ashldi3
+ pop A0
+ ;; A[] += C[]
+ add A0, C0
+ adc A1, C1
+ adc A2, C2
+ adc A3, C3
+ adc A4, C4
+ adc A5, C5
+ adc A6, C6
+ adc A7, C7
+ brts 1f
+ ;; Signed
+ brvc 3f
+ ;; Signed overflow: A[] = 0x7f...
+ brvs 2f
+1: ;; Unsigned
+ brcc 3f
+ ;; Unsigned overflow: A[] = 0xff...
+2: ldi C7, 0xff
+ ldi C6, 0xff
+ wmov C0, C6
+ wmov C2, C6
+ wmov C4, C6
+ bld C7, 7
+ rjmp 9f
+3:
+ ;; C[] = -C[] - C[]
+ push A0
+ ldi r16, 1
+ XCALL __ashldi3
+ pop A0
+ XCALL __negdi2
+ ;; Clear the bits beyond the rounding point.
+ and C0, A0
+ and C1, A1
+ and C2, A2
+ and C3, A3
+ and C4, A4
+ and C5, A5
+ and C6, A6
+ and C7, A7
+9: ;; Epilogue
+ pop r29
+ pop r28
+ pop r17
+ pop r16
+ ret
+ENDF __round_x8
+
+#endif /* L_round_x8 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+
+#undef RP
+#undef FBITm1
+
+
+;; Supply implementations / symbols for the bit-banging functions
+;; __builtin_avr_bitsfx and __builtin_avr_fxbits
+#ifdef L_ret
+DEFUN __ret
+ ret
+ENDF __ret
+#endif /* L_ret */
diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs.S b/gcc-4.9/libgcc/config/avr/lib1funcs.S
new file mode 100644
index 000000000..6f1c77edb
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,3226 @@
+/* -*- Mode: Asm -*- */
+/* Copyright (C) 1998-2014 Free Software Foundation, Inc.
+ Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#if defined (__AVR_HAVE_SPH__)
+#define __SP_H__ 0x3e
+#endif
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__ 0x3C
+
+/* Most of the functions here are called directly from avr.md
+ patterns, instead of using the standard libcall mechanisms.
+ This can make better code because GCC knows exactly which
+ of the call-used registers (not all of them) are clobbered. */
+
+/* FIXME: At present, there is no SORT directive in the linker
+ script so that we must not assume that different modules
+ in the same input section like .libgcc.text.mul will be
+ located close together. Therefore, we cannot use
+ RCALL/RJMP to call a function like __udivmodhi4 from
+ __divmodhi4 and have to use lengthy XCALL/XJMP even
+ though they are in the same input section and all same
+ input sections together are small enough to reach every
+ location with a RCALL/RJMP instruction. */
+
+ .macro mov_l r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ movw \r_dest, \r_src
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+ .macro mov_h r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ ; empty
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+.macro wmov r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ movw \r_dest, \r_src
+#else
+ mov \r_dest, \r_src
+ mov \r_dest+1, \r_src+1
+#endif
+.endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP jmp
+#else
+#define XCALL rcall
+#define XJMP rjmp
+#endif
+
+;; Prologue stuff
+
+.macro do_prologue_saves n_pushed n_frame=0
+ ldi r26, lo8(\n_frame)
+ ldi r27, hi8(\n_frame)
+ ldi r30, lo8(gs(.L_prologue_saves.\@))
+ ldi r31, hi8(gs(.L_prologue_saves.\@))
+ XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
+.L_prologue_saves.\@:
+.endm
+
+;; Epilogue stuff
+
+.macro do_epilogue_restores n_pushed n_frame=0
+ in r28, __SP_L__
+#ifdef __AVR_HAVE_SPH__
+ in r29, __SP_H__
+.if \n_frame > 63
+ subi r28, lo8(-\n_frame)
+ sbci r29, hi8(-\n_frame)
+.elseif \n_frame > 0
+ adiw r28, \n_frame
+.endif
+#else
+ clr r29
+.if \n_frame > 0
+ subi r28, lo8(-\n_frame)
+.endif
+#endif /* HAVE SPH */
+ ldi r30, \n_pushed
+ XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
+.endm
+
+;; Support function entry and exit for convenience
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+.macro FALIAS name
+.global \name
+.func \name
+\name:
+.size \name, .-\name
+.endfunc
+.endm
+
+;; Skip next instruction, typically a jump target
+#define skip cpse 0,0
+
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2 reg
+ com \reg+1
+ neg \reg
+ sbci \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+;; Sets the V flag for signed overflow tests if REG >= 16
+.macro NEG4 reg
+ com \reg+3
+ com \reg+2
+ com \reg+1
+.if \reg >= 16
+ neg \reg
+ sbci \reg+1, -1
+ sbci \reg+2, -1
+ sbci \reg+3, -1
+.else
+ com \reg
+ adc \reg, __zero_reg__
+ adc \reg+1, __zero_reg__
+ adc \reg+2, __zero_reg__
+ adc \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N) hlo8 ((N) << 23)
+#define exp_hi(N) hhi8 ((N) << 23)
+
+
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+ Multiplication 8 x 8 without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define r_arg2 r22 /* multiplicand */
+#define r_arg1 r24 /* multiplier */
+#define r_res __tmp_reg__ /* result */
+
+DEFUN __mulqi3
+ clr r_res ; clear result
+__mulqi3_loop:
+ sbrc r_arg1,0
+ add r_res,r_arg2
+ add r_arg2,r_arg2 ; shift multiplicand
+ breq __mulqi3_exit ; while multiplicand != 0
+ lsr r_arg1 ;
+ brne __mulqi3_loop ; exit if multiplier = 0
+__mulqi3_exit:
+ mov r_arg1,r_res ; result to return register
+ ret
+ENDF __mulqi3
+
+#undef r_arg2
+#undef r_arg1
+#undef r_res
+
+#endif /* defined (L_mulqi3) */
+
+
+/*******************************************************
+ Widening Multiplication 16 = 8 x 8 without MUL
+ Multiplication 16 x 16 without MUL
+*******************************************************/
+
+#define A0 r22
+#define A1 r23
+#define B0 r24
+#define BB0 r20
+#define B1 r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0 r24
+#define C1 r25
+#define CC0 __tmp_reg__
+#define CC1 R21
+
+#if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __umulqihi3
+ clr A1
+ clr B1
+ XJMP __mulhi3
+ENDF __umulqihi3
+#endif /* L_umulqihi3 */
+
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0 * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+ ;; Sign-extend B0
+ clr B1
+ sbrc B0, 7
+ com B1
+ ;; The multiplication runs twice as fast if A1 is zero, thus:
+ ;; Zero-extend A0
+ clr A1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Store B0 * sign of A
+ clr BB0
+ sbrc A0, 7
+ mov BB0, B0
+ call __mulhi3
+#else /* have no CALL */
+ ;; Skip sign-extension of A if A >= 0
+ ;; Same size as with the first alternative but avoids errata skip
+ ;; and is faster if A >= 0
+ sbrs A0, 7
+ rjmp __mulhi3
+ ;; If A < 0 store B
+ mov BB0, B0
+ rcall __mulhi3
+#endif /* HAVE_JMP_CALL */
+ ;; 1-extend A after the multiplication
+ sub C1, BB0
+ ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
+
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __mulhi3
+
+ ;; Clear result
+ clr CC0
+ clr CC1
+ rjmp 3f
+1:
+ ;; Bit n of A is 1 --> C += B << n
+ add CC0, B0
+ adc CC1, B1
+2:
+ lsl B0
+ rol B1
+3:
+ ;; If B == 0 we are ready
+ sbiw B0, 0
+ breq 9f
+
+ ;; Carry = n-th bit of A
+ lsr A1
+ ror A0
+ ;; If bit n of A is set, then go add B * 2^n to C
+ brcs 1b
+
+ ;; Carry = 0 --> The ROR above acts like CP A0, 0
+ ;; Thus, it is sufficient to CPC the high part to test A against 0
+ cpc A1, __zero_reg__
+ ;; Only proceed if A != 0
+ brne 2b
+9:
+ ;; Move Result into place
+ mov C0, CC0
+ mov C1, CC1
+ ret
+ENDF __mulhi3
+#endif /* L_mulhi3 */
+
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16 without MUL
+*******************************************************/
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+ wmov B0, 24
+ ;; Zero-extend B
+ clr B2
+ clr B3
+ ;; Zero-extend A
+ wmov A2, B2
+ XJMP __mulsi3
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+ wmov B0, 24
+ ;; Sign-extend B
+ lsl r25
+ sbc B2, B2
+ mov B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Sign-extend A
+ clr A2
+ sbrc A1, 7
+ com A2
+ mov A3, A2
+ XJMP __mulsi3
+#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
+ ;; Zero-extend A and __mulsi3 will run at least twice as fast
+ ;; compared to a sign-extended A.
+ clr A2
+ clr A3
+ sbrs A1, 7
+ XJMP __mulsi3
+ ;; If A < 0 then perform the B * 0xffff.... before the
+ ;; very multiplication by initializing the high part of the
+ ;; result CC with -B.
+ wmov CC2, A2
+ sub CC2, B0
+ sbc CC3, B1
+ XJMP __mulsi3_helper
+#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+
+/*******************************************************
+ Multiplication 32 x 32 without MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+DEFUN __mulsi3
+ ;; Clear result
+ clr CC2
+ clr CC3
+ ;; FALLTHRU
+ENDF __mulsi3
+
+DEFUN __mulsi3_helper
+ clr CC0
+ clr CC1
+ rjmp 3f
+
+1: ;; If bit n of A is set, then add B * 2^n to the result in CC
+ ;; CC += B
+ add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
+
+2: ;; B <<= 1
+ lsl B0 $ rol B1 $ rol B2 $ rol B3
+
+3: ;; A >>= 1: Carry = n-th bit of A
+ lsr A3 $ ror A2 $ ror A1 $ ror A0
+
+ brcs 1b
+ ;; Only continue if A != 0
+ sbci A1, 0
+ brne 2b
+ sbiw A2, 0
+ brne 2b
+
+ ;; All bits of A are consumed: Copy result to return register C
+ wmov C0, CC0
+ wmov C2, CC2
+ ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16 with MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+ XCALL __umulhisi3
+ ;; Sign-extend B
+ tst B1
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: ;; Sign-extend A
+ XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+ XCALL __umulhisi3
+ ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+ ;; Sign-extend A
+ sbrs A1, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+ mul A0, B0
+ movw C0, r0
+ mul A1, B1
+ movw C2, r0
+ mul A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; This function is used by many other routines, often multiple times.
+ ;; Therefore, if the flash size is not too limited, avoid the RCALL
+ ;; and inverst 6 Bytes to speed things up.
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+#else
+ rcall 1f
+#endif
+ mul A1, B0
+1: add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 32 with MUL
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A1
+ brmi __mulohisi3
+#else
+ sbrs A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __muluhisi3
+ ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+ XCALL __muluhisi3
+ ;; One-extend R27:R26 (A1:A0)
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+ XCALL __umulhisi3
+ mul A0, B3
+ add C3, r0
+ mul A1, B2
+ add C3, r0
+ mul A0, B2
+ add C2, r0
+ adc C3, r1
+ clr __zero_reg__
+ ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+ Multiplication 32 x 32 with MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0 * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+ movw A0, C0
+ push C2
+ push C3
+ XCALL __muluhisi3
+ pop A1
+ pop A0
+ ;; A1:A0 now contains the high word of A
+ mul A0, B0
+ add C2, r0
+ adc C3, r1
+ mul A0, B1
+ add C3, r0
+ mul A1, B0
+ add C3, r0
+ clr __zero_reg__
+ ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+/*******************************************************
+ Multiplication 24 x 24 with MUL
+*******************************************************/
+
+#if defined (L_mulpsi3)
+
+;; A[0..2]: In: Multiplicand; Out: Product
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; B[0..2]: In: Multiplier
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; C[0..2]: Expand Result
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+
+;; R24:R22 *= R20:R18
+;; Clobbers: r21, r25, r26, r27, __tmp_reg__
+
+#define AA0 26
+#define AA2 21
+
+DEFUN __mulpsi3
+ wmov AA0, A0
+ mov AA2, A2
+ XCALL __umulhisi3
+ mul AA2, B0 $ add C2, r0
+ mul AA0, B2 $ add C2, r0
+ clr __zero_reg__
+ ret
+ENDF __mulpsi3
+
+#undef AA2
+#undef AA0
+
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+;; C[0..2]: Expand Result
+#define C0 0
+#define C1 C0+1
+#define C2 21
+
+;; R24:R22 *= R20:R18
+;; Clobbers: __tmp_reg__, R18, R19, R20, R21
+
+DEFUN __mulpsi3
+
+ ;; C[] = 0
+ clr __tmp_reg__
+ clr C2
+
+0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
+ LSR B2 $ ror B1 $ ror B0
+
+ ;; If the N-th Bit of B[] was set...
+ brcc 1f
+
+ ;; ...then add A[] * 2^N to the Result C[]
+ ADD C0,A0 $ adc C1,A1 $ adc C2,A2
+
+1: ;; Multiply A[] by 2
+ LSL A0 $ rol A1 $ rol A2
+
+ ;; Loop until B[] is 0
+ subi B0,0 $ sbci B1,0 $ sbci B2,0
+ brne 0b
+
+ ;; Copy C[] to the return Register A[]
+ wmov A0, C0
+ mov A2, C2
+
+ clr __zero_reg__
+ ret
+ENDF __mulpsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* HAVE_MUL */
+
+#undef B2
+#undef B1
+#undef B0
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulpsi3 */
+
+#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
+
+;; A[0..2]: In: Multiplicand
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; BB: In: Multiplier
+#define BB 25
+
+;; C[0..2]: Result
+#define C0 18
+#define C1 C0+1
+#define C2 C0+2
+
+;; C[] = A[] * sign_extend (BB)
+DEFUN __mulsqipsi3
+ mul A0, BB
+ movw C0, r0
+ mul A2, BB
+ mov C2, r0
+ mul A1, BB
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ sbrs BB, 7
+ ret
+ ;; One-extend BB
+ sub C1, A0
+ sbc C2, A1
+ ret
+ENDF __mulsqipsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#undef BB
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulsqipsi3 && HAVE_MUL */
+
+/*******************************************************
+ Multiplication 64 x 64
+*******************************************************/
+
+;; A[] = A[] * B[]
+
+;; A[0..7]: In: Multiplicand
+;; Out: Product
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+;; B[0..7]: In: Multiplier
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Define C[] for convenience
+;; Notice that parts of C[] overlap A[] respective B[]
+#define C0 16
+#define C1 C0+1
+#define C2 20
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 C4+2
+#define C7 C4+3
+
+#if defined (L_muldi3)
+
+;; A[] *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+ push r29
+ push r28
+ push r17
+ push r16
+
+ ;; Counting in Words, we have to perform a 4 * 4 Multiplication
+
+ ;; 3 * 0 + 0 * 3
+ mul A7,B0 $ $ mov C7,r0
+ mul A0,B7 $ $ add C7,r0
+ mul A6,B1 $ $ add C7,r0
+ mul A6,B0 $ mov C6,r0 $ add C7,r1
+ mul B6,A1 $ $ add C7,r0
+ mul B6,A0 $ add C6,r0 $ adc C7,r1
+
+ ;; 1 * 2
+ mul A2,B4 $ add C6,r0 $ adc C7,r1
+ mul A3,B4 $ $ add C7,r0
+ mul A2,B5 $ $ add C7,r0
+
+ push A5
+ push A4
+ push B1
+ push B0
+ push A3
+ push A2
+
+ ;; 0 * 0
+ wmov 26, B0
+ XCALL __umulhisi3
+ wmov C0, 22
+ wmov C2, 24
+
+ ;; 0 * 2
+ wmov 26, B4
+ XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
+
+ wmov 26, B2
+ ;; 0 * 1
+ XCALL __muldi3_6
+
+ pop A0
+ pop A1
+ ;; 1 * 1
+ wmov 26, B2
+ XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+ pop r26
+ pop r27
+ ;; 1 * 0
+ XCALL __muldi3_6
+
+ pop A0
+ pop A1
+ ;; 2 * 0
+ XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+ ;; 2 * 1
+ wmov 26, B2
+ XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
+
+ ;; A[] = C[]
+ wmov A0, C0
+ ;; A2 = C2 already
+ wmov A4, C4
+ wmov A6, C6
+
+ clr __zero_reg__
+ pop r16
+ pop r17
+ pop r28
+ pop r29
+ ret
+ENDF __muldi3
+#endif /* L_muldi3 */
+
+#if defined (L_muldi3_6)
+;; A helper for some 64-bit multiplications with MUL available
+DEFUN __muldi3_6
+__muldi3_6:
+ XCALL __umulhisi3
+ add C2, 22
+ adc C3, 23
+ adc C4, 24
+ adc C5, 25
+ brcc 0f
+ adiw C6, 1
+0: ret
+ENDF __muldi3_6
+#endif /* L_muldi3_6 */
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+#if defined (L_muldi3)
+
+#define C0 26
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define C4 C0+4
+#define C5 C0+5
+#define C6 0
+#define C7 C6+1
+
+#define Loop 9
+
+;; A[] *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+ push r29
+ push r28
+ push Loop
+
+ ldi C0, 64
+ mov Loop, C0
+
+ ;; C[] = 0
+ clr __tmp_reg__
+ wmov C0, 0
+ wmov C2, 0
+ wmov C4, 0
+
+0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
+ ;; where N = 64 - Loop.
+ ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
+ ;; B[] will have its initial Value again.
+ LSR B7 $ ror B6 $ ror B5 $ ror B4
+ ror B3 $ ror B2 $ ror B1 $ ror B0
+
+ ;; If the N-th Bit of B[] was set then...
+ brcc 1f
+ ;; ...finish Rotation...
+ ori B7, 1 << 7
+
+ ;; ...and add A[] * 2^N to the Result C[]
+ ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
+ adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
+
+1: ;; Multiply A[] by 2
+ LSL A0 $ rol A1 $ rol A2 $ rol A3
+ rol A4 $ rol A5 $ rol A6 $ rol A7
+
+ dec Loop
+ brne 0b
+
+ ;; We expanded the Result in C[]
+ ;; Copy Result to the Return Register A[]
+ wmov A0, C0
+ wmov A2, C2
+ wmov A4, C4
+ wmov A6, C6
+
+ clr __zero_reg__
+ pop Loop
+ pop r28
+ pop r29
+ ret
+ENDF __muldi3
+
+#undef Loop
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* L_muldi3 */
+#endif /* HAVE_MUL */
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+/*******************************************************
+ Widening Multiplication 64 = 32 x 32 with MUL
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+#define A0 r22
+#define A1 r23
+#define A2 r24
+#define A3 r25
+
+#define B0 r18
+#define B1 r19
+#define B2 r20
+#define B3 r21
+
+#define C0 18
+#define C1 C0+1
+#define C2 20
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 C4+2
+#define C7 C4+3
+
+#if defined (L_umulsidi3)
+
+;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
+
+;; R18[8] = R22[4] * R18[4]
+;;
+;; Ordinary ABI Function, but additionally sets
+;; X = R20[2] = B2[2]
+;; Z = R22[2] = A0[2]
+DEFUN __umulsidi3
+ clt
+ ;; FALLTHRU
+ENDF __umulsidi3
+ ;; T = sign (A)
+DEFUN __umulsidi3_helper
+ push 29 $ push 28 ; Y
+ wmov 30, A2
+ ;; Counting in Words, we have to perform 4 Multiplications
+ ;; 0 * 0
+ wmov 26, A0
+ XCALL __umulhisi3
+ push 23 $ push 22 ; C0
+ wmov 28, B0
+ wmov 18, B2
+ wmov C2, 24
+ push 27 $ push 26 ; A0
+ push 19 $ push 18 ; B2
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B2 C2 -- -- -- B0 A2
+ ;; 1 * 1
+ wmov 26, 30 ; A2
+ XCALL __umulhisi3
+ ;; Sign-extend A. T holds the sign of A
+ brtc 0f
+ ;; Subtract B from the high part of the result
+ sub 22, 28
+ sbc 23, 29
+ sbc 24, 18
+ sbc 25, 19
+0: wmov 18, 28 ;; B0
+ wmov C4, 22
+ wmov C6, 24
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B0 C2 -- -- A2 C4 C6
+ ;;
+ ;; 1 * 0
+ XCALL __muldi3_6
+ ;; 0 * 1
+ pop 26 $ pop 27 ;; B2
+ pop 18 $ pop 19 ;; A0
+ XCALL __muldi3_6
+
+ ;; Move result C into place and save A0 in Z
+ wmov 22, C4
+ wmov 24, C6
+ wmov 30, 18 ; A0
+ pop C0 $ pop C1
+
+ ;; Epilogue
+ pop 28 $ pop 29 ;; Y
+ ret
+ENDF __umulsidi3_helper
+#endif /* L_umulsidi3 */
+
+
+#if defined (L_mulsidi3)
+
+;; Signed widening 64 = 32 * 32 Multiplication
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ bst A3, 7
+ sbrs B3, 7 ; Enhanced core has no skip bug
+ XJMP __umulsidi3_helper
+
+ ;; B needs sign-extension
+ push A3
+ push A2
+ XCALL __umulsidi3_helper
+ ;; A0 survived in Z
+ sub r22, r30
+ sbc r23, r31
+ pop r26
+ pop r27
+ sbc r24, r26
+ sbc r25, r27
+ ret
+ENDF __mulsidi3
+#endif /* L_mulsidi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#endif /* HAVE_MUL */
+
+/**********************************************************
+ Widening Multiplication 64 = 32 x 32 without MUL
+**********************************************************/
+
+#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#define AA0 22
+#define AA1 AA0+1
+#define AA2 AA0+2
+#define AA3 AA0+3
+
+#define BB0 18
+#define BB1 BB0+1
+#define BB2 BB0+2
+#define BB3 BB0+3
+
+#define Mask r30
+
+;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ set
+ skip
+ ;; FALLTHRU
+ENDF __mulsidi3
+
+DEFUN __umulsidi3
+ clt ; skipped
+ ;; Save 10 Registers: R10..R17, R28, R29
+ do_prologue_saves 10
+ ldi Mask, 0xff
+ bld Mask, 7
+ ;; Move B into place...
+ wmov B0, BB0
+ wmov B2, BB2
+ ;; ...and extend it
+ and BB3, Mask
+ lsl BB3
+ sbc B4, B4
+ mov B5, B4
+ wmov B6, B4
+ ;; Move A into place...
+ wmov A0, AA0
+ wmov A2, AA2
+ ;; ...and extend it
+ and AA3, Mask
+ lsl AA3
+ sbc A4, A4
+ mov A5, A4
+ wmov A6, A4
+ XCALL __muldi3
+ do_epilogue_restores 10
+ENDF __umulsidi3
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef BB0
+#undef BB1
+#undef BB2
+#undef BB3
+#undef Mask
+#endif /* L_mulsidi3 && !HAVE_MUL */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+ Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define r_rem r25 /* remainder */
+#define r_arg1 r24 /* dividend, quotient */
+#define r_arg2 r22 /* divisor */
+#define r_cnt r23 /* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+ sub r_rem,r_rem ; clear remainder and carry
+ ldi r_cnt,9 ; init loop counter
+ rjmp __udivmodqi4_ep ; jump to entry point
+__udivmodqi4_loop:
+ rol r_rem ; shift dividend into remainder
+ cp r_rem,r_arg2 ; compare remainder & divisor
+ brcs __udivmodqi4_ep ; remainder <= divisor
+ sub r_rem,r_arg2 ; restore remainder
+__udivmodqi4_ep:
+ rol r_arg1 ; shift dividend (with CARRY)
+ dec r_cnt ; decrement loop counter
+ brne __udivmodqi4_loop
+ com r_arg1 ; complement result
+ ; because C flag was complemented in loop
+ ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+ bst r_arg1,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg1
+ eor __tmp_reg__,r_arg2; r0.7 is sign of result
+ sbrc r_arg1,7
+ neg r_arg1 ; dividend negative : negate
+ sbrc r_arg2,7
+ neg r_arg2 ; divisor negative : negate
+ XCALL __udivmodqi4 ; do the unsigned div/mod
+ brtc __divmodqi4_1
+ neg r_rem ; correct remainder sign
+__divmodqi4_1:
+ sbrc __tmp_reg__,7
+ neg r_arg1 ; correct result sign
+__divmodqi4_exit:
+ ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+
+
+/*******************************************************
+ Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define r_remL r26 /* remainder Low */
+#define r_remH r27 /* remainder High */
+
+/* return: remainder */
+#define r_arg1L r24 /* dividend Low */
+#define r_arg1H r25 /* dividend High */
+
+/* return: quotient */
+#define r_arg2L r22 /* divisor Low */
+#define r_arg2H r23 /* divisor High */
+
+#define r_cnt r21 /* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ ldi r_cnt,17 ; init loop counter
+ rjmp __udivmodhi4_ep ; jump to entry point
+__udivmodhi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ brcs __udivmodhi4_ep ; remainder < divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+__udivmodhi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ dec r_cnt ; decrement loop counter
+ brne __udivmodhi4_loop
+ com r_arg1L
+ com r_arg1H
+; div/mod results to return registers, as for the div() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+ .global _div
+_div:
+ bst r_arg1H,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg2H
+ brtc 0f
+ com __tmp_reg__ ; r0.7 is sign of result
+ rcall __divmodhi4_neg1 ; dividend negative: negate
+0:
+ sbrc r_arg2H,7
+ rcall __divmodhi4_neg2 ; divisor negative: negate
+ XCALL __udivmodhi4 ; do the unsigned div/mod
+ sbrc __tmp_reg__,7
+ rcall __divmodhi4_neg2 ; correct remainder sign
+ brtc __divmodhi4_exit
+__divmodhi4_neg1:
+ ;; correct dividend/remainder sign
+ com r_arg1H
+ neg r_arg1L
+ sbci r_arg1H,0xff
+ ret
+__divmodhi4_neg2:
+ ;; correct divisor/result sign
+ com r_arg2H
+ neg r_arg2L
+ sbci r_arg2H,0xff
+__divmodhi4_exit:
+ ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH
+#undef r_remL
+
+#undef r_arg1H
+#undef r_arg1L
+
+#undef r_arg2H
+#undef r_arg2L
+
+#undef r_cnt
+
+/*******************************************************
+ Division 24 / 24 => (result + remainder)
+*******************************************************/
+
+;; A[0..2]: In: Dividend; Out: Quotient
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; B[0..2]: In: Divisor; Out: Remainder
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+
+;; C[0..2]: Expand remainder
+#define C0 __zero_reg__
+#define C1 26
+#define C2 25
+
+;; Loop counter
+#define r_cnt 21
+
+#if defined (L_udivmodpsi4)
+;; R24:R22 = R24:R22 udiv R20:R18
+;; R20:R18 = R24:R22 umod R20:R18
+;; Clobbers: R21, R25, R26
+
+DEFUN __udivmodpsi4
+ ; init loop counter
+ ldi r_cnt, 24+1
+ ; Clear remainder and carry. C0 is already 0
+ clr C1
+ sub C2, C2
+ ; jump to entry point
+ rjmp __udivmodpsi4_start
+__udivmodpsi4_loop:
+ ; shift dividend into remainder
+ rol C0
+ rol C1
+ rol C2
+ ; compare remainder & divisor
+ cp C0, B0
+ cpc C1, B1
+ cpc C2, B2
+ brcs __udivmodpsi4_start ; remainder <= divisor
+ sub C0, B0 ; restore remainder
+ sbc C1, B1
+ sbc C2, B2
+__udivmodpsi4_start:
+ ; shift dividend (with CARRY)
+ rol A0
+ rol A1
+ rol A2
+ ; decrement loop counter
+ dec r_cnt
+ brne __udivmodpsi4_loop
+ com A0
+ com A1
+ com A2
+ ; div/mod results to return registers
+ ; remainder
+ mov B0, C0
+ mov B1, C1
+ mov B2, C2
+ clr __zero_reg__ ; C0
+ ret
+ENDF __udivmodpsi4
+#endif /* defined (L_udivmodpsi4) */
+
+#if defined (L_divmodpsi4)
+;; R24:R22 = R24:R22 div R20:R18
+;; R20:R18 = R24:R22 mod R20:R18
+;; Clobbers: T, __tmp_reg__, R21, R25, R26
+
+DEFUN __divmodpsi4
+ ; R0.7 will contain the sign of the result:
+ ; R0.7 = A.sign ^ B.sign
+ mov __tmp_reg__, B2
+ ; T-flag = sign of dividend
+ bst A2, 7
+ brtc 0f
+ com __tmp_reg__
+ ; Adjust dividend's sign
+ rcall __divmodpsi4_negA
+0:
+ ; Adjust divisor's sign
+ sbrc B2, 7
+ rcall __divmodpsi4_negB
+
+ ; Do the unsigned div/mod
+ XCALL __udivmodpsi4
+
+ ; Adjust quotient's sign
+ sbrc __tmp_reg__, 7
+ rcall __divmodpsi4_negA
+
+ ; Adjust remainder's sign
+ brtc __divmodpsi4_end
+
+__divmodpsi4_negB:
+ ; Correct divisor/remainder sign
+ com B2
+ com B1
+ neg B0
+ sbci B1, -1
+ sbci B2, -1
+ ret
+
+ ; Correct dividend/quotient sign
+__divmodpsi4_negA:
+ com A2
+ com A1
+ neg A0
+ sbci A1, -1
+ sbci A2, -1
+__divmodpsi4_end:
+ ret
+
+ENDF __divmodpsi4
+#endif /* defined (L_divmodpsi4) */
+
+#undef A0
+#undef A1
+#undef A2
+
+#undef B0
+#undef B1
+#undef B2
+
+#undef C0
+#undef C1
+#undef C2
+
+#undef r_cnt
+
+/*******************************************************
+ Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define r_remHH r31 /* remainder High */
+#define r_remHL r30
+#define r_remH r27
+#define r_remL r26 /* remainder Low */
+
+/* return: remainder */
+#define r_arg1HH r25 /* dividend High */
+#define r_arg1HL r24
+#define r_arg1H r23
+#define r_arg1L r22 /* dividend Low */
+
+/* return: quotient */
+#define r_arg2HH r21 /* divisor High */
+#define r_arg2HL r20
+#define r_arg2H r19
+#define r_arg2L r18 /* divisor Low */
+
+#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+ ldi r_remL, 33 ; init loop counter
+ mov r_cnt, r_remL
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ mov_l r_remHL, r_remL
+ mov_h r_remHH, r_remH
+ rjmp __udivmodsi4_ep ; jump to entry point
+__udivmodsi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ rol r_remHL
+ rol r_remHH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ cpc r_remHL,r_arg2HL
+ cpc r_remHH,r_arg2HH
+ brcs __udivmodsi4_ep ; remainder <= divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+ sbc r_remHL,r_arg2HL
+ sbc r_remHH,r_arg2HH
+__udivmodsi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ rol r_arg1HL
+ rol r_arg1HH
+ dec r_cnt ; decrement loop counter
+ brne __udivmodsi4_loop
+ ; __zero_reg__ now restored (r_cnt == 0)
+ com r_arg1L
+ com r_arg1H
+ com r_arg1HL
+ com r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg2HL, r_arg1HL
+ mov_h r_arg2HH, r_arg1HH
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ mov_l r_arg1HL, r_remHL
+ mov_h r_arg1HH, r_remHH
+ ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+ mov __tmp_reg__,r_arg2HH
+ bst r_arg1HH,7 ; store sign of dividend
+ brtc 0f
+ com __tmp_reg__ ; r0.7 is sign of result
+ XCALL __negsi2 ; dividend negative: negate
+0:
+ sbrc r_arg2HH,7
+ rcall __divmodsi4_neg2 ; divisor negative: negate
+ XCALL __udivmodsi4 ; do the unsigned div/mod
+ sbrc __tmp_reg__, 7 ; correct quotient sign
+ rcall __divmodsi4_neg2
+ brtc __divmodsi4_exit ; correct remainder sign
+ XJMP __negsi2
+__divmodsi4_neg2:
+ ;; correct divisor/quotient sign
+ com r_arg2HH
+ com r_arg2HL
+ com r_arg2H
+ neg r_arg2L
+ sbci r_arg2H,0xff
+ sbci r_arg2HL,0xff
+ sbci r_arg2HH,0xff
+__divmodsi4_exit:
+ ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+#if defined (L_negsi2)
+;; (set (reg:SI 22)
+;; (neg:SI (reg:SI 22)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negsi2
+ NEG4 22
+ ret
+ENDF __negsi2
+#endif /* L_negsi2 */
+
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
+
+/*******************************************************
+ Division 64 / 64
+ Modulo 64 % 64
+*******************************************************/
+
+;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
+;; at least 16k of Program Memory. For smaller Devices, depend
+;; on MOVW and SP Size. There is a Connexion between SP Size and
+;; Flash Size so that SP Size can be used to test for Flash Size.
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+# define SPEED_DIV 8
+#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
+# define SPEED_DIV 16
+#else
+# define SPEED_DIV 0
+#endif
+
+;; A[0..7]: In: Dividend;
+;; Out: Quotient (T = 0)
+;; Out: Remainder (T = 1)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+;; B[0..7]: In: Divisor; Out: Clobber
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+;; C[0..7]: Expand remainder; Out: Remainder (unused)
+#define C0 8
+#define C1 C0+1
+#define C2 30
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 26
+#define C7 C6+1
+
+;; Holds Signs during Division Routine
+#define SS __tmp_reg__
+
+;; Bit-Counter in Division Routine
+#define R_cnt __zero_reg__
+
+;; Scratch Register for Negation
+#define NN r31
+
+#if defined (L_udivdi3)
+
+;; R25:R18 = R24:R18 umod R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __umoddi3
+ set
+ rjmp __udivdi3_umoddi3
+ENDF __umoddi3
+
+;; R25:R18 = R24:R18 udiv R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __udivdi3
+ clt
+ENDF __udivdi3
+
+DEFUN __udivdi3_umoddi3
+ push C0
+ push C1
+ push C4
+ push C5
+ XCALL __udivmod64
+ pop C5
+ pop C4
+ pop C1
+ pop C0
+ ret
+ENDF __udivdi3_umoddi3
+#endif /* L_udivdi3 */
+
+#if defined (L_udivmod64)
+
+;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
+;; No Registers saved/restored; the Callers will take Care.
+;; Preserves B[] and T-flag
+;; T = 0: Compute Quotient in A[]
+;; T = 1: Compute Remainder in A[] and shift SS one Bit left
+
+DEFUN __udivmod64
+
+ ;; Clear Remainder (C6, C7 will follow)
+ clr C0
+ clr C1
+ wmov C2, C0
+ wmov C4, C0
+ ldi C7, 64
+
+#if SPEED_DIV == 0 || SPEED_DIV == 16
+ ;; Initialize Loop-Counter
+ mov R_cnt, C7
+ wmov C6, C0
+#endif /* SPEED_DIV */
+
+#if SPEED_DIV == 8
+
+ push A7
+ clr C6
+
+1: ;; Compare shifted Devidend against Divisor
+ ;; If -- even after Shifting -- it is smaller...
+ CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
+ cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
+ brcc 2f
+
+ ;; ...then we can subtract it. Thus, it is legal to shift left
+ $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
+ mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
+ mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
+ mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
+
+ ;; 8 Bits are done
+ subi C7, 8
+ brne 1b
+
+ ;; Shifted 64 Bits: A7 has traveled to C7
+ pop C7
+ ;; Divisor is greater than Dividend. We have:
+ ;; A[] % B[] = A[]
+ ;; A[] / B[] = 0
+ ;; Thus, we can return immediately
+ rjmp 5f
+
+2: ;; Initialze Bit-Counter with Number of Bits still to be performed
+ mov R_cnt, C7
+
+ ;; Push of A7 is not needed because C7 is still 0
+ pop C7
+ clr C7
+
+#elif SPEED_DIV == 16
+
+ ;; Compare shifted Dividend against Divisor
+ cp A7, B3
+ cpc C0, B4
+ cpc C1, B5
+ cpc C2, B6
+ cpc C3, B7
+ brcc 2f
+
+ ;; Divisor is greater than shifted Dividen: We can shift the Dividend
+ ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
+ wmov C2,A6 $ wmov C0,A4
+ wmov A6,A2 $ wmov A4,A0
+ wmov A2,C6 $ wmov A0,C4
+
+ ;; Set Bit Counter to 32
+ lsr R_cnt
+2:
+#elif SPEED_DIV
+#error SPEED_DIV = ?
+#endif /* SPEED_DIV */
+
+;; The very Division + Remainder Routine
+
+3: ;; Left-shift Dividend...
+ lsl A0 $ rol A1 $ rol A2 $ rol A3
+ rol A4 $ rol A5 $ rol A6 $ rol A7
+
+ ;; ...into Remainder
+ rol C0 $ rol C1 $ rol C2 $ rol C3
+ rol C4 $ rol C5 $ rol C6 $ rol C7
+
+ ;; Compare Remainder and Divisor
+ CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
+ cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
+
+ brcs 4f
+
+ ;; Divisor fits into Remainder: Subtract it from Remainder...
+ SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
+ sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
+
+ ;; ...and set according Bit in the upcoming Quotient
+ ;; The Bit will travel to its final Position
+ ori A0, 1
+
+4: ;; This Bit is done
+ dec R_cnt
+ brne 3b
+ ;; __zero_reg__ is 0 again
+
+ ;; T = 0: We are fine with the Quotient in A[]
+ ;; T = 1: Copy Remainder to A[]
+5: brtc 6f
+ wmov A0, C0
+ wmov A2, C2
+ wmov A4, C4
+ wmov A6, C6
+ ;; Move the Sign of the Result to SS.7
+ lsl SS
+
+6: ret
+
+ENDF __udivmod64
+#endif /* L_udivmod64 */
+
+
+#if defined (L_divdi3)
+
+;; R25:R18 = R24:R18 mod R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __moddi3
+ set
+ rjmp __divdi3_moddi3
+ENDF __moddi3
+
+;; R25:R18 = R24:R18 div R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __divdi3
+ clt
+ENDF __divdi3
+
+DEFUN __divdi3_moddi3
+#if SPEED_DIV
+ mov r31, A7
+ or r31, B7
+ brmi 0f
+ ;; Both Signs are 0: the following Complexitiy is not needed
+ XJMP __udivdi3_umoddi3
+#endif /* SPEED_DIV */
+
+0: ;; The Prologue
+ ;; Save 12 Registers: Y, 17...8
+ ;; No Frame needed
+ do_prologue_saves 12
+
+ ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
+ ;; SS.6 will contain the Sign of the Remainder (A.sign)
+ mov SS, A7
+ asr SS
+ ;; Adjust Dividend's Sign as needed
+#if SPEED_DIV
+ ;; Compiling for Speed we know that at least one Sign must be < 0
+ ;; Thus, if A[] >= 0 then we know B[] < 0
+ brpl 22f
+#else
+ brpl 21f
+#endif /* SPEED_DIV */
+
+ XCALL __negdi2
+
+ ;; Adjust Divisor's Sign and SS.7 as needed
+21: tst B7
+ brpl 3f
+22: ldi NN, 1 << 7
+ eor SS, NN
+
+ ldi NN, -1
+ com B4 $ com B5 $ com B6 $ com B7
+ $ com B1 $ com B2 $ com B3
+ NEG B0
+ $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
+ sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
+
+3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
+ XCALL __udivmod64
+
+ ;; Adjust Result's Sign
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ tst SS
+ brpl 4f
+#else
+ sbrc SS, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XCALL __negdi2
+
+4: ;; Epilogue: Restore 12 Registers and return
+ do_epilogue_restores 12
+
+ENDF __divdi3_moddi3
+
+#endif /* L_divdi3 */
+
+#undef R_cnt
+#undef SS
+#undef NN
+
+.section .text.libgcc, "ax", @progbits
+
+#define TT __tmp_reg__
+
+#if defined (L_adddi3)
+;; (set (reg:DI 18)
+;; (plus:DI (reg:DI 18)
+;; (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __adddi3
+ ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
+ adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
+ ret
+ENDF __adddi3
+#endif /* L_adddi3 */
+
+#if defined (L_adddi3_s8)
+;; (set (reg:DI 18)
+;; (plus:DI (reg:DI 18)
+;; (sign_extend:SI (reg:QI 26))))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
+DEFUN __adddi3_s8
+ clr TT
+ sbrc r26, 7
+ com TT
+ ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
+ adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
+ ret
+ENDF __adddi3_s8
+#endif /* L_adddi3_s8 */
+
+#if defined (L_subdi3)
+;; (set (reg:DI 18)
+;; (minus:DI (reg:DI 18)
+;; (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __subdi3
+ SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
+ sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
+ ret
+ENDF __subdi3
+#endif /* L_subdi3 */
+
+#if defined (L_cmpdi2)
+;; (set (cc0)
+;; (compare (reg:DI 18)
+;; (reg:DI 10)))
+DEFUN __cmpdi2
+ CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
+ cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
+ ret
+ENDF __cmpdi2
+#endif /* L_cmpdi2 */
+
+#if defined (L_cmpdi2_s8)
+;; (set (cc0)
+;; (compare (reg:DI 18)
+;; (sign_extend:SI (reg:QI 26))))
+DEFUN __cmpdi2_s8
+ clr TT
+ sbrc r26, 7
+ com TT
+ CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
+ cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
+ ret
+ENDF __cmpdi2_s8
+#endif /* L_cmpdi2_s8 */
+
+#if defined (L_negdi2)
+;; (set (reg:DI 18)
+;; (neg:DI (reg:DI 18)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negdi2
+
+ com A4 $ com A5 $ com A6 $ com A7
+ $ com A1 $ com A2 $ com A3
+ NEG A0
+ $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
+ sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
+ ret
+
+ENDF __negdi2
+#endif /* L_negdi2 */
+
+#undef TT
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+
+.section .text.libgcc.prologue, "ax", @progbits
+
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+;; This function does not clobber T-flag; 64-bit division relies on it
+DEFUN __prologue_saves__
+ push r2
+ push r3
+ push r4
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+#if !defined (__AVR_HAVE_SPH__)
+ in r28,__SP_L__
+ sub r28,r26
+ out __SP_L__,r28
+ clr r29
+#elif defined (__AVR_XMEGA__)
+ in r28,__SP_L__
+ in r29,__SP_H__
+ sub r28,r26
+ sbc r29,r27
+ out __SP_L__,r28
+ out __SP_H__,r29
+#else
+ in r28,__SP_L__
+ in r29,__SP_H__
+ sub r28,r26
+ sbc r29,r27
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+#endif /* #SP = 8/16 */
+
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+ ldd r2,Y+18
+ ldd r3,Y+17
+ ldd r4,Y+16
+ ldd r5,Y+15
+ ldd r6,Y+14
+ ldd r7,Y+13
+ ldd r8,Y+12
+ ldd r9,Y+11
+ ldd r10,Y+10
+ ldd r11,Y+9
+ ldd r12,Y+8
+ ldd r13,Y+7
+ ldd r14,Y+6
+ ldd r15,Y+5
+ ldd r16,Y+4
+ ldd r17,Y+3
+ ldd r26,Y+2
+#if !defined (__AVR_HAVE_SPH__)
+ ldd r29,Y+1
+ add r28,r30
+ out __SP_L__,r28
+ mov r28, r26
+#elif defined (__AVR_XMEGA__)
+ ldd r27,Y+1
+ add r28,r30
+ adc r29,__zero_reg__
+ out __SP_L__,r28
+ out __SP_H__,r29
+ wmov 28, 26
+#else
+ ldd r27,Y+1
+ add r28,r30
+ adc r29,__zero_reg__
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+ mov_l r28, r26
+ mov_h r29, r27
+#endif /* #SP = 8/16 */
+ ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+ .section .fini9,"ax",@progbits
+DEFUN _exit
+ .weak exit
+exit:
+ENDF _exit
+
+ /* Code from .fini8 ... .fini1 sections inserted by ld script. */
+
+ .section .fini0,"ax",@progbits
+ cli
+__stop_program:
+ rjmp __stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+ .weak _cleanup
+ .func _cleanup
+_cleanup:
+ ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump
+DEFUN __tablejump2__
+ lsl r30
+ rol r31
+ ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+ lpm __tmp_reg__, Z+
+ lpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+ lpm
+ adiw r30, 1
+ push r0
+ lpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+ .section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start)
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm r0, Z+
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+ inc r16
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm
+ st X+, r0
+ adiw r30, 1
+ brcs .L__do_copy_data_carry
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+ lpm r0, Z+
+#else
+ lpm
+ adiw r30, 1
+#endif
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section. */
+
+#ifdef L_clear_bss
+ .section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+ ldi r17, hi8(__bss_end)
+ ldi r26, lo8(__bss_start)
+ ldi r27, hi8(__bss_start)
+ rjmp .do_clear_bss_start
+.do_clear_bss_loop:
+ st X+, __zero_reg__
+.do_clear_bss_start:
+ cpi r26, lo8(__bss_end)
+ cpc r27, r17
+ brne .do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+ if there are any constructors/destructors. */
+
+#ifdef L_ctors
+ .section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ ldi r16, hh8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ ldi r24, hh8(__ctors_start)
+ cpc r16, r24
+ brne .L__do_global_ctors_loop
+#else
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ brne .L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+ .section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ ldi r16, hh8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ ldi r24, hh8(__dtors_end)
+ cpc r16, r24
+ brne .L__do_global_dtors_loop
+#else
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+ adiw r28, 2
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ brne .L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm __tmp_reg__, Z+
+ elpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* RAMPD */
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ adiw r30, 1
+ push r0
+ elpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash; n = 3,4
+;; R22... = Flash[Z]
+;; Clobbers: __tmp_reg__
+
+#if (defined (L_load_3) \
+ || defined (L_load_4)) \
+ && !defined (__AVR_HAVE_LPMX__)
+
+;; Destination
+#define D0 22
+#define D1 D0+1
+#define D2 D0+2
+#define D3 D0+3
+
+.macro .load dest, n
+ lpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+.else
+ sbiw r30, \n-1
+.endif
+.endm
+
+#if defined (L_load_3)
+DEFUN __load_3
+ push D3
+ XCALL __load_4
+ pop D3
+ ret
+ENDF __load_3
+#endif /* L_load_3 */
+
+#if defined (L_load_4)
+DEFUN __load_4
+ .load D0, 4
+ .load D1, 4
+ .load D2, 4
+ .load D3, 4
+ ret
+ENDF __load_4
+#endif /* L_load_4 */
+
+#endif /* L_load_3 || L_load_3 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash or RAM; n = 1,2,3,4
+;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
+;; Clobbers: __tmp_reg__, R21, R30, R31
+
+#if (defined (L_xload_1) \
+ || defined (L_xload_2) \
+ || defined (L_xload_3) \
+ || defined (L_xload_4))
+
+;; Destination
+#define D0 22
+#define D1 D0+1
+#define D2 D0+2
+#define D3 D0+3
+
+;; Register containing bits 16+ of the address
+
+#define HHI8 21
+
+.macro .xload dest, n
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm \dest, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+ adc HHI8, __zero_reg__
+ out __RAMPZ__, HHI8
+.endif
+#elif defined (__AVR_HAVE_LPMX__)
+ lpm \dest, Z+
+#else
+ lpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+.endif
+#endif
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+.if \dest == D0+\n-1
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+.endif
+#endif
+.endm ; .xload
+
+#if defined (L_xload_1)
+DEFUN __xload_1
+#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
+ sbrc HHI8, 7
+ ld D0, Z
+ sbrs HHI8, 7
+ lpm D0, Z
+ ret
+#else
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 1
+ ret
+1: ld D0, Z
+ ret
+#endif /* LPMx && ! ELPM */
+ENDF __xload_1
+#endif /* L_xload_1 */
+
+#if defined (L_xload_2)
+DEFUN __xload_2
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 2
+ .xload D1, 2
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ret
+ENDF __xload_2
+#endif /* L_xload_2 */
+
+#if defined (L_xload_3)
+DEFUN __xload_3
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 3
+ .xload D1, 3
+ .xload D2, 3
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ld D2, Z+
+ ret
+ENDF __xload_3
+#endif /* L_xload_3 */
+
+#if defined (L_xload_4)
+DEFUN __xload_4
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 4
+ .xload D1, 4
+ .xload D2, 4
+ .xload D3, 4
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ld D2, Z+
+ ld D3, Z+
+ ret
+ENDF __xload_4
+#endif /* L_xload_4 */
+
+#endif /* L_xload_{1|2|3|4} */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; memcopy from Address Space __pgmx to RAM
+;; R23:Z = Source Address
+;; X = Destination Address
+;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
+
+#if defined (L_movmemx)
+
+#define HHI8 23
+#define LOOP 24
+
+DEFUN __movmemx_qi
+ ;; #Bytes to copy fity in 8 Bits (1..255)
+ ;; Zero-extend Loop Counter
+ clr LOOP+1
+ ;; FALLTHRU
+ENDF __movmemx_qi
+
+DEFUN __movmemx_hi
+
+;; Read from where?
+ sbrc HHI8, 7
+ rjmp 1f
+
+;; Read from Flash
+
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif
+
+0: ;; Load 1 Byte from Flash...
+
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm r0, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ adiw r30, 1
+ adc HHI8, __zero_reg__
+ out __RAMPZ__, HHI8
+#elif defined (__AVR_HAVE_LPMX__)
+ lpm r0, Z+
+#else
+ lpm
+ adiw r30, 1
+#endif
+
+ ;; ...and store that Byte to RAM Destination
+ st X+, r0
+ sbiw LOOP, 1
+ brne 0b
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+ ret
+
+;; Read from RAM
+
+1: ;; Read 1 Byte from RAM...
+ ld r0, Z+
+ ;; and store that Byte to RAM Destination
+ st X+, r0
+ sbiw LOOP, 1
+ brne 1b
+ ret
+ENDF __movmemx_hi
+
+#undef HHI8
+#undef LOOP
+
+#endif /* L_movmemx */
+
+
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+ clr r26
+ tst r22
+ brne 1f
+ subi r26, -8
+ or r22, r23
+ brne 1f
+ subi r26, -8
+ or r22, r24
+ brne 1f
+ subi r26, -8
+ or r22, r25
+ brne 1f
+ ret
+1: mov r24, r22
+ XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+ clr r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
+ cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1: XJMP __loop_ffsqi2
+2: ldi r26, 8
+ or r24, r25
+ brne 1b
+ ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+ inc r26
+ lsr r24
+ brcc __loop_ffsqi2
+ mov r24, r26
+ clr r25
+ ret
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+ XCALL __ffssi2
+ dec r24
+ ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+ XCALL __ffshi2
+ dec r24
+ ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+ XCALL __clzsi2
+ sbrs r24, 5
+ ret
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __clzsi2
+ subi r24, -32
+ ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+ XCALL __clzhi2
+ sbrs r24, 4
+ ret
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __clzhi2
+ subi r24, -16
+ ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+ clr r26
+ tst r25
+ brne 1f
+ subi r26, -8
+ or r25, r24
+ brne 1f
+ ldi r24, 16
+ ret
+1: cpi r25, 16
+ brsh 3f
+ subi r26, -3
+ swap r25
+2: inc r26
+3: lsl r25
+ brcc 2b
+ mov r24, r26
+ clr r25
+ ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+
+/**********************************
+ * Parity
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+ eor r24, r18
+ eor r24, r19
+ eor r24, r20
+ eor r24, r21
+ XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+ eor r24, r22
+ eor r24, r23
+ XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+ eor r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+ ;; parity is in r24[0..7]
+ mov __tmp_reg__, r24
+ swap __tmp_reg__
+ eor r24, __tmp_reg__
+ ;; parity is in r24[0..3]
+ subi r24, -4
+ andi r24, -5
+ subi r24, -6
+ ;; parity is in r24[0,3]
+ sbrc r24, 3
+ inc r24
+ ;; parity is in r24[0]
+ andi r24, 1
+ clr r25
+ ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+ XCALL __popcountqi2
+ push r24
+ mov r24, r25
+ XCALL __popcountqi2
+ clr r25
+ ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+ XCALL __popcounthi2
+ push r24
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __popcounthi2
+ XJMP __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+ XCALL __popcountsi2
+ push r24
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __popcountsi2
+ XJMP __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+ mov __tmp_reg__, r24
+ andi r24, 1
+ lsr __tmp_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __tmp_reg__
+ ret
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+ eor \a, \b
+ eor \b, \a
+ eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+ bswap r22, r25
+ bswap r23, r24
+ ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+ bswap r18, r25
+ bswap r19, r24
+ bswap r20, r23
+ bswap r21, r22
+ ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+ bst r25, 7
+ bld __zero_reg__, 0
+ ;; FALLTHRU
+ENDF __ashrdi3
+
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+ lsr __zero_reg__
+ sbc __tmp_reg__, __tmp_reg__
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ subi r16, 8
+ mov r18, r19
+ mov r19, r20
+ mov r20, r21
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, __tmp_reg__
+ rjmp 0b
+1: asr __tmp_reg__
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ ror r20
+ ror r19
+ ror r18
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __lshrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ mov r21, r20
+ mov r20, r19
+ mov r19, r18
+ clr r18
+ subi r16, 8
+ rjmp 0b
+1: lsl r18
+ rol r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+#if defined (L_rotldi3)
+;; Shift left
+;; r25:r18 = rotl64 (r25:r18, r17:r16)
+DEFUN __rotldi3
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ subi r16, 8
+ mov __tmp_reg__, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ mov r21, r20
+ mov r20, r19
+ mov r19, r18
+ mov r18, __tmp_reg__
+ rjmp 0b
+1: lsl r18
+ rol r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ adc r18, __zero_reg__
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __rotldi3
+#endif /* defined (L_rotldi3) */
+
+
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+ ;; A0.7 = negate result?
+ mov A0, A1
+ eor A0, B1
+ ;; B1 = |B1|
+ sbrc B1, 7
+ neg B1
+ XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+ ;; A0.7 = negate result?
+ mov A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+ ;; A1 = |A1|
+ sbrc A1, 7
+ neg A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A0
+ brmi 1f
+#else
+ sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __fmul
+1: XCALL __fmul
+ ;; C = -C iff A0.7 = 1
+ NEG2 C0
+ ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+ ; clear result
+ clr C0
+ clr C1
+ clr A0
+1: tst B1
+ ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2: brpl 3f
+ ;; C += A
+ add C0, A0
+ adc C1, A1
+3: ;; A >>= 1
+ lsr A1
+ ror A0
+ ;; B <<= 1
+ lsl B1
+ brne 2b
+ ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
+
+#include "lib1funcs-fixed.S"
diff --git a/gcc-4.9/libgcc/config/avr/lib2-object.mk b/gcc-4.9/libgcc/config/avr/lib2-object.mk
new file mode 100644
index 000000000..6a9e04de0
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib2-object.mk
@@ -0,0 +1,23 @@
+# This file is included several times in a row, once for each element of
+# $(iter-items). On each inclusion, we advance $o to the next element.
+# $(iter-labels) and $(iter-flags) are also advanced.
+# This works similar to $(srcdir)/siditi-object.mk.
+
+o := $(firstword $(iter-items))
+iter-items := $(filter-out $o,$(iter-items))
+
+$o-label := $(firstword $(iter-labels))
+iter-labels := $(wordlist 2,$(words $(iter-labels)),$(iter-labels))
+
+$o-flag := $(firstword $(iter-flags))
+iter-flags := $(wordlist 2,$(words $(iter-flags)),$(iter-flags))
+
+$o$(objext): %$(objext): $(srcdir)/config/avr/lib2funcs.c
+ $(gcc_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \
+ -c $< $(vis_hide)
+
+ifeq ($(enable_shared),yes)
+$(o)_s$(objext): %_s$(objext): $(srcdir)/config/avr/lib2funcs.c
+ $(gcc_s_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \
+ -c $<
+endif
diff --git a/gcc-4.9/libgcc/config/avr/lib2funcs.c b/gcc-4.9/libgcc/config/avr/lib2funcs.c
new file mode 100644
index 000000000..774d14ced
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib2funcs.c
@@ -0,0 +1,226 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3, or (at your option) any later
+ version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+/* This file supplies implementations for some AVR-specific builtin
+ functions so that code like the following works as expected:
+
+ int (*f (void))(_Fract)
+ {
+ return __builtin_avr_countlsr;
+ }
+
+ In this specific case, the generated code is:
+
+ f:
+ ldi r24,lo8(gs(__countlsHI))
+ ldi r25,hi8(gs(__countlsHI))
+ ret
+*/
+
+/* Map fixed-point suffix to the corresponding fixed-point type. */
+
+typedef short _Fract fx_hr_t;
+typedef _Fract fx_r_t;
+typedef long _Fract fx_lr_t;
+typedef long long _Fract fx_llr_t;
+
+typedef unsigned short _Fract fx_uhr_t;
+typedef unsigned _Fract fx_ur_t;
+typedef unsigned long _Fract fx_ulr_t;
+typedef unsigned long long _Fract fx_ullr_t;
+
+typedef short _Accum fx_hk_t;
+typedef _Accum fx_k_t;
+typedef long _Accum fx_lk_t;
+typedef long long _Accum fx_llk_t;
+
+typedef unsigned short _Accum fx_uhk_t;
+typedef unsigned _Accum fx_uk_t;
+typedef unsigned long _Accum fx_ulk_t;
+typedef unsigned long long _Accum fx_ullk_t;
+
+/* Map fixed-point suffix to the corresponding natural integer type. */
+
+typedef char int_hr_t;
+typedef int int_r_t;
+typedef long int_lr_t;
+typedef long long int_llr_t;
+
+typedef unsigned char int_uhr_t;
+typedef unsigned int int_ur_t;
+typedef unsigned long int_ulr_t;
+typedef unsigned long long int_ullr_t;
+
+typedef int int_hk_t;
+typedef long int_k_t;
+typedef long long int_lk_t;
+typedef long long int_llk_t;
+
+typedef unsigned int int_uhk_t;
+typedef unsigned long int_uk_t;
+typedef unsigned long long int_ulk_t;
+typedef unsigned long long int_ullk_t;
+
+/* Map mode to the corresponding integer type. */
+
+typedef char int_qi_t;
+typedef int int_hi_t;
+typedef long int_si_t;
+typedef long long int_di_t;
+
+typedef unsigned char uint_qi_t;
+typedef unsigned int uint_hi_t;
+typedef unsigned long uint_si_t;
+typedef unsigned long long uint_di_t;
+
+
+
+/************************************************************************/
+
+/* Supply implementations / symbols for __builtin_roundFX ASM_NAME. */
+
+#ifdef L_round
+
+#define ROUND1(FX) \
+ ROUND2 (FX)
+
+#define ROUND2(FX) \
+ extern fx_## FX ##_t __round## FX (fx_## FX ##_t x, int rpoint); \
+ \
+ fx_## FX ##_t \
+ __round## FX (fx_## FX ##_t x, int rpoint) \
+ { \
+ return __builtin_avr_round ##FX (x, rpoint); \
+ }
+
+ROUND1(L_LABEL)
+
+#endif /* L_round */
+
+
+
+/*********************************************************************/
+
+/* Implement some count-leading-redundant-sign-bits to be used with
+ coundlsFX implementation. */
+
+#ifdef L__clrsbqi
+extern int __clrsbqi2 (char x);
+
+int
+__clrsbqi2 (char x)
+{
+ int ret;
+
+ if (x < 0)
+ x = ~x;
+
+ if (x == 0)
+ return 8 * sizeof (x) -1;
+
+ ret = __builtin_clz (x << 8);
+ return ret - 1;
+}
+#endif /* L__clrsbqi */
+
+
+#ifdef L__clrsbdi
+extern int __clrsbdi2 (long long x);
+
+int
+__clrsbdi2 (long long x)
+{
+ int ret;
+
+ if (x < 0LL)
+ x = ~x;
+
+ if (x == 0LL)
+ return 8 * sizeof (x) -1;
+
+ ret = __builtin_clzll ((unsigned long long) x);
+ return ret - 1;
+}
+#endif /* L__clrsbdi */
+
+
+
+/*********************************************************************/
+
+/* Supply implementations / symbols for __builtin_avr_countlsFX. */
+
+/* Signed */
+
+#ifdef L_countls
+
+#define COUNTLS1(MM) \
+ COUNTLS2 (MM)
+
+#define COUNTLS2(MM) \
+ extern int __countls## MM ##2 (int_## MM ##_t); \
+ extern int __clrsb## MM ##2 (int_## MM ##_t); \
+ \
+ int \
+ __countls## MM ##2 (int_## MM ##_t x) \
+ { \
+ if (x == 0) \
+ return __INT8_MAX__; \
+ \
+ return __clrsb## MM ##2 (x); \
+ }
+
+COUNTLS1(L_LABEL)
+
+#endif /* L_countls */
+
+/* Unsigned */
+
+#ifdef L_countlsu
+
+#define clz_qi2 __builtin_clz /* unused, avoid warning */
+#define clz_hi2 __builtin_clz
+#define clz_si2 __builtin_clzl
+#define clz_di2 __builtin_clzll
+
+#define COUNTLS1(MM) \
+ COUNTLS2 (MM)
+
+#define COUNTLS2(MM) \
+ extern int __countlsu## MM ##2 (uint_## MM ##_t); \
+ \
+ int \
+ __countlsu## MM ##2 (uint_## MM ##_t x) \
+ { \
+ if (x == 0) \
+ return __INT8_MAX__; \
+ \
+ if (sizeof (x) == 1) \
+ return clz_hi2 (x << 8); \
+ else \
+ return clz_## MM ##2 (x); \
+ }
+
+COUNTLS1(L_LABEL)
+
+#endif /* L_countlsu */
diff --git a/gcc-4.9/libgcc/config/avr/t-avr b/gcc-4.9/libgcc/config/avr/t-avr
new file mode 100644
index 000000000..461304706
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-avr
@@ -0,0 +1,277 @@
+LIB1ASMSRC = avr/lib1funcs.S
+LIB1ASMFUNCS = \
+ _mulqi3 \
+ _mulhi3 \
+ _mulqihi3 _umulqihi3 \
+ _mulpsi3 _mulsqipsi3 \
+ _mulhisi3 \
+ _umulhisi3 \
+ _usmulhisi3 \
+ _muluhisi3 \
+ _mulshisi3 \
+ _mulsi3 \
+ _udivmodqi4 \
+ _divmodqi4 \
+ _udivmodhi4 \
+ _divmodhi4 \
+ _divmodpsi4 _udivmodpsi4 \
+ _udivmodsi4 \
+ _divmodsi4 \
+ _divdi3 _udivdi3 \
+ _muldi3 _muldi3_6 \
+ _mulsidi3 _umulsidi3 \
+ _udivmod64 \
+ _negsi2 _negdi2 \
+ _prologue \
+ _epilogue \
+ _exit \
+ _cleanup \
+ _tablejump \
+ _tablejump_elpm \
+ _load_3 _load_4 \
+ _xload_1 _xload_2 _xload_3 _xload_4 \
+ _movmemx \
+ _copy_data \
+ _clear_bss \
+ _ctors \
+ _dtors \
+ _ffssi2 \
+ _ffshi2 \
+ _loop_ffsqi2 \
+ _ctzsi2 \
+ _ctzhi2 \
+ _clzdi2 \
+ _clzsi2 \
+ _clzhi2 \
+ _paritydi2 \
+ _paritysi2 \
+ _parityhi2 \
+ _popcounthi2 \
+ _popcountsi2 \
+ _popcountdi2 \
+ _popcountqi2 \
+ _bswapsi2 \
+ _bswapdi2 \
+ _ashldi3 _ashrdi3 _lshrdi3 _rotldi3 \
+ _adddi3 _adddi3_s8 _subdi3 \
+ _cmpdi2 _cmpdi2_s8 \
+ _fmul _fmuls _fmulsu
+
+# Fixed point routines in avr/lib1funcs-fixed.S
+LIB1ASMFUNCS += \
+ _fractqqsf _fractuqqsf \
+ _fracthqsf _fractuhqsf _fracthasf _fractuhasf \
+ _fractsasf _fractusasf _fractsqsf _fractusqsf \
+ \
+ _fractsfqq _fractsfuqq \
+ _fractsfhq _fractsfuhq _fractsfha _fractsfuha \
+ _fractsfsq _fractsfusq _fractsfsa _fractsfusa \
+ _mulqq3 \
+ _mulhq3 _muluhq3 \
+ _mulha3 _muluha3 _muluha3_round \
+ _mulsa3 _mulusa3 \
+ _usmuluha3 _ssmulha3 \
+ _usmulusa3 _ssmulsa3 \
+ _divqq3 _udivuqq3 _divqq_helper \
+ _divhq3 _udivuhq3 \
+ _divha3 _udivuha3 \
+ _divsa3 _udivusa3 \
+ _clr_8 \
+ _ssneg_2 _ssneg_4 _ssneg_8 \
+ _ssabs_1 _ssabs_2 _ssabs_4 _ssabs_8 \
+ _ssadd_8 _sssub_8 \
+ _usadd_8 _ussub_8 \
+ _mask1 _ret \
+ _roundqq3 _rounduqq3 \
+ _round_s2 _round_u2 _round_2_const _addmask_2 \
+ _round_s4 _round_u4 _round_4_const _addmask_4 \
+ _round_x8 \
+ _rounddq3 _roundudq3 \
+ _roundda3 _rounduda3 \
+ _roundta3 _rounduta3 \
+
+
+LIB2FUNCS_EXCLUDE = \
+ _moddi3 _umoddi3 \
+ _clz \
+ _clrsbdi2 \
+
+
+# We do not have the DF type.
+# Most of the C functions in libgcc2 use almost all registers,
+# so use -mcall-prologues for smaller code size.
+HOST_LIBGCC2_CFLAGS += -DDF=SF -Dinhibit_libc -mcall-prologues -Os
+
+# Extra 16-bit integer functions.
+intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2
+
+hiintfuncs16 = $(subst XX,hi,$(intfuncs16))
+siintfuncs16 = $(subst XX,si,$(intfuncs16))
+
+iter-items := $(hiintfuncs16)
+iter-labels := $(siintfuncs16)
+iter-sizes := $(patsubst %,2,$(siintfuncs16)) $(patsubst %,2,$(hiintfuncs16))
+
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/siditi-object.mk,$(iter-items))
+libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16))
+
+ifeq ($(enable_shared),yes)
+libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16))
+endif
+
+###
+
+conv_XY=$(conv)$(mode1)$(mode2)
+func_X=$(func)$(mode)
+
+# Compile C functions from lib2funcs.c and add them to libgcc.a.
+#
+# Some functions which are not performance.critical are more convenient
+# to implement in C than in assembler. Most of them serve as implementation
+# for AVR-specific builtins in the case where the address of a builtin
+# function is taken or if there is no insn that implements the builtin.
+#
+# We don't use LIB2ADD because we want to iterate over the source for
+# different modes, fixed-point suffixes, etc. See iter-labels and L_LABEL.
+# iter-label will get one more underscore in order to avoid too short
+# labels like -DLk and we use -DL_k instead.
+
+# Build roundFX functions from lib2funcs.c
+
+round_suffix := hr r lr uhr ur ulr \
+ hk k uhk uk
+round_funcs := $(foreach func,_round,\
+ $(foreach mode,$(round_suffix),$(func_X)))
+
+iter-items := $(round_funcs)
+iter-labels := $(round_suffix)
+iter-flags := $(patsubst %,-DL_round,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(round_funcs))
+
+# Build clrsbXX functions from lib2funcs.c
+
+clrsb_modes := qi di
+clrsb_funcs := $(foreach func,_clrsb,\
+ $(foreach mode,$(clrsb_modes),$(func_X)))
+
+iter-items := $(clrsb_funcs)
+iter-labels := $(clrsb_funcs)
+iter-flags := $(patsubst %,-DL_clrsb,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(clrsb_funcs))
+
+# Build signed countlsFX functions from lib2funcs.c
+
+countls_modes := qi hi si di
+countls_funcs := $(foreach func,_countls,\
+ $(foreach mode,$(countls_modes),$(func_X)))
+
+iter-items := $(countls_funcs)
+iter-labels := $(countls_modes)
+iter-flags := $(patsubst %,-DL_countls,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(countls_funcs))
+
+# Build unsigned countlsFX functions from lib2funcs.c
+
+countlsu_modes := qi hi si di
+countlsu_funcs := $(foreach func,_countlsu,\
+ $(foreach mode,$(countlsu_modes),$(func_X)))
+
+iter-items := $(countlsu_funcs)
+iter-labels := $(countlsu_modes)
+iter-flags := $(patsubst %,-DL_countlsu,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(countlsu_funcs))
+
+
+# Filter out supported conversions from fixed-bit.c
+# Also filter out TQ and UTQ.
+
+# Conversions supported by the compiler
+
+convf_modes = QI UQI QQ UQQ \
+ HI UHI HQ UHQ HA UHA \
+ SI USI SQ USQ SA USA \
+ DI UDI DQ UDQ DA UDA \
+ TI UTI TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract _fractuns,\
+ $(foreach mode1,$(convf_modes),\
+ $(foreach mode2,$(convf_modes),$(conv_XY))))
+
+# Conversions supported by lib1funcs-fixed.S
+
+conv_to_sf_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA
+conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA SA USA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract, \
+ $(foreach mode1,$(conv_to_sf_modes), \
+ $(foreach mode2,SF,$(conv_XY))))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract,\
+ $(foreach mode1,SF,\
+ $(foreach mode2,$(conv_from_sf_modes),$(conv_XY))))
+
+# Arithmetik supported by the compiler
+
+allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_add _sub,\
+ $(foreach mode,$(allfix_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_lshr _ashl _ashr _cmp,\
+ $(foreach mode,$(allfix_modes),$(func_X)))
+
+
+usat_modes = UQQ UHQ UHA USQ USA UDQ UDA UTQ UTA
+ssat_modes = QQ HQ HA SQ SA DQ DA TQ TA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_ssadd _sssub _ssneg _ssabs,\
+ $(foreach mode,$(ssat_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_usadd _ussub _usneg,\
+ $(foreach mode,$(usat_modes),$(func_X)))
+
+
+smul_modes = QQ HQ HA SA
+umul_modes = UQQ UHQ UHA USA
+sdiv_modes = QQ HQ HA SA
+udiv_modes = UQQ UHQ UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_mul,\
+ $(foreach mode,$(smul_modes) $(umul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_div,\
+ $(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X)))
+
+
+ssmul_modes = HA SA
+usmul_modes = UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_usmul,\
+ $(foreach mode,$(usmul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach func,_ssmul,\
+ $(foreach mode,$(ssmul_modes),$(func_X)))
diff --git a/gcc-4.9/libgcc/config/avr/t-avrlibc b/gcc-4.9/libgcc/config/avr/t-avrlibc
new file mode 100644
index 000000000..d2c8b870a
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-avrlibc
@@ -0,0 +1,66 @@
+# This file is used if not configured --with-avrlibc=no
+#
+# AVR-Libc comes with hand-optimized float routines.
+# For historical reasons, these routines live in AVR-Libc
+# and not in libgcc and use the same function names like libgcc.
+# To get the best support, i.e. always use the routines from
+# AVR-Libc, we remove these routines from libgcc.
+#
+# See also PR54461.
+#
+#
+# Arithmetic:
+# __addsf3 __subsf3 __divsf3 __mulsf3 __negsf2
+#
+# Comparison:
+# __cmpsf2 __unordsf2
+# __eqsf2 __lesf2 __ltsf2 __nesf2 __gesf2 __gtsf2
+#
+# Conversion:
+# __fixsfdi __fixunssfdi __floatdisf __floatundisf
+# __fixsfsi __fixunssfsi __floatsisf __floatunsisf
+#
+#
+# These functions are contained in modules:
+#
+# _addsub_sf.o: __addsf3 __subsf3
+# _mul_sf.o: __mulsf3
+# _div_sf.o: __divsf3
+# _negate_sf.o: __negsf2
+#
+# _compare_sf.o: __cmpsf2
+# _unord_sf.o: __unordsf2
+# _eq_sf.o: __eqsf2
+# _ne_sf.o: __nesf2
+# _ge_sf.o: __gesf2
+# _gt_sf.o: __gtsf2
+# _le_sf.o: __lesf2
+# _lt_sf.o: __ltsf2
+#
+# _fixsfdi.o: __fixsfdi
+# _fixunssfdi.o: __fixunssfdi
+# _fixunssfsi.o: __fixunssfsi
+# _floatdisf.o: __floatdisf
+# _floatundisf.o: __floatundisf
+# _sf_to_si.o: __fixsfsi
+# _si_to_sf.o: __floatsisf
+# _usi_to_sf.o: __floatunsisf
+
+
+# SFmode
+LIB2FUNCS_EXCLUDE += \
+ _addsub_sf \
+ _negate_sf \
+ _mul_sf _div_sf \
+ \
+ _compare_sf \
+ _unord_sf \
+ _eq_sf _ne_sf \
+ _gt_sf _ge_sf \
+ _lt_sf _le_sf \
+ \
+ _si_to_sf _sf_to_si \
+ _usi_to_sf _sf_to_usi \
+ _fixunssfsi _fixsfdi \
+ _fixunssfdi \
+ _floatdisf _floatundisf
diff --git a/gcc-4.9/libgcc/config/avr/t-rtems b/gcc-4.9/libgcc/config/avr/t-rtems
new file mode 100644
index 000000000..43b57ee32
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-rtems
@@ -0,0 +1,2 @@
+# RTEMS uses _exit from newlib
+LIB1ASMFUNCS := $(filter-out _exit,$(LIB1ASMFUNCS))