8 files changed, 5840 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/avr/avr-lib.h b/gcc-4.9/libgcc/config/avr/avr-lib.h
new file mode 100644
index 000000000..37d5fa1a7
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/avr-lib.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef FLOAT
+#define CMPtype QItype
+#define DF SF
+#define DI SI
+typedef int QItype __attribute__ ((mode (QI)));
+#endif
+
+/* fixed-bit.h does not define functions for TA and UTA because
+   that part is wrapped in #if MIN_UNITS_PER_WORD > 4.
+   This would lead to empty functions for TA and UTA.
+   Thus, supply appropriate defines as if HAVE_[U]TA == 1.
+   #define HAVE_[U]TA 1 won't work because avr-modes.def
+   uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough
+   to arrange for such changes of the mode size.  */
+
+typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA)));
+
+#if defined (UTA_MODE)
+#define FIXED_SIZE      8       /* in bytes */
+#define INT_C_TYPE      UDItype
+#define UINT_C_TYPE     UDItype
+#define HINT_C_TYPE     USItype
+#define HUINT_C_TYPE    USItype
+#define MODE_NAME       UTA
+#define MODE_NAME_S     uta
+#define MODE_UNSIGNED   1
+#endif
+
+#if defined (FROM_UTA)
+#define FROM_TYPE               4       /* ID for fixed-point */
+#define FROM_MODE_NAME          UTA
+#define FROM_MODE_NAME_S        uta
+#define FROM_INT_C_TYPE         UDItype
+#define FROM_SINT_C_TYPE        DItype
+#define FROM_UINT_C_TYPE        UDItype
+#define FROM_MODE_UNSIGNED      1
+#define FROM_FIXED_SIZE         8       /* in bytes */
+#elif defined (TO_UTA)
+#define TO_TYPE                 4       /* ID for fixed-point */
+#define TO_MODE_NAME            UTA
+#define TO_MODE_NAME_S          uta
+#define TO_INT_C_TYPE           UDItype
+#define TO_SINT_C_TYPE          DItype
+#define TO_UINT_C_TYPE          UDItype
+#define TO_MODE_UNSIGNED        1
+#define TO_FIXED_SIZE           8       /* in bytes */
+#endif
+
+/* Same for TAmode */
+
+typedef _Fract TAtype  __attribute__ ((mode (TA)));
+
+#if defined (TA_MODE)
+#define FIXED_SIZE      8       /* in bytes */
+#define INT_C_TYPE      DItype
+#define UINT_C_TYPE     UDItype
+#define HINT_C_TYPE     SItype
+#define HUINT_C_TYPE    USItype
+#define MODE_NAME       TA
+#define MODE_NAME_S     ta
+#define MODE_UNSIGNED   0
+#endif
+
+#if defined (FROM_TA)
+#define FROM_TYPE               4       /* ID for fixed-point */
+#define FROM_MODE_NAME          TA
+#define FROM_MODE_NAME_S        ta
+#define FROM_INT_C_TYPE         DItype
+#define FROM_SINT_C_TYPE        DItype
+#define FROM_UINT_C_TYPE        UDItype
+#define FROM_MODE_UNSIGNED      0
+#define FROM_FIXED_SIZE         8       /* in bytes */
+#elif defined (TO_TA)
+#define TO_TYPE                 4       /* ID for fixed-point */
+#define TO_MODE_NAME            TA
+#define TO_MODE_NAME_S          ta
+#define TO_INT_C_TYPE           DItype
+#define TO_SINT_C_TYPE          DItype
+#define TO_UINT_C_TYPE          UDItype
+#define TO_MODE_UNSIGNED        0
+#define TO_FIXED_SIZE           8       /* in bytes */
+#endif
diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S
new file mode 100644
index 000000000..8f3ed9201
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S
@@ -0,0 +1,1915 @@
+/*  -*- Mode: Asm -*-  */
+;;    Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
+;;                   Georg-Johann Lay (avr@gjlay.de)
+
+;; This file is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by the
+;; Free Software Foundation; either version 3, or (at your option) any
+;; later version.
+
+;; In addition to the permissions in the GNU General Public License, the
+;; Free Software Foundation gives you unlimited permission to link the
+;; compiled version of this file into combinations with other programs,
+;; and to distribute those combinations without any restriction coming
+;; from the use of this file.  (The General Public License restrictions
+;; do apply in other respects; for example, they cover modification of
+;; the file, and distribution when not linked into a combine
+;; executable.)
+
+;; This file is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Fixed point library routines for AVR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.section .text.libgcc.fixed, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions to float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractqqsf)
+DEFUN __fractqqsf
+    ;; Move in place for SA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    ;; Sign-extend
+    lsl     r24
+    sbc     r24, r24
+    mov     r25, r24
+    XJMP    __fractsasf
+ENDF __fractqqsf
+#endif  /* L_fractqqsf */
+
+#if defined (L_fractuqqsf)
+DEFUN __fractuqqsf
+    ;; Move in place for USA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    ;; Zero-extend
+    clr     r24
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuqqsf
+#endif  /* L_fractuqqsf */
+
+#if defined (L_fracthqsf)
+DEFUN __fracthqsf
+    ;; Move in place for SA -> SF conversion
+    wmov    22, 24
+    ;; Sign-extend
+    lsl     r25
+    sbc     r24, r24
+    mov     r25, r24
+    XJMP    __fractsasf
+ENDF __fracthqsf
+#endif  /* L_fracthqsf */
+
+#if defined (L_fractuhqsf)
+DEFUN __fractuhqsf
+    ;; Move in place for USA -> SF conversion
+    wmov    22, 24
+    ;; Zero-extend
+    clr     r24
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuhqsf
+#endif  /* L_fractuhqsf */
+
+#if defined (L_fracthasf)
+DEFUN __fracthasf
+    ;; Move in place for SA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    mov     r24, r25
+    ;; Sign-extend
+    lsl     r25
+    sbc     r25, r25
+    XJMP    __fractsasf
+ENDF __fracthasf
+#endif  /* L_fracthasf */
+
+#if defined (L_fractuhasf)
+DEFUN __fractuhasf
+    ;; Move in place for USA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    mov     r24, r25
+    ;; Zero-extend
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuhasf
+#endif  /* L_fractuhasf */
+
+
+#if defined (L_fractsqsf)
+DEFUN __fractsqsf
+    XCALL   __floatsisf
+    ;; Divide non-zero results by 2^31 to move the
+    ;; decimal point into place
+    tst     r25
+    breq    0f
+    subi    r24, exp_lo (31)
+    sbci    r25, exp_hi (31)
+0:  ret
+ENDF __fractsqsf
+#endif  /* L_fractsqsf */
+
+#if defined (L_fractusqsf)
+DEFUN __fractusqsf
+    XCALL   __floatunsisf
+    ;; Divide non-zero results by 2^32 to move the
+    ;; decimal point into place
+    cpse    r25, __zero_reg__
+    subi    r25, exp_hi (32)
+    ret
+ENDF __fractusqsf
+#endif  /* L_fractusqsf */
+
+#if defined (L_fractsasf)
+DEFUN __fractsasf
+    XCALL   __floatsisf
+    ;; Divide non-zero results by 2^15 to move the
+    ;; decimal point into place
+    tst     r25
+    breq    0f
+    subi    r24, exp_lo (15)
+    sbci    r25, exp_hi (15)
+0:  ret
+ENDF __fractsasf
+#endif  /* L_fractsasf */
+
+#if defined (L_fractusasf)
+DEFUN __fractusasf
+    XCALL   __floatunsisf
+    ;; Divide non-zero results by 2^16 to move the
+    ;; decimal point into place
+    cpse    r25, __zero_reg__
+    subi    r25, exp_hi (16)
+    ret
+ENDF __fractusasf
+#endif  /* L_fractusasf */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions from float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractsfqq)
+DEFUN __fractsfqq
+    ;; Multiply with 2^{24+7} to get a QQ result in r25
+    subi    r24, exp_lo (-31)
+    sbci    r25, exp_hi (-31)
+    XCALL   __fixsfsi
+    mov     r24, r25
+    ret
+ENDF __fractsfqq
+#endif  /* L_fractsfqq */
+
+#if defined (L_fractsfuqq)
+DEFUN __fractsfuqq
+    ;; Multiply with 2^{24+8} to get a UQQ result in r25
+    subi    r25, exp_hi (-32)
+    XCALL   __fixunssfsi
+    mov     r24, r25
+    ret
+ENDF __fractsfuqq
+#endif  /* L_fractsfuqq */
+
+#if defined (L_fractsfha)
+DEFUN __fractsfha
+    ;; Multiply with 2^{16+7} to get a HA result in r25:r24
+    subi    r24, exp_lo (-23)
+    sbci    r25, exp_hi (-23)
+    XJMP    __fixsfsi
+ENDF __fractsfha
+#endif  /* L_fractsfha */
+
+#if defined (L_fractsfuha)
+DEFUN __fractsfuha
+    ;; Multiply with 2^24 to get a UHA result in r25:r24
+    subi    r25, exp_hi (-24)
+    XJMP    __fixunssfsi
+ENDF __fractsfuha
+#endif  /* L_fractsfuha */
+
+#if defined (L_fractsfhq)
+FALIAS __fractsfsq
+
+DEFUN __fractsfhq
+    ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
+    ;; resp. with 2^31 to get a SQ result in r25:r22
+    subi    r24, exp_lo (-31)
+    sbci    r25, exp_hi (-31)
+    XJMP    __fixsfsi
+ENDF __fractsfhq
+#endif  /* L_fractsfhq */
+
+#if defined (L_fractsfuhq)
+FALIAS __fractsfusq
+
+DEFUN __fractsfuhq
+    ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
+    ;; resp. with 2^32 to get a USQ result in r25:r22
+    subi    r25, exp_hi (-32)
+    XJMP    __fixunssfsi
+ENDF __fractsfuhq
+#endif  /* L_fractsfuhq */
+
+#if defined (L_fractsfsa)
+DEFUN __fractsfsa
+    ;; Multiply with 2^15 to get a SA result in r25:r22
+    subi    r24, exp_lo (-15)
+    sbci    r25, exp_hi (-15)
+    XJMP    __fixsfsi
+ENDF __fractsfsa
+#endif  /* L_fractsfsa */
+
+#if defined (L_fractsfusa)
+DEFUN __fractsfusa
+    ;; Multiply with 2^16 to get a USA result in r25:r22
+    subi    r25, exp_hi (-16)
+    XJMP    __fixunssfsi
+ENDF __fractsfusa
+#endif  /* L_fractsfusa */
+
+
+;; For multiplication the functions here are called directly from
+;; avr-fixed.md instead of using the standard libcall mechanisms.
+;; This can make better code because GCC knows exactly which
+;; of the call-used registers (not all of them) are clobbered.  */
+
+/*******************************************************
+    Fractional  Multiplication  8 x 8  without MUL
+*******************************************************/
+
+#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
+;;; R23 = R24 * R25
+;;; Clobbers: __tmp_reg__, R22, R24, R25
+;;; Rounding: ???
+DEFUN __mulqq3
+    XCALL   __fmuls
+    ;; TR 18037 requires that  (-1) * (-1)  does not overflow
+    ;; The only input that can produce  -1  is  (-1)^2.
+    dec     r23
+    brvs    0f
+    inc     r23
+0:  ret
+ENDF  __mulqq3
+#endif /* L_mulqq3 && ! HAVE_MUL */
+
+/*******************************************************
+    Fractional Multiply  .16 x .16  with and without MUL
+*******************************************************/
+
+#if defined (L_mulhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
+DEFUN   __mulhq3
+    XCALL   __mulhisi3
+    ;; Shift result into place
+    lsl     r23
+    rol     r24
+    rol     r25
+    brvs    1f
+    ;; Round
+    sbrc    r23, 7
+    adiw    r24, 1
+    ret
+1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
+    ldi     r24, lo8 (0x7fff)
+    ldi     r25, hi8 (0x7fff)
+    ret
+ENDF __mulhq3
+#endif  /* defined (L_mulhq3) */
+
+#if defined (L_muluhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN   __muluhq3
+    XCALL   __umulhisi3
+    ;; Round
+    sbrc    r23, 7
+    adiw    r24, 1
+    ret
+ENDF __muluhq3
+#endif  /* L_muluhq3 */
+
+
+/*******************************************************
+    Fixed  Multiply  8.8 x 8.8  with and without MUL
+*******************************************************/
+
+#if defined (L_mulha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
+DEFUN   __mulha3
+    XCALL   __mulhisi3
+    lsl     r22
+    rol     r23
+    rol     r24
+    XJMP    __muluha3_round
+ENDF __mulha3
+#endif  /* L_mulha3 */
+
+#if defined (L_muluha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN   __muluha3
+    XCALL   __umulhisi3
+    XJMP    __muluha3_round
+ENDF __muluha3
+#endif  /* L_muluha3 */
+
+#if defined (L_muluha3_round)
+DEFUN   __muluha3_round
+    ;; Shift result into place
+    mov     r25, r24
+    mov     r24, r23
+    ;; Round
+    sbrc    r22, 7
+    adiw    r24, 1
+    ret
+ENDF __muluha3_round
+#endif  /* L_muluha3_round */
+
+
+/*******************************************************
+    Fixed  Multiplication  16.16 x 16.16
+*******************************************************/
+
+;; Bits outside the result (below LSB), used in the signed version
+#define GUARD __tmp_reg__
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Multiplier
+#define A0  16
+#define A1  A0+1
+#define A2  A1+1
+#define A3  A2+1
+
+;; Multiplicand
+#define B0  20
+#define B1  B0+1
+#define B2  B1+1
+#define B3  B2+1
+
+;; Result
+#define C0  24
+#define C1  C0+1
+#define C2  C1+1
+#define C3  C2+1
+
+#if defined (L_mulusa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+DEFUN __mulusa3
+    set
+    ;; Fallthru
+ENDF  __mulusa3
+
+;;; Round for last digit iff T = 1
+;;; Return guard bits in GUARD (__tmp_reg__).
+;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
+;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN __mulusa3_round
+    ;; Some of the MUL instructions have LSBs outside the result.
+    ;; Don't ignore these LSBs in order to tame rounding error.
+    ;; Use C2/C3 for these LSBs.
+
+    clr C0
+    clr C1
+    mul A0, B0  $  movw C2, r0
+
+    mul A1, B0  $  add  C3, r0  $  adc C0, r1
+    mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
+
+    ;; Round if T = 1.  Store guarding bits outside the result for rounding
+    ;; and left-shift by the signed version (function below).
+    brtc 0f
+    sbrc C3, 7
+    adiw C0, 1
+0:  push C3
+
+    ;; The following MULs don't have LSBs outside the result.
+    ;; C2/C3 is the high part.
+
+    mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
+    mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
+    mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
+    neg  C2
+
+    mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
+    mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    neg  C3
+
+    mul  A1, B3  $  add C2, r0  $  adc C3, r1
+    mul  A2, B2  $  add C2, r0  $  adc C3, r1
+    mul  A3, B1  $  add C2, r0  $  adc C3, r1
+
+    mul  A2, B3  $  add C3, r0
+    mul  A3, B2  $  add C3, r0
+
+    ;; Guard bits used in the signed version below.
+    pop  GUARD
+    clr  __zero_reg__
+    ret
+ENDF __mulusa3_round
+#endif /* L_mulusa3 */
+
+#if defined (L_mulsa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__, T
+;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
+DEFUN __mulsa3
+    clt
+    XCALL   __mulusa3_round
+    ;; A posteriori sign extension of the operands
+    tst     B3
+    brpl 1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  sbrs    A3, 7
+    rjmp 2f
+    sub     C2, B0
+    sbc     C3, B1
+2:
+    ;;  Shift 1 bit left to adjust for 15 fractional bits
+    lsl     GUARD
+    rol     C0
+    rol     C1
+    rol     C2
+    rol     C3
+    ;; Round last digit
+    lsl     GUARD
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* __AVR_HAVE_MUL__ */
+
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 22
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+
+;; __tmp_reg__
+#define CC0  0
+;; __zero_reg__
+#define CC1  1
+#define CC2  16
+#define CC3  17
+
+#define AA0  26
+#define AA1  AA0+1
+#define AA2  30
+#define AA3  AA2+1
+
+#if defined (L_mulsa3)
+;;; (R25:R22)  *=  (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
+DEFUN   __mulsa3
+    push    B0
+    push    B1
+    push    B3
+    clt
+    XCALL   __mulusa3_round
+    pop     r30
+    ;; sign-extend B
+    bst     r30, 7
+    brtc 1f
+    ;; A1, A0 survived in  R27:R26
+    sub     C2, AA0
+    sbc     C3, AA1
+1:
+    pop     AA1  ;; B1
+    pop     AA0  ;; B0
+
+    ;; sign-extend A.  A3 survived in  R31
+    bst     AA3, 7
+    brtc 2f
+    sub     C2, AA0
+    sbc     C3, AA1
+2:
+    ;;  Shift 1 bit left to adjust for 15 fractional bits
+    lsl     GUARD
+    rol     C0
+    rol     C1
+    rol     C2
+    rol     C3
+    ;; Round last digit
+    lsl     GUARD
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __mulsa3
+#endif  /* L_mulsa3 */
+
+#if defined (L_mulusa3)
+;;; (R25:R22)  *=  (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
+DEFUN __mulusa3
+    set
+    ;; Fallthru
+ENDF  __mulusa3
+
+;;; A[] survives in 26, 27, 30, 31
+;;; Also used by __mulsa3 with T = 0
+;;; Round if T = 1
+;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
+DEFUN __mulusa3_round
+    push    CC2
+    push    CC3
+    ; clear result
+    clr     __tmp_reg__
+    wmov    CC2, CC0
+    ; save multiplicand
+    wmov    AA0, A0
+    wmov    AA2, A2
+    rjmp 3f
+
+    ;; Loop the integral part
+
+1:  ;; CC += A * 2^n;  n >= 0
+    add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
+
+2:  ;; A <<= 1
+    lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
+
+3:  ;; IBIT(B) >>= 1
+    ;; Carry = n-th bit of B;  n >= 0
+    lsr     B3
+    ror     B2
+    brcs 1b
+    sbci    B3, 0
+    brne 2b
+
+    ;; Loop the fractional part
+    ;; B2/B3 is 0 now, use as guard bits for rounding
+    ;; Restore multiplicand
+    wmov    A0, AA0
+    wmov    A2, AA2
+    rjmp 5f
+
+4:  ;; CC += A:Guard * 2^n;  n < 0
+    add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
+5:
+    ;; A:Guard >>= 1
+    lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
+
+    ;; FBIT(B) <<= 1
+    ;; Carry = n-th bit of B;  n < 0
+    lsl     B0
+    rol     B1
+    brcs 4b
+    sbci    B0, 0
+    brne 5b
+
+    ;; Save guard bits and set carry for rounding
+    push    B3
+    lsl     B3
+    ;; Move result into place
+    wmov    C2, CC2
+    wmov    C0, CC0
+    clr     __zero_reg__
+    brtc 6f
+    ;; Round iff T = 1
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+6:
+    pop     GUARD
+    ;; Epilogue
+    pop     CC3
+    pop     CC2
+    ret
+ENDF __mulusa3_round
+#endif  /* L_mulusa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+#undef GUARD
+
+/***********************************************************
+    Fixed  unsigned saturated Multiplication  8.8 x 8.8
+***********************************************************/
+
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+#define SS __tmp_reg__
+
+#if defined (L_usmuluha3)
+DEFUN __usmuluha3
+    ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+    ;; Adjust interface
+    movw    R26, R22
+    movw    R18, R24
+#endif /* HAVE MUL */
+    XCALL   __umulhisi3
+    tst     C3
+    brne .Lmax
+    ;; Round, target is in C1..C2
+    lsl     C0
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    brcs .Lmax
+    ;; Move result into place
+    mov     C3, C2
+    mov     C2, C1
+    ret
+.Lmax:
+    ;; Saturate
+    ldi     C2, 0xff
+    ldi     C3, 0xff
+    ret
+ENDF  __usmuluha3
+#endif /* L_usmuluha3 */
+
+/***********************************************************
+    Fixed signed saturated Multiplication  s8.7 x s8.7
+***********************************************************/
+
+#if defined (L_ssmulha3)
+DEFUN __ssmulha3
+    ;; Widening multiply
+#ifdef __AVR_HAVE_MUL__
+    ;; Adjust interface
+    movw    R26, R22
+    movw    R18, R24
+#endif /* HAVE MUL */
+    XCALL   __mulhisi3
+    ;; Adjust decimal point
+    lsl     C0
+    rol     C1
+    rol     C2
+    brvs .LsatC3.3
+    ;; The 9 MSBs must be the same
+    rol     C3
+    sbc     SS, SS
+    cp      C3, SS
+    brne .LsatSS
+    ;; Round
+    lsl     C0
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    brvs .Lmax
+    ;; Move result into place
+    mov    C3, C2
+    mov    C2, C1
+    ret
+.Lmax:
+    ;; Load 0x7fff
+    clr     C3
+.LsatC3.3:
+    ;; C3 <  0 -->  0x8000
+    ;; C3 >= 0 -->  0x7fff
+    mov     SS, C3
+.LsatSS:
+    ;; Load min / max value:
+    ;; SS = -1  -->  0x8000
+    ;; SS =  0  -->  0x7fff
+    ldi     C3, 0x7f
+    ldi     C2, 0xff
+    sbrc    SS, 7
+    adiw    C2, 1
+    ret
+ENDF  __ssmulha3
+#endif /* L_ssmulha3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef SS
+
+/***********************************************************
+    Fixed  unsigned saturated Multiplication  16.16 x 16.16
+***********************************************************/
+
+#define C0  18
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+#define C4  C0+4
+#define C5  C0+5
+#define C6  C0+6
+#define C7  C0+7
+#define SS __tmp_reg__
+
+#if defined (L_usmulusa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __usmulusa3
+    ;; Widening multiply
+    XCALL   __umulsidi3
+    or      C7, C6
+    brne .Lmax
+    ;; Round, target is in C2..C5
+    lsl     C1
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    adc     C4, __zero_reg__
+    adc     C5, __zero_reg__
+    brcs .Lmax
+    ;; Move result into place
+    wmov    C6, C4
+    wmov    C4, C2
+    ret
+.Lmax:
+    ;; Saturate
+    ldi     C7, 0xff
+    ldi     C6, 0xff
+    wmov    C4, C6
+    ret
+ENDF  __usmulusa3
+#endif /* L_usmulusa3 */
+
+/***********************************************************
+    Fixed signed saturated Multiplication  s16.15 x s16.15
+***********************************************************/
+
+#if defined (L_ssmulsa3)
+;; R22[4] = R22[4] *{ssat} R18[4]
+;; Ordinary ABI function
+DEFUN __ssmulsa3
+    ;; Widening multiply
+    XCALL   __mulsidi3
+    ;; Adjust decimal point
+    lsl     C1
+    rol     C2
+    rol     C3
+    rol     C4
+    rol     C5
+    brvs .LsatC7.7
+    ;; The 17 MSBs must be the same
+    rol     C6
+    rol     C7
+    sbc     SS, SS
+    cp      C6, SS
+    cpc     C7, SS
+    brne .LsatSS
+    ;; Round
+    lsl     C1
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    adc     C4, __zero_reg__
+    adc     C5, __zero_reg__
+    brvs .Lmax
+    ;; Move result into place
+    wmov    C6, C4
+    wmov    C4, C2
+    ret
+
+.Lmax:
+    ;; Load 0x7fffffff
+    clr     C7
+.LsatC7.7:
+    ;; C7 <  0 -->  0x80000000
+    ;; C7 >= 0 -->  0x7fffffff
+    lsl     C7
+    sbc     SS, SS
+.LsatSS:
+    ;; Load min / max value:
+    ;; SS = -1  -->  0x80000000
+    ;; SS =  0  -->  0x7fffffff
+    com     SS
+    mov     C4, SS
+    mov     C5, C4
+    wmov    C6, C4
+    subi    C7, 0x80
+    ret
+ENDF  __ssmulsa3
+#endif /* L_ssmulsa3 */
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#undef SS
+
+/*******************************************************
+      Fractional Division 8 / 8
+*******************************************************/
+
+#define r_divd  r25     /* dividend */
+#define r_quo   r24     /* quotient */
+#define r_div   r22     /* divisor */
+#define r_sign  __tmp_reg__
+
+#if defined (L_divqq3)
+DEFUN   __divqq3
+    mov     r_sign, r_divd
+    eor     r_sign, r_div
+    sbrc    r_div, 7
+    neg     r_div
+    sbrc    r_divd, 7
+    neg     r_divd
+    XCALL   __divqq_helper
+    lsr     r_quo
+    sbrc    r_sign, 7   ; negate result if needed
+    neg     r_quo
+    ret
+ENDF __divqq3
+#endif  /* L_divqq3 */
+
+#if defined (L_udivuqq3)
+DEFUN   __udivuqq3
+    cp      r_divd, r_div
+    brsh    0f
+    XJMP __divqq_helper
+    ;; Result is out of [0, 1)  ==>  Return 1 - eps.
+0:  ldi     r_quo, 0xff
+    ret
+ENDF __udivuqq3
+#endif  /* L_udivuqq3 */
+
+
+#if defined (L_divqq_helper)
+DEFUN   __divqq_helper
+    clr     r_quo           ; clear quotient
+    inc     __zero_reg__    ; init loop counter, used per shift
+__udivuqq3_loop:
+    lsl     r_divd          ; shift dividend
+    brcs    0f              ; dividend overflow
+    cp      r_divd,r_div    ; compare dividend & divisor
+    brcc    0f              ; dividend >= divisor
+    rol     r_quo           ; shift quotient (with CARRY)
+    rjmp    __udivuqq3_cont
+0:
+    sub     r_divd,r_div    ; restore dividend
+    lsl     r_quo           ; shift quotient (without CARRY)
+__udivuqq3_cont:
+    lsl     __zero_reg__    ; shift loop-counter bit
+    brne    __udivuqq3_loop
+    com     r_quo           ; complement result
+                            ; because C flag was complemented in loop
+    ret
+ENDF __divqq_helper
+#endif  /* L_divqq_helper */
+
+#undef  r_divd
+#undef  r_quo
+#undef  r_div
+#undef  r_sign
+
+
+/*******************************************************
+    Fractional Division 16 / 16
+*******************************************************/
+#define r_divdL 26     /* dividend Low */
+#define r_divdH 27     /* dividend Hig */
+#define r_quoL  24     /* quotient Low */
+#define r_quoH  25     /* quotient High */
+#define r_divL  22     /* divisor */
+#define r_divH  23     /* divisor */
+#define r_cnt   21
+
+#if defined (L_divhq3)
+DEFUN   __divhq3
+    mov     r0, r_divdH
+    eor     r0, r_divH
+    sbrs    r_divH, 7
+    rjmp    1f
+    NEG2    r_divL
+1:
+    sbrs    r_divdH, 7
+    rjmp    2f
+    NEG2    r_divdL
+2:
+    cp      r_divdL, r_divL
+    cpc     r_divdH, r_divH
+    breq    __divhq3_minus1  ; if equal return -1
+    XCALL   __udivuhq3
+    lsr     r_quoH
+    ror     r_quoL
+    brpl    9f
+    ;; negate result if needed
+    NEG2    r_quoL
+9:
+    ret
+__divhq3_minus1:
+    ldi     r_quoH, 0x80
+    clr     r_quoL
+    ret
+ENDF __divhq3
+#endif  /* defined (L_divhq3) */
+
+#if defined (L_udivuhq3)
+DEFUN   __udivuhq3
+    sub     r_quoH,r_quoH   ; clear quotient and carry
+    ;; FALLTHRU
+ENDF __udivuhq3
+
+DEFUN   __udivuha3_common
+    clr     r_quoL          ; clear quotient
+    ldi     r_cnt,16        ; init loop counter
+__udivuhq3_loop:
+    rol     r_divdL         ; shift dividend (with CARRY)
+    rol     r_divdH
+    brcs    __udivuhq3_ep   ; dividend overflow
+    cp      r_divdL,r_divL  ; compare dividend & divisor
+    cpc     r_divdH,r_divH
+    brcc    __udivuhq3_ep   ; dividend >= divisor
+    rol     r_quoL          ; shift quotient (with CARRY)
+    rjmp    __udivuhq3_cont
+__udivuhq3_ep:
+    sub     r_divdL,r_divL  ; restore dividend
+    sbc     r_divdH,r_divH
+    lsl     r_quoL          ; shift quotient (without CARRY)
+__udivuhq3_cont:
+    rol     r_quoH          ; shift quotient
+    dec     r_cnt           ; decrement loop counter
+    brne    __udivuhq3_loop
+    com     r_quoL          ; complement result
+    com     r_quoH          ; because C flag was complemented in loop
+    ret
+ENDF __udivuha3_common
+#endif  /* defined (L_udivuhq3) */
+
+/*******************************************************
+    Fixed Division 8.8 / 8.8
+*******************************************************/
+#if defined (L_divha3)
+DEFUN   __divha3
+    mov     r0, r_divdH
+    eor     r0, r_divH
+    sbrs    r_divH, 7
+    rjmp    1f
+    NEG2    r_divL
+1:
+    sbrs    r_divdH, 7
+    rjmp    2f
+    NEG2    r_divdL
+2:
+    XCALL   __udivuha3
+    lsr     r_quoH  ; adjust to 7 fractional bits
+    ror     r_quoL
+    sbrs    r0, 7   ; negate result if needed
+    ret
+    NEG2    r_quoL
+    ret
+ENDF __divha3
+#endif  /* defined (L_divha3) */
+
+#if defined (L_udivuha3)
+DEFUN   __udivuha3
+    mov     r_quoH, r_divdL
+    mov     r_divdL, r_divdH
+    clr     r_divdH
+    lsl     r_quoH     ; shift quotient into carry
+    XJMP    __udivuha3_common ; same as fractional after rearrange
+ENDF __udivuha3
+#endif  /* defined (L_udivuha3) */
+
+#undef  r_divdL
+#undef  r_divdH
+#undef  r_quoL
+#undef  r_quoH
+#undef  r_divL
+#undef  r_divH
+#undef  r_cnt
+
+/*******************************************************
+    Fixed Division 16.16 / 16.16
+*******************************************************/
+
+#define r_arg1L  24    /* arg1 gets passed already in place */
+#define r_arg1H  25
+#define r_arg1HL 26
+#define r_arg1HH 27
+#define r_divdL  26    /* dividend Low */
+#define r_divdH  27
+#define r_divdHL 30
+#define r_divdHH 31    /* dividend High */
+#define r_quoL   22    /* quotient Low */
+#define r_quoH   23
+#define r_quoHL  24
+#define r_quoHH  25    /* quotient High */
+#define r_divL   18    /* divisor Low */
+#define r_divH   19
+#define r_divHL  20
+#define r_divHH  21    /* divisor High */
+#define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_divsa3)
+DEFUN   __divsa3
+    mov     r0, r_arg1HH
+    eor     r0, r_divHH
+    sbrs    r_divHH, 7
+    rjmp    1f
+    NEG4    r_divL
+1:
+    sbrs    r_arg1HH, 7
+    rjmp    2f
+    NEG4    r_arg1L
+2:
+    XCALL   __udivusa3
+    lsr     r_quoHH ; adjust to 15 fractional bits
+    ror     r_quoHL
+    ror     r_quoH
+    ror     r_quoL
+    sbrs    r0, 7   ; negate result if needed
+    ret
+    ;; negate r_quoL
+    XJMP    __negsi2
+ENDF __divsa3
+#endif  /* defined (L_divsa3) */
+
+#if defined (L_udivusa3)
+DEFUN   __udivusa3
+    ldi     r_divdHL, 32    ; init loop counter
+    mov     r_cnt, r_divdHL
+    clr     r_divdHL
+    clr     r_divdHH
+    wmov    r_quoL, r_divdHL
+    lsl     r_quoHL         ; shift quotient into carry
+    rol     r_quoHH
+__udivusa3_loop:
+    rol     r_divdL         ; shift dividend (with CARRY)
+    rol     r_divdH
+    rol     r_divdHL
+    rol     r_divdHH
+    brcs    __udivusa3_ep   ; dividend overflow
+    cp      r_divdL,r_divL  ; compare dividend & divisor
+    cpc     r_divdH,r_divH
+    cpc     r_divdHL,r_divHL
+    cpc     r_divdHH,r_divHH
+    brcc    __udivusa3_ep   ; dividend >= divisor
+    rol     r_quoL          ; shift quotient (with CARRY)
+    rjmp    __udivusa3_cont
+__udivusa3_ep:
+    sub     r_divdL,r_divL  ; restore dividend
+    sbc     r_divdH,r_divH
+    sbc     r_divdHL,r_divHL
+    sbc     r_divdHH,r_divHH
+    lsl     r_quoL          ; shift quotient (without CARRY)
+__udivusa3_cont:
+    rol     r_quoH          ; shift quotient
+    rol     r_quoHL
+    rol     r_quoHH
+    dec     r_cnt           ; decrement loop counter
+    brne    __udivusa3_loop
+    com     r_quoL          ; complement result
+    com     r_quoH          ; because C flag was complemented in loop
+    com     r_quoHL
+    com     r_quoHH
+    ret
+ENDF __udivusa3
+#endif  /* defined (L_udivusa3) */
+
+#undef  r_arg1L
+#undef  r_arg1H
+#undef  r_arg1HL
+#undef  r_arg1HH
+#undef  r_divdL
+#undef  r_divdH
+#undef  r_divdHL
+#undef  r_divdHH
+#undef  r_quoL
+#undef  r_quoH
+#undef  r_quoHL
+#undef  r_quoHH
+#undef  r_divL
+#undef  r_divH
+#undef  r_divHL
+#undef  r_divHH
+#undef  r_cnt
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 1 Byte
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0  24
+
+#if defined (L_ssabs_1)
+DEFUN __ssabs_1
+    sbrs    A0, 7
+    ret
+    neg     A0
+    sbrc    A0,7
+    dec     A0
+    ret
+ENDF __ssabs_1
+#endif /* L_ssabs_1 */
+
+#undef A0
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 2 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0  24
+#define A1  A0+1
+
+#if defined (L_ssneg_2)
+DEFUN __ssneg_2
+    NEG2    A0
+    brvc 0f
+    sbiw    A0, 1
+0:  ret
+ENDF __ssneg_2
+#endif /* L_ssneg_2 */
+
+#if defined (L_ssabs_2)
+DEFUN __ssabs_2
+    sbrs    A1, 7
+    ret
+    XJMP    __ssneg_2
+ENDF __ssabs_2
+#endif /* L_ssabs_2 */
+
+#undef A0
+#undef A1
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 4 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+#define A3  A0+3
+
+#if defined (L_ssneg_4)
+DEFUN __ssneg_4
+    XCALL   __negsi2
+    brvc 0f
+    ldi     A3, 0x7f
+    ldi     A2, 0xff
+    ldi     A1, 0xff
+    ldi     A0, 0xff
+0:  ret
+ENDF __ssneg_4
+#endif /* L_ssneg_4 */
+
+#if defined (L_ssabs_4)
+DEFUN __ssabs_4
+    sbrs    A3, 7
+    ret
+    XJMP    __ssneg_4
+ENDF __ssabs_4
+#endif /* L_ssabs_4 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Saturation, 8 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; First Argument and Return Register
+#define A0  18
+#define A1  A0+1
+#define A2  A0+2
+#define A3  A0+3
+#define A4  A0+4
+#define A5  A0+5
+#define A6  A0+6
+#define A7  A0+7
+
+#if defined (L_clr_8)
+FALIAS __usneguta2
+FALIAS __usneguda2
+FALIAS __usnegudq2
+
+;; Clear Carry and all Bytes
+DEFUN __clr_8
+    ;; Clear Carry and set Z
+    sub     A7, A7
+    ;; FALLTHRU
+ENDF  __clr_8
+;; Propagate Carry to all Bytes, Carry unaltered
+DEFUN __sbc_8
+    sbc     A7, A7
+    sbc     A6, A6
+    wmov    A4, A6
+    wmov    A2, A6
+    wmov    A0, A6
+    ret
+ENDF __sbc_8
+#endif /* L_clr_8 */
+
+#if defined (L_ssneg_8)
+FALIAS __ssnegta2
+FALIAS __ssnegda2
+FALIAS __ssnegdq2
+
+DEFUN __ssneg_8
+    XCALL   __negdi2
+    brvc 0f
+    ;; A[] = 0x7fffffff
+    sec
+    XCALL   __sbc_8
+    ldi     A7, 0x7f
+0:  ret
+ENDF __ssneg_8
+#endif /* L_ssneg_8 */
+
+#if defined (L_ssabs_8)
+FALIAS __ssabsta2
+FALIAS __ssabsda2
+FALIAS __ssabsdq2
+
+DEFUN __ssabs_8
+    sbrs    A7, 7
+    ret
+    XJMP    __ssneg_8
+ENDF __ssabs_8
+#endif /* L_ssabs_8 */
+
+;; Second Argument
+#define B0  10
+#define B1  B0+1
+#define B2  B0+2
+#define B3  B0+3
+#define B4  B0+4
+#define B5  B0+5
+#define B6  B0+6
+#define B7  B0+7
+
+#if defined (L_usadd_8)
+FALIAS __usadduta3
+FALIAS __usadduda3
+FALIAS __usaddudq3
+
+DEFUN __usadd_8
+    XCALL   __adddi3
+    brcs 0f
+    ret
+0:  ;; A[] = 0xffffffff
+    XJMP    __sbc_8
+ENDF __usadd_8
+#endif /* L_usadd_8 */
+
+#if defined (L_ussub_8)
+FALIAS __ussubuta3
+FALIAS __ussubuda3
+FALIAS __ussubudq3
+
+DEFUN __ussub_8
+    XCALL   __subdi3
+    brcs 0f
+    ret
+0:  ;; A[] = 0
+    XJMP    __clr_8
+ENDF __ussub_8
+#endif /* L_ussub_8 */
+
+#if defined (L_ssadd_8)
+FALIAS __ssaddta3
+FALIAS __ssaddda3
+FALIAS __ssadddq3
+
+DEFUN __ssadd_8
+    XCALL   __adddi3
+    brvc 0f
+    ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
+    cpi     B7, 0x80
+    XCALL   __sbc_8
+    subi    A7, 0x80
+0:  ret
+ENDF __ssadd_8
+#endif /* L_ssadd_8 */
+
+#if defined (L_sssub_8)
+FALIAS __sssubta3
+FALIAS __sssubda3
+FALIAS __sssubdq3
+
+DEFUN __sssub_8
+    XCALL   __subdi3
+    brvc 0f
+    ;; A = (B < 0) ? INT64_MAX : INT64_MIN
+    ldi     A7, 0x7f
+    cp      A7, B7
+    XCALL   __sbc_8
+    subi    A7, 0x80
+0:  ret
+ENDF __sssub_8
+#endif /* L_sssub_8 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding Helpers
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_mask1
+
+#define AA 24
+#define CC 25
+
+;; R25 = 1 << (R24 & 7)
+;; CC  = 1 << (AA  & 7)
+;; Clobbers: None
+DEFUN __mask1
+    ;; CC = 2 ^ AA.1
+    ldi     CC, 1 << 2
+    sbrs    AA, 1
+    ldi     CC, 1 << 0
+    ;; CC *= 2 ^ AA.0
+    sbrc    AA, 0
+    lsl     CC
+    ;; CC *= 2 ^ AA.2
+    sbrc    AA, 2
+    swap    CC
+    ret
+ENDF __mask1
+
+#undef AA
+#undef CC
+#endif /* L_mask1 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The rounding point. Any bits smaller than
+;; 2^{-RP} will be cleared.
+#define RP R24
+
+#define A0 22
+#define A1 A0 + 1
+
+#define C0 24
+#define C1 C0 + 1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 1 Byte
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_roundqq3
+
+;; R24 = round (R22, R24)
+;; Clobbers: R22, __tmp_reg__
+DEFUN  __roundqq3
+    mov     __tmp_reg__, C1
+    subi    RP, __QQ_FBIT__ - 1
+    neg     RP
+    ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
+    XCALL   __mask1
+    mov     C0, C1
+    ;; Add-Saturate 2^{-RP-1}
+    add     A0, C0
+    brvc 0f
+    ldi     C0, 0x7f
+    rjmp 9f
+0:  ;; Mask out bits beyond RP
+    lsl     C0
+    neg     C0
+    and     C0, A0
+9:  mov     C1, __tmp_reg__
+    ret
+ENDF  __roundqq3
+#endif /* L_roundqq3 */
+
+#ifdef L_rounduqq3
+
+;; R24 = round (R22, R24)
+;; Clobbers: R22, __tmp_reg__
+DEFUN  __rounduqq3
+    mov     __tmp_reg__, C1
+    subi    RP, __UQQ_FBIT__ - 1
+    neg     RP
+    ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
+    XCALL   __mask1
+    mov     C0, C1
+    ;; Add-Saturate 2^{-RP-1}
+    add     A0, C0
+    brcc 0f
+    ldi     C0, 0xff
+    rjmp 9f
+0:  ;; Mask out bits beyond RP
+    lsl     C0
+    neg     C0
+    and     C0, A0
+9:  mov     C1, __tmp_reg__
+    ret
+ENDF  __rounduqq3
+#endif /* L_rounduqq3 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 2 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#ifdef L_addmask_2
+
+;; [ R25:R24 =  1 << (R24 & 15)
+;;   R23:R22 += 1 << (R24 & 15) ]
+;; SREG is set according to the addition
+DEFUN __addmask_2
+    ;; R25 = 1 << (R24 & 7)
+    XCALL   __mask1
+    cpi     RP, 1 << 3
+    sbc     C0, C0
+    ;; Swap C0 and C1 if RP.3 was set
+    and     C0, C1
+    eor     C1, C0
+    ;; Finally, add the power-of-two:  A[] += C[]
+    add     A0, C0
+    adc     A1, C1
+    ret
+ENDF  __addmask_2
+#endif /* L_addmask_2 */
+
+#ifdef L_round_s2
+
+;; R25:R24 = round (R23:R22, R24)
+;; Clobbers: R23, R22
+DEFUN  __roundhq3
+    subi    RP, __HQ_FBIT__ - __HA_FBIT__
+ENDF   __roundhq3
+DEFUN  __roundha3
+    subi    RP, __HA_FBIT__ - 1
+    neg     RP
+    ;; [ R25:R24  = 1 << (FBIT-1 - RP)
+    ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
+    XCALL   __addmask_2
+    XJMP    __round_s2_const
+ENDF  __roundha3
+
+#endif /* L_round_s2 */
+
+#ifdef L_round_u2
+
+;; R25:R24 = round (R23:R22, R24)
+;; Clobbers: R23, R22
+DEFUN  __rounduhq3
+    subi    RP, __UHQ_FBIT__ - __UHA_FBIT__
+ENDF   __rounduhq3
+DEFUN  __rounduha3
+    subi    RP, __UHA_FBIT__ - 1
+    neg     RP
+    ;; [ R25:R24  = 1 << (FBIT-1 - RP)
+    ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
+    XCALL   __addmask_2
+    XJMP    __round_u2_const 
+ENDF  __rounduha3
+
+#endif /* L_round_u2 */
+
+
+#ifdef L_round_2_const
+
+;; Helpers for 2 byte wide rounding
+
+DEFUN  __round_s2_const
+    brvc 2f
+    ldi     C1, 0x7f
+    rjmp 1f
+    ;; FALLTHRU (Barrier)
+ENDF  __round_s2_const
+
+DEFUN __round_u2_const
+    brcc 2f
+    ldi     C1, 0xff
+1:
+    ldi     C0, 0xff
+    rjmp 9f
+2:
+    ;; Saturation is performed now.
+    ;; Currently, we have C[] = 2^{-RP-1}
+    ;; C[] = 2^{-RP}
+    lsl     C0
+    rol     C1
+    ;;      
+    NEG2    C0
+    ;; Clear the bits beyond the rounding point.
+    and     C0, A0
+    and     C1, A1
+9:  ret
+ENDF  __round_u2_const
+
+#endif /* L_round_2_const */
+
+#undef A0
+#undef A1
+#undef C0
+#undef C1
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 4 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#define A0 18
+#define A1 A0 + 1
+#define A2 A0 + 2
+#define A3 A0 + 3
+
+#define C0 22
+#define C1 C0 + 1
+#define C2 C0 + 2
+#define C3 C0 + 3
+
+#ifdef L_addmask_4
+
+;; [ R25:R22 =  1 << (R24 & 31)
+;;   R21:R18 += 1 << (R24 & 31) ]
+;; SREG is set according to the addition
+DEFUN __addmask_4
+    ;; R25 = 1 << (R24 & 7)
+    XCALL   __mask1
+    cpi     RP, 1 << 4
+    sbc     C0, C0
+    sbc     C1, C1
+    ;; Swap C2 with C3 if RP.3 is not set
+    cpi     RP, 1 << 3
+    sbc     C2, C2
+    and     C2, C3
+    eor     C3, C2
+    ;; Swap C3:C2 with C1:C0 if RP.4 is not set
+    and     C0, C2  $  eor     C2, C0
+    and     C1, C3  $  eor     C3, C1
+    ;; Finally, add the power-of-two:  A[] += C[]
+    add     A0, C0
+    adc     A1, C1
+    adc     A2, C2
+    adc     A3, C3
+    ret
+ENDF  __addmask_4
+#endif /* L_addmask_4 */
+
+#ifdef L_round_s4
+
+;; R25:R22 = round (R21:R18, R24)
+;; Clobbers: R18...R21
+DEFUN  __roundsq3
+    subi    RP, __SQ_FBIT__ - __SA_FBIT__
+ENDF   __roundsq3
+DEFUN  __roundsa3
+    subi    RP, __SA_FBIT__ - 1
+    neg     RP
+    ;; [ R25:R22  = 1 << (FBIT-1 - RP)
+    ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
+    XCALL   __addmask_4
+    XJMP    __round_s4_const
+ENDF  __roundsa3
+
+#endif /* L_round_s4 */
+
+#ifdef L_round_u4
+
+;; R25:R22 = round (R21:R18, R24)
+;; Clobbers: R18...R21
+DEFUN  __roundusq3
+    subi    RP, __USQ_FBIT__ - __USA_FBIT__
+ENDF   __roundusq3
+DEFUN  __roundusa3
+    subi    RP, __USA_FBIT__ - 1
+    neg     RP
+    ;; [ R25:R22  = 1 << (FBIT-1 - RP)
+    ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
+    XCALL   __addmask_4
+    XJMP    __round_u4_const 
+ENDF  __roundusa3
+
+#endif /* L_round_u4 */
+
+
+#ifdef L_round_4_const
+
+;; Helpers for 4 byte wide rounding
+
+DEFUN  __round_s4_const
+    brvc 2f
+    ldi     C3, 0x7f
+    rjmp 1f
+    ;; FALLTHRU (Barrier)
+ENDF  __round_s4_const
+
+DEFUN __round_u4_const
+    brcc 2f
+    ldi     C3, 0xff
+1:
+    ldi     C2, 0xff
+    ldi     C1, 0xff
+    ldi     C0, 0xff
+    rjmp 9f
+2:
+    ;; Saturation is performed now.
+    ;; Currently, we have C[] = 2^{-RP-1}
+    ;; C[] = 2^{-RP}
+    lsl     C0
+    rol     C1
+    rol     C2
+    rol     C3
+    XCALL   __negsi2
+    ;; Clear the bits beyond the rounding point.
+    and     C0, A0
+    and     C1, A1
+    and     C2, A2
+    and     C3, A3
+9:  ret
+ENDF  __round_u4_const
+
+#endif /* L_round_4_const */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#undef RP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Rounding, 8 Bytes
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#define RP     16
+#define FBITm1 31
+
+#define C0 18
+#define C1 C0 + 1
+#define C2 C0 + 2
+#define C3 C0 + 3
+#define C4 C0 + 4
+#define C5 C0 + 5
+#define C6 C0 + 6
+#define C7 C0 + 7
+
+#define A0 16
+#define A1 17
+#define A2 26
+#define A3 27
+#define A4 28
+#define A5 29
+#define A6 30
+#define A7 31
+
+
+#ifdef L_rounddq3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __rounddq3
+    ldi     FBITm1, __DQ_FBIT__ - 1
+    clt
+    XJMP    __round_x8
+ENDF  __rounddq3
+#endif /* L_rounddq3 */
+
+#ifdef L_roundudq3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __roundudq3
+    ldi     FBITm1, __UDQ_FBIT__ - 1
+    set
+    XJMP    __round_x8
+ENDF  __roundudq3
+#endif /* L_roundudq3 */
+
+#ifdef L_roundda3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __roundda3
+    ldi     FBITm1, __DA_FBIT__ - 1
+    clt
+    XJMP    __round_x8
+ENDF  __roundda3
+#endif /* L_roundda3 */
+
+#ifdef L_rounduda3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __rounduda3
+    ldi     FBITm1, __UDA_FBIT__ - 1
+    set
+    XJMP    __round_x8
+ENDF  __rounduda3
+#endif /* L_rounduda3 */
+
+#ifdef L_roundta3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __roundta3
+    ldi     FBITm1, __TA_FBIT__ - 1
+    clt
+    XJMP    __round_x8
+ENDF  __roundta3
+#endif /* L_roundta3 */
+
+#ifdef L_rounduta3
+;; R25:R18 = round (R25:R18, R16)
+;; Clobbers: ABI
+DEFUN  __rounduta3
+    ldi     FBITm1, __UTA_FBIT__ - 1
+    set
+    XJMP    __round_x8
+ENDF  __rounduta3
+#endif /* L_rounduta3 */
+
+
+#ifdef L_round_x8
+DEFUN __round_x8
+    push r16
+    push r17
+    push r28
+    push r29
+    ;; Compute log2 of addend from rounding point
+    sub     RP, FBITm1
+    neg     RP
+    ;; Move input to work register A[]
+    push    C0
+    mov     A1, C1
+    wmov    A2, C2
+    wmov    A4, C4
+    wmov    A6, C6
+    ;; C[] = 1 << (FBIT-1 - RP)
+    XCALL   __clr_8
+    inc     C0
+    XCALL   __ashldi3
+    pop     A0
+    ;; A[] += C[]
+    add     A0, C0
+    adc     A1, C1
+    adc     A2, C2
+    adc     A3, C3
+    adc     A4, C4
+    adc     A5, C5
+    adc     A6, C6
+    adc     A7, C7
+    brts    1f
+    ;; Signed
+    brvc    3f
+    ;; Signed overflow: A[] = 0x7f...
+    brvs    2f
+1:  ;; Unsigned
+    brcc    3f
+    ;; Unsigned overflow: A[] = 0xff...
+2:  ldi     C7, 0xff
+    ldi     C6, 0xff
+    wmov    C0, C6
+    wmov    C2, C6
+    wmov    C4, C6
+    bld     C7, 7
+    rjmp 9f
+3:
+    ;;  C[] = -C[] - C[]
+    push    A0
+    ldi     r16, 1
+    XCALL   __ashldi3
+    pop     A0
+    XCALL   __negdi2
+    ;; Clear the bits beyond the rounding point.
+    and     C0, A0
+    and     C1, A1
+    and     C2, A2
+    and     C3, A3
+    and     C4, A4
+    and     C5, A5
+    and     C6, A6
+    and     C7, A7
+9:  ;; Epilogue
+    pop r29
+    pop r28
+    pop r17
+    pop r16
+    ret
+ENDF  __round_x8
+
+#endif /* L_round_x8 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+
+#undef RP
+#undef FBITm1
+
+
+;; Supply implementations / symbols for the bit-banging functions
+;; __builtin_avr_bitsfx and __builtin_avr_fxbits
+#ifdef L_ret
+DEFUN __ret
+    ret
+ENDF  __ret
+#endif /* L_ret */
diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs.S b/gcc-4.9/libgcc/config/avr/lib1funcs.S
new file mode 100644
index 000000000..6f1c77edb
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,3226 @@
+/*  -*- Mode: Asm -*-  */
+/* Copyright (C) 1998-2014 Free Software Foundation, Inc.
+   Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#if defined (__AVR_HAVE_SPH__)
+#define __SP_H__ 0x3e
+#endif
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__  0x3C
+
+/* Most of the functions here are called directly from avr.md
+   patterns, instead of using the standard libcall mechanisms.
+   This can make better code because GCC knows exactly which
+   of the call-used registers (not all of them) are clobbered.  */
+
+/* FIXME:  At present, there is no SORT directive in the linker
+           script so that we must not assume that different modules
+           in the same input section like .libgcc.text.mul will be
+           located close together.  Therefore, we cannot use
+           RCALL/RJMP to call a function like __udivmodhi4 from
+           __divmodhi4 and have to use lengthy XCALL/XJMP even
+           though they are in the same input section and all same
+           input sections together are small enough to reach every
+           location with a RCALL/RJMP instruction.  */
+
+	.macro	mov_l  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	movw	\r_dest, \r_src
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+	.macro	mov_h  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	; empty
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+.macro	wmov  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+    movw \r_dest,   \r_src
+#else
+    mov \r_dest,    \r_src
+    mov \r_dest+1,  \r_src+1
+#endif
+.endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP  jmp
+#else
+#define XCALL rcall
+#define XJMP  rjmp
+#endif
+
+;; Prologue stuff
+
+.macro do_prologue_saves n_pushed n_frame=0
+    ldi r26, lo8(\n_frame)
+    ldi r27, hi8(\n_frame)
+    ldi r30, lo8(gs(.L_prologue_saves.\@))
+    ldi r31, hi8(gs(.L_prologue_saves.\@))
+    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
+.L_prologue_saves.\@:
+.endm
+
+;; Epilogue stuff
+
+.macro do_epilogue_restores n_pushed n_frame=0
+    in      r28, __SP_L__
+#ifdef __AVR_HAVE_SPH__
+    in      r29, __SP_H__
+.if \n_frame > 63
+    subi    r28, lo8(-\n_frame)
+    sbci    r29, hi8(-\n_frame)
+.elseif \n_frame > 0
+    adiw    r28, \n_frame
+.endif
+#else
+    clr     r29
+.if \n_frame > 0
+    subi    r28, lo8(-\n_frame)
+.endif
+#endif /* HAVE SPH */
+    ldi     r30, \n_pushed
+    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
+.endm
+
+;; Support function entry and exit for convenience
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+.macro FALIAS name
+.global \name
+.func \name
+\name:
+.size \name, .-\name
+.endfunc
+.endm
+
+;; Skip next instruction, typically a jump target
+#define skip cpse 0,0
+
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2  reg
+    com     \reg+1
+    neg     \reg
+    sbci    \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+;; Sets the V flag for signed overflow tests if REG >= 16
+.macro NEG4  reg
+    com     \reg+3
+    com     \reg+2
+    com     \reg+1
+.if \reg >= 16
+    neg     \reg
+    sbci    \reg+1, -1
+    sbci    \reg+2, -1
+    sbci    \reg+3, -1
+.else
+    com     \reg
+    adc     \reg,   __zero_reg__
+    adc     \reg+1, __zero_reg__
+    adc     \reg+2, __zero_reg__
+    adc     \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N)  hlo8 ((N) << 23)
+#define exp_hi(N)  hhi8 ((N) << 23)
+
+
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+    Multiplication  8 x 8  without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define	r_arg2	r22		/* multiplicand */
+#define	r_arg1 	r24		/* multiplier */
+#define r_res	__tmp_reg__	/* result */
+
+DEFUN __mulqi3
+	clr	r_res		; clear result
+__mulqi3_loop:
+	sbrc	r_arg1,0
+	add	r_res,r_arg2
+	add	r_arg2,r_arg2	; shift multiplicand
+	breq	__mulqi3_exit	; while multiplicand != 0
+	lsr	r_arg1		;
+	brne	__mulqi3_loop	; exit if multiplier = 0
+__mulqi3_exit:	
+	mov	r_arg1,r_res	; result to return register
+	ret
+ENDF __mulqi3
+
+#undef r_arg2
+#undef r_arg1
+#undef r_res
+	
+#endif 	/* defined (L_mulqi3) */
+
+
+/*******************************************************
+    Widening Multiplication  16 = 8 x 8  without MUL
+    Multiplication  16 x 16  without MUL
+*******************************************************/
+
+#define A0  r22
+#define A1  r23
+#define B0  r24
+#define BB0 r20
+#define B1  r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0  r24
+#define C1  r25
+#define CC0  __tmp_reg__
+#define CC1  R21
+
+#if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __umulqihi3
+    clr     A1
+    clr     B1
+    XJMP    __mulhi3
+ENDF __umulqihi3
+#endif /* L_umulqihi3 */
+
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0  * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+    ;; Sign-extend B0
+    clr     B1
+    sbrc    B0, 7
+    com     B1
+    ;; The multiplication runs twice as fast if A1 is zero, thus:
+    ;; Zero-extend A0
+    clr     A1
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Store  B0 * sign of A
+    clr     BB0
+    sbrc    A0, 7
+    mov     BB0, B0
+    call    __mulhi3
+#else /* have no CALL */
+    ;; Skip sign-extension of A if A >= 0
+    ;; Same size as with the first alternative but avoids errata skip
+    ;; and is faster if A >= 0
+    sbrs    A0, 7
+    rjmp    __mulhi3
+    ;; If  A < 0  store B
+    mov     BB0, B0
+    rcall   __mulhi3
+#endif /* HAVE_JMP_CALL */
+    ;; 1-extend A after the multiplication
+    sub     C1, BB0
+    ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
+
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __mulhi3
+
+    ;; Clear result
+    clr     CC0
+    clr     CC1
+    rjmp 3f
+1:
+    ;; Bit n of A is 1  -->  C += B << n
+    add     CC0, B0
+    adc     CC1, B1
+2:
+    lsl     B0
+    rol     B1
+3:
+    ;; If B == 0 we are ready
+    sbiw    B0, 0
+    breq 9f
+
+    ;; Carry = n-th bit of A
+    lsr     A1
+    ror     A0
+    ;; If bit n of A is set, then go add  B * 2^n  to  C
+    brcs 1b
+
+    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
+    ;; Thus, it is sufficient to CPC the high part to test A against 0
+    cpc     A1, __zero_reg__
+    ;; Only proceed if A != 0
+    brne    2b
+9:
+    ;; Move Result into place
+    mov     C0, CC0
+    mov     C1, CC1
+    ret
+ENDF  __mulhi3
+#endif /* L_mulhi3 */
+
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16  without MUL
+*******************************************************/
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+    wmov    B0, 24
+    ;; Zero-extend B
+    clr     B2
+    clr     B3
+    ;; Zero-extend A
+    wmov    A2, B2
+    XJMP    __mulsi3
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+    wmov    B0, 24
+    ;; Sign-extend B
+    lsl     r25
+    sbc     B2, B2
+    mov     B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Sign-extend A
+    clr     A2
+    sbrc    A1, 7
+    com     A2
+    mov     A3, A2
+    XJMP __mulsi3
+#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
+    ;; Zero-extend A and __mulsi3 will run at least twice as fast
+    ;; compared to a sign-extended A.
+    clr     A2
+    clr     A3
+    sbrs    A1, 7
+    XJMP __mulsi3
+    ;; If  A < 0  then perform the  B * 0xffff.... before the
+    ;; very multiplication by initializing the high part of the
+    ;; result CC with -B.
+    wmov    CC2, A2
+    sub     CC2, B0
+    sbc     CC3, B1
+    XJMP __mulsi3_helper
+#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+
+/*******************************************************
+    Multiplication  32 x 32  without MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+DEFUN __mulsi3
+    ;; Clear result
+    clr     CC2
+    clr     CC3
+    ;; FALLTHRU
+ENDF  __mulsi3
+
+DEFUN __mulsi3_helper
+    clr     CC0
+    clr     CC1
+    rjmp 3f
+
+1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
+    ;; CC += B
+    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
+
+2:  ;; B <<= 1
+    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
+
+3:  ;; A >>= 1:  Carry = n-th bit of A
+    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
+
+    brcs 1b
+    ;; Only continue if  A != 0
+    sbci    A1, 0
+    brne 2b
+    sbiw    A2, 0
+    brne 2b
+
+    ;; All bits of A are consumed:  Copy result to return register C
+    wmov    C0, CC0
+    wmov    C2, CC2
+    ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16  with MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+    XCALL   __umulhisi3
+    ;; Sign-extend B
+    tst     B1
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  ;; Sign-extend A
+    XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+    XCALL   __umulhisi3
+    ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+    ;; Sign-extend A
+    sbrs    A1, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+    mul     A0, B0
+    movw    C0, r0
+    mul     A1, B1
+    movw    C2, r0
+    mul     A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; This function is used by many other routines, often multiple times.
+    ;; Therefore, if the flash size is not too limited, avoid the RCALL
+    ;; and inverst 6 Bytes to speed things up.
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+#else
+    rcall   1f
+#endif
+    mul     A1, B0
+1:  add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 32  with MUL
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst     A1
+    brmi    __mulohisi3
+#else
+    sbrs    A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP    __muluhisi3
+    ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+    XCALL   __muluhisi3
+    ;; One-extend R27:R26 (A1:A0)
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+    XCALL   __umulhisi3
+    mul     A0, B3
+    add     C3, r0
+    mul     A1, B2
+    add     C3, r0
+    mul     A0, B2
+    add     C2, r0
+    adc     C3, r1
+    clr     __zero_reg__
+    ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+    Multiplication  32 x 32  with MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0   * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+    movw    A0, C0
+    push    C2
+    push    C3
+    XCALL   __muluhisi3
+    pop     A1
+    pop     A0
+    ;; A1:A0 now contains the high word of A
+    mul     A0, B0
+    add     C2, r0
+    adc     C3, r1
+    mul     A0, B1
+    add     C3, r0
+    mul     A1, B0
+    add     C3, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+/*******************************************************
+       Multiplication 24 x 24 with MUL
+*******************************************************/
+
+#if defined (L_mulpsi3)
+
+;; A[0..2]: In: Multiplicand; Out: Product
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; B[0..2]: In: Multiplier
+#define B0  18
+#define B1  B0+1
+#define B2  B0+2
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; C[0..2]: Expand Result
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+
+;; R24:R22 *= R20:R18
+;; Clobbers: r21, r25, r26, r27, __tmp_reg__
+
+#define AA0 26
+#define AA2 21
+
+DEFUN __mulpsi3
+    wmov    AA0, A0
+    mov     AA2, A2
+    XCALL   __umulhisi3
+    mul     AA2, B0     $  add  C2, r0
+    mul     AA0, B2     $  add  C2, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulpsi3
+
+#undef AA2
+#undef AA0
+
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+;; C[0..2]: Expand Result
+#define C0  0
+#define C1  C0+1
+#define C2  21
+
+;; R24:R22 *= R20:R18
+;; Clobbers: __tmp_reg__, R18, R19, R20, R21
+
+DEFUN __mulpsi3
+
+    ;; C[] = 0
+    clr     __tmp_reg__
+    clr     C2
+
+0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
+    LSR  B2     $  ror  B1     $  ror  B0
+
+    ;; If the N-th Bit of B[] was set...
+    brcc    1f
+
+    ;; ...then add A[] * 2^N to the Result C[]
+    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
+
+1:  ;; Multiply A[] by 2
+    LSL  A0     $  rol  A1     $  rol  A2
+
+    ;; Loop until B[] is 0
+    subi B0,0   $  sbci B1,0   $  sbci B2,0
+    brne    0b
+
+    ;; Copy C[] to the return Register A[]
+    wmov    A0, C0
+    mov     A2, C2
+
+    clr     __zero_reg__
+    ret
+ENDF __mulpsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* HAVE_MUL */
+
+#undef B2
+#undef B1
+#undef B0
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulpsi3 */
+
+#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
+
+;; A[0..2]: In: Multiplicand
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; BB: In: Multiplier
+#define BB  25
+
+;; C[0..2]: Result
+#define C0  18
+#define C1  C0+1
+#define C2  C0+2
+
+;; C[] = A[] * sign_extend (BB)
+DEFUN __mulsqipsi3
+    mul     A0, BB
+    movw    C0, r0
+    mul     A2, BB
+    mov     C2, r0
+    mul     A1, BB
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    sbrs    BB, 7
+    ret
+    ;; One-extend BB
+    sub     C1, A0
+    sbc     C2, A1
+    ret
+ENDF __mulsqipsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#undef BB
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulsqipsi3  &&  HAVE_MUL */
+
+/*******************************************************
+       Multiplication 64 x 64
+*******************************************************/
+
+;; A[] = A[] * B[]
+
+;; A[0..7]: In: Multiplicand
+;; Out: Product
+#define A0  18
+#define A1  A0+1
+#define A2  A0+2
+#define A3  A0+3
+#define A4  A0+4
+#define A5  A0+5
+#define A6  A0+6
+#define A7  A0+7
+
+;; B[0..7]: In: Multiplier
+#define B0  10
+#define B1  B0+1
+#define B2  B0+2
+#define B3  B0+3
+#define B4  B0+4
+#define B5  B0+5
+#define B6  B0+6
+#define B7  B0+7
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Define C[] for convenience
+;; Notice that parts of C[] overlap A[] respective B[]
+#define C0  16
+#define C1  C0+1
+#define C2  20
+#define C3  C2+1
+#define C4  28
+#define C5  C4+1
+#define C6  C4+2
+#define C7  C4+3
+
+#if defined (L_muldi3)
+
+;; A[]     *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+    push    r29
+    push    r28
+    push    r17
+    push    r16
+
+    ;; Counting in Words, we have to perform a 4 * 4 Multiplication
+
+    ;; 3 * 0  +  0 * 3
+    mul  A7,B0  $             $  mov C7,r0
+    mul  A0,B7  $             $  add C7,r0
+    mul  A6,B1  $             $  add C7,r0
+    mul  A6,B0  $  mov C6,r0  $  add C7,r1
+    mul  B6,A1  $             $  add C7,r0
+    mul  B6,A0  $  add C6,r0  $  adc C7,r1
+
+    ;; 1 * 2
+    mul  A2,B4  $  add C6,r0  $  adc C7,r1
+    mul  A3,B4  $             $  add C7,r0
+    mul  A2,B5  $             $  add C7,r0
+
+    push    A5
+    push    A4
+    push    B1
+    push    B0
+    push    A3
+    push    A2
+
+    ;; 0 * 0
+    wmov    26, B0
+    XCALL   __umulhisi3
+    wmov    C0, 22
+    wmov    C2, 24
+
+    ;; 0 * 2
+    wmov    26, B4
+    XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
+
+    wmov    26, B2
+    ;; 0 * 1
+    XCALL   __muldi3_6
+
+    pop     A0
+    pop     A1
+    ;; 1 * 1
+    wmov    26, B2
+    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+    pop     r26
+    pop     r27
+    ;; 1 * 0
+    XCALL   __muldi3_6
+
+    pop     A0
+    pop     A1
+    ;; 2 * 0
+    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+    ;; 2 * 1
+    wmov    26, B2
+    XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
+
+    ;; A[] = C[]
+    wmov    A0, C0
+    ;; A2 = C2 already
+    wmov    A4, C4
+    wmov    A6, C6
+
+    clr     __zero_reg__
+    pop     r16
+    pop     r17
+    pop     r28
+    pop     r29
+    ret
+ENDF __muldi3
+#endif /* L_muldi3 */
+
+#if defined (L_muldi3_6)
+;; A helper for some 64-bit multiplications with MUL available
+DEFUN __muldi3_6
+__muldi3_6:
+    XCALL   __umulhisi3
+    add     C2, 22
+    adc     C3, 23
+    adc     C4, 24
+    adc     C5, 25
+    brcc    0f
+    adiw    C6, 1
+0:  ret
+ENDF __muldi3_6
+#endif /* L_muldi3_6 */
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+#if defined (L_muldi3)
+
+#define C0  26
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+#define C4  C0+4
+#define C5  C0+5
+#define C6  0
+#define C7  C6+1
+
+#define Loop 9
+
+;; A[]     *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+    push    r29
+    push    r28
+    push    Loop
+
+    ldi     C0, 64
+    mov     Loop, C0
+
+    ;; C[] = 0
+    clr     __tmp_reg__
+    wmov    C0, 0
+    wmov    C2, 0
+    wmov    C4, 0
+
+0:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
+    ;; where N = 64 - Loop.
+    ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
+    ;; B[] will have its initial Value again.
+    LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
+    ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
+
+    ;; If the N-th Bit of B[] was set then...
+    brcc    1f
+    ;; ...finish Rotation...
+    ori     B7, 1 << 7
+
+    ;; ...and add A[] * 2^N to the Result C[]
+    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
+    adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
+
+1:  ;; Multiply A[] by 2
+    LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
+    rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
+
+    dec     Loop
+    brne    0b
+
+    ;; We expanded the Result in C[]
+    ;; Copy Result to the Return Register A[]
+    wmov    A0, C0
+    wmov    A2, C2
+    wmov    A4, C4
+    wmov    A6, C6
+
+    clr     __zero_reg__
+    pop     Loop
+    pop     r28
+    pop     r29
+    ret
+ENDF __muldi3
+
+#undef Loop
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* L_muldi3 */
+#endif /* HAVE_MUL */
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+/*******************************************************
+   Widening Multiplication 64 = 32 x 32  with  MUL
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+#define A0 r22
+#define A1 r23 
+#define A2 r24
+#define A3 r25
+ 
+#define B0 r18
+#define B1 r19
+#define B2 r20
+#define B3 r21
+ 
+#define C0  18
+#define C1  C0+1
+#define C2  20
+#define C3  C2+1
+#define C4  28
+#define C5  C4+1
+#define C6  C4+2
+#define C7  C4+3
+
+#if defined (L_umulsidi3)
+
+;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
+
+;; R18[8] = R22[4] * R18[4]
+;;
+;; Ordinary ABI Function, but additionally sets
+;; X = R20[2] = B2[2]
+;; Z = R22[2] = A0[2]
+DEFUN __umulsidi3
+    clt
+    ;; FALLTHRU
+ENDF  __umulsidi3
+    ;; T = sign (A)
+DEFUN __umulsidi3_helper
+    push    29  $  push    28 ; Y
+    wmov    30, A2
+    ;; Counting in Words, we have to perform 4 Multiplications
+    ;; 0 * 0
+    wmov    26, A0
+    XCALL __umulhisi3
+    push    23  $  push    22 ; C0
+    wmov    28, B0
+    wmov    18, B2
+    wmov    C2, 24
+    push    27  $  push    26 ; A0
+    push    19  $  push    18 ; B2
+    ;;
+    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
+    ;;  B2  C2  --  --  --  B0  A2
+    ;; 1 * 1
+    wmov    26, 30      ; A2
+    XCALL __umulhisi3
+    ;; Sign-extend A.  T holds the sign of A
+    brtc    0f
+    ;; Subtract B from the high part of the result
+    sub     22, 28
+    sbc     23, 29
+    sbc     24, 18
+    sbc     25, 19
+0:  wmov    18, 28      ;; B0
+    wmov    C4, 22
+    wmov    C6, 24
+    ;;
+    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
+    ;;  B0  C2  --  --  A2  C4  C6
+    ;;
+    ;; 1 * 0
+    XCALL __muldi3_6
+    ;; 0 * 1
+    pop     26  $   pop 27  ;; B2
+    pop     18  $   pop 19  ;; A0
+    XCALL __muldi3_6
+
+    ;; Move result C into place and save A0 in Z
+    wmov    22, C4
+    wmov    24, C6
+    wmov    30, 18 ; A0
+    pop     C0  $   pop C1
+
+    ;; Epilogue
+    pop     28  $   pop 29  ;; Y
+    ret
+ENDF __umulsidi3_helper
+#endif /* L_umulsidi3 */
+
+
+#if defined (L_mulsidi3)
+
+;; Signed widening 64 = 32 * 32 Multiplication
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+    bst     A3, 7
+    sbrs    B3, 7           ; Enhanced core has no skip bug
+    XJMP __umulsidi3_helper
+
+    ;; B needs sign-extension
+    push    A3
+    push    A2
+    XCALL __umulsidi3_helper
+    ;; A0 survived in Z
+    sub     r22, r30
+    sbc     r23, r31
+    pop     r26
+    pop     r27
+    sbc     r24, r26
+    sbc     r25, r27
+    ret
+ENDF __mulsidi3
+#endif /* L_mulsidi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#endif /* HAVE_MUL */
+
+/**********************************************************
+    Widening Multiplication 64 = 32 x 32  without  MUL
+**********************************************************/
+
+#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#define AA0 22
+#define AA1 AA0+1
+#define AA2 AA0+2
+#define AA3 AA0+3
+
+#define BB0 18
+#define BB1 BB0+1
+#define BB2 BB0+2
+#define BB3 BB0+3
+
+#define Mask r30
+
+;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+    set
+    skip
+    ;; FALLTHRU
+ENDF  __mulsidi3
+
+DEFUN __umulsidi3
+    clt     ; skipped
+    ;; Save 10 Registers: R10..R17, R28, R29
+    do_prologue_saves 10
+    ldi     Mask, 0xff
+    bld     Mask, 7
+    ;; Move B into place...
+    wmov    B0, BB0
+    wmov    B2, BB2
+    ;; ...and extend it
+    and     BB3, Mask
+    lsl     BB3
+    sbc     B4, B4
+    mov     B5, B4
+    wmov    B6, B4
+    ;; Move A into place...
+    wmov    A0, AA0
+    wmov    A2, AA2
+    ;; ...and extend it
+    and     AA3, Mask
+    lsl     AA3
+    sbc     A4, A4
+    mov     A5, A4
+    wmov    A6, A4
+    XCALL   __muldi3
+    do_epilogue_restores 10
+ENDF __umulsidi3
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef BB0
+#undef BB1
+#undef BB2
+#undef BB3
+#undef Mask
+#endif /* L_mulsidi3 && !HAVE_MUL */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+	
+
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+       Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define	r_rem	r25	/* remainder */
+#define	r_arg1	r24	/* dividend, quotient */
+#define	r_arg2	r22	/* divisor */
+#define	r_cnt	r23	/* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+	sub	r_rem,r_rem	; clear remainder and carry
+	ldi	r_cnt,9		; init loop counter
+	rjmp	__udivmodqi4_ep	; jump to entry point
+__udivmodqi4_loop:
+	rol	r_rem		; shift dividend into remainder
+	cp	r_rem,r_arg2	; compare remainder & divisor
+	brcs	__udivmodqi4_ep	; remainder <= divisor
+	sub	r_rem,r_arg2	; restore remainder
+__udivmodqi4_ep:
+	rol	r_arg1		; shift dividend (with CARRY)
+	dec	r_cnt		; decrement loop counter
+	brne	__udivmodqi4_loop
+	com	r_arg1		; complement result
+				; because C flag was complemented in loop
+	ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+        bst     r_arg1,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1
+        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
+        sbrc	r_arg1,7
+	neg     r_arg1		; dividend negative : negate
+        sbrc	r_arg2,7
+	neg     r_arg2		; divisor negative : negate
+	XCALL	__udivmodqi4	; do the unsigned div/mod
+	brtc	__divmodqi4_1
+	neg	r_rem		; correct remainder sign
+__divmodqi4_1:
+	sbrc	__tmp_reg__,7
+	neg	r_arg1		; correct result sign
+__divmodqi4_exit:
+	ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+	
+		
+/*******************************************************
+       Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define	r_remL	r26	/* remainder Low */
+#define	r_remH	r27	/* remainder High */
+
+/* return: remainder */
+#define	r_arg1L	r24	/* dividend Low */
+#define	r_arg1H	r25	/* dividend High */
+
+/* return: quotient */
+#define	r_arg2L	r22	/* divisor Low */
+#define	r_arg2H	r23	/* divisor High */
+	
+#define	r_cnt	r21	/* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	ldi	r_cnt,17	; init loop counter
+	rjmp	__udivmodhi4_ep	; jump to entry point
+__udivmodhi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+        brcs	__udivmodhi4_ep	; remainder < divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+__udivmodhi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodhi4_loop
+	com	r_arg1L
+	com	r_arg1H
+; div/mod results to return registers, as for the div() function
+	mov_l	r_arg2L, r_arg1L	; quotient
+	mov_h	r_arg2H, r_arg1H
+	mov_l	r_arg1L, r_remL		; remainder
+	mov_h	r_arg1H, r_remH
+	ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+    .global _div
+_div:
+    bst     r_arg1H,7           ; store sign of dividend
+    mov     __tmp_reg__,r_arg2H
+    brtc    0f
+    com     __tmp_reg__         ; r0.7 is sign of result
+    rcall   __divmodhi4_neg1    ; dividend negative: negate
+0:
+    sbrc    r_arg2H,7
+    rcall   __divmodhi4_neg2    ; divisor negative: negate
+    XCALL   __udivmodhi4        ; do the unsigned div/mod
+    sbrc    __tmp_reg__,7
+    rcall   __divmodhi4_neg2    ; correct remainder sign
+    brtc    __divmodhi4_exit
+__divmodhi4_neg1:
+    ;; correct dividend/remainder sign
+    com     r_arg1H
+    neg     r_arg1L
+    sbci    r_arg1H,0xff
+    ret
+__divmodhi4_neg2:
+    ;; correct divisor/result sign
+    com     r_arg2H
+    neg     r_arg2L
+    sbci    r_arg2H,0xff
+__divmodhi4_exit:
+    ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH
+#undef r_remL
+
+#undef r_arg1H
+#undef r_arg1L
+
+#undef r_arg2H
+#undef r_arg2L
+             	
+#undef r_cnt   	
+
+/*******************************************************
+       Division 24 / 24 => (result + remainder)
+*******************************************************/
+
+;; A[0..2]: In: Dividend; Out: Quotient
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; B[0..2]: In: Divisor;   Out: Remainder
+#define B0  18
+#define B1  B0+1
+#define B2  B0+2
+
+;; C[0..2]: Expand remainder
+#define C0  __zero_reg__
+#define C1  26
+#define C2  25
+
+;; Loop counter
+#define r_cnt   21
+
+#if defined (L_udivmodpsi4)
+;; R24:R22 = R24:R22  udiv  R20:R18
+;; R20:R18 = R24:R22  umod  R20:R18
+;; Clobbers: R21, R25, R26
+
+DEFUN __udivmodpsi4
+    ; init loop counter
+    ldi     r_cnt, 24+1
+    ; Clear remainder and carry.  C0 is already 0
+    clr     C1
+    sub     C2, C2
+    ; jump to entry point
+    rjmp    __udivmodpsi4_start
+__udivmodpsi4_loop:
+    ; shift dividend into remainder
+    rol     C0
+    rol     C1
+    rol     C2
+    ; compare remainder & divisor
+    cp      C0, B0
+    cpc     C1, B1
+    cpc     C2, B2
+    brcs    __udivmodpsi4_start ; remainder <= divisor
+    sub     C0, B0              ; restore remainder
+    sbc     C1, B1
+    sbc     C2, B2
+__udivmodpsi4_start:
+    ; shift dividend (with CARRY)
+    rol     A0
+    rol     A1
+    rol     A2
+    ; decrement loop counter
+    dec     r_cnt
+    brne    __udivmodpsi4_loop
+    com     A0
+    com     A1
+    com     A2
+    ; div/mod results to return registers
+    ; remainder
+    mov     B0, C0
+    mov     B1, C1
+    mov     B2, C2
+    clr     __zero_reg__ ; C0
+    ret
+ENDF __udivmodpsi4
+#endif /* defined (L_udivmodpsi4) */
+
+#if defined (L_divmodpsi4)
+;; R24:R22 = R24:R22  div  R20:R18
+;; R20:R18 = R24:R22  mod  R20:R18
+;; Clobbers: T, __tmp_reg__, R21, R25, R26
+
+DEFUN __divmodpsi4
+    ; R0.7 will contain the sign of the result:
+    ; R0.7 = A.sign ^ B.sign
+    mov __tmp_reg__, B2
+    ; T-flag = sign of dividend
+    bst     A2, 7
+    brtc    0f
+    com     __tmp_reg__
+    ; Adjust dividend's sign
+    rcall   __divmodpsi4_negA
+0:
+    ; Adjust divisor's sign
+    sbrc    B2, 7
+    rcall   __divmodpsi4_negB
+
+    ; Do the unsigned div/mod
+    XCALL   __udivmodpsi4
+
+    ; Adjust quotient's sign
+    sbrc    __tmp_reg__, 7
+    rcall   __divmodpsi4_negA
+
+    ; Adjust remainder's sign
+    brtc    __divmodpsi4_end
+
+__divmodpsi4_negB:
+    ; Correct divisor/remainder sign
+    com     B2
+    com     B1
+    neg     B0
+    sbci    B1, -1
+    sbci    B2, -1
+    ret
+
+    ; Correct dividend/quotient sign
+__divmodpsi4_negA:
+    com     A2
+    com     A1
+    neg     A0
+    sbci    A1, -1
+    sbci    A2, -1
+__divmodpsi4_end:
+    ret
+
+ENDF __divmodpsi4
+#endif /* defined (L_divmodpsi4) */
+
+#undef A0
+#undef A1
+#undef A2
+
+#undef B0
+#undef B1
+#undef B2
+
+#undef C0
+#undef C1
+#undef C2
+
+#undef r_cnt
+
+/*******************************************************
+       Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define	r_remHH	r31	/* remainder High */
+#define	r_remHL	r30
+#define	r_remH	r27
+#define	r_remL	r26	/* remainder Low */
+
+/* return: remainder */
+#define	r_arg1HH r25	/* dividend High */
+#define	r_arg1HL r24
+#define	r_arg1H  r23
+#define	r_arg1L  r22	/* dividend Low */
+
+/* return: quotient */
+#define	r_arg2HH r21	/* divisor High */
+#define	r_arg2HL r20
+#define	r_arg2H  r19
+#define	r_arg2L  r18	/* divisor Low */
+	
+#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+	ldi	r_remL, 33	; init loop counter
+	mov	r_cnt, r_remL
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	mov_l	r_remHL, r_remL
+	mov_h	r_remHH, r_remH
+	rjmp	__udivmodsi4_ep	; jump to entry point
+__udivmodsi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+	rol	r_remHL
+	rol	r_remHH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+	cpc	r_remHL,r_arg2HL
+	cpc	r_remHH,r_arg2HH
+	brcs	__udivmodsi4_ep	; remainder <= divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+        sbc	r_remHL,r_arg2HL
+        sbc	r_remHH,r_arg2HH
+__udivmodsi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        rol	r_arg1HL
+        rol	r_arg1HH
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodsi4_loop
+				; __zero_reg__ now restored (r_cnt == 0)
+	com	r_arg1L
+	com	r_arg1H
+	com	r_arg1HL
+	com	r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+	mov_l	r_arg2L,  r_arg1L	; quotient
+	mov_h	r_arg2H,  r_arg1H
+	mov_l	r_arg2HL, r_arg1HL
+	mov_h	r_arg2HH, r_arg1HH
+	mov_l	r_arg1L,  r_remL	; remainder
+	mov_h	r_arg1H,  r_remH
+	mov_l	r_arg1HL, r_remHL
+	mov_h	r_arg1HH, r_remHH
+	ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+    mov     __tmp_reg__,r_arg2HH
+    bst     r_arg1HH,7          ; store sign of dividend
+    brtc    0f
+    com     __tmp_reg__         ; r0.7 is sign of result
+    XCALL   __negsi2            ; dividend negative: negate
+0:
+    sbrc    r_arg2HH,7
+    rcall   __divmodsi4_neg2    ; divisor negative: negate
+    XCALL   __udivmodsi4        ; do the unsigned div/mod
+    sbrc    __tmp_reg__, 7      ; correct quotient sign
+    rcall   __divmodsi4_neg2
+    brtc    __divmodsi4_exit    ; correct remainder sign
+    XJMP    __negsi2
+__divmodsi4_neg2:
+    ;; correct divisor/quotient sign
+    com     r_arg2HH
+    com     r_arg2HL
+    com     r_arg2H
+    neg     r_arg2L
+    sbci    r_arg2H,0xff
+    sbci    r_arg2HL,0xff
+    sbci    r_arg2HH,0xff
+__divmodsi4_exit:
+    ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+#if defined (L_negsi2)
+;; (set (reg:SI 22)
+;;      (neg:SI (reg:SI 22)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negsi2
+    NEG4    22
+    ret
+ENDF __negsi2
+#endif /* L_negsi2 */
+
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
+
+/*******************************************************
+       Division 64 / 64
+       Modulo   64 % 64
+*******************************************************/
+
+;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
+;; at least 16k of Program Memory.  For smaller Devices, depend
+;; on MOVW and SP Size.  There is a Connexion between SP Size and
+;; Flash Size so that SP Size can be used to test for Flash Size.
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#   define SPEED_DIV 8
+#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
+#   define SPEED_DIV 16
+#else
+#   define SPEED_DIV 0
+#endif
+
+;; A[0..7]: In: Dividend;
+;; Out: Quotient  (T = 0)
+;; Out: Remainder (T = 1)
+#define A0  18
+#define A1  A0+1
+#define A2  A0+2
+#define A3  A0+3
+#define A4  A0+4
+#define A5  A0+5
+#define A6  A0+6
+#define A7  A0+7
+
+;; B[0..7]: In: Divisor;   Out: Clobber
+#define B0  10
+#define B1  B0+1
+#define B2  B0+2
+#define B3  B0+3
+#define B4  B0+4
+#define B5  B0+5
+#define B6  B0+6
+#define B7  B0+7
+
+;; C[0..7]: Expand remainder;  Out: Remainder (unused)
+#define C0  8
+#define C1  C0+1
+#define C2  30
+#define C3  C2+1
+#define C4  28
+#define C5  C4+1
+#define C6  26
+#define C7  C6+1
+
+;; Holds Signs during Division Routine
+#define SS      __tmp_reg__
+
+;; Bit-Counter in Division Routine
+#define R_cnt   __zero_reg__
+
+;; Scratch Register for Negation
+#define NN      r31
+
+#if defined (L_udivdi3)
+
+;; R25:R18 = R24:R18  umod  R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __umoddi3
+    set
+    rjmp __udivdi3_umoddi3
+ENDF __umoddi3
+
+;; R25:R18 = R24:R18  udiv  R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __udivdi3
+    clt
+ENDF __udivdi3
+
+DEFUN __udivdi3_umoddi3
+    push    C0
+    push    C1
+    push    C4
+    push    C5
+    XCALL   __udivmod64
+    pop     C5
+    pop     C4
+    pop     C1
+    pop     C0
+    ret
+ENDF __udivdi3_umoddi3
+#endif /* L_udivdi3 */
+
+#if defined (L_udivmod64)
+
+;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
+;; No Registers saved/restored; the Callers will take Care.
+;; Preserves B[] and T-flag
+;; T = 0: Compute Quotient  in A[]
+;; T = 1: Compute Remainder in A[] and shift SS one Bit left
+
+DEFUN __udivmod64
+
+    ;; Clear Remainder (C6, C7 will follow)
+    clr     C0
+    clr     C1
+    wmov    C2, C0
+    wmov    C4, C0
+    ldi     C7, 64
+
+#if SPEED_DIV == 0 || SPEED_DIV == 16
+    ;; Initialize Loop-Counter
+    mov     R_cnt, C7
+    wmov    C6, C0
+#endif /* SPEED_DIV */
+
+#if SPEED_DIV == 8
+
+    push    A7
+    clr     C6
+
+1:  ;; Compare shifted Devidend against Divisor
+    ;; If -- even after Shifting -- it is smaller...
+    CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
+    cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
+    brcc    2f
+
+    ;; ...then we can subtract it.  Thus, it is legal to shift left
+               $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
+    mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
+    mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
+    mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
+
+    ;; 8 Bits are done
+    subi    C7, 8
+    brne    1b
+
+    ;; Shifted 64 Bits:  A7 has traveled to C7
+    pop     C7
+    ;; Divisor is greater than Dividend. We have:
+    ;; A[] % B[] = A[]
+    ;; A[] / B[] = 0
+    ;; Thus, we can return immediately
+    rjmp    5f
+
+2:  ;; Initialze Bit-Counter with Number of Bits still to be performed
+    mov     R_cnt, C7
+
+    ;; Push of A7 is not needed because C7 is still 0
+    pop     C7
+    clr     C7
+
+#elif  SPEED_DIV == 16
+
+    ;; Compare shifted Dividend against Divisor
+    cp      A7, B3
+    cpc     C0, B4
+    cpc     C1, B5
+    cpc     C2, B6
+    cpc     C3, B7
+    brcc    2f
+
+    ;; Divisor is greater than shifted Dividen: We can shift the Dividend
+    ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
+    wmov  C2,A6  $  wmov C0,A4
+    wmov  A6,A2  $  wmov A4,A0
+    wmov  A2,C6  $  wmov A0,C4
+
+    ;; Set Bit Counter to 32
+    lsr     R_cnt
+2:
+#elif SPEED_DIV
+#error SPEED_DIV = ?
+#endif /* SPEED_DIV */
+
+;; The very Division + Remainder Routine
+
+3:  ;; Left-shift Dividend...
+    lsl A0     $  rol A1     $  rol A2     $  rol A3
+    rol A4     $  rol A5     $  rol A6     $  rol A7
+
+    ;; ...into Remainder
+    rol C0     $  rol C1     $  rol C2     $  rol C3
+    rol C4     $  rol C5     $  rol C6     $  rol C7
+
+    ;; Compare Remainder and Divisor
+    CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
+    cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
+
+    brcs 4f
+
+    ;; Divisor fits into Remainder:  Subtract it from Remainder...
+    SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
+    sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
+
+    ;; ...and set according Bit in the upcoming Quotient
+    ;; The Bit will travel to its final Position
+    ori A0, 1
+
+4:  ;; This Bit is done
+    dec     R_cnt
+    brne    3b
+    ;; __zero_reg__ is 0 again
+
+    ;; T = 0: We are fine with the Quotient in A[]
+    ;; T = 1: Copy Remainder to A[]
+5:  brtc    6f
+    wmov    A0, C0
+    wmov    A2, C2
+    wmov    A4, C4
+    wmov    A6, C6
+    ;; Move the Sign of the Result to SS.7
+    lsl     SS
+
+6:  ret
+
+ENDF __udivmod64
+#endif /* L_udivmod64 */
+
+
+#if defined (L_divdi3)
+
+;; R25:R18 = R24:R18  mod  R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __moddi3
+    set
+    rjmp    __divdi3_moddi3
+ENDF __moddi3
+
+;; R25:R18 = R24:R18  div  R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __divdi3
+    clt
+ENDF __divdi3
+
+DEFUN  __divdi3_moddi3
+#if SPEED_DIV
+    mov     r31, A7
+    or      r31, B7
+    brmi    0f
+    ;; Both Signs are 0:  the following Complexitiy is not needed
+    XJMP    __udivdi3_umoddi3
+#endif /* SPEED_DIV */
+
+0:  ;; The Prologue
+    ;; Save 12 Registers:  Y, 17...8
+    ;; No Frame needed
+    do_prologue_saves 12
+
+    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
+    ;; SS.6 will contain the Sign of the Remainder (A.sign)
+    mov     SS, A7
+    asr     SS
+    ;; Adjust Dividend's Sign as needed
+#if SPEED_DIV
+    ;; Compiling for Speed we know that at least one Sign must be < 0
+    ;; Thus, if A[] >= 0 then we know B[] < 0
+    brpl    22f
+#else
+    brpl    21f
+#endif /* SPEED_DIV */
+
+    XCALL   __negdi2
+
+    ;; Adjust Divisor's Sign and SS.7 as needed
+21: tst     B7
+    brpl    3f
+22: ldi     NN, 1 << 7
+    eor     SS, NN
+
+    ldi NN, -1
+    com B4     $  com B5     $  com B6     $  com B7
+               $  com B1     $  com B2     $  com B3
+    NEG B0
+               $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
+    sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
+
+3:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
+    XCALL   __udivmod64
+
+    ;; Adjust Result's Sign
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    tst     SS
+    brpl    4f
+#else
+    sbrc    SS, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XCALL   __negdi2
+
+4:  ;; Epilogue: Restore 12 Registers and return
+    do_epilogue_restores 12
+
+ENDF __divdi3_moddi3
+
+#endif /* L_divdi3 */
+
+#undef R_cnt
+#undef SS
+#undef NN
+
+.section .text.libgcc, "ax", @progbits
+
+#define TT __tmp_reg__
+
+#if defined (L_adddi3)
+;; (set (reg:DI 18)
+;;      (plus:DI (reg:DI 18)
+;;               (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __adddi3
+    ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
+    adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
+    ret
+ENDF __adddi3
+#endif /* L_adddi3 */
+
+#if defined (L_adddi3_s8)
+;; (set (reg:DI 18)
+;;      (plus:DI (reg:DI 18)
+;;               (sign_extend:SI (reg:QI 26))))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
+DEFUN __adddi3_s8
+    clr     TT
+    sbrc    r26, 7
+    com     TT
+    ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
+    adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
+    ret
+ENDF __adddi3_s8
+#endif /* L_adddi3_s8 */
+
+#if defined (L_subdi3)
+;; (set (reg:DI 18)
+;;      (minus:DI (reg:DI 18)
+;;                (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __subdi3
+    SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
+    sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
+    ret
+ENDF __subdi3
+#endif /* L_subdi3 */
+
+#if defined (L_cmpdi2)
+;; (set (cc0)
+;;      (compare (reg:DI 18)
+;;               (reg:DI 10)))
+DEFUN __cmpdi2
+    CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
+    cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
+    ret
+ENDF __cmpdi2
+#endif /* L_cmpdi2 */
+
+#if defined (L_cmpdi2_s8)
+;; (set (cc0)
+;;      (compare (reg:DI 18)
+;;               (sign_extend:SI (reg:QI 26))))
+DEFUN __cmpdi2_s8
+    clr     TT
+    sbrc    r26, 7
+    com     TT
+    CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
+    cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
+    ret
+ENDF __cmpdi2_s8
+#endif /* L_cmpdi2_s8 */
+
+#if defined (L_negdi2)
+;; (set (reg:DI 18)
+;;      (neg:DI (reg:DI 18)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negdi2
+
+    com  A4    $  com  A5    $  com  A6    $  com  A7
+               $  com  A1    $  com  A2    $  com  A3
+    NEG  A0
+               $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
+    sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
+    ret
+
+ENDF __negdi2
+#endif /* L_negdi2 */
+
+#undef TT
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+
+.section .text.libgcc.prologue, "ax", @progbits
+
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+;; This function does not clobber T-flag; 64-bit division relies on it
+DEFUN __prologue_saves__
+	push r2
+	push r3
+	push r4
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+#if !defined (__AVR_HAVE_SPH__)
+	in	r28,__SP_L__
+	sub	r28,r26
+	out	__SP_L__,r28
+	clr	r29
+#elif defined (__AVR_XMEGA__)
+	in	r28,__SP_L__
+	in	r29,__SP_H__
+	sub	r28,r26
+	sbc	r29,r27
+	out	__SP_L__,r28
+	out	__SP_H__,r29
+#else
+	in	r28,__SP_L__
+	in	r29,__SP_H__
+	sub	r28,r26
+	sbc	r29,r27
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+#endif /* #SP = 8/16 */
+
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+	ldd	r2,Y+18
+	ldd	r3,Y+17
+	ldd	r4,Y+16
+	ldd	r5,Y+15
+	ldd	r6,Y+14
+	ldd	r7,Y+13
+	ldd	r8,Y+12
+	ldd	r9,Y+11
+	ldd	r10,Y+10
+	ldd	r11,Y+9
+	ldd	r12,Y+8
+	ldd	r13,Y+7
+	ldd	r14,Y+6
+	ldd	r15,Y+5
+	ldd	r16,Y+4
+	ldd	r17,Y+3
+	ldd	r26,Y+2
+#if !defined (__AVR_HAVE_SPH__)
+	ldd	r29,Y+1
+	add	r28,r30
+	out	__SP_L__,r28
+	mov	r28, r26
+#elif defined (__AVR_XMEGA__)
+	ldd  r27,Y+1
+	add  r28,r30
+	adc  r29,__zero_reg__
+	out  __SP_L__,r28
+	out  __SP_H__,r29
+	wmov 28, 26
+#else
+	ldd	r27,Y+1
+	add	r28,r30
+	adc	r29,__zero_reg__
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+	mov_l	r28, r26
+	mov_h	r29, r27
+#endif /* #SP = 8/16 */
+	ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+	.section .fini9,"ax",@progbits
+DEFUN _exit
+	.weak	exit
+exit:
+ENDF _exit
+
+	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
+
+	.section .fini0,"ax",@progbits
+	cli
+__stop_program:
+	rjmp	__stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+	.weak	_cleanup
+	.func	_cleanup
+_cleanup:
+	ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump
+DEFUN __tablejump2__
+	lsl	r30
+	rol	r31
+    ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+	lpm __tmp_reg__, Z+
+	lpm r31, Z
+	mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+	lpm
+	adiw r30, 1
+	push r0
+	lpm
+	push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in   __tmp_reg__, __EIND__
+	push __tmp_reg__
+#endif
+	ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+	.section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start)
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm	r0, Z+
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+	inc	r16
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm
+	st	X+, r0
+	adiw	r30, 1
+	brcs	.L__do_copy_data_carry
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+	lpm	r0, Z+
+#else
+	lpm
+	adiw	r30, 1
+#endif
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+	out	__RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section.  */
+
+#ifdef L_clear_bss
+	.section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+	ldi	r17, hi8(__bss_end)
+	ldi	r26, lo8(__bss_start)
+	ldi	r27, hi8(__bss_start)
+	rjmp	.do_clear_bss_start
+.do_clear_bss_loop:
+	st	X+, __zero_reg__
+.do_clear_bss_start:
+	cpi	r26, lo8(__bss_end)
+	cpc	r27, r17
+	brne	.do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+   if there are any constructors/destructors.  */
+
+#ifdef L_ctors
+	.section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	ldi	r16, hh8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	ldi	r24, hh8(__ctors_start)
+	cpc	r16, r24
+	brne	.L__do_global_ctors_loop
+#else
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	brne	.L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+	.section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	ldi	r16, hh8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	ldi	r24, hh8(__dtors_end)
+	cpc	r16, r24
+	brne	.L__do_global_dtors_loop
+#else
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+	adiw	r28, 2
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	brne	.L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPMX__)
+	elpm	__tmp_reg__, Z+
+	elpm	r31, Z
+	mov	r30, __tmp_reg__
+#if defined (__AVR_HAVE_RAMPD__)
+	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+	out	__RAMPZ__, __zero_reg__
+#endif /* RAMPD */
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#elif defined (__AVR_HAVE_ELPM__)
+	elpm
+	adiw	r30, 1
+	push	r0
+	elpm
+	push	r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in      __tmp_reg__, __EIND__
+	push    __tmp_reg__
+#endif
+	ret
+#endif
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash; n = 3,4
+;; R22... = Flash[Z]
+;; Clobbers: __tmp_reg__
+
+#if (defined (L_load_3)        \
+     || defined (L_load_4))    \
+    && !defined (__AVR_HAVE_LPMX__)
+
+;; Destination
+#define D0  22
+#define D1  D0+1
+#define D2  D0+2
+#define D3  D0+3
+
+.macro  .load dest, n
+    lpm
+    mov     \dest, r0
+.if \dest != D0+\n-1
+    adiw    r30, 1
+.else
+    sbiw    r30, \n-1
+.endif
+.endm
+
+#if defined (L_load_3)
+DEFUN __load_3
+    push  D3
+    XCALL __load_4
+    pop   D3
+    ret
+ENDF __load_3
+#endif /* L_load_3 */
+
+#if defined (L_load_4)
+DEFUN __load_4
+    .load D0, 4
+    .load D1, 4
+    .load D2, 4
+    .load D3, 4
+    ret
+ENDF __load_4
+#endif /* L_load_4 */
+
+#endif /* L_load_3 || L_load_3 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
+;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
+;; Clobbers: __tmp_reg__, R21, R30, R31
+
+#if (defined (L_xload_1)            \
+     || defined (L_xload_2)         \
+     || defined (L_xload_3)         \
+     || defined (L_xload_4))
+
+;; Destination
+#define D0  22
+#define D1  D0+1
+#define D2  D0+2
+#define D3  D0+3
+
+;; Register containing bits 16+ of the address
+
+#define HHI8  21
+
+.macro  .xload dest, n
+#if defined (__AVR_HAVE_ELPMX__)
+    elpm    \dest, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+    elpm
+    mov     \dest, r0
+.if \dest != D0+\n-1
+    adiw    r30, 1
+    adc     HHI8, __zero_reg__
+    out     __RAMPZ__, HHI8
+.endif
+#elif defined (__AVR_HAVE_LPMX__)
+    lpm     \dest, Z+
+#else
+    lpm
+    mov     \dest, r0
+.if \dest != D0+\n-1
+    adiw    r30, 1
+.endif
+#endif
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+.if \dest == D0+\n-1
+    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+    out     __RAMPZ__, __zero_reg__
+.endif
+#endif
+.endm ; .xload
+
+#if defined (L_xload_1)
+DEFUN __xload_1
+#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
+    sbrc    HHI8, 7
+    ld      D0, Z
+    sbrs    HHI8, 7
+    lpm     D0, Z
+    ret
+#else
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_ELPM__)
+    out     __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+    .xload  D0, 1
+    ret
+1:  ld      D0, Z
+    ret
+#endif /* LPMx && ! ELPM */
+ENDF __xload_1
+#endif /* L_xload_1 */
+
+#if defined (L_xload_2)
+DEFUN __xload_2
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_ELPM__)
+    out     __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+    .xload  D0, 2
+    .xload  D1, 2
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
+    ret
+ENDF __xload_2
+#endif /* L_xload_2 */
+
+#if defined (L_xload_3)
+DEFUN __xload_3
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_ELPM__)
+    out     __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+    .xload  D0, 3
+    .xload  D1, 3
+    .xload  D2, 3
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
+    ld      D2, Z+
+    ret
+ENDF __xload_3
+#endif /* L_xload_3 */
+
+#if defined (L_xload_4)
+DEFUN __xload_4
+    sbrc    HHI8, 7
+    rjmp    1f
+#if defined (__AVR_HAVE_ELPM__)
+    out     __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+    .xload  D0, 4
+    .xload  D1, 4
+    .xload  D2, 4
+    .xload  D3, 4
+    ret
+1:  ld      D0, Z+
+    ld      D1, Z+
+    ld      D2, Z+
+    ld      D3, Z+
+    ret
+ENDF __xload_4
+#endif /* L_xload_4 */
+
+#endif /* L_xload_{1|2|3|4} */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; memcopy from Address Space __pgmx to RAM
+;; R23:Z = Source Address
+;; X     = Destination Address
+;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
+
+#if defined (L_movmemx)
+
+#define HHI8  23
+#define LOOP  24
+
+DEFUN __movmemx_qi
+    ;; #Bytes to copy fity in 8 Bits (1..255)
+    ;; Zero-extend Loop Counter
+    clr     LOOP+1
+    ;; FALLTHRU
+ENDF __movmemx_qi
+
+DEFUN __movmemx_hi
+
+;; Read from where?
+    sbrc    HHI8, 7
+    rjmp    1f
+
+;; Read from Flash
+
+#if defined (__AVR_HAVE_ELPM__)
+    out     __RAMPZ__, HHI8
+#endif
+
+0:  ;; Load 1 Byte from Flash...
+
+#if defined (__AVR_HAVE_ELPMX__)
+    elpm    r0, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+    elpm
+    adiw    r30, 1
+    adc     HHI8, __zero_reg__
+    out     __RAMPZ__, HHI8
+#elif defined (__AVR_HAVE_LPMX__)
+    lpm     r0, Z+
+#else
+    lpm
+    adiw    r30, 1
+#endif
+
+    ;; ...and store that Byte to RAM Destination
+    st      X+, r0
+    sbiw    LOOP, 1
+    brne    0b
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+    out	__RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+    ret
+
+;; Read from RAM
+
+1:  ;; Read 1 Byte from RAM...
+    ld      r0, Z+
+    ;; and store that Byte to RAM Destination
+    st      X+, r0
+    sbiw    LOOP, 1
+    brne    1b
+    ret
+ENDF __movmemx_hi
+
+#undef HHI8
+#undef LOOP
+
+#endif /* L_movmemx */
+
+
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+    clr  r26
+    tst  r22
+    brne 1f
+    subi r26, -8
+    or   r22, r23
+    brne 1f
+    subi r26, -8
+    or   r22, r24
+    brne 1f
+    subi r26, -8
+    or   r22, r25
+    brne 1f
+    ret
+1:  mov  r24, r22
+    XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+    clr  r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  r24
+    breq 2f
+#else
+    cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1:  XJMP __loop_ffsqi2
+2:  ldi  r26, 8
+    or   r24, r25
+    brne 1b
+    ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+    inc  r26
+    lsr  r24
+    brcc __loop_ffsqi2
+    mov  r24, r26
+    clr  r25
+    ret
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+    XCALL __ffssi2
+    dec  r24
+    ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+    XCALL __ffshi2
+    dec  r24
+    ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+    XCALL __clzsi2
+    sbrs r24, 5
+    ret
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __clzsi2
+    subi r24, -32
+    ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+    XCALL __clzhi2
+    sbrs r24, 4
+    ret
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __clzhi2
+    subi r24, -16
+    ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+    clr  r26
+    tst  r25
+    brne 1f
+    subi r26, -8
+    or   r25, r24
+    brne 1f
+    ldi  r24, 16
+    ret
+1:  cpi  r25, 16
+    brsh 3f
+    subi r26, -3
+    swap r25
+2:  inc  r26
+3:  lsl  r25
+    brcc 2b
+    mov  r24, r26
+    clr  r25
+    ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+
+/**********************************
+ * Parity
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+    eor  r24, r18
+    eor  r24, r19
+    eor  r24, r20
+    eor  r24, r21
+    XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+    eor  r24, r22
+    eor  r24, r23
+    XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+    eor  r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+    ;; parity is in r24[0..7]
+    mov  __tmp_reg__, r24
+    swap __tmp_reg__
+    eor  r24, __tmp_reg__
+    ;; parity is in r24[0..3]
+    subi r24, -4
+    andi r24, -5
+    subi r24, -6
+    ;; parity is in r24[0,3]
+    sbrc r24, 3
+    inc  r24
+    ;; parity is in r24[0]
+    andi r24, 1
+    clr  r25
+    ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+    XCALL __popcountqi2
+    push r24
+    mov  r24, r25
+    XCALL __popcountqi2
+    clr  r25
+    ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+    pop   __tmp_reg__
+    add   r24, __tmp_reg__
+    ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+    XCALL __popcounthi2
+    push  r24
+    mov_l r24, r22
+    mov_h r25, r23
+    XCALL __popcounthi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+    XCALL __popcountsi2
+    push  r24
+    mov_l r22, r18
+    mov_h r23, r19
+    mov_l r24, r20
+    mov_h r25, r21
+    XCALL __popcountsi2
+    XJMP  __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+    mov  __tmp_reg__, r24
+    andi r24, 1
+    lsr  __tmp_reg__
+    lsr  __tmp_reg__
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__
+    adc  r24, __zero_reg__
+    lsr  __tmp_reg__
+    adc  r24, __tmp_reg__
+    ret
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+    eor \a, \b
+    eor \b, \a
+    eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+    bswap r22, r25
+    bswap r23, r24
+    ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+    bswap r18, r25
+    bswap r19, r24
+    bswap r20, r23
+    bswap r21, r22
+    ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+    bst     r25, 7
+    bld     __zero_reg__, 0
+    ;; FALLTHRU
+ENDF  __ashrdi3
+
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+    lsr     __zero_reg__
+    sbc     __tmp_reg__, __tmp_reg__
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    subi    r16, 8
+    mov     r18, r19
+    mov     r19, r20
+    mov     r20, r21
+    mov     r21, r22
+    mov     r22, r23
+    mov     r23, r24
+    mov     r24, r25
+    mov     r25, __tmp_reg__
+    rjmp 0b
+1:  asr     __tmp_reg__
+    ror     r25
+    ror     r24
+    ror     r23
+    ror     r22
+    ror     r21
+    ror     r20
+    ror     r19
+    ror     r18
+2:  dec     r16
+    brpl 1b
+    pop     r16
+    ret
+ENDF __lshrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    mov     r25, r24
+    mov     r24, r23
+    mov     r23, r22
+    mov     r22, r21
+    mov     r21, r20
+    mov     r20, r19
+    mov     r19, r18
+    clr     r18
+    subi    r16, 8
+    rjmp 0b
+1:  lsl     r18
+    rol     r19
+    rol     r20
+    rol     r21
+    rol     r22
+    rol     r23
+    rol     r24
+    rol     r25
+2:  dec     r16
+    brpl 1b
+    pop     r16
+    ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+#if defined (L_rotldi3)
+;; Shift left
+;; r25:r18 = rotl64 (r25:r18, r17:r16)
+DEFUN __rotldi3
+    push    r16
+0:  cpi     r16, 8
+    brlo 2f
+    subi    r16, 8
+    mov     __tmp_reg__, r25
+    mov     r25, r24
+    mov     r24, r23
+    mov     r23, r22
+    mov     r22, r21
+    mov     r21, r20
+    mov     r20, r19
+    mov     r19, r18
+    mov     r18, __tmp_reg__
+    rjmp 0b
+1:  lsl     r18
+    rol     r19
+    rol     r20
+    rol     r21
+    rol     r22
+    rol     r23
+    rol     r24
+    rol     r25
+    adc     r18, __zero_reg__
+2:  dec     r16
+    brpl 1b
+    pop     r16
+    ret
+ENDF __rotldi3
+#endif /* defined (L_rotldi3) */
+
+
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+    ;; A0.7 = negate result?
+    mov  A0, A1
+    eor  A0, B1
+    ;; B1 = |B1|
+    sbrc B1, 7
+    neg  B1
+    XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+    ;; A0.7 = negate result?
+    mov  A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+    ;; A1 = |A1|
+    sbrc A1, 7
+    neg  A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  A0
+    brmi 1f
+#else
+    sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP  __fmul
+1:  XCALL __fmul
+    ;; C = -C iff A0.7 = 1
+    NEG2 C0
+    ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+    ; clear result
+    clr   C0
+    clr   C1
+    clr   A0
+1:  tst   B1
+    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2:  brpl  3f
+    ;; C += A
+    add   C0, A0
+    adc   C1, A1
+3:  ;; A >>= 1
+    lsr   A1
+    ror   A0
+    ;; B <<= 1
+    lsl   B1
+    brne  2b
+    ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
+
+#include "lib1funcs-fixed.S"
diff --git a/gcc-4.9/libgcc/config/avr/lib2-object.mk b/gcc-4.9/libgcc/config/avr/lib2-object.mk
new file mode 100644
index 000000000..6a9e04de0
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib2-object.mk
@@ -0,0 +1,23 @@
+# This file is included several times in a row, once for each element of
+# $(iter-items).  On each inclusion, we advance $o to the next element.
+# $(iter-labels) and $(iter-flags) are also advanced.
+# This works similar to $(srcdir)/siditi-object.mk.
+
+o := $(firstword $(iter-items))
+iter-items := $(filter-out $o,$(iter-items))
+
+$o-label := $(firstword $(iter-labels))
+iter-labels := $(wordlist 2,$(words $(iter-labels)),$(iter-labels))
+
+$o-flag := $(firstword $(iter-flags))
+iter-flags := $(wordlist 2,$(words $(iter-flags)),$(iter-flags))
+
+$o$(objext): %$(objext): $(srcdir)/config/avr/lib2funcs.c
+	$(gcc_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \
+		-c $< $(vis_hide)
+
+ifeq ($(enable_shared),yes)
+$(o)_s$(objext): %_s$(objext): $(srcdir)/config/avr/lib2funcs.c
+	$(gcc_s_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \
+		-c $<
+endif
diff --git a/gcc-4.9/libgcc/config/avr/lib2funcs.c b/gcc-4.9/libgcc/config/avr/lib2funcs.c
new file mode 100644
index 000000000..774d14ced
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib2funcs.c
@@ -0,0 +1,226 @@
+/* Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* This file supplies implementations for some AVR-specific builtin
+   functions so that code like the following works as expected:
+
+   int (*f (void))(_Fract)
+   {
+       return __builtin_avr_countlsr;
+   }
+
+   In this specific case, the generated code is:
+
+   f:
+       ldi r24,lo8(gs(__countlsHI))
+       ldi r25,hi8(gs(__countlsHI))
+       ret
+*/
+
+/* Map fixed-point suffix to the corresponding fixed-point type.  */
+
+typedef short _Fract fx_hr_t;
+typedef _Fract fx_r_t;
+typedef long _Fract fx_lr_t;
+typedef long long _Fract fx_llr_t;
+
+typedef unsigned short _Fract fx_uhr_t;
+typedef unsigned _Fract fx_ur_t;
+typedef unsigned long _Fract fx_ulr_t;
+typedef unsigned long long _Fract fx_ullr_t;
+
+typedef short _Accum fx_hk_t;
+typedef _Accum fx_k_t;
+typedef long _Accum fx_lk_t;
+typedef long long _Accum fx_llk_t;
+
+typedef unsigned short _Accum fx_uhk_t;
+typedef unsigned _Accum fx_uk_t;
+typedef unsigned long _Accum fx_ulk_t;
+typedef unsigned long long _Accum fx_ullk_t;
+
+/* Map fixed-point suffix to the corresponding natural integer type.  */
+
+typedef char int_hr_t;
+typedef int int_r_t;
+typedef long int_lr_t;
+typedef long long int_llr_t;
+
+typedef unsigned char int_uhr_t;
+typedef unsigned int int_ur_t;
+typedef unsigned long int_ulr_t;
+typedef unsigned long long int_ullr_t;
+
+typedef int int_hk_t;
+typedef long int_k_t;
+typedef long long int_lk_t;
+typedef long long int_llk_t;
+
+typedef unsigned int int_uhk_t;
+typedef unsigned long int_uk_t;
+typedef unsigned long long int_ulk_t;
+typedef unsigned long long int_ullk_t;
+
+/* Map mode to the corresponding integer type.  */
+
+typedef char int_qi_t;
+typedef int int_hi_t;
+typedef long int_si_t;
+typedef long long int_di_t;
+
+typedef unsigned char uint_qi_t;
+typedef unsigned int uint_hi_t;
+typedef unsigned long uint_si_t;
+typedef unsigned long long uint_di_t;
+
+
+
+/************************************************************************/
+
+/* Supply implementations / symbols for __builtin_roundFX ASM_NAME.  */
+
+#ifdef L_round
+
+#define ROUND1(FX)                              \
+  ROUND2 (FX)
+
+#define ROUND2(FX)                                                      \
+  extern fx_## FX ##_t __round## FX (fx_## FX ##_t x, int rpoint);      \
+                                                                        \
+  fx_## FX ##_t                                                         \
+  __round## FX (fx_## FX ##_t x, int rpoint)                            \
+  {                                                                     \
+    return __builtin_avr_round ##FX (x, rpoint);                        \
+  }
+
+ROUND1(L_LABEL)
+
+#endif /* L_round */
+
+
+
+/*********************************************************************/
+
+/* Implement some count-leading-redundant-sign-bits to be used with
+   coundlsFX implementation.  */
+
+#ifdef L__clrsbqi
+extern int __clrsbqi2 (char x);
+
+int
+__clrsbqi2 (char x)
+{
+  int ret;
+
+  if (x < 0)
+    x = ~x;
+
+  if (x == 0)
+    return 8 * sizeof (x) -1;
+
+  ret = __builtin_clz (x << 8);
+  return ret - 1;
+}
+#endif /* L__clrsbqi */
+
+
+#ifdef L__clrsbdi
+extern int __clrsbdi2 (long long x);
+
+int
+__clrsbdi2 (long long x)
+{
+  int ret;
+
+  if (x < 0LL)
+    x = ~x;
+
+  if (x == 0LL)
+    return 8 * sizeof (x) -1;
+
+  ret = __builtin_clzll ((unsigned long long) x);
+  return ret - 1;
+}
+#endif /* L__clrsbdi */
+
+
+
+/*********************************************************************/
+
+/* Supply implementations / symbols for __builtin_avr_countlsFX.  */
+
+/* Signed */
+
+#ifdef L_countls
+
+#define COUNTLS1(MM)                            \
+  COUNTLS2 (MM)
+
+#define COUNTLS2(MM)                                                    \
+  extern int __countls## MM ##2 (int_## MM ##_t);                       \
+  extern int __clrsb## MM ##2 (int_## MM ##_t);                         \
+                                                                        \
+  int                                                                   \
+  __countls## MM ##2 (int_## MM ##_t x)                                 \
+  {                                                                     \
+    if (x == 0)                                                         \
+      return __INT8_MAX__;                                              \
+                                                                        \
+    return __clrsb## MM ##2 (x);                                        \
+  }
+
+COUNTLS1(L_LABEL)
+
+#endif /* L_countls */
+
+/* Unsigned */
+
+#ifdef L_countlsu
+
+#define clz_qi2 __builtin_clz /* unused, avoid warning */
+#define clz_hi2 __builtin_clz
+#define clz_si2 __builtin_clzl
+#define clz_di2 __builtin_clzll
+
+#define COUNTLS1(MM)                            \
+  COUNTLS2 (MM)
+
+#define COUNTLS2(MM)                                                    \
+  extern int __countlsu## MM ##2 (uint_## MM ##_t);                     \
+                                                                        \
+  int                                                                   \
+  __countlsu## MM ##2 (uint_## MM ##_t x)                               \
+  {                                                                     \
+    if (x == 0)                                                         \
+      return __INT8_MAX__;                                              \
+                                                                        \
+    if (sizeof (x) == 1)                                                \
+      return clz_hi2 (x << 8);                                          \
+    else                                                                \
+      return clz_## MM ##2 (x);                                         \
+  }
+
+COUNTLS1(L_LABEL)
+
+#endif /* L_countlsu */
diff --git a/gcc-4.9/libgcc/config/avr/t-avr b/gcc-4.9/libgcc/config/avr/t-avr
new file mode 100644
index 000000000..461304706
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-avr
@@ -0,0 +1,277 @@
+LIB1ASMSRC = avr/lib1funcs.S
+LIB1ASMFUNCS = \
+	_mulqi3 \
+	_mulhi3 \
+	_mulqihi3 _umulqihi3 \
+	_mulpsi3 _mulsqipsi3 \
+	_mulhisi3 \
+	_umulhisi3 \
+	_usmulhisi3 \
+	_muluhisi3 \
+	_mulshisi3 \
+	_mulsi3 \
+	_udivmodqi4 \
+	_divmodqi4 \
+	_udivmodhi4 \
+	_divmodhi4 \
+	_divmodpsi4 _udivmodpsi4 \
+	_udivmodsi4 \
+	_divmodsi4 \
+	_divdi3 _udivdi3 \
+	_muldi3 _muldi3_6 \
+	_mulsidi3 _umulsidi3 \
+	_udivmod64 \
+	_negsi2 _negdi2 \
+	_prologue \
+	_epilogue \
+	_exit \
+	_cleanup \
+	_tablejump \
+	_tablejump_elpm \
+	_load_3 _load_4 \
+	_xload_1 _xload_2 _xload_3 _xload_4 \
+	_movmemx \
+	_copy_data \
+	_clear_bss \
+	_ctors \
+	_dtors \
+	_ffssi2 \
+	_ffshi2 \
+	_loop_ffsqi2 \
+	_ctzsi2 \
+	_ctzhi2 \
+	_clzdi2 \
+	_clzsi2 \
+	_clzhi2 \
+	_paritydi2 \
+	_paritysi2 \
+	_parityhi2 \
+	_popcounthi2 \
+	_popcountsi2 \
+	_popcountdi2 \
+	_popcountqi2 \
+	_bswapsi2 \
+	_bswapdi2 \
+	_ashldi3 _ashrdi3 _lshrdi3 _rotldi3 \
+	_adddi3 _adddi3_s8 _subdi3 \
+	_cmpdi2 _cmpdi2_s8 \
+	_fmul _fmuls _fmulsu
+
+# Fixed point routines in avr/lib1funcs-fixed.S
+LIB1ASMFUNCS += \
+	_fractqqsf _fractuqqsf \
+	_fracthqsf _fractuhqsf _fracthasf _fractuhasf \
+	_fractsasf _fractusasf _fractsqsf _fractusqsf \
+	\
+	_fractsfqq _fractsfuqq \
+	_fractsfhq _fractsfuhq _fractsfha _fractsfuha \
+	_fractsfsq _fractsfusq _fractsfsa _fractsfusa \
+	_mulqq3 \
+	_mulhq3 _muluhq3 \
+	_mulha3 _muluha3 _muluha3_round \
+	_mulsa3 _mulusa3 \
+	_usmuluha3 _ssmulha3 \
+	_usmulusa3 _ssmulsa3 \
+	_divqq3 _udivuqq3 _divqq_helper \
+	_divhq3 _udivuhq3 \
+	_divha3 _udivuha3 \
+	_divsa3 _udivusa3 \
+	_clr_8 \
+	_ssneg_2 _ssneg_4 _ssneg_8 \
+	_ssabs_1 _ssabs_2 _ssabs_4 _ssabs_8 \
+	_ssadd_8 _sssub_8 \
+	_usadd_8 _ussub_8 \
+	_mask1 _ret \
+	_roundqq3 _rounduqq3 \
+	_round_s2 _round_u2 _round_2_const _addmask_2 \
+	_round_s4 _round_u4 _round_4_const _addmask_4 \
+	_round_x8 \
+	_rounddq3 _roundudq3 \
+	_roundda3 _rounduda3 \
+	_roundta3 _rounduta3 \
+
+
+LIB2FUNCS_EXCLUDE = \
+	_moddi3 _umoddi3 \
+	_clz \
+	_clrsbdi2 \
+
+
+# We do not have the DF type.
+# Most of the C functions in libgcc2 use almost all registers,
+# so use -mcall-prologues for smaller code size.
+HOST_LIBGCC2_CFLAGS += -DDF=SF -Dinhibit_libc -mcall-prologues -Os
+
+# Extra 16-bit integer functions.
+intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2
+
+hiintfuncs16 = $(subst XX,hi,$(intfuncs16))
+siintfuncs16 = $(subst XX,si,$(intfuncs16))
+
+iter-items := $(hiintfuncs16)
+iter-labels := $(siintfuncs16)
+iter-sizes := $(patsubst %,2,$(siintfuncs16)) $(patsubst %,2,$(hiintfuncs16))
+
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/siditi-object.mk,$(iter-items))
+libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16))
+
+ifeq ($(enable_shared),yes)
+libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16))
+endif
+
+###
+
+conv_XY=$(conv)$(mode1)$(mode2)
+func_X=$(func)$(mode)
+
+# Compile C functions from lib2funcs.c and add them to libgcc.a.
+#
+# Some functions which are not performance.critical are more convenient
+# to implement in C than in assembler.  Most of them serve as implementation
+# for AVR-specific builtins in the case where the address of a builtin
+# function is taken or if there is no insn that implements the builtin.
+#
+# We don't use LIB2ADD because we want to iterate over the source for
+# different modes, fixed-point suffixes, etc.  See iter-labels and L_LABEL.
+# iter-label will get one more underscore in order to avoid too short
+# labels like -DLk and we use -DL_k instead.
+
+# Build roundFX functions from lib2funcs.c
+
+round_suffix :=  hr r lr uhr ur ulr  \
+		 hk k    uhk uk 
+round_funcs  := $(foreach func,_round,\
+		$(foreach mode,$(round_suffix),$(func_X)))
+
+iter-items  := $(round_funcs)
+iter-labels := $(round_suffix)
+iter-flags  := $(patsubst %,-DL_round,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(round_funcs))
+
+# Build clrsbXX functions from lib2funcs.c
+
+clrsb_modes := qi di
+clrsb_funcs := $(foreach func,_clrsb,\
+	       $(foreach mode,$(clrsb_modes),$(func_X)))
+
+iter-items  := $(clrsb_funcs)
+iter-labels := $(clrsb_funcs)
+iter-flags  := $(patsubst %,-DL_clrsb,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(clrsb_funcs))
+
+# Build signed countlsFX functions from lib2funcs.c
+
+countls_modes := qi hi si di
+countls_funcs := $(foreach func,_countls,\
+		 $(foreach mode,$(countls_modes),$(func_X)))
+
+iter-items  := $(countls_funcs)
+iter-labels := $(countls_modes)
+iter-flags  := $(patsubst %,-DL_countls,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(countls_funcs))
+
+# Build unsigned countlsFX functions from lib2funcs.c
+
+countlsu_modes := qi hi si di
+countlsu_funcs := $(foreach func,_countlsu,\
+		  $(foreach mode,$(countlsu_modes),$(func_X)))
+
+iter-items  := $(countlsu_funcs)
+iter-labels := $(countlsu_modes)
+iter-flags  := $(patsubst %,-DL_countlsu,$(iter-items))
+
+include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items))
+
+libgcc-objects += $(patsubst %,%$(objext),$(countlsu_funcs))
+
+
+# Filter out supported conversions from fixed-bit.c
+# Also filter out TQ and UTQ.
+
+# Conversions supported by the compiler
+
+convf_modes =	 QI UQI QQ UQQ \
+		 HI UHI HQ UHQ HA UHA \
+		 SI USI SQ USQ SA USA \
+		 DI UDI DQ UDQ DA UDA \
+		 TI UTI TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract _fractuns,\
+	$(foreach mode1,$(convf_modes),\
+	$(foreach mode2,$(convf_modes),$(conv_XY))))
+
+# Conversions supported by lib1funcs-fixed.S
+
+conv_to_sf_modes   = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA
+conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA        SA USA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract, \
+	$(foreach mode1,$(conv_to_sf_modes), \
+	$(foreach mode2,SF,$(conv_XY))))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract,\
+	$(foreach mode1,SF,\
+	$(foreach mode2,$(conv_from_sf_modes),$(conv_XY))))
+
+# Arithmetik supported by the compiler
+
+allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_add _sub,\
+	$(foreach mode,$(allfix_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_lshr _ashl _ashr _cmp,\
+	$(foreach mode,$(allfix_modes),$(func_X)))
+
+
+usat_modes = UQQ UHQ UHA USQ USA UDQ UDA UTQ UTA
+ssat_modes =  QQ  HQ  HA  SQ  SA  DQ  DA  TQ  TA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_ssadd _sssub _ssneg _ssabs,\
+	$(foreach mode,$(ssat_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_usadd _ussub _usneg,\
+	$(foreach mode,$(usat_modes),$(func_X)))
+
+
+smul_modes =  QQ  HQ  HA  SA
+umul_modes = UQQ UHQ UHA USA
+sdiv_modes =  QQ  HQ  HA  SA
+udiv_modes = UQQ UHQ UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_mul,\
+	$(foreach mode,$(smul_modes) $(umul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_div,\
+	$(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X)))
+
+
+ssmul_modes =  HA  SA
+usmul_modes = UHA USA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_usmul,\
+	$(foreach mode,$(usmul_modes),$(func_X)))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach func,_ssmul,\
+	$(foreach mode,$(ssmul_modes),$(func_X)))
diff --git a/gcc-4.9/libgcc/config/avr/t-avrlibc b/gcc-4.9/libgcc/config/avr/t-avrlibc
new file mode 100644
index 000000000..d2c8b870a
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-avrlibc
@@ -0,0 +1,66 @@
+# This file is used if not configured --with-avrlibc=no
+#
+# AVR-Libc comes with hand-optimized float routines.
+# For historical reasons, these routines live in AVR-Libc
+# and not in libgcc and use the same function names like libgcc.
+# To get the best support, i.e. always use the routines from
+# AVR-Libc, we remove these routines from libgcc.
+#
+# See also PR54461.
+#
+#
+# Arithmetic:
+#     __addsf3 __subsf3 __divsf3 __mulsf3 __negsf2
+#
+# Comparison:
+#     __cmpsf2 __unordsf2
+#     __eqsf2 __lesf2 __ltsf2 __nesf2 __gesf2 __gtsf2
+#
+# Conversion:
+#     __fixsfdi __fixunssfdi __floatdisf __floatundisf
+#     __fixsfsi __fixunssfsi __floatsisf __floatunsisf
+#
+#
+# These functions are contained in modules:
+#
+# _addsub_sf.o:   __addsf3  __subsf3
+# _mul_sf.o:      __mulsf3
+# _div_sf.o:      __divsf3
+# _negate_sf.o:   __negsf2
+#
+# _compare_sf.o:  __cmpsf2
+# _unord_sf.o:    __unordsf2
+# _eq_sf.o:       __eqsf2
+# _ne_sf.o:       __nesf2
+# _ge_sf.o:       __gesf2
+# _gt_sf.o:       __gtsf2
+# _le_sf.o:       __lesf2
+# _lt_sf.o:       __ltsf2
+#
+# _fixsfdi.o:     __fixsfdi
+# _fixunssfdi.o:  __fixunssfdi
+# _fixunssfsi.o:  __fixunssfsi
+# _floatdisf.o:   __floatdisf
+# _floatundisf.o: __floatundisf
+# _sf_to_si.o:    __fixsfsi
+# _si_to_sf.o:    __floatsisf
+# _usi_to_sf.o:   __floatunsisf
+
+
+# SFmode
+LIB2FUNCS_EXCLUDE += \
+	_addsub_sf \
+	_negate_sf \
+	_mul_sf _div_sf \
+	\
+	_compare_sf \
+	_unord_sf \
+	_eq_sf _ne_sf \
+	_gt_sf _ge_sf \
+	_lt_sf _le_sf \
+	\
+	_si_to_sf  _sf_to_si \
+	_usi_to_sf _sf_to_usi \
+	_fixunssfsi _fixsfdi \
+	_fixunssfdi \
+	_floatdisf _floatundisf
diff --git a/gcc-4.9/libgcc/config/avr/t-rtems b/gcc-4.9/libgcc/config/avr/t-rtems
new file mode 100644
index 000000000..43b57ee32
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/t-rtems
@@ -0,0 +1,2 @@
+# RTEMS uses _exit from newlib
+LIB1ASMFUNCS := $(filter-out _exit,$(LIB1ASMFUNCS))