1 files changed, 0 insertions, 1910 deletions
diff --git a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S
deleted file mode 100644
index d80389ce2..000000000
--- a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S
+++ /dev/null
@@ -1,1910 +0,0 @@
-/*  -*- Mode: Asm -*-  */
-;;    Copyright (C) 2012-2013 Free Software Foundation, Inc.
-;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
-;;                   Georg-Johann Lay (avr@gjlay.de)
-
-;; This file is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by the
-;; Free Software Foundation; either version 3, or (at your option) any
-;; later version.
-
-;; In addition to the permissions in the GNU General Public License, the
-;; Free Software Foundation gives you unlimited permission to link the
-;; compiled version of this file into combinations with other programs,
-;; and to distribute those combinations without any restriction coming
-;; from the use of this file.  (The General Public License restrictions
-;; do apply in other respects; for example, they cover modification of
-;; the file, and distribution when not linked into a combine
-;; executable.)
-
-;; This file is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with this program; see the file COPYING.  If not, write to
-;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Fixed point library routines for AVR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.section .text.libgcc.fixed, "ax", @progbits
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Conversions to float
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#if defined (L_fractqqsf)
-DEFUN __fractqqsf
-    ;; Move in place for SA -> SF conversion
-    clr     r22
-    mov     r23, r24
-    ;; Sign-extend
-    lsl     r24
-    sbc     r24, r24
-    mov     r25, r24
-    XJMP    __fractsasf
-ENDF __fractqqsf
-#endif  /* L_fractqqsf */
-
-#if defined (L_fractuqqsf)
-DEFUN __fractuqqsf
-    ;; Move in place for USA -> SF conversion
-    clr     r22
-    mov     r23, r24
-    ;; Zero-extend
-    clr     r24
-    clr     r25
-    XJMP    __fractusasf
-ENDF __fractuqqsf
-#endif  /* L_fractuqqsf */
-
-#if defined (L_fracthqsf)
-DEFUN __fracthqsf
-    ;; Move in place for SA -> SF conversion
-    wmov    22, 24
-    ;; Sign-extend
-    lsl     r25
-    sbc     r24, r24
-    mov     r25, r24
-    XJMP    __fractsasf
-ENDF __fracthqsf
-#endif  /* L_fracthqsf */
-
-#if defined (L_fractuhqsf)
-DEFUN __fractuhqsf
-    ;; Move in place for USA -> SF conversion
-    wmov    22, 24
-    ;; Zero-extend
-    clr     r24
-    clr     r25
-    XJMP    __fractusasf
-ENDF __fractuhqsf
-#endif  /* L_fractuhqsf */
-
-#if defined (L_fracthasf)
-DEFUN __fracthasf
-    ;; Move in place for SA -> SF conversion
-    clr     r22
-    mov     r23, r24
-    mov     r24, r25
-    ;; Sign-extend
-    lsl     r25
-    sbc     r25, r25
-    XJMP    __fractsasf
-ENDF __fracthasf
-#endif  /* L_fracthasf */
-
-#if defined (L_fractuhasf)
-DEFUN __fractuhasf
-    ;; Move in place for USA -> SF conversion
-    clr     r22
-    mov     r23, r24
-    mov     r24, r25
-    ;; Zero-extend
-    clr     r25
-    XJMP    __fractusasf
-ENDF __fractuhasf
-#endif  /* L_fractuhasf */
-
-
-#if defined (L_fractsqsf)
-DEFUN __fractsqsf
-    XCALL   __floatsisf
-    ;; Divide non-zero results by 2^31 to move the
-    ;; decimal point into place
-    tst     r25
-    breq    0f
-    subi    r24, exp_lo (31)
-    sbci    r25, exp_hi (31)
-0:  ret
-ENDF __fractsqsf
-#endif  /* L_fractsqsf */
-
-#if defined (L_fractusqsf)
-DEFUN __fractusqsf
-    XCALL   __floatunsisf
-    ;; Divide non-zero results by 2^32 to move the
-    ;; decimal point into place
-    cpse    r25, __zero_reg__
-    subi    r25, exp_hi (32)
-    ret
-ENDF __fractusqsf
-#endif  /* L_fractusqsf */
-
-#if defined (L_fractsasf)
-DEFUN __fractsasf
-    XCALL   __floatsisf
-    ;; Divide non-zero results by 2^15 to move the
-    ;; decimal point into place
-    tst     r25
-    breq    0f
-    subi    r24, exp_lo (15)
-    sbci    r25, exp_hi (15)
-0:  ret
-ENDF __fractsasf
-#endif  /* L_fractsasf */
-
-#if defined (L_fractusasf)
-DEFUN __fractusasf
-    XCALL   __floatunsisf
-    ;; Divide non-zero results by 2^16 to move the
-    ;; decimal point into place
-    cpse    r25, __zero_reg__
-    subi    r25, exp_hi (16)
-    ret
-ENDF __fractusasf
-#endif  /* L_fractusasf */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Conversions from float
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#if defined (L_fractsfqq)
-DEFUN __fractsfqq
-    ;; Multiply with 2^{24+7} to get a QQ result in r25
-    subi    r24, exp_lo (-31)
-    sbci    r25, exp_hi (-31)
-    XCALL   __fixsfsi
-    mov     r24, r25
-    ret
-ENDF __fractsfqq
-#endif  /* L_fractsfqq */
-
-#if defined (L_fractsfuqq)
-DEFUN __fractsfuqq
-    ;; Multiply with 2^{24+8} to get a UQQ result in r25
-    subi    r25, exp_hi (-32)
-    XCALL   __fixunssfsi
-    mov     r24, r25
-    ret
-ENDF __fractsfuqq
-#endif  /* L_fractsfuqq */
-
-#if defined (L_fractsfha)
-DEFUN __fractsfha
-    ;; Multiply with 2^{16+7} to get a HA result in r25:r24
-    subi    r24, exp_lo (-23)
-    sbci    r25, exp_hi (-23)
-    XJMP    __fixsfsi
-ENDF __fractsfha
-#endif  /* L_fractsfha */
-
-#if defined (L_fractsfuha)
-DEFUN __fractsfuha
-    ;; Multiply with 2^24 to get a UHA result in r25:r24
-    subi    r25, exp_hi (-24)
-    XJMP    __fixunssfsi
-ENDF __fractsfuha
-#endif  /* L_fractsfuha */
-
-#if defined (L_fractsfhq)
-FALIAS __fractsfsq
-
-DEFUN __fractsfhq
-    ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
-    ;; resp. with 2^31 to get a SQ result in r25:r22
-    subi    r24, exp_lo (-31)
-    sbci    r25, exp_hi (-31)
-    XJMP    __fixsfsi
-ENDF __fractsfhq
-#endif  /* L_fractsfhq */
-
-#if defined (L_fractsfuhq)
-FALIAS __fractsfusq
-
-DEFUN __fractsfuhq
-    ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
-    ;; resp. with 2^32 to get a USQ result in r25:r22
-    subi    r25, exp_hi (-32)
-    XJMP    __fixunssfsi
-ENDF __fractsfuhq
-#endif  /* L_fractsfuhq */
-
-#if defined (L_fractsfsa)
-DEFUN __fractsfsa
-    ;; Multiply with 2^15 to get a SA result in r25:r22
-    subi    r24, exp_lo (-15)
-    sbci    r25, exp_hi (-15)
-    XJMP    __fixsfsi
-ENDF __fractsfsa
-#endif  /* L_fractsfsa */
-
-#if defined (L_fractsfusa)
-DEFUN __fractsfusa
-    ;; Multiply with 2^16 to get a USA result in r25:r22
-    subi    r25, exp_hi (-16)
-    XJMP    __fixunssfsi
-ENDF __fractsfusa
-#endif  /* L_fractsfusa */
-
-
-;; For multiplication the functions here are called directly from
-;; avr-fixed.md instead of using the standard libcall mechanisms.
-;; This can make better code because GCC knows exactly which
-;; of the call-used registers (not all of them) are clobbered.  */
-
-/*******************************************************
-    Fractional  Multiplication  8 x 8  without MUL
-*******************************************************/
-
-#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
-;;; R23 = R24 * R25
-;;; Clobbers: __tmp_reg__, R22, R24, R25
-;;; Rounding: ???
-DEFUN __mulqq3
-    XCALL   __fmuls
-    ;; TR 18037 requires that  (-1) * (-1)  does not overflow
-    ;; The only input that can produce  -1  is  (-1)^2.
-    dec     r23
-    brvs    0f
-    inc     r23
-0:  ret
-ENDF  __mulqq3
-#endif /* L_mulqq3 && ! HAVE_MUL */
-
-/*******************************************************
-    Fractional Multiply  .16 x .16  with and without MUL
-*******************************************************/
-
-#if defined (L_mulhq3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
-;;;         Clobbers: ABI, called by optabs
-;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
-;;;         Clobbers: __tmp_reg__, R22, R23
-;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
-DEFUN   __mulhq3
-    XCALL   __mulhisi3
-    ;; Shift result into place
-    lsl     r23
-    rol     r24
-    rol     r25
-    brvs    1f
-    ;; Round
-    sbrc    r23, 7
-    adiw    r24, 1
-    ret
-1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
-    ldi     r24, lo8 (0x7fff)
-    ldi     r25, hi8 (0x7fff)
-    ret
-ENDF __mulhq3
-#endif  /* defined (L_mulhq3) */
-
-#if defined (L_muluhq3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) *= (R23:R22)
-;;;         Clobbers: ABI, called by optabs
-;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
-;;;         Clobbers: __tmp_reg__, R22, R23
-;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
-DEFUN   __muluhq3
-    XCALL   __umulhisi3
-    ;; Round
-    sbrc    r23, 7
-    adiw    r24, 1
-    ret
-ENDF __muluhq3
-#endif  /* L_muluhq3 */
-
-
-/*******************************************************
-    Fixed  Multiply  8.8 x 8.8  with and without MUL
-*******************************************************/
-
-#if defined (L_mulha3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
-;;;         Clobbers: ABI, called by optabs
-;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
-;;;         Clobbers: __tmp_reg__, R22, R23
-;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
-DEFUN   __mulha3
-    XCALL   __mulhisi3
-    lsl     r22
-    rol     r23
-    rol     r24
-    XJMP    __muluha3_round
-ENDF __mulha3
-#endif  /* L_mulha3 */
-
-#if defined (L_muluha3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) *= (R23:R22)
-;;;         Clobbers: ABI, called by optabs
-;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
-;;;         Clobbers: __tmp_reg__, R22, R23
-;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
-DEFUN   __muluha3
-    XCALL   __umulhisi3
-    XJMP    __muluha3_round
-ENDF __muluha3
-#endif  /* L_muluha3 */
-
-#if defined (L_muluha3_round)
-DEFUN   __muluha3_round
-    ;; Shift result into place
-    mov     r25, r24
-    mov     r24, r23
-    ;; Round
-    sbrc    r22, 7
-    adiw    r24, 1
-    ret
-ENDF __muluha3_round
-#endif  /* L_muluha3_round */
-
-
-/*******************************************************
-    Fixed  Multiplication  16.16 x 16.16
-*******************************************************/
-
-;; Bits outside the result (below LSB), used in the signed version
-#define GUARD __tmp_reg__
-
-#if defined (__AVR_HAVE_MUL__)
-
-;; Multiplier
-#define A0  16
-#define A1  A0+1
-#define A2  A1+1
-#define A3  A2+1
-
-;; Multiplicand
-#define B0  20
-#define B1  B0+1
-#define B2  B1+1
-#define B3  B2+1
-
-;; Result
-#define C0  24
-#define C1  C0+1
-#define C2  C1+1
-#define C3  C2+1
-
-#if defined (L_mulusa3)
-;;; (C3:C0) = (A3:A0) * (B3:B0)
-DEFUN __mulusa3
-    set
-    ;; Fallthru
-ENDF  __mulusa3
-
-;;; Round for last digit iff T = 1
-;;; Return guard bits in GUARD (__tmp_reg__).
-;;; Rounding, T = 0:  -1.0 LSB  <  error  <=  0   LSB
-;;; Rounding, T = 1:  -0.5 LSB  <  error  <=  0.5 LSB
-DEFUN __mulusa3_round
-    ;; Some of the MUL instructions have LSBs outside the result.
-    ;; Don't ignore these LSBs in order to tame rounding error.
-    ;; Use C2/C3 for these LSBs.
-
-    clr C0
-    clr C1
-    mul A0, B0  $  movw C2, r0
-
-    mul A1, B0  $  add  C3, r0  $  adc C0, r1
-    mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
-
-    ;; Round if T = 1.  Store guarding bits outside the result for rounding
-    ;; and left-shift by the signed version (function below).
-    brtc 0f
-    sbrc C3, 7
-    adiw C0, 1
-0:  push C3
-
-    ;; The following MULs don't have LSBs outside the result.
-    ;; C2/C3 is the high part.
-
-    mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
-    mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
-    mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
-    neg  C2
-
-    mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
-    mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
-    mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
-    mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
-    neg  C3
-
-    mul  A1, B3  $  add C2, r0  $  adc C3, r1
-    mul  A2, B2  $  add C2, r0  $  adc C3, r1
-    mul  A3, B1  $  add C2, r0  $  adc C3, r1
-
-    mul  A2, B3  $  add C3, r0
-    mul  A3, B2  $  add C3, r0
-
-    ;; Guard bits used in the signed version below.
-    pop  GUARD
-    clr  __zero_reg__
-    ret
-ENDF __mulusa3_round
-#endif /* L_mulusa3 */
-
-#if defined (L_mulsa3)
-;;; (C3:C0) = (A3:A0) * (B3:B0)
-;;; Clobbers: __tmp_reg__, T
-;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
-DEFUN __mulsa3
-    clt
-    XCALL   __mulusa3_round
-    ;; A posteriori sign extension of the operands
-    tst     B3
-    brpl 1f
-    sub     C2, A0
-    sbc     C3, A1
-1:  sbrs    A3, 7
-    rjmp 2f
-    sub     C2, B0
-    sbc     C3, B1
-2:
-    ;;  Shift 1 bit left to adjust for 15 fractional bits
-    lsl     GUARD
-    rol     C0
-    rol     C1
-    rol     C2
-    rol     C3
-    ;; Round last digit
-    lsl     GUARD
-    adc     C0, __zero_reg__
-    adc     C1, __zero_reg__
-    adc     C2, __zero_reg__
-    adc     C3, __zero_reg__
-    ret
-ENDF __mulsa3
-#endif /* L_mulsa3 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* __AVR_HAVE_MUL__ */
-
-#define A0 18
-#define A1 A0+1
-#define A2 A0+2
-#define A3 A0+3
-
-#define B0 22
-#define B1 B0+1
-#define B2 B0+2
-#define B3 B0+3
-
-#define C0  22
-#define C1  C0+1
-#define C2  C0+2
-#define C3  C0+3
-
-;; __tmp_reg__
-#define CC0  0
-;; __zero_reg__
-#define CC1  1
-#define CC2  16
-#define CC3  17
-
-#define AA0  26
-#define AA1  AA0+1
-#define AA2  30
-#define AA3  AA2+1
-
-#if defined (L_mulsa3)
-;;; (R25:R22)  *=  (R21:R18)
-;;; Clobbers: ABI, called by optabs
-;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
-DEFUN   __mulsa3
-    push    B0
-    push    B1
-    push    B3
-    clt
-    XCALL   __mulusa3_round
-    pop     r30
-    ;; sign-extend B
-    bst     r30, 7
-    brtc 1f
-    ;; A1, A0 survived in  R27:R26
-    sub     C2, AA0
-    sbc     C3, AA1
-1:
-    pop     AA1  ;; B1
-    pop     AA0  ;; B0
-
-    ;; sign-extend A.  A3 survived in  R31
-    bst     AA3, 7
-    brtc 2f
-    sub     C2, AA0
-    sbc     C3, AA1
-2:
-    ;;  Shift 1 bit left to adjust for 15 fractional bits
-    lsl     GUARD
-    rol     C0
-    rol     C1
-    rol     C2
-    rol     C3
-    ;; Round last digit
-    lsl     GUARD
-    adc     C0, __zero_reg__
-    adc     C1, __zero_reg__
-    adc     C2, __zero_reg__
-    adc     C3, __zero_reg__
-    ret
-ENDF __mulsa3
-#endif  /* L_mulsa3 */
-
-#if defined (L_mulusa3)
-;;; (R25:R22)  *=  (R21:R18)
-;;; Clobbers: ABI, called by optabs
-;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
-DEFUN __mulusa3
-    set
-    ;; Fallthru
-ENDF  __mulusa3
-
-;;; A[] survives in 26, 27, 30, 31
-;;; Also used by __mulsa3 with T = 0
-;;; Round if T = 1
-;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
-DEFUN __mulusa3_round
-    push    CC2
-    push    CC3
-    ; clear result
-    clr     __tmp_reg__
-    wmov    CC2, CC0
-    ; save multiplicand
-    wmov    AA0, A0
-    wmov    AA2, A2
-    rjmp 3f
-
-    ;; Loop the integral part
-
-1:  ;; CC += A * 2^n;  n >= 0
-    add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
-
-2:  ;; A <<= 1
-    lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
-
-3:  ;; IBIT(B) >>= 1
-    ;; Carry = n-th bit of B;  n >= 0
-    lsr     B3
-    ror     B2
-    brcs 1b
-    sbci    B3, 0
-    brne 2b
-
-    ;; Loop the fractional part
-    ;; B2/B3 is 0 now, use as guard bits for rounding
-    ;; Restore multiplicand
-    wmov    A0, AA0
-    wmov    A2, AA2
-    rjmp 5f
-
-4:  ;; CC += A:Guard * 2^n;  n < 0
-    add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
-5:
-    ;; A:Guard >>= 1
-    lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
-
-    ;; FBIT(B) <<= 1
-    ;; Carry = n-th bit of B;  n < 0
-    lsl     B0
-    rol     B1
-    brcs 4b
-    sbci    B0, 0
-    brne 5b
-
-    ;; Save guard bits and set carry for rounding
-    push    B3
-    lsl     B3
-    ;; Move result into place
-    wmov    C2, CC2
-    wmov    C0, CC0
-    clr     __zero_reg__
-    brtc 6f
-    ;; Round iff T = 1
-    adc     C0, __zero_reg__
-    adc     C1, __zero_reg__
-    adc     C2, __zero_reg__
-    adc     C3, __zero_reg__
-6:
-    pop     GUARD
-    ;; Epilogue
-    pop     CC3
-    pop     CC2
-    ret
-ENDF __mulusa3_round
-#endif  /* L_mulusa3 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef AA0
-#undef AA1
-#undef AA2
-#undef AA3
-#undef CC0
-#undef CC1
-#undef CC2
-#undef CC3
-
-#endif /* __AVR_HAVE_MUL__ */
-
-#undef GUARD
-
-/***********************************************************
-    Fixed  unsigned saturated Multiplication  8.8 x 8.8
-***********************************************************/
-
-#define C0  22
-#define C1  C0+1
-#define C2  C0+2
-#define C3  C0+3
-#define SS __tmp_reg__
-
-#if defined (L_usmuluha3)
-DEFUN __usmuluha3
-    ;; Widening multiply
-#ifdef __AVR_HAVE_MUL__
-    ;; Adjust interface
-    movw    R26, R22
-    movw    R18, R24
-#endif /* HAVE MUL */
-    XCALL   __umulhisi3
-    tst     C3
-    brne .Lmax
-    ;; Round, target is in C1..C2
-    lsl     C0
-    adc     C1, __zero_reg__
-    adc     C2, __zero_reg__
-    brcs .Lmax
-    ;; Move result into place
-    mov     C3, C2
-    mov     C2, C1
-    ret
-.Lmax:
-    ;; Saturate
-    ldi     C2, 0xff
-    ldi     C3, 0xff
-    ret
-ENDF  __usmuluha3
-#endif /* L_usmuluha3 */
-
-/***********************************************************
-    Fixed signed saturated Multiplication  s8.7 x s8.7
-***********************************************************/
-
-#if defined (L_ssmulha3)
-DEFUN __ssmulha3
-    ;; Widening multiply
-#ifdef __AVR_HAVE_MUL__
-    ;; Adjust interface
-    movw    R26, R22
-    movw    R18, R24
-#endif /* HAVE MUL */
-    XCALL   __mulhisi3
-    ;; Adjust decimal point
-    lsl     C0
-    rol     C1
-    rol     C2
-    brvs .LsatC3.3
-    ;; The 9 MSBs must be the same
-    rol     C3
-    sbc     SS, SS
-    cp      C3, SS
-    brne .LsatSS
-    ;; Round
-    lsl     C0
-    adc     C1, __zero_reg__
-    adc     C2, __zero_reg__
-    brvs .Lmax
-    ;; Move result into place
-    mov    C3, C2
-    mov    C2, C1
-    ret
-.Lmax:
-    ;; Load 0x7fff
-    clr     C3
-.LsatC3.3:
-    ;; C3 <  0 -->  0x8000
-    ;; C3 >= 0 -->  0x7fff
-    mov     SS, C3
-.LsatSS:
-    ;; Load min / max value:
-    ;; SS = -1  -->  0x8000
-    ;; SS =  0  -->  0x7fff
-    ldi     C3, 0x7f
-    ldi     C2, 0xff
-    sbrc    SS, 7
-    adiw    C2, 1
-    ret
-ENDF  __ssmulha3
-#endif /* L_ssmulha3 */
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef SS
-
-/***********************************************************
-    Fixed  unsigned saturated Multiplication  16.16 x 16.16
-***********************************************************/
-
-#define C0  18
-#define C1  C0+1
-#define C2  C0+2
-#define C3  C0+3
-#define C4  C0+4
-#define C5  C0+5
-#define C6  C0+6
-#define C7  C0+7
-#define SS __tmp_reg__
-
-#if defined (L_usmulusa3)
-;; R22[4] = R22[4] *{ssat} R18[4]
-;; Ordinary ABI function
-DEFUN __usmulusa3
-    ;; Widening multiply
-    XCALL   __umulsidi3
-    or      C7, C6
-    brne .Lmax
-    ;; Round, target is in C2..C5
-    lsl     C1
-    adc     C2, __zero_reg__
-    adc     C3, __zero_reg__
-    adc     C4, __zero_reg__
-    adc     C5, __zero_reg__
-    brcs .Lmax
-    ;; Move result into place
-    wmov    C6, C4
-    wmov    C4, C2
-    ret
-.Lmax:
-    ;; Saturate
-    ldi     C7, 0xff
-    ldi     C6, 0xff
-    wmov    C4, C6
-    ret
-ENDF  __usmulusa3
-#endif /* L_usmulusa3 */
-
-/***********************************************************
-    Fixed signed saturated Multiplication  s16.15 x s16.15
-***********************************************************/
-
-#if defined (L_ssmulsa3)
-;; R22[4] = R22[4] *{ssat} R18[4]
-;; Ordinary ABI function
-DEFUN __ssmulsa3
-    ;; Widening multiply
-    XCALL   __mulsidi3
-    ;; Adjust decimal point
-    lsl     C1
-    rol     C2
-    rol     C3
-    rol     C4
-    rol     C5
-    brvs .LsatC7.7
-    ;; The 17 MSBs must be the same
-    rol     C6
-    rol     C7
-    sbc     SS, SS
-    cp      C6, SS
-    cpc     C7, SS
-    brne .LsatSS
-    ;; Round
-    lsl     C1
-    adc     C2, __zero_reg__
-    adc     C3, __zero_reg__
-    adc     C4, __zero_reg__
-    adc     C5, __zero_reg__
-    brvs .Lmax
-    ;; Move result into place
-    wmov    C6, C4
-    wmov    C4, C2
-    ret
-
-.Lmax:
-    ;; Load 0x7fffffff
-    clr     C7
-.LsatC7.7:
-    ;; C7 <  0 -->  0x80000000
-    ;; C7 >= 0 -->  0x7fffffff
-    lsl     C7
-    sbc     SS, SS
-.LsatSS:
-    ;; Load min / max value:
-    ;; SS = -1  -->  0x80000000
-    ;; SS =  0  -->  0x7fffffff
-    com     SS
-    mov     C4, SS
-    mov     C5, C4
-    wmov    C6, C4
-    subi    C7, 0x80
-    ret
-ENDF  __ssmulsa3
-#endif /* L_ssmulsa3 */
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
-#undef SS
-
-/*******************************************************
-      Fractional Division 8 / 8
-*******************************************************/
-
-#define r_divd  r25     /* dividend */
-#define r_quo   r24     /* quotient */
-#define r_div   r22     /* divisor */
-#define r_sign  __tmp_reg__
-
-#if defined (L_divqq3)
-DEFUN   __divqq3
-    mov     r_sign, r_divd
-    eor     r_sign, r_div
-    sbrc    r_div, 7
-    neg     r_div
-    sbrc    r_divd, 7
-    neg     r_divd
-    XCALL   __divqq_helper
-    lsr     r_quo
-    sbrc    r_sign, 7   ; negate result if needed
-    neg     r_quo
-    ret
-ENDF __divqq3
-#endif  /* L_divqq3 */
-
-#if defined (L_udivuqq3)
-DEFUN   __udivuqq3
-    cp      r_divd, r_div
-    brsh    0f
-    XJMP __divqq_helper
-    ;; Result is out of [0, 1)  ==>  Return 1 - eps.
-0:  ldi     r_quo, 0xff
-    ret
-ENDF __udivuqq3
-#endif  /* L_udivuqq3 */
-
-
-#if defined (L_divqq_helper)
-DEFUN   __divqq_helper
-    clr     r_quo           ; clear quotient
-    inc     __zero_reg__    ; init loop counter, used per shift
-__udivuqq3_loop:
-    lsl     r_divd          ; shift dividend
-    brcs    0f              ; dividend overflow
-    cp      r_divd,r_div    ; compare dividend & divisor
-    brcc    0f              ; dividend >= divisor
-    rol     r_quo           ; shift quotient (with CARRY)
-    rjmp    __udivuqq3_cont
-0:
-    sub     r_divd,r_div    ; restore dividend
-    lsl     r_quo           ; shift quotient (without CARRY)
-__udivuqq3_cont:
-    lsl     __zero_reg__    ; shift loop-counter bit
-    brne    __udivuqq3_loop
-    com     r_quo           ; complement result
-                            ; because C flag was complemented in loop
-    ret
-ENDF __divqq_helper
-#endif  /* L_divqq_helper */
-
-#undef  r_divd
-#undef  r_quo
-#undef  r_div
-#undef  r_sign
-
-
-/*******************************************************
-    Fractional Division 16 / 16
-*******************************************************/
-#define r_divdL 26     /* dividend Low */
-#define r_divdH 27     /* dividend Hig */
-#define r_quoL  24     /* quotient Low */
-#define r_quoH  25     /* quotient High */
-#define r_divL  22     /* divisor */
-#define r_divH  23     /* divisor */
-#define r_cnt   21
-
-#if defined (L_divhq3)
-DEFUN   __divhq3
-    mov     r0, r_divdH
-    eor     r0, r_divH
-    sbrs    r_divH, 7
-    rjmp    1f
-    NEG2    r_divL
-1:
-    sbrs    r_divdH, 7
-    rjmp    2f
-    NEG2    r_divdL
-2:
-    cp      r_divdL, r_divL
-    cpc     r_divdH, r_divH
-    breq    __divhq3_minus1  ; if equal return -1
-    XCALL   __udivuhq3
-    lsr     r_quoH
-    ror     r_quoL
-    brpl    9f
-    ;; negate result if needed
-    NEG2    r_quoL
-9:
-    ret
-__divhq3_minus1:
-    ldi     r_quoH, 0x80
-    clr     r_quoL
-    ret
-ENDF __divhq3
-#endif  /* defined (L_divhq3) */
-
-#if defined (L_udivuhq3)
-DEFUN   __udivuhq3
-    sub     r_quoH,r_quoH   ; clear quotient and carry
-    ;; FALLTHRU
-ENDF __udivuhq3
-
-DEFUN   __udivuha3_common
-    clr     r_quoL          ; clear quotient
-    ldi     r_cnt,16        ; init loop counter
-__udivuhq3_loop:
-    rol     r_divdL         ; shift dividend (with CARRY)
-    rol     r_divdH
-    brcs    __udivuhq3_ep   ; dividend overflow
-    cp      r_divdL,r_divL  ; compare dividend & divisor
-    cpc     r_divdH,r_divH
-    brcc    __udivuhq3_ep   ; dividend >= divisor
-    rol     r_quoL          ; shift quotient (with CARRY)
-    rjmp    __udivuhq3_cont
-__udivuhq3_ep:
-    sub     r_divdL,r_divL  ; restore dividend
-    sbc     r_divdH,r_divH
-    lsl     r_quoL          ; shift quotient (without CARRY)
-__udivuhq3_cont:
-    rol     r_quoH          ; shift quotient
-    dec     r_cnt           ; decrement loop counter
-    brne    __udivuhq3_loop
-    com     r_quoL          ; complement result
-    com     r_quoH          ; because C flag was complemented in loop
-    ret
-ENDF __udivuha3_common
-#endif  /* defined (L_udivuhq3) */
-
-/*******************************************************
-    Fixed Division 8.8 / 8.8
-*******************************************************/
-#if defined (L_divha3)
-DEFUN   __divha3
-    mov     r0, r_divdH
-    eor     r0, r_divH
-    sbrs    r_divH, 7
-    rjmp    1f
-    NEG2    r_divL
-1:
-    sbrs    r_divdH, 7
-    rjmp    2f
-    NEG2    r_divdL
-2:
-    XCALL   __udivuha3
-    lsr     r_quoH  ; adjust to 7 fractional bits
-    ror     r_quoL
-    sbrs    r0, 7   ; negate result if needed
-    ret
-    NEG2    r_quoL
-    ret
-ENDF __divha3
-#endif  /* defined (L_divha3) */
-
-#if defined (L_udivuha3)
-DEFUN   __udivuha3
-    mov     r_quoH, r_divdL
-    mov     r_divdL, r_divdH
-    clr     r_divdH
-    lsl     r_quoH     ; shift quotient into carry
-    XJMP    __udivuha3_common ; same as fractional after rearrange
-ENDF __udivuha3
-#endif  /* defined (L_udivuha3) */
-
-#undef  r_divdL
-#undef  r_divdH
-#undef  r_quoL
-#undef  r_quoH
-#undef  r_divL
-#undef  r_divH
-#undef  r_cnt
-
-/*******************************************************
-    Fixed Division 16.16 / 16.16
-*******************************************************/
-
-#define r_arg1L  24    /* arg1 gets passed already in place */
-#define r_arg1H  25
-#define r_arg1HL 26
-#define r_arg1HH 27
-#define r_divdL  26    /* dividend Low */
-#define r_divdH  27
-#define r_divdHL 30
-#define r_divdHH 31    /* dividend High */
-#define r_quoL   22    /* quotient Low */
-#define r_quoH   23
-#define r_quoHL  24
-#define r_quoHH  25    /* quotient High */
-#define r_divL   18    /* divisor Low */
-#define r_divH   19
-#define r_divHL  20
-#define r_divHH  21    /* divisor High */
-#define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
-
-#if defined (L_divsa3)
-DEFUN   __divsa3
-    mov     r0, r_arg1HH
-    eor     r0, r_divHH
-    sbrs    r_divHH, 7
-    rjmp    1f
-    NEG4    r_divL
-1:
-    sbrs    r_arg1HH, 7
-    rjmp    2f
-    NEG4    r_arg1L
-2:
-    XCALL   __udivusa3
-    lsr     r_quoHH ; adjust to 15 fractional bits
-    ror     r_quoHL
-    ror     r_quoH
-    ror     r_quoL
-    sbrs    r0, 7   ; negate result if needed
-    ret
-    ;; negate r_quoL
-    XJMP    __negsi2
-ENDF __divsa3
-#endif  /* defined (L_divsa3) */
-
-#if defined (L_udivusa3)
-DEFUN   __udivusa3
-    ldi     r_divdHL, 32    ; init loop counter
-    mov     r_cnt, r_divdHL
-    clr     r_divdHL
-    clr     r_divdHH
-    wmov    r_quoL, r_divdHL
-    lsl     r_quoHL         ; shift quotient into carry
-    rol     r_quoHH
-__udivusa3_loop:
-    rol     r_divdL         ; shift dividend (with CARRY)
-    rol     r_divdH
-    rol     r_divdHL
-    rol     r_divdHH
-    brcs    __udivusa3_ep   ; dividend overflow
-    cp      r_divdL,r_divL  ; compare dividend & divisor
-    cpc     r_divdH,r_divH
-    cpc     r_divdHL,r_divHL
-    cpc     r_divdHH,r_divHH
-    brcc    __udivusa3_ep   ; dividend >= divisor
-    rol     r_quoL          ; shift quotient (with CARRY)
-    rjmp    __udivusa3_cont
-__udivusa3_ep:
-    sub     r_divdL,r_divL  ; restore dividend
-    sbc     r_divdH,r_divH
-    sbc     r_divdHL,r_divHL
-    sbc     r_divdHH,r_divHH
-    lsl     r_quoL          ; shift quotient (without CARRY)
-__udivusa3_cont:
-    rol     r_quoH          ; shift quotient
-    rol     r_quoHL
-    rol     r_quoHH
-    dec     r_cnt           ; decrement loop counter
-    brne    __udivusa3_loop
-    com     r_quoL          ; complement result
-    com     r_quoH          ; because C flag was complemented in loop
-    com     r_quoHL
-    com     r_quoHH
-    ret
-ENDF __udivusa3
-#endif  /* defined (L_udivusa3) */
-
-#undef  r_arg1L
-#undef  r_arg1H
-#undef  r_arg1HL
-#undef  r_arg1HH
-#undef  r_divdL
-#undef  r_divdH
-#undef  r_divdHL
-#undef  r_divdHH
-#undef  r_quoL
-#undef  r_quoH
-#undef  r_quoHL
-#undef  r_quoHH
-#undef  r_divL
-#undef  r_divH
-#undef  r_divHL
-#undef  r_divHH
-#undef  r_cnt
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 1 Byte
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0  24
-
-#if defined (L_ssabs_1)
-DEFUN __ssabs_1
-    sbrs    A0, 7
-    ret
-    neg     A0
-    sbrc    A0,7
-    dec     A0
-    ret
-ENDF __ssabs_1
-#endif /* L_ssabs_1 */
-
-#undef A0
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 2 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0  24
-#define A1  A0+1
-
-#if defined (L_ssneg_2)
-DEFUN __ssneg_2
-    NEG2    A0
-    brvc 0f
-    sbiw    A0, 1
-0:  ret
-ENDF __ssneg_2
-#endif /* L_ssneg_2 */
-
-#if defined (L_ssabs_2)
-DEFUN __ssabs_2
-    sbrs    A1, 7
-    ret
-    XJMP    __ssneg_2
-ENDF __ssabs_2
-#endif /* L_ssabs_2 */
-
-#undef A0
-#undef A1
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 4 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0  22
-#define A1  A0+1
-#define A2  A0+2
-#define A3  A0+3
-
-#if defined (L_ssneg_4)
-DEFUN __ssneg_4
-    XCALL   __negsi2
-    brvc 0f
-    ldi     A3, 0x7f
-    ldi     A2, 0xff
-    ldi     A1, 0xff
-    ldi     A0, 0xff
-0:  ret
-ENDF __ssneg_4
-#endif /* L_ssneg_4 */
-
-#if defined (L_ssabs_4)
-DEFUN __ssabs_4
-    sbrs    A3, 7
-    ret
-    XJMP    __ssneg_4
-ENDF __ssabs_4
-#endif /* L_ssabs_4 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 8 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0  18
-#define A1  A0+1
-#define A2  A0+2
-#define A3  A0+3
-#define A4  A0+4
-#define A5  A0+5
-#define A6  A0+6
-#define A7  A0+7
-
-#if defined (L_clr_8)
-FALIAS __usneguta2
-FALIAS __usneguda2
-FALIAS __usnegudq2
-
-;; Clear Carry and all Bytes
-DEFUN __clr_8
-    ;; Clear Carry and set Z
-    sub     A7, A7
-    ;; FALLTHRU
-ENDF  __clr_8
-;; Propagate Carry to all Bytes, Carry unaltered
-DEFUN __sbc_8
-    sbc     A7, A7
-    sbc     A6, A6
-    wmov    A4, A6
-    wmov    A2, A6
-    wmov    A0, A6
-    ret
-ENDF __sbc_8
-#endif /* L_clr_8 */
-
-#if defined (L_ssneg_8)
-FALIAS __ssnegta2
-FALIAS __ssnegda2
-FALIAS __ssnegdq2
-
-DEFUN __ssneg_8
-    XCALL   __negdi2
-    brvc 0f
-    ;; A[] = 0x7fffffff
-    sec
-    XCALL   __sbc_8
-    ldi     A7, 0x7f
-0:  ret
-ENDF __ssneg_8
-#endif /* L_ssneg_8 */
-
-#if defined (L_ssabs_8)
-FALIAS __ssabsta2
-FALIAS __ssabsda2
-FALIAS __ssabsdq2
-
-DEFUN __ssabs_8
-    sbrs    A7, 7
-    ret
-    XJMP    __ssneg_8
-ENDF __ssabs_8
-#endif /* L_ssabs_8 */
-
-;; Second Argument
-#define B0  10
-#define B1  B0+1
-#define B2  B0+2
-#define B3  B0+3
-#define B4  B0+4
-#define B5  B0+5
-#define B6  B0+6
-#define B7  B0+7
-
-#if defined (L_usadd_8)
-FALIAS __usadduta3
-FALIAS __usadduda3
-FALIAS __usaddudq3
-
-DEFUN __usadd_8
-    XCALL   __adddi3
-    brcs 0f
-    ret
-0:  ;; A[] = 0xffffffff
-    XJMP    __sbc_8
-ENDF __usadd_8
-#endif /* L_usadd_8 */
-
-#if defined (L_ussub_8)
-FALIAS __ussubuta3
-FALIAS __ussubuda3
-FALIAS __ussubudq3
-
-DEFUN __ussub_8
-    XCALL   __subdi3
-    brcs 0f
-    ret
-0:  ;; A[] = 0
-    XJMP    __clr_8
-ENDF __ussub_8
-#endif /* L_ussub_8 */
-
-#if defined (L_ssadd_8)
-FALIAS __ssaddta3
-FALIAS __ssaddda3
-FALIAS __ssadddq3
-
-DEFUN __ssadd_8
-    XCALL   __adddi3
-    brvc 0f
-    ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
-    cpi     B7, 0x80
-    XCALL   __sbc_8
-    subi    A7, 0x80
-0:  ret
-ENDF __ssadd_8
-#endif /* L_ssadd_8 */
-
-#if defined (L_sssub_8)
-FALIAS __sssubta3
-FALIAS __sssubda3
-FALIAS __sssubdq3
-
-DEFUN __sssub_8
-    XCALL   __subdi3
-    brvc 0f
-    ;; A = (B < 0) ? INT64_MAX : INT64_MIN
-    ldi     A7, 0x7f
-    cp      A7, B7
-    XCALL   __sbc_8
-    subi    A7, 0x80
-0:  ret
-ENDF __sssub_8
-#endif /* L_sssub_8 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef A4
-#undef A5
-#undef A6
-#undef A7
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef B4
-#undef B5
-#undef B6
-#undef B7
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding Helpers
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_mask1
-
-#define AA 24
-#define CC 25
-
-;; R25 = 1 << (R24 & 7)
-;; CC  = 1 << (AA  & 7)
-;; Clobbers: None
-DEFUN __mask1
-    ;; CC = 2 ^ AA.1
-    ldi     CC, 1 << 2
-    sbrs    AA, 1
-    ldi     CC, 1 << 0
-    ;; CC *= 2 ^ AA.0
-    sbrc    AA, 0
-    lsl     CC
-    ;; CC *= 2 ^ AA.2
-    sbrc    AA, 2
-    swap    CC
-    ret
-ENDF __mask1
-
-#undef AA
-#undef CC
-#endif /* L_mask1 */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; The rounding point. Any bits smaller than
-;; 2^{-RP} will be cleared.
-#define RP R24
-
-#define A0 22
-#define A1 A0 + 1
-
-#define C0 24
-#define C1 C0 + 1
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 1 Byte
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_roundqq3
-
-;; R24 = round (R22, R24)
-;; Clobbers: R22, __tmp_reg__
-DEFUN  __roundqq3
-    mov     __tmp_reg__, C1
-    subi    RP, __QQ_FBIT__ - 1
-    neg     RP
-    ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
-    XCALL   __mask1
-    mov     C0, C1
-    ;; Add-Saturate 2^{-RP-1}
-    add     A0, C0
-    brvc 0f
-    ldi     A0, 0x7f
-0:  ;; Mask out bits beyond RP
-    lsl     C0
-    neg     C0
-    and     C0, A0
-    mov     C1, __tmp_reg__
-    ret
-ENDF  __roundqq3
-#endif /* L_roundqq3 */
-
-#ifdef L_rounduqq3
-
-;; R24 = round (R22, R24)
-;; Clobbers: R22, __tmp_reg__
-DEFUN  __rounduqq3
-    mov     __tmp_reg__, C1
-    subi    RP, __UQQ_FBIT__ - 1
-    neg     RP
-    ;; R25 = 1 << RP  (Total offset is FBIT-1 - RP)
-    XCALL   __mask1
-    mov     C0, C1
-    ;; Add-Saturate 2^{-RP-1}
-    add     A0, C0
-    brcc 0f
-    ldi     A0, 0xff
-0:  ;; Mask out bits beyond RP
-    lsl     C0
-    neg     C0
-    and     C0, A0
-    mov     C1, __tmp_reg__
-    ret
-ENDF  __rounduqq3
-#endif /* L_rounduqq3 */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 2 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_addmask_2
-
-;; [ R25:R24 =  1 << (R24 & 15)
-;;   R23:R22 += 1 << (R24 & 15) ]
-;; SREG is set according to the addition
-DEFUN __addmask_2
-    ;; R25 = 1 << (R24 & 7)
-    XCALL   __mask1
-    cpi     RP, 1 << 3
-    sbc     C0, C0
-    ;; Swap C0 and C1 if RP.3 was set
-    and     C0, C1
-    eor     C1, C0
-    ;; Finally, add the power-of-two:  A[] += C[]
-    add     A0, C0
-    adc     A1, C1
-    ret
-ENDF  __addmask_2
-#endif /* L_addmask_2 */
-
-#ifdef L_round_s2
-
-;; R25:R24 = round (R23:R22, R24)
-;; Clobbers: R23, R22
-DEFUN  __roundhq3
-    subi    RP, __HQ_FBIT__ - __HA_FBIT__
-ENDF   __roundhq3
-DEFUN  __roundha3
-    subi    RP, __HA_FBIT__ - 1
-    neg     RP
-    ;; [ R25:R24  = 1 << (FBIT-1 - RP)
-    ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
-    XCALL   __addmask_2
-    XJMP    __round_s2_const
-ENDF  __roundha3
-
-#endif /* L_round_s2 */
-
-#ifdef L_round_u2
-
-;; R25:R24 = round (R23:R22, R24)
-;; Clobbers: R23, R22
-DEFUN  __rounduhq3
-    subi    RP, __UHQ_FBIT__ - __UHA_FBIT__
-ENDF   __rounduhq3
-DEFUN  __rounduha3
-    subi    RP, __UHA_FBIT__ - 1
-    neg     RP
-    ;; [ R25:R24  = 1 << (FBIT-1 - RP)
-    ;;   R23:R22 += 1 << (FBIT-1 - RP) ]
-    XCALL   __addmask_2
-    XJMP    __round_u2_const 
-ENDF  __rounduha3
-
-#endif /* L_round_u2 */
-
-
-#ifdef L_round_2_const
-
-;; Helpers for 2 byte wide rounding
-
-DEFUN  __round_s2_const
-    brvc 2f
-    ldi     A1, 0x7f
-    rjmp 1f
-    ;; FALLTHRU (Barrier)
-ENDF  __round_s2_const
-
-DEFUN __round_u2_const
-    brcc 2f
-    ldi     A1, 0xff
-1:
-    ldi     A0, 0xff
-2:
-    ;; Saturation is performed now.
-    ;; Currently, we have C[] = 2^{-RP-1}
-    ;; C[] = 2^{-RP}
-    lsl     C0
-    rol     C1
-    ;;      
-    NEG2    C0
-    ;; Clear the bits beyond the rounding point.
-    and     C0, A0
-    and     C1, A1
-    ret
-ENDF  __round_u2_const
-
-#endif /* L_round_2_const */
-
-#undef A0
-#undef A1
-#undef C0
-#undef C1
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 4 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#define A0 18
-#define A1 A0 + 1
-#define A2 A0 + 2
-#define A3 A0 + 3
-
-#define C0 22
-#define C1 C0 + 1
-#define C2 C0 + 2
-#define C3 C0 + 3
-
-#ifdef L_addmask_4
-
-;; [ R25:R22 =  1 << (R24 & 31)
-;;   R21:R18 += 1 << (R24 & 31) ]
-;; SREG is set according to the addition
-DEFUN __addmask_4
-    ;; R25 = 1 << (R24 & 7)
-    XCALL   __mask1
-    cpi     RP, 1 << 4
-    sbc     C0, C0
-    sbc     C1, C1
-    ;; Swap C2 with C3 if RP.3 is not set
-    cpi     RP, 1 << 3
-    sbc     C2, C2
-    and     C2, C3
-    eor     C3, C2
-    ;; Swap C3:C2 with C1:C0 if RP.4 is not set
-    and     C0, C2  $  eor     C2, C0
-    and     C1, C3  $  eor     C3, C1
-    ;; Finally, add the power-of-two:  A[] += C[]
-    add     A0, C0
-    adc     A1, C1
-    adc     A2, C2
-    adc     A3, C3
-    ret
-ENDF  __addmask_4
-#endif /* L_addmask_4 */
-
-#ifdef L_round_s4
-
-;; R25:R22 = round (R21:R18, R24)
-;; Clobbers: R18...R21
-DEFUN  __roundsq3
-    subi    RP, __SQ_FBIT__ - __SA_FBIT__
-ENDF   __roundsq3
-DEFUN  __roundsa3
-    subi    RP, __SA_FBIT__ - 1
-    neg     RP
-    ;; [ R25:R22  = 1 << (FBIT-1 - RP)
-    ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
-    XCALL   __addmask_4
-    XJMP    __round_s4_const
-ENDF  __roundsa3
-
-#endif /* L_round_s4 */
-
-#ifdef L_round_u4
-
-;; R25:R22 = round (R21:R18, R24)
-;; Clobbers: R18...R21
-DEFUN  __roundusq3
-    subi    RP, __USQ_FBIT__ - __USA_FBIT__
-ENDF   __roundusq3
-DEFUN  __roundusa3
-    subi    RP, __USA_FBIT__ - 1
-    neg     RP
-    ;; [ R25:R22  = 1 << (FBIT-1 - RP)
-    ;;   R21:R18 += 1 << (FBIT-1 - RP) ]
-    XCALL   __addmask_4
-    XJMP    __round_u4_const 
-ENDF  __roundusa3
-
-#endif /* L_round_u4 */
-
-
-#ifdef L_round_4_const
-
-;; Helpers for 4 byte wide rounding
-
-DEFUN  __round_s4_const
-    brvc 2f
-    ldi     A3, 0x7f
-    rjmp 1f
-    ;; FALLTHRU (Barrier)
-ENDF  __round_s4_const
-
-DEFUN __round_u4_const
-    brcc 2f
-    ldi     A3, 0xff
-1:
-    ldi     A2, 0xff
-    ldi     A1, 0xff
-    ldi     A0, 0xff
-2:
-    ;; Saturation is performed now.
-    ;; Currently, we have C[] = 2^{-RP-1}
-    ;; C[] = 2^{-RP}
-    lsl     C0
-    rol     C1
-    rol     C2
-    rol     C3
-    XCALL   __negsi2
-    ;; Clear the bits beyond the rounding point.
-    and     C0, A0
-    and     C1, A1
-    and     C2, A2
-    and     C3, A3
-    ret
-ENDF  __round_u4_const
-
-#endif /* L_round_4_const */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#undef RP
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 8 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#define RP     16
-#define FBITm1 31
-
-#define C0 18
-#define C1 C0 + 1
-#define C2 C0 + 2
-#define C3 C0 + 3
-#define C4 C0 + 4
-#define C5 C0 + 5
-#define C6 C0 + 6
-#define C7 C0 + 7
-
-#define A0 16
-#define A1 17
-#define A2 26
-#define A3 27
-#define A4 28
-#define A5 29
-#define A6 30
-#define A7 31
-
-
-#ifdef L_rounddq3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __rounddq3
-    ldi     FBITm1, __DQ_FBIT__ - 1
-    clt
-    XJMP    __round_x8
-ENDF  __rounddq3
-#endif /* L_rounddq3 */
-
-#ifdef L_roundudq3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __roundudq3
-    ldi     FBITm1, __UDQ_FBIT__ - 1
-    set
-    XJMP    __round_x8
-ENDF  __roundudq3
-#endif /* L_roundudq3 */
-
-#ifdef L_roundda3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __roundda3
-    ldi     FBITm1, __DA_FBIT__ - 1
-    clt
-    XJMP    __round_x8
-ENDF  __roundda3
-#endif /* L_roundda3 */
-
-#ifdef L_rounduda3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __rounduda3
-    ldi     FBITm1, __UDA_FBIT__ - 1
-    set
-    XJMP    __round_x8
-ENDF  __rounduda3
-#endif /* L_rounduda3 */
-
-#ifdef L_roundta3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __roundta3
-    ldi     FBITm1, __TA_FBIT__ - 1
-    clt
-    XJMP    __round_x8
-ENDF  __roundta3
-#endif /* L_roundta3 */
-
-#ifdef L_rounduta3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN  __rounduta3
-    ldi     FBITm1, __UTA_FBIT__ - 1
-    set
-    XJMP    __round_x8
-ENDF  __rounduta3
-#endif /* L_rounduta3 */
-
-
-#ifdef L_round_x8
-DEFUN __round_x8
-    push r16
-    push r17
-    push r28
-    push r29
-    ;; Compute log2 of addend from rounding point
-    sub     RP, FBITm1
-    neg     RP
-    ;; Move input to work register A[]
-    push    C0
-    mov     A1, C1
-    wmov    A2, C2
-    wmov    A4, C4
-    wmov    A6, C6
-    ;; C[] = 1 << (FBIT-1 - RP)
-    XCALL   __clr_8
-    inc     C0
-    XCALL   __ashldi3
-    pop     A0
-    ;; A[] += C[]
-    add     A0, C0
-    adc     A1, C1
-    adc     A2, C2
-    adc     A3, C3
-    adc     A4, C4
-    adc     A5, C5
-    adc     A6, C6
-    adc     A7, C7
-    brts    1f
-    ;; Signed
-    brvc    3f
-    ;; Signed overflow: A[] = 0x7f...
-    brvs    2f
-1:  ;; Unsigned
-    brcc    3f
-    ;; Unsigned overflow: A[] = 0xff...
-2:  ldi     A7, 0xff
-    ldi     A6, 0xff
-    wmov    A0, A6
-    wmov    A2, A6
-    wmov    A4, A6
-    bld     A7, 7
-3:
-    ;;  C[] = -C[] - C[]
-    push    A0
-    ldi     r16, 1
-    XCALL   __ashldi3
-    pop     A0
-    XCALL   __negdi2
-    ;; Clear the bits beyond the rounding point.
-    and     C0, A0
-    and     C1, A1
-    and     C2, A2
-    and     C3, A3
-    and     C4, A4
-    and     C5, A5
-    and     C6, A6
-    and     C7, A7
-    ;; Epilogue
-    pop r29
-    pop r28
-    pop r17
-    pop r16
-    ret
-ENDF  __round_x8
-
-#endif /* L_round_x8 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef A4
-#undef A5
-#undef A6
-#undef A7
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
-
-#undef RP
-#undef FBITm1
-
-
-;; Supply implementations / symbols for the bit-banging functions
-;; __builtin_avr_bitsfx and __builtin_avr_fxbits
-#ifdef L_ret
-DEFUN __ret
-    ret
-ENDF  __ret
-#endif /* L_ret */