aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S')
-rw-r--r--gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S1910
1 files changed, 0 insertions, 1910 deletions
diff --git a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S
deleted file mode 100644
index d80389ce2..000000000
--- a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S
+++ /dev/null
@@ -1,1910 +0,0 @@
-/* -*- Mode: Asm -*- */
-;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
-;; Contributed by Sean D'Epagnier (sean@depagnier.com)
-;; Georg-Johann Lay (avr@gjlay.de)
-
-;; This file is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by the
-;; Free Software Foundation; either version 3, or (at your option) any
-;; later version.
-
-;; In addition to the permissions in the GNU General Public License, the
-;; Free Software Foundation gives you unlimited permission to link the
-;; compiled version of this file into combinations with other programs,
-;; and to distribute those combinations without any restriction coming
-;; from the use of this file. (The General Public License restrictions
-;; do apply in other respects; for example, they cover modification of
-;; the file, and distribution when not linked into a combine
-;; executable.)
-
-;; This file is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;; General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with this program; see the file COPYING. If not, write to
-;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Fixed point library routines for AVR
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.section .text.libgcc.fixed, "ax", @progbits
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Conversions to float
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#if defined (L_fractqqsf)
-DEFUN __fractqqsf
- ;; Move in place for SA -> SF conversion
- clr r22
- mov r23, r24
- ;; Sign-extend
- lsl r24
- sbc r24, r24
- mov r25, r24
- XJMP __fractsasf
-ENDF __fractqqsf
-#endif /* L_fractqqsf */
-
-#if defined (L_fractuqqsf)
-DEFUN __fractuqqsf
- ;; Move in place for USA -> SF conversion
- clr r22
- mov r23, r24
- ;; Zero-extend
- clr r24
- clr r25
- XJMP __fractusasf
-ENDF __fractuqqsf
-#endif /* L_fractuqqsf */
-
-#if defined (L_fracthqsf)
-DEFUN __fracthqsf
- ;; Move in place for SA -> SF conversion
- wmov 22, 24
- ;; Sign-extend
- lsl r25
- sbc r24, r24
- mov r25, r24
- XJMP __fractsasf
-ENDF __fracthqsf
-#endif /* L_fracthqsf */
-
-#if defined (L_fractuhqsf)
-DEFUN __fractuhqsf
- ;; Move in place for USA -> SF conversion
- wmov 22, 24
- ;; Zero-extend
- clr r24
- clr r25
- XJMP __fractusasf
-ENDF __fractuhqsf
-#endif /* L_fractuhqsf */
-
-#if defined (L_fracthasf)
-DEFUN __fracthasf
- ;; Move in place for SA -> SF conversion
- clr r22
- mov r23, r24
- mov r24, r25
- ;; Sign-extend
- lsl r25
- sbc r25, r25
- XJMP __fractsasf
-ENDF __fracthasf
-#endif /* L_fracthasf */
-
-#if defined (L_fractuhasf)
-DEFUN __fractuhasf
- ;; Move in place for USA -> SF conversion
- clr r22
- mov r23, r24
- mov r24, r25
- ;; Zero-extend
- clr r25
- XJMP __fractusasf
-ENDF __fractuhasf
-#endif /* L_fractuhasf */
-
-
-#if defined (L_fractsqsf)
-DEFUN __fractsqsf
- XCALL __floatsisf
- ;; Divide non-zero results by 2^31 to move the
- ;; decimal point into place
- tst r25
- breq 0f
- subi r24, exp_lo (31)
- sbci r25, exp_hi (31)
-0: ret
-ENDF __fractsqsf
-#endif /* L_fractsqsf */
-
-#if defined (L_fractusqsf)
-DEFUN __fractusqsf
- XCALL __floatunsisf
- ;; Divide non-zero results by 2^32 to move the
- ;; decimal point into place
- cpse r25, __zero_reg__
- subi r25, exp_hi (32)
- ret
-ENDF __fractusqsf
-#endif /* L_fractusqsf */
-
-#if defined (L_fractsasf)
-DEFUN __fractsasf
- XCALL __floatsisf
- ;; Divide non-zero results by 2^15 to move the
- ;; decimal point into place
- tst r25
- breq 0f
- subi r24, exp_lo (15)
- sbci r25, exp_hi (15)
-0: ret
-ENDF __fractsasf
-#endif /* L_fractsasf */
-
-#if defined (L_fractusasf)
-DEFUN __fractusasf
- XCALL __floatunsisf
- ;; Divide non-zero results by 2^16 to move the
- ;; decimal point into place
- cpse r25, __zero_reg__
- subi r25, exp_hi (16)
- ret
-ENDF __fractusasf
-#endif /* L_fractusasf */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Conversions from float
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#if defined (L_fractsfqq)
-DEFUN __fractsfqq
- ;; Multiply with 2^{24+7} to get a QQ result in r25
- subi r24, exp_lo (-31)
- sbci r25, exp_hi (-31)
- XCALL __fixsfsi
- mov r24, r25
- ret
-ENDF __fractsfqq
-#endif /* L_fractsfqq */
-
-#if defined (L_fractsfuqq)
-DEFUN __fractsfuqq
- ;; Multiply with 2^{24+8} to get a UQQ result in r25
- subi r25, exp_hi (-32)
- XCALL __fixunssfsi
- mov r24, r25
- ret
-ENDF __fractsfuqq
-#endif /* L_fractsfuqq */
-
-#if defined (L_fractsfha)
-DEFUN __fractsfha
- ;; Multiply with 2^{16+7} to get a HA result in r25:r24
- subi r24, exp_lo (-23)
- sbci r25, exp_hi (-23)
- XJMP __fixsfsi
-ENDF __fractsfha
-#endif /* L_fractsfha */
-
-#if defined (L_fractsfuha)
-DEFUN __fractsfuha
- ;; Multiply with 2^24 to get a UHA result in r25:r24
- subi r25, exp_hi (-24)
- XJMP __fixunssfsi
-ENDF __fractsfuha
-#endif /* L_fractsfuha */
-
-#if defined (L_fractsfhq)
-FALIAS __fractsfsq
-
-DEFUN __fractsfhq
- ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
- ;; resp. with 2^31 to get a SQ result in r25:r22
- subi r24, exp_lo (-31)
- sbci r25, exp_hi (-31)
- XJMP __fixsfsi
-ENDF __fractsfhq
-#endif /* L_fractsfhq */
-
-#if defined (L_fractsfuhq)
-FALIAS __fractsfusq
-
-DEFUN __fractsfuhq
- ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
- ;; resp. with 2^32 to get a USQ result in r25:r22
- subi r25, exp_hi (-32)
- XJMP __fixunssfsi
-ENDF __fractsfuhq
-#endif /* L_fractsfuhq */
-
-#if defined (L_fractsfsa)
-DEFUN __fractsfsa
- ;; Multiply with 2^15 to get a SA result in r25:r22
- subi r24, exp_lo (-15)
- sbci r25, exp_hi (-15)
- XJMP __fixsfsi
-ENDF __fractsfsa
-#endif /* L_fractsfsa */
-
-#if defined (L_fractsfusa)
-DEFUN __fractsfusa
- ;; Multiply with 2^16 to get a USA result in r25:r22
- subi r25, exp_hi (-16)
- XJMP __fixunssfsi
-ENDF __fractsfusa
-#endif /* L_fractsfusa */
-
-
-;; For multiplication the functions here are called directly from
-;; avr-fixed.md instead of using the standard libcall mechanisms.
-;; This can make better code because GCC knows exactly which
-;; of the call-used registers (not all of them) are clobbered. */
-
-/*******************************************************
- Fractional Multiplication 8 x 8 without MUL
-*******************************************************/
-
-#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
-;;; R23 = R24 * R25
-;;; Clobbers: __tmp_reg__, R22, R24, R25
-;;; Rounding: ???
-DEFUN __mulqq3
- XCALL __fmuls
- ;; TR 18037 requires that (-1) * (-1) does not overflow
- ;; The only input that can produce -1 is (-1)^2.
- dec r23
- brvs 0f
- inc r23
-0: ret
-ENDF __mulqq3
-#endif /* L_mulqq3 && ! HAVE_MUL */
-
-/*******************************************************
- Fractional Multiply .16 x .16 with and without MUL
-*******************************************************/
-
-#if defined (L_mulhq3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
-;;; Clobbers: ABI, called by optabs
-;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
-;;; Clobbers: __tmp_reg__, R22, R23
-;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
-DEFUN __mulhq3
- XCALL __mulhisi3
- ;; Shift result into place
- lsl r23
- rol r24
- rol r25
- brvs 1f
- ;; Round
- sbrc r23, 7
- adiw r24, 1
- ret
-1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
- ldi r24, lo8 (0x7fff)
- ldi r25, hi8 (0x7fff)
- ret
-ENDF __mulhq3
-#endif /* defined (L_mulhq3) */
-
-#if defined (L_muluhq3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) *= (R23:R22)
-;;; Clobbers: ABI, called by optabs
-;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
-;;; Clobbers: __tmp_reg__, R22, R23
-;;; Rounding: -0.5 LSB < error <= 0.5 LSB
-DEFUN __muluhq3
- XCALL __umulhisi3
- ;; Round
- sbrc r23, 7
- adiw r24, 1
- ret
-ENDF __muluhq3
-#endif /* L_muluhq3 */
-
-
-/*******************************************************
- Fixed Multiply 8.8 x 8.8 with and without MUL
-*******************************************************/
-
-#if defined (L_mulha3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
-;;; Clobbers: ABI, called by optabs
-;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
-;;; Clobbers: __tmp_reg__, R22, R23
-;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
-DEFUN __mulha3
- XCALL __mulhisi3
- lsl r22
- rol r23
- rol r24
- XJMP __muluha3_round
-ENDF __mulha3
-#endif /* L_mulha3 */
-
-#if defined (L_muluha3)
-;;; Same code with and without MUL, but the interfaces differ:
-;;; no MUL: (R25:R24) *= (R23:R22)
-;;; Clobbers: ABI, called by optabs
-;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
-;;; Clobbers: __tmp_reg__, R22, R23
-;;; Rounding: -0.5 LSB < error <= 0.5 LSB
-DEFUN __muluha3
- XCALL __umulhisi3
- XJMP __muluha3_round
-ENDF __muluha3
-#endif /* L_muluha3 */
-
-#if defined (L_muluha3_round)
-DEFUN __muluha3_round
- ;; Shift result into place
- mov r25, r24
- mov r24, r23
- ;; Round
- sbrc r22, 7
- adiw r24, 1
- ret
-ENDF __muluha3_round
-#endif /* L_muluha3_round */
-
-
-/*******************************************************
- Fixed Multiplication 16.16 x 16.16
-*******************************************************/
-
-;; Bits outside the result (below LSB), used in the signed version
-#define GUARD __tmp_reg__
-
-#if defined (__AVR_HAVE_MUL__)
-
-;; Multiplier
-#define A0 16
-#define A1 A0+1
-#define A2 A1+1
-#define A3 A2+1
-
-;; Multiplicand
-#define B0 20
-#define B1 B0+1
-#define B2 B1+1
-#define B3 B2+1
-
-;; Result
-#define C0 24
-#define C1 C0+1
-#define C2 C1+1
-#define C3 C2+1
-
-#if defined (L_mulusa3)
-;;; (C3:C0) = (A3:A0) * (B3:B0)
-DEFUN __mulusa3
- set
- ;; Fallthru
-ENDF __mulusa3
-
-;;; Round for last digit iff T = 1
-;;; Return guard bits in GUARD (__tmp_reg__).
-;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
-;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
-DEFUN __mulusa3_round
- ;; Some of the MUL instructions have LSBs outside the result.
- ;; Don't ignore these LSBs in order to tame rounding error.
- ;; Use C2/C3 for these LSBs.
-
- clr C0
- clr C1
- mul A0, B0 $ movw C2, r0
-
- mul A1, B0 $ add C3, r0 $ adc C0, r1
- mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
-
- ;; Round if T = 1. Store guarding bits outside the result for rounding
- ;; and left-shift by the signed version (function below).
- brtc 0f
- sbrc C3, 7
- adiw C0, 1
-0: push C3
-
- ;; The following MULs don't have LSBs outside the result.
- ;; C2/C3 is the high part.
-
- mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
- mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
- mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
- neg C2
-
- mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
- mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
- mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
- mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
- neg C3
-
- mul A1, B3 $ add C2, r0 $ adc C3, r1
- mul A2, B2 $ add C2, r0 $ adc C3, r1
- mul A3, B1 $ add C2, r0 $ adc C3, r1
-
- mul A2, B3 $ add C3, r0
- mul A3, B2 $ add C3, r0
-
- ;; Guard bits used in the signed version below.
- pop GUARD
- clr __zero_reg__
- ret
-ENDF __mulusa3_round
-#endif /* L_mulusa3 */
-
-#if defined (L_mulsa3)
-;;; (C3:C0) = (A3:A0) * (B3:B0)
-;;; Clobbers: __tmp_reg__, T
-;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
-DEFUN __mulsa3
- clt
- XCALL __mulusa3_round
- ;; A posteriori sign extension of the operands
- tst B3
- brpl 1f
- sub C2, A0
- sbc C3, A1
-1: sbrs A3, 7
- rjmp 2f
- sub C2, B0
- sbc C3, B1
-2:
- ;; Shift 1 bit left to adjust for 15 fractional bits
- lsl GUARD
- rol C0
- rol C1
- rol C2
- rol C3
- ;; Round last digit
- lsl GUARD
- adc C0, __zero_reg__
- adc C1, __zero_reg__
- adc C2, __zero_reg__
- adc C3, __zero_reg__
- ret
-ENDF __mulsa3
-#endif /* L_mulsa3 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* __AVR_HAVE_MUL__ */
-
-#define A0 18
-#define A1 A0+1
-#define A2 A0+2
-#define A3 A0+3
-
-#define B0 22
-#define B1 B0+1
-#define B2 B0+2
-#define B3 B0+3
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-;; __tmp_reg__
-#define CC0 0
-;; __zero_reg__
-#define CC1 1
-#define CC2 16
-#define CC3 17
-
-#define AA0 26
-#define AA1 AA0+1
-#define AA2 30
-#define AA3 AA2+1
-
-#if defined (L_mulsa3)
-;;; (R25:R22) *= (R21:R18)
-;;; Clobbers: ABI, called by optabs
-;;; Rounding: -1 LSB <= error <= 1 LSB
-DEFUN __mulsa3
- push B0
- push B1
- push B3
- clt
- XCALL __mulusa3_round
- pop r30
- ;; sign-extend B
- bst r30, 7
- brtc 1f
- ;; A1, A0 survived in R27:R26
- sub C2, AA0
- sbc C3, AA1
-1:
- pop AA1 ;; B1
- pop AA0 ;; B0
-
- ;; sign-extend A. A3 survived in R31
- bst AA3, 7
- brtc 2f
- sub C2, AA0
- sbc C3, AA1
-2:
- ;; Shift 1 bit left to adjust for 15 fractional bits
- lsl GUARD
- rol C0
- rol C1
- rol C2
- rol C3
- ;; Round last digit
- lsl GUARD
- adc C0, __zero_reg__
- adc C1, __zero_reg__
- adc C2, __zero_reg__
- adc C3, __zero_reg__
- ret
-ENDF __mulsa3
-#endif /* L_mulsa3 */
-
-#if defined (L_mulusa3)
-;;; (R25:R22) *= (R21:R18)
-;;; Clobbers: ABI, called by optabs
-;;; Rounding: -1 LSB <= error <= 1 LSB
-DEFUN __mulusa3
- set
- ;; Fallthru
-ENDF __mulusa3
-
-;;; A[] survives in 26, 27, 30, 31
-;;; Also used by __mulsa3 with T = 0
-;;; Round if T = 1
-;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
-DEFUN __mulusa3_round
- push CC2
- push CC3
- ; clear result
- clr __tmp_reg__
- wmov CC2, CC0
- ; save multiplicand
- wmov AA0, A0
- wmov AA2, A2
- rjmp 3f
-
- ;; Loop the integral part
-
-1: ;; CC += A * 2^n; n >= 0
- add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
-
-2: ;; A <<= 1
- lsl A0 $ rol A1 $ rol A2 $ rol A3
-
-3: ;; IBIT(B) >>= 1
- ;; Carry = n-th bit of B; n >= 0
- lsr B3
- ror B2
- brcs 1b
- sbci B3, 0
- brne 2b
-
- ;; Loop the fractional part
- ;; B2/B3 is 0 now, use as guard bits for rounding
- ;; Restore multiplicand
- wmov A0, AA0
- wmov A2, AA2
- rjmp 5f
-
-4: ;; CC += A:Guard * 2^n; n < 0
- add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
-5:
- ;; A:Guard >>= 1
- lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
-
- ;; FBIT(B) <<= 1
- ;; Carry = n-th bit of B; n < 0
- lsl B0
- rol B1
- brcs 4b
- sbci B0, 0
- brne 5b
-
- ;; Save guard bits and set carry for rounding
- push B3
- lsl B3
- ;; Move result into place
- wmov C2, CC2
- wmov C0, CC0
- clr __zero_reg__
- brtc 6f
- ;; Round iff T = 1
- adc C0, __zero_reg__
- adc C1, __zero_reg__
- adc C2, __zero_reg__
- adc C3, __zero_reg__
-6:
- pop GUARD
- ;; Epilogue
- pop CC3
- pop CC2
- ret
-ENDF __mulusa3_round
-#endif /* L_mulusa3 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef AA0
-#undef AA1
-#undef AA2
-#undef AA3
-#undef CC0
-#undef CC1
-#undef CC2
-#undef CC3
-
-#endif /* __AVR_HAVE_MUL__ */
-
-#undef GUARD
-
-/***********************************************************
- Fixed unsigned saturated Multiplication 8.8 x 8.8
-***********************************************************/
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-#define SS __tmp_reg__
-
-#if defined (L_usmuluha3)
-DEFUN __usmuluha3
- ;; Widening multiply
-#ifdef __AVR_HAVE_MUL__
- ;; Adjust interface
- movw R26, R22
- movw R18, R24
-#endif /* HAVE MUL */
- XCALL __umulhisi3
- tst C3
- brne .Lmax
- ;; Round, target is in C1..C2
- lsl C0
- adc C1, __zero_reg__
- adc C2, __zero_reg__
- brcs .Lmax
- ;; Move result into place
- mov C3, C2
- mov C2, C1
- ret
-.Lmax:
- ;; Saturate
- ldi C2, 0xff
- ldi C3, 0xff
- ret
-ENDF __usmuluha3
-#endif /* L_usmuluha3 */
-
-/***********************************************************
- Fixed signed saturated Multiplication s8.7 x s8.7
-***********************************************************/
-
-#if defined (L_ssmulha3)
-DEFUN __ssmulha3
- ;; Widening multiply
-#ifdef __AVR_HAVE_MUL__
- ;; Adjust interface
- movw R26, R22
- movw R18, R24
-#endif /* HAVE MUL */
- XCALL __mulhisi3
- ;; Adjust decimal point
- lsl C0
- rol C1
- rol C2
- brvs .LsatC3.3
- ;; The 9 MSBs must be the same
- rol C3
- sbc SS, SS
- cp C3, SS
- brne .LsatSS
- ;; Round
- lsl C0
- adc C1, __zero_reg__
- adc C2, __zero_reg__
- brvs .Lmax
- ;; Move result into place
- mov C3, C2
- mov C2, C1
- ret
-.Lmax:
- ;; Load 0x7fff
- clr C3
-.LsatC3.3:
- ;; C3 < 0 --> 0x8000
- ;; C3 >= 0 --> 0x7fff
- mov SS, C3
-.LsatSS:
- ;; Load min / max value:
- ;; SS = -1 --> 0x8000
- ;; SS = 0 --> 0x7fff
- ldi C3, 0x7f
- ldi C2, 0xff
- sbrc SS, 7
- adiw C2, 1
- ret
-ENDF __ssmulha3
-#endif /* L_ssmulha3 */
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef SS
-
-/***********************************************************
- Fixed unsigned saturated Multiplication 16.16 x 16.16
-***********************************************************/
-
-#define C0 18
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-#define C4 C0+4
-#define C5 C0+5
-#define C6 C0+6
-#define C7 C0+7
-#define SS __tmp_reg__
-
-#if defined (L_usmulusa3)
-;; R22[4] = R22[4] *{ssat} R18[4]
-;; Ordinary ABI function
-DEFUN __usmulusa3
- ;; Widening multiply
- XCALL __umulsidi3
- or C7, C6
- brne .Lmax
- ;; Round, target is in C2..C5
- lsl C1
- adc C2, __zero_reg__
- adc C3, __zero_reg__
- adc C4, __zero_reg__
- adc C5, __zero_reg__
- brcs .Lmax
- ;; Move result into place
- wmov C6, C4
- wmov C4, C2
- ret
-.Lmax:
- ;; Saturate
- ldi C7, 0xff
- ldi C6, 0xff
- wmov C4, C6
- ret
-ENDF __usmulusa3
-#endif /* L_usmulusa3 */
-
-/***********************************************************
- Fixed signed saturated Multiplication s16.15 x s16.15
-***********************************************************/
-
-#if defined (L_ssmulsa3)
-;; R22[4] = R22[4] *{ssat} R18[4]
-;; Ordinary ABI function
-DEFUN __ssmulsa3
- ;; Widening multiply
- XCALL __mulsidi3
- ;; Adjust decimal point
- lsl C1
- rol C2
- rol C3
- rol C4
- rol C5
- brvs .LsatC7.7
- ;; The 17 MSBs must be the same
- rol C6
- rol C7
- sbc SS, SS
- cp C6, SS
- cpc C7, SS
- brne .LsatSS
- ;; Round
- lsl C1
- adc C2, __zero_reg__
- adc C3, __zero_reg__
- adc C4, __zero_reg__
- adc C5, __zero_reg__
- brvs .Lmax
- ;; Move result into place
- wmov C6, C4
- wmov C4, C2
- ret
-
-.Lmax:
- ;; Load 0x7fffffff
- clr C7
-.LsatC7.7:
- ;; C7 < 0 --> 0x80000000
- ;; C7 >= 0 --> 0x7fffffff
- lsl C7
- sbc SS, SS
-.LsatSS:
- ;; Load min / max value:
- ;; SS = -1 --> 0x80000000
- ;; SS = 0 --> 0x7fffffff
- com SS
- mov C4, SS
- mov C5, C4
- wmov C6, C4
- subi C7, 0x80
- ret
-ENDF __ssmulsa3
-#endif /* L_ssmulsa3 */
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
-#undef SS
-
-/*******************************************************
- Fractional Division 8 / 8
-*******************************************************/
-
-#define r_divd r25 /* dividend */
-#define r_quo r24 /* quotient */
-#define r_div r22 /* divisor */
-#define r_sign __tmp_reg__
-
-#if defined (L_divqq3)
-DEFUN __divqq3
- mov r_sign, r_divd
- eor r_sign, r_div
- sbrc r_div, 7
- neg r_div
- sbrc r_divd, 7
- neg r_divd
- XCALL __divqq_helper
- lsr r_quo
- sbrc r_sign, 7 ; negate result if needed
- neg r_quo
- ret
-ENDF __divqq3
-#endif /* L_divqq3 */
-
-#if defined (L_udivuqq3)
-DEFUN __udivuqq3
- cp r_divd, r_div
- brsh 0f
- XJMP __divqq_helper
- ;; Result is out of [0, 1) ==> Return 1 - eps.
-0: ldi r_quo, 0xff
- ret
-ENDF __udivuqq3
-#endif /* L_udivuqq3 */
-
-
-#if defined (L_divqq_helper)
-DEFUN __divqq_helper
- clr r_quo ; clear quotient
- inc __zero_reg__ ; init loop counter, used per shift
-__udivuqq3_loop:
- lsl r_divd ; shift dividend
- brcs 0f ; dividend overflow
- cp r_divd,r_div ; compare dividend & divisor
- brcc 0f ; dividend >= divisor
- rol r_quo ; shift quotient (with CARRY)
- rjmp __udivuqq3_cont
-0:
- sub r_divd,r_div ; restore dividend
- lsl r_quo ; shift quotient (without CARRY)
-__udivuqq3_cont:
- lsl __zero_reg__ ; shift loop-counter bit
- brne __udivuqq3_loop
- com r_quo ; complement result
- ; because C flag was complemented in loop
- ret
-ENDF __divqq_helper
-#endif /* L_divqq_helper */
-
-#undef r_divd
-#undef r_quo
-#undef r_div
-#undef r_sign
-
-
-/*******************************************************
- Fractional Division 16 / 16
-*******************************************************/
-#define r_divdL 26 /* dividend Low */
-#define r_divdH 27 /* dividend Hig */
-#define r_quoL 24 /* quotient Low */
-#define r_quoH 25 /* quotient High */
-#define r_divL 22 /* divisor */
-#define r_divH 23 /* divisor */
-#define r_cnt 21
-
-#if defined (L_divhq3)
-DEFUN __divhq3
- mov r0, r_divdH
- eor r0, r_divH
- sbrs r_divH, 7
- rjmp 1f
- NEG2 r_divL
-1:
- sbrs r_divdH, 7
- rjmp 2f
- NEG2 r_divdL
-2:
- cp r_divdL, r_divL
- cpc r_divdH, r_divH
- breq __divhq3_minus1 ; if equal return -1
- XCALL __udivuhq3
- lsr r_quoH
- ror r_quoL
- brpl 9f
- ;; negate result if needed
- NEG2 r_quoL
-9:
- ret
-__divhq3_minus1:
- ldi r_quoH, 0x80
- clr r_quoL
- ret
-ENDF __divhq3
-#endif /* defined (L_divhq3) */
-
-#if defined (L_udivuhq3)
-DEFUN __udivuhq3
- sub r_quoH,r_quoH ; clear quotient and carry
- ;; FALLTHRU
-ENDF __udivuhq3
-
-DEFUN __udivuha3_common
- clr r_quoL ; clear quotient
- ldi r_cnt,16 ; init loop counter
-__udivuhq3_loop:
- rol r_divdL ; shift dividend (with CARRY)
- rol r_divdH
- brcs __udivuhq3_ep ; dividend overflow
- cp r_divdL,r_divL ; compare dividend & divisor
- cpc r_divdH,r_divH
- brcc __udivuhq3_ep ; dividend >= divisor
- rol r_quoL ; shift quotient (with CARRY)
- rjmp __udivuhq3_cont
-__udivuhq3_ep:
- sub r_divdL,r_divL ; restore dividend
- sbc r_divdH,r_divH
- lsl r_quoL ; shift quotient (without CARRY)
-__udivuhq3_cont:
- rol r_quoH ; shift quotient
- dec r_cnt ; decrement loop counter
- brne __udivuhq3_loop
- com r_quoL ; complement result
- com r_quoH ; because C flag was complemented in loop
- ret
-ENDF __udivuha3_common
-#endif /* defined (L_udivuhq3) */
-
-/*******************************************************
- Fixed Division 8.8 / 8.8
-*******************************************************/
-#if defined (L_divha3)
-DEFUN __divha3
- mov r0, r_divdH
- eor r0, r_divH
- sbrs r_divH, 7
- rjmp 1f
- NEG2 r_divL
-1:
- sbrs r_divdH, 7
- rjmp 2f
- NEG2 r_divdL
-2:
- XCALL __udivuha3
- lsr r_quoH ; adjust to 7 fractional bits
- ror r_quoL
- sbrs r0, 7 ; negate result if needed
- ret
- NEG2 r_quoL
- ret
-ENDF __divha3
-#endif /* defined (L_divha3) */
-
-#if defined (L_udivuha3)
-DEFUN __udivuha3
- mov r_quoH, r_divdL
- mov r_divdL, r_divdH
- clr r_divdH
- lsl r_quoH ; shift quotient into carry
- XJMP __udivuha3_common ; same as fractional after rearrange
-ENDF __udivuha3
-#endif /* defined (L_udivuha3) */
-
-#undef r_divdL
-#undef r_divdH
-#undef r_quoL
-#undef r_quoH
-#undef r_divL
-#undef r_divH
-#undef r_cnt
-
-/*******************************************************
- Fixed Division 16.16 / 16.16
-*******************************************************/
-
-#define r_arg1L 24 /* arg1 gets passed already in place */
-#define r_arg1H 25
-#define r_arg1HL 26
-#define r_arg1HH 27
-#define r_divdL 26 /* dividend Low */
-#define r_divdH 27
-#define r_divdHL 30
-#define r_divdHH 31 /* dividend High */
-#define r_quoL 22 /* quotient Low */
-#define r_quoH 23
-#define r_quoHL 24
-#define r_quoHH 25 /* quotient High */
-#define r_divL 18 /* divisor Low */
-#define r_divH 19
-#define r_divHL 20
-#define r_divHH 21 /* divisor High */
-#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
-
-#if defined (L_divsa3)
-DEFUN __divsa3
- mov r0, r_arg1HH
- eor r0, r_divHH
- sbrs r_divHH, 7
- rjmp 1f
- NEG4 r_divL
-1:
- sbrs r_arg1HH, 7
- rjmp 2f
- NEG4 r_arg1L
-2:
- XCALL __udivusa3
- lsr r_quoHH ; adjust to 15 fractional bits
- ror r_quoHL
- ror r_quoH
- ror r_quoL
- sbrs r0, 7 ; negate result if needed
- ret
- ;; negate r_quoL
- XJMP __negsi2
-ENDF __divsa3
-#endif /* defined (L_divsa3) */
-
-#if defined (L_udivusa3)
-DEFUN __udivusa3
- ldi r_divdHL, 32 ; init loop counter
- mov r_cnt, r_divdHL
- clr r_divdHL
- clr r_divdHH
- wmov r_quoL, r_divdHL
- lsl r_quoHL ; shift quotient into carry
- rol r_quoHH
-__udivusa3_loop:
- rol r_divdL ; shift dividend (with CARRY)
- rol r_divdH
- rol r_divdHL
- rol r_divdHH
- brcs __udivusa3_ep ; dividend overflow
- cp r_divdL,r_divL ; compare dividend & divisor
- cpc r_divdH,r_divH
- cpc r_divdHL,r_divHL
- cpc r_divdHH,r_divHH
- brcc __udivusa3_ep ; dividend >= divisor
- rol r_quoL ; shift quotient (with CARRY)
- rjmp __udivusa3_cont
-__udivusa3_ep:
- sub r_divdL,r_divL ; restore dividend
- sbc r_divdH,r_divH
- sbc r_divdHL,r_divHL
- sbc r_divdHH,r_divHH
- lsl r_quoL ; shift quotient (without CARRY)
-__udivusa3_cont:
- rol r_quoH ; shift quotient
- rol r_quoHL
- rol r_quoHH
- dec r_cnt ; decrement loop counter
- brne __udivusa3_loop
- com r_quoL ; complement result
- com r_quoH ; because C flag was complemented in loop
- com r_quoHL
- com r_quoHH
- ret
-ENDF __udivusa3
-#endif /* defined (L_udivusa3) */
-
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg1HL
-#undef r_arg1HH
-#undef r_divdL
-#undef r_divdH
-#undef r_divdHL
-#undef r_divdHH
-#undef r_quoL
-#undef r_quoH
-#undef r_quoHL
-#undef r_quoHH
-#undef r_divL
-#undef r_divH
-#undef r_divHL
-#undef r_divHH
-#undef r_cnt
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 1 Byte
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0 24
-
-#if defined (L_ssabs_1)
-DEFUN __ssabs_1
- sbrs A0, 7
- ret
- neg A0
- sbrc A0,7
- dec A0
- ret
-ENDF __ssabs_1
-#endif /* L_ssabs_1 */
-
-#undef A0
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 2 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0 24
-#define A1 A0+1
-
-#if defined (L_ssneg_2)
-DEFUN __ssneg_2
- NEG2 A0
- brvc 0f
- sbiw A0, 1
-0: ret
-ENDF __ssneg_2
-#endif /* L_ssneg_2 */
-
-#if defined (L_ssabs_2)
-DEFUN __ssabs_2
- sbrs A1, 7
- ret
- XJMP __ssneg_2
-ENDF __ssabs_2
-#endif /* L_ssabs_2 */
-
-#undef A0
-#undef A1
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 4 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0 22
-#define A1 A0+1
-#define A2 A0+2
-#define A3 A0+3
-
-#if defined (L_ssneg_4)
-DEFUN __ssneg_4
- XCALL __negsi2
- brvc 0f
- ldi A3, 0x7f
- ldi A2, 0xff
- ldi A1, 0xff
- ldi A0, 0xff
-0: ret
-ENDF __ssneg_4
-#endif /* L_ssneg_4 */
-
-#if defined (L_ssabs_4)
-DEFUN __ssabs_4
- sbrs A3, 7
- ret
- XJMP __ssneg_4
-ENDF __ssabs_4
-#endif /* L_ssabs_4 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Saturation, 8 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; First Argument and Return Register
-#define A0 18
-#define A1 A0+1
-#define A2 A0+2
-#define A3 A0+3
-#define A4 A0+4
-#define A5 A0+5
-#define A6 A0+6
-#define A7 A0+7
-
-#if defined (L_clr_8)
-FALIAS __usneguta2
-FALIAS __usneguda2
-FALIAS __usnegudq2
-
-;; Clear Carry and all Bytes
-DEFUN __clr_8
- ;; Clear Carry and set Z
- sub A7, A7
- ;; FALLTHRU
-ENDF __clr_8
-;; Propagate Carry to all Bytes, Carry unaltered
-DEFUN __sbc_8
- sbc A7, A7
- sbc A6, A6
- wmov A4, A6
- wmov A2, A6
- wmov A0, A6
- ret
-ENDF __sbc_8
-#endif /* L_clr_8 */
-
-#if defined (L_ssneg_8)
-FALIAS __ssnegta2
-FALIAS __ssnegda2
-FALIAS __ssnegdq2
-
-DEFUN __ssneg_8
- XCALL __negdi2
- brvc 0f
- ;; A[] = 0x7fffffff
- sec
- XCALL __sbc_8
- ldi A7, 0x7f
-0: ret
-ENDF __ssneg_8
-#endif /* L_ssneg_8 */
-
-#if defined (L_ssabs_8)
-FALIAS __ssabsta2
-FALIAS __ssabsda2
-FALIAS __ssabsdq2
-
-DEFUN __ssabs_8
- sbrs A7, 7
- ret
- XJMP __ssneg_8
-ENDF __ssabs_8
-#endif /* L_ssabs_8 */
-
-;; Second Argument
-#define B0 10
-#define B1 B0+1
-#define B2 B0+2
-#define B3 B0+3
-#define B4 B0+4
-#define B5 B0+5
-#define B6 B0+6
-#define B7 B0+7
-
-#if defined (L_usadd_8)
-FALIAS __usadduta3
-FALIAS __usadduda3
-FALIAS __usaddudq3
-
-DEFUN __usadd_8
- XCALL __adddi3
- brcs 0f
- ret
-0: ;; A[] = 0xffffffff
- XJMP __sbc_8
-ENDF __usadd_8
-#endif /* L_usadd_8 */
-
-#if defined (L_ussub_8)
-FALIAS __ussubuta3
-FALIAS __ussubuda3
-FALIAS __ussubudq3
-
-DEFUN __ussub_8
- XCALL __subdi3
- brcs 0f
- ret
-0: ;; A[] = 0
- XJMP __clr_8
-ENDF __ussub_8
-#endif /* L_ussub_8 */
-
-#if defined (L_ssadd_8)
-FALIAS __ssaddta3
-FALIAS __ssaddda3
-FALIAS __ssadddq3
-
-DEFUN __ssadd_8
- XCALL __adddi3
- brvc 0f
- ;; A = (B >= 0) ? INT64_MAX : INT64_MIN
- cpi B7, 0x80
- XCALL __sbc_8
- subi A7, 0x80
-0: ret
-ENDF __ssadd_8
-#endif /* L_ssadd_8 */
-
-#if defined (L_sssub_8)
-FALIAS __sssubta3
-FALIAS __sssubda3
-FALIAS __sssubdq3
-
-DEFUN __sssub_8
- XCALL __subdi3
- brvc 0f
- ;; A = (B < 0) ? INT64_MAX : INT64_MIN
- ldi A7, 0x7f
- cp A7, B7
- XCALL __sbc_8
- subi A7, 0x80
-0: ret
-ENDF __sssub_8
-#endif /* L_sssub_8 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef A4
-#undef A5
-#undef A6
-#undef A7
-#undef B0
-#undef B1
-#undef B2
-#undef B3
-#undef B4
-#undef B5
-#undef B6
-#undef B7
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding Helpers
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_mask1
-
-#define AA 24
-#define CC 25
-
-;; R25 = 1 << (R24 & 7)
-;; CC = 1 << (AA & 7)
-;; Clobbers: None
-DEFUN __mask1
- ;; CC = 2 ^ AA.1
- ldi CC, 1 << 2
- sbrs AA, 1
- ldi CC, 1 << 0
- ;; CC *= 2 ^ AA.0
- sbrc AA, 0
- lsl CC
- ;; CC *= 2 ^ AA.2
- sbrc AA, 2
- swap CC
- ret
-ENDF __mask1
-
-#undef AA
-#undef CC
-#endif /* L_mask1 */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; The rounding point. Any bits smaller than
-;; 2^{-RP} will be cleared.
-#define RP R24
-
-#define A0 22
-#define A1 A0 + 1
-
-#define C0 24
-#define C1 C0 + 1
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 1 Byte
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_roundqq3
-
-;; R24 = round (R22, R24)
-;; Clobbers: R22, __tmp_reg__
-DEFUN __roundqq3
- mov __tmp_reg__, C1
- subi RP, __QQ_FBIT__ - 1
- neg RP
- ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
- XCALL __mask1
- mov C0, C1
- ;; Add-Saturate 2^{-RP-1}
- add A0, C0
- brvc 0f
- ldi A0, 0x7f
-0: ;; Mask out bits beyond RP
- lsl C0
- neg C0
- and C0, A0
- mov C1, __tmp_reg__
- ret
-ENDF __roundqq3
-#endif /* L_roundqq3 */
-
-#ifdef L_rounduqq3
-
-;; R24 = round (R22, R24)
-;; Clobbers: R22, __tmp_reg__
-DEFUN __rounduqq3
- mov __tmp_reg__, C1
- subi RP, __UQQ_FBIT__ - 1
- neg RP
- ;; R25 = 1 << RP (Total offset is FBIT-1 - RP)
- XCALL __mask1
- mov C0, C1
- ;; Add-Saturate 2^{-RP-1}
- add A0, C0
- brcc 0f
- ldi A0, 0xff
-0: ;; Mask out bits beyond RP
- lsl C0
- neg C0
- and C0, A0
- mov C1, __tmp_reg__
- ret
-ENDF __rounduqq3
-#endif /* L_rounduqq3 */
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 2 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#ifdef L_addmask_2
-
-;; [ R25:R24 = 1 << (R24 & 15)
-;; R23:R22 += 1 << (R24 & 15) ]
-;; SREG is set according to the addition
-DEFUN __addmask_2
- ;; R25 = 1 << (R24 & 7)
- XCALL __mask1
- cpi RP, 1 << 3
- sbc C0, C0
- ;; Swap C0 and C1 if RP.3 was set
- and C0, C1
- eor C1, C0
- ;; Finally, add the power-of-two: A[] += C[]
- add A0, C0
- adc A1, C1
- ret
-ENDF __addmask_2
-#endif /* L_addmask_2 */
-
-#ifdef L_round_s2
-
-;; R25:R24 = round (R23:R22, R24)
-;; Clobbers: R23, R22
-DEFUN __roundhq3
- subi RP, __HQ_FBIT__ - __HA_FBIT__
-ENDF __roundhq3
-DEFUN __roundha3
- subi RP, __HA_FBIT__ - 1
- neg RP
- ;; [ R25:R24 = 1 << (FBIT-1 - RP)
- ;; R23:R22 += 1 << (FBIT-1 - RP) ]
- XCALL __addmask_2
- XJMP __round_s2_const
-ENDF __roundha3
-
-#endif /* L_round_s2 */
-
-#ifdef L_round_u2
-
-;; R25:R24 = round (R23:R22, R24)
-;; Clobbers: R23, R22
-DEFUN __rounduhq3
- subi RP, __UHQ_FBIT__ - __UHA_FBIT__
-ENDF __rounduhq3
-DEFUN __rounduha3
- subi RP, __UHA_FBIT__ - 1
- neg RP
- ;; [ R25:R24 = 1 << (FBIT-1 - RP)
- ;; R23:R22 += 1 << (FBIT-1 - RP) ]
- XCALL __addmask_2
- XJMP __round_u2_const
-ENDF __rounduha3
-
-#endif /* L_round_u2 */
-
-
-#ifdef L_round_2_const
-
-;; Helpers for 2 byte wide rounding
-
-DEFUN __round_s2_const
- brvc 2f
- ldi A1, 0x7f
- rjmp 1f
- ;; FALLTHRU (Barrier)
-ENDF __round_s2_const
-
-DEFUN __round_u2_const
- brcc 2f
- ldi A1, 0xff
-1:
- ldi A0, 0xff
-2:
- ;; Saturation is performed now.
- ;; Currently, we have C[] = 2^{-RP-1}
- ;; C[] = 2^{-RP}
- lsl C0
- rol C1
- ;;
- NEG2 C0
- ;; Clear the bits beyond the rounding point.
- and C0, A0
- and C1, A1
- ret
-ENDF __round_u2_const
-
-#endif /* L_round_2_const */
-
-#undef A0
-#undef A1
-#undef C0
-#undef C1
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 4 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#define A0 18
-#define A1 A0 + 1
-#define A2 A0 + 2
-#define A3 A0 + 3
-
-#define C0 22
-#define C1 C0 + 1
-#define C2 C0 + 2
-#define C3 C0 + 3
-
-#ifdef L_addmask_4
-
-;; [ R25:R22 = 1 << (R24 & 31)
-;; R21:R18 += 1 << (R24 & 31) ]
-;; SREG is set according to the addition
-DEFUN __addmask_4
- ;; R25 = 1 << (R24 & 7)
- XCALL __mask1
- cpi RP, 1 << 4
- sbc C0, C0
- sbc C1, C1
- ;; Swap C2 with C3 if RP.3 is not set
- cpi RP, 1 << 3
- sbc C2, C2
- and C2, C3
- eor C3, C2
- ;; Swap C3:C2 with C1:C0 if RP.4 is not set
- and C0, C2 $ eor C2, C0
- and C1, C3 $ eor C3, C1
- ;; Finally, add the power-of-two: A[] += C[]
- add A0, C0
- adc A1, C1
- adc A2, C2
- adc A3, C3
- ret
-ENDF __addmask_4
-#endif /* L_addmask_4 */
-
-#ifdef L_round_s4
-
-;; R25:R22 = round (R21:R18, R24)
-;; Clobbers: R18...R21
-DEFUN __roundsq3
- subi RP, __SQ_FBIT__ - __SA_FBIT__
-ENDF __roundsq3
-DEFUN __roundsa3
- subi RP, __SA_FBIT__ - 1
- neg RP
- ;; [ R25:R22 = 1 << (FBIT-1 - RP)
- ;; R21:R18 += 1 << (FBIT-1 - RP) ]
- XCALL __addmask_4
- XJMP __round_s4_const
-ENDF __roundsa3
-
-#endif /* L_round_s4 */
-
-#ifdef L_round_u4
-
-;; R25:R22 = round (R21:R18, R24)
-;; Clobbers: R18...R21
-DEFUN __roundusq3
- subi RP, __USQ_FBIT__ - __USA_FBIT__
-ENDF __roundusq3
-DEFUN __roundusa3
- subi RP, __USA_FBIT__ - 1
- neg RP
- ;; [ R25:R22 = 1 << (FBIT-1 - RP)
- ;; R21:R18 += 1 << (FBIT-1 - RP) ]
- XCALL __addmask_4
- XJMP __round_u4_const
-ENDF __roundusa3
-
-#endif /* L_round_u4 */
-
-
-#ifdef L_round_4_const
-
-;; Helpers for 4 byte wide rounding
-
-DEFUN __round_s4_const
- brvc 2f
- ldi A3, 0x7f
- rjmp 1f
- ;; FALLTHRU (Barrier)
-ENDF __round_s4_const
-
-DEFUN __round_u4_const
- brcc 2f
- ldi A3, 0xff
-1:
- ldi A2, 0xff
- ldi A1, 0xff
- ldi A0, 0xff
-2:
- ;; Saturation is performed now.
- ;; Currently, we have C[] = 2^{-RP-1}
- ;; C[] = 2^{-RP}
- lsl C0
- rol C1
- rol C2
- rol C3
- XCALL __negsi2
- ;; Clear the bits beyond the rounding point.
- and C0, A0
- and C1, A1
- and C2, A2
- and C3, A3
- ret
-ENDF __round_u4_const
-
-#endif /* L_round_4_const */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#undef RP
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Rounding, 8 Bytes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-#define RP 16
-#define FBITm1 31
-
-#define C0 18
-#define C1 C0 + 1
-#define C2 C0 + 2
-#define C3 C0 + 3
-#define C4 C0 + 4
-#define C5 C0 + 5
-#define C6 C0 + 6
-#define C7 C0 + 7
-
-#define A0 16
-#define A1 17
-#define A2 26
-#define A3 27
-#define A4 28
-#define A5 29
-#define A6 30
-#define A7 31
-
-
-#ifdef L_rounddq3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __rounddq3
- ldi FBITm1, __DQ_FBIT__ - 1
- clt
- XJMP __round_x8
-ENDF __rounddq3
-#endif /* L_rounddq3 */
-
-#ifdef L_roundudq3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __roundudq3
- ldi FBITm1, __UDQ_FBIT__ - 1
- set
- XJMP __round_x8
-ENDF __roundudq3
-#endif /* L_roundudq3 */
-
-#ifdef L_roundda3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __roundda3
- ldi FBITm1, __DA_FBIT__ - 1
- clt
- XJMP __round_x8
-ENDF __roundda3
-#endif /* L_roundda3 */
-
-#ifdef L_rounduda3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __rounduda3
- ldi FBITm1, __UDA_FBIT__ - 1
- set
- XJMP __round_x8
-ENDF __rounduda3
-#endif /* L_rounduda3 */
-
-#ifdef L_roundta3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __roundta3
- ldi FBITm1, __TA_FBIT__ - 1
- clt
- XJMP __round_x8
-ENDF __roundta3
-#endif /* L_roundta3 */
-
-#ifdef L_rounduta3
-;; R25:R18 = round (R25:R18, R16)
-;; Clobbers: ABI
-DEFUN __rounduta3
- ldi FBITm1, __UTA_FBIT__ - 1
- set
- XJMP __round_x8
-ENDF __rounduta3
-#endif /* L_rounduta3 */
-
-
-#ifdef L_round_x8
-DEFUN __round_x8
- push r16
- push r17
- push r28
- push r29
- ;; Compute log2 of addend from rounding point
- sub RP, FBITm1
- neg RP
- ;; Move input to work register A[]
- push C0
- mov A1, C1
- wmov A2, C2
- wmov A4, C4
- wmov A6, C6
- ;; C[] = 1 << (FBIT-1 - RP)
- XCALL __clr_8
- inc C0
- XCALL __ashldi3
- pop A0
- ;; A[] += C[]
- add A0, C0
- adc A1, C1
- adc A2, C2
- adc A3, C3
- adc A4, C4
- adc A5, C5
- adc A6, C6
- adc A7, C7
- brts 1f
- ;; Signed
- brvc 3f
- ;; Signed overflow: A[] = 0x7f...
- brvs 2f
-1: ;; Unsigned
- brcc 3f
- ;; Unsigned overflow: A[] = 0xff...
-2: ldi A7, 0xff
- ldi A6, 0xff
- wmov A0, A6
- wmov A2, A6
- wmov A4, A6
- bld A7, 7
-3:
- ;; C[] = -C[] - C[]
- push A0
- ldi r16, 1
- XCALL __ashldi3
- pop A0
- XCALL __negdi2
- ;; Clear the bits beyond the rounding point.
- and C0, A0
- and C1, A1
- and C2, A2
- and C3, A3
- and C4, A4
- and C5, A5
- and C6, A6
- and C7, A7
- ;; Epilogue
- pop r29
- pop r28
- pop r17
- pop r16
- ret
-ENDF __round_x8
-
-#endif /* L_round_x8 */
-
-#undef A0
-#undef A1
-#undef A2
-#undef A3
-#undef A4
-#undef A5
-#undef A6
-#undef A7
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
-
-#undef RP
-#undef FBITm1
-
-
-;; Supply implementations / symbols for the bit-banging functions
-;; __builtin_avr_bitsfx and __builtin_avr_fxbits
-#ifdef L_ret
-DEFUN __ret
- ret
-ENDF __ret
-#endif /* L_ret */