diff options
Diffstat (limited to 'gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S')
-rw-r--r-- | gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S | 1910 |
1 files changed, 0 insertions, 1910 deletions
diff --git a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S deleted file mode 100644 index d80389ce2..000000000 --- a/gcc-4.8.1/libgcc/config/avr/lib1funcs-fixed.S +++ /dev/null @@ -1,1910 +0,0 @@ -/* -*- Mode: Asm -*- */ -;; Copyright (C) 2012-2013 Free Software Foundation, Inc. -;; Contributed by Sean D'Epagnier (sean@depagnier.com) -;; Georg-Johann Lay (avr@gjlay.de) - -;; This file is free software; you can redistribute it and/or modify it -;; under the terms of the GNU General Public License as published by the -;; Free Software Foundation; either version 3, or (at your option) any -;; later version. - -;; In addition to the permissions in the GNU General Public License, the -;; Free Software Foundation gives you unlimited permission to link the -;; compiled version of this file into combinations with other programs, -;; and to distribute those combinations without any restriction coming -;; from the use of this file. (The General Public License restrictions -;; do apply in other respects; for example, they cover modification of -;; the file, and distribution when not linked into a combine -;; executable.) - -;; This file is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;; General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with this program; see the file COPYING. If not, write to -;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Fixed point library routines for AVR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -.section .text.libgcc.fixed, "ax", @progbits - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Conversions to float -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#if defined (L_fractqqsf) -DEFUN __fractqqsf - ;; Move in place for SA -> SF conversion - clr r22 - mov r23, r24 - ;; Sign-extend - lsl r24 - sbc r24, r24 - mov r25, r24 - XJMP __fractsasf -ENDF __fractqqsf -#endif /* L_fractqqsf */ - -#if defined (L_fractuqqsf) -DEFUN __fractuqqsf - ;; Move in place for USA -> SF conversion - clr r22 - mov r23, r24 - ;; Zero-extend - clr r24 - clr r25 - XJMP __fractusasf -ENDF __fractuqqsf -#endif /* L_fractuqqsf */ - -#if defined (L_fracthqsf) -DEFUN __fracthqsf - ;; Move in place for SA -> SF conversion - wmov 22, 24 - ;; Sign-extend - lsl r25 - sbc r24, r24 - mov r25, r24 - XJMP __fractsasf -ENDF __fracthqsf -#endif /* L_fracthqsf */ - -#if defined (L_fractuhqsf) -DEFUN __fractuhqsf - ;; Move in place for USA -> SF conversion - wmov 22, 24 - ;; Zero-extend - clr r24 - clr r25 - XJMP __fractusasf -ENDF __fractuhqsf -#endif /* L_fractuhqsf */ - -#if defined (L_fracthasf) -DEFUN __fracthasf - ;; Move in place for SA -> SF conversion - clr r22 - mov r23, r24 - mov r24, r25 - ;; Sign-extend - lsl r25 - sbc r25, r25 - XJMP __fractsasf -ENDF __fracthasf -#endif /* L_fracthasf */ - -#if defined (L_fractuhasf) -DEFUN __fractuhasf - ;; Move in place for USA -> SF conversion - clr r22 - mov r23, r24 - mov r24, r25 - ;; Zero-extend - clr r25 - XJMP __fractusasf -ENDF __fractuhasf -#endif /* L_fractuhasf */ - - -#if defined (L_fractsqsf) -DEFUN __fractsqsf - XCALL __floatsisf - ;; Divide non-zero results by 2^31 to move the - ;; decimal point into place - tst r25 - breq 0f - subi r24, exp_lo (31) - sbci r25, exp_hi (31) -0: ret -ENDF __fractsqsf -#endif /* L_fractsqsf */ - -#if defined (L_fractusqsf) -DEFUN __fractusqsf - XCALL __floatunsisf - ;; Divide non-zero results by 2^32 to move the - ;; decimal point into place - cpse r25, __zero_reg__ - subi r25, exp_hi (32) - ret -ENDF __fractusqsf -#endif /* L_fractusqsf */ - -#if defined (L_fractsasf) -DEFUN __fractsasf - XCALL __floatsisf - ;; Divide non-zero results by 2^15 to move the - ;; decimal point into place - tst r25 - breq 0f - subi r24, exp_lo (15) - sbci r25, exp_hi (15) -0: ret -ENDF __fractsasf -#endif /* L_fractsasf */ - -#if defined (L_fractusasf) -DEFUN __fractusasf - XCALL __floatunsisf - ;; Divide non-zero results by 2^16 to move the - ;; decimal point into place - cpse r25, __zero_reg__ - subi r25, exp_hi (16) - ret -ENDF __fractusasf -#endif /* L_fractusasf */ - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Conversions from float -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#if defined (L_fractsfqq) -DEFUN __fractsfqq - ;; Multiply with 2^{24+7} to get a QQ result in r25 - subi r24, exp_lo (-31) - sbci r25, exp_hi (-31) - XCALL __fixsfsi - mov r24, r25 - ret -ENDF __fractsfqq -#endif /* L_fractsfqq */ - -#if defined (L_fractsfuqq) -DEFUN __fractsfuqq - ;; Multiply with 2^{24+8} to get a UQQ result in r25 - subi r25, exp_hi (-32) - XCALL __fixunssfsi - mov r24, r25 - ret -ENDF __fractsfuqq -#endif /* L_fractsfuqq */ - -#if defined (L_fractsfha) -DEFUN __fractsfha - ;; Multiply with 2^{16+7} to get a HA result in r25:r24 - subi r24, exp_lo (-23) - sbci r25, exp_hi (-23) - XJMP __fixsfsi -ENDF __fractsfha -#endif /* L_fractsfha */ - -#if defined (L_fractsfuha) -DEFUN __fractsfuha - ;; Multiply with 2^24 to get a UHA result in r25:r24 - subi r25, exp_hi (-24) - XJMP __fixunssfsi -ENDF __fractsfuha -#endif /* L_fractsfuha */ - -#if defined (L_fractsfhq) -FALIAS __fractsfsq - -DEFUN __fractsfhq - ;; Multiply with 2^{16+15} to get a HQ result in r25:r24 - ;; resp. with 2^31 to get a SQ result in r25:r22 - subi r24, exp_lo (-31) - sbci r25, exp_hi (-31) - XJMP __fixsfsi -ENDF __fractsfhq -#endif /* L_fractsfhq */ - -#if defined (L_fractsfuhq) -FALIAS __fractsfusq - -DEFUN __fractsfuhq - ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 - ;; resp. with 2^32 to get a USQ result in r25:r22 - subi r25, exp_hi (-32) - XJMP __fixunssfsi -ENDF __fractsfuhq -#endif /* L_fractsfuhq */ - -#if defined (L_fractsfsa) -DEFUN __fractsfsa - ;; Multiply with 2^15 to get a SA result in r25:r22 - subi r24, exp_lo (-15) - sbci r25, exp_hi (-15) - XJMP __fixsfsi -ENDF __fractsfsa -#endif /* L_fractsfsa */ - -#if defined (L_fractsfusa) -DEFUN __fractsfusa - ;; Multiply with 2^16 to get a USA result in r25:r22 - subi r25, exp_hi (-16) - XJMP __fixunssfsi -ENDF __fractsfusa -#endif /* L_fractsfusa */ - - -;; For multiplication the functions here are called directly from -;; avr-fixed.md instead of using the standard libcall mechanisms. -;; This can make better code because GCC knows exactly which -;; of the call-used registers (not all of them) are clobbered. */ - -/******************************************************* - Fractional Multiplication 8 x 8 without MUL -*******************************************************/ - -#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__) -;;; R23 = R24 * R25 -;;; Clobbers: __tmp_reg__, R22, R24, R25 -;;; Rounding: ??? -DEFUN __mulqq3 - XCALL __fmuls - ;; TR 18037 requires that (-1) * (-1) does not overflow - ;; The only input that can produce -1 is (-1)^2. - dec r23 - brvs 0f - inc r23 -0: ret -ENDF __mulqq3 -#endif /* L_mulqq3 && ! HAVE_MUL */ - -/******************************************************* - Fractional Multiply .16 x .16 with and without MUL -*******************************************************/ - -#if defined (L_mulhq3) -;;; Same code with and without MUL, but the interfaces differ: -;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) -;;; Clobbers: ABI, called by optabs -;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) -;;; Clobbers: __tmp_reg__, R22, R23 -;;; Rounding: -0.5 LSB <= error <= 0.5 LSB -DEFUN __mulhq3 - XCALL __mulhisi3 - ;; Shift result into place - lsl r23 - rol r24 - rol r25 - brvs 1f - ;; Round - sbrc r23, 7 - adiw r24, 1 - ret -1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow - ldi r24, lo8 (0x7fff) - ldi r25, hi8 (0x7fff) - ret -ENDF __mulhq3 -#endif /* defined (L_mulhq3) */ - -#if defined (L_muluhq3) -;;; Same code with and without MUL, but the interfaces differ: -;;; no MUL: (R25:R24) *= (R23:R22) -;;; Clobbers: ABI, called by optabs -;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) -;;; Clobbers: __tmp_reg__, R22, R23 -;;; Rounding: -0.5 LSB < error <= 0.5 LSB -DEFUN __muluhq3 - XCALL __umulhisi3 - ;; Round - sbrc r23, 7 - adiw r24, 1 - ret -ENDF __muluhq3 -#endif /* L_muluhq3 */ - - -/******************************************************* - Fixed Multiply 8.8 x 8.8 with and without MUL -*******************************************************/ - -#if defined (L_mulha3) -;;; Same code with and without MUL, but the interfaces differ: -;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) -;;; Clobbers: ABI, called by optabs -;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) -;;; Clobbers: __tmp_reg__, R22, R23 -;;; Rounding: -0.5 LSB <= error <= 0.5 LSB -DEFUN __mulha3 - XCALL __mulhisi3 - lsl r22 - rol r23 - rol r24 - XJMP __muluha3_round -ENDF __mulha3 -#endif /* L_mulha3 */ - -#if defined (L_muluha3) -;;; Same code with and without MUL, but the interfaces differ: -;;; no MUL: (R25:R24) *= (R23:R22) -;;; Clobbers: ABI, called by optabs -;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) -;;; Clobbers: __tmp_reg__, R22, R23 -;;; Rounding: -0.5 LSB < error <= 0.5 LSB -DEFUN __muluha3 - XCALL __umulhisi3 - XJMP __muluha3_round -ENDF __muluha3 -#endif /* L_muluha3 */ - -#if defined (L_muluha3_round) -DEFUN __muluha3_round - ;; Shift result into place - mov r25, r24 - mov r24, r23 - ;; Round - sbrc r22, 7 - adiw r24, 1 - ret -ENDF __muluha3_round -#endif /* L_muluha3_round */ - - -/******************************************************* - Fixed Multiplication 16.16 x 16.16 -*******************************************************/ - -;; Bits outside the result (below LSB), used in the signed version -#define GUARD __tmp_reg__ - -#if defined (__AVR_HAVE_MUL__) - -;; Multiplier -#define A0 16 -#define A1 A0+1 -#define A2 A1+1 -#define A3 A2+1 - -;; Multiplicand -#define B0 20 -#define B1 B0+1 -#define B2 B1+1 -#define B3 B2+1 - -;; Result -#define C0 24 -#define C1 C0+1 -#define C2 C1+1 -#define C3 C2+1 - -#if defined (L_mulusa3) -;;; (C3:C0) = (A3:A0) * (B3:B0) -DEFUN __mulusa3 - set - ;; Fallthru -ENDF __mulusa3 - -;;; Round for last digit iff T = 1 -;;; Return guard bits in GUARD (__tmp_reg__). -;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB -;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB -DEFUN __mulusa3_round - ;; Some of the MUL instructions have LSBs outside the result. - ;; Don't ignore these LSBs in order to tame rounding error. - ;; Use C2/C3 for these LSBs. - - clr C0 - clr C1 - mul A0, B0 $ movw C2, r0 - - mul A1, B0 $ add C3, r0 $ adc C0, r1 - mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1 - - ;; Round if T = 1. Store guarding bits outside the result for rounding - ;; and left-shift by the signed version (function below). - brtc 0f - sbrc C3, 7 - adiw C0, 1 -0: push C3 - - ;; The following MULs don't have LSBs outside the result. - ;; C2/C3 is the high part. - - mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2 - mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 - mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 - neg C2 - - mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3 - mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 - mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 - mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 - neg C3 - - mul A1, B3 $ add C2, r0 $ adc C3, r1 - mul A2, B2 $ add C2, r0 $ adc C3, r1 - mul A3, B1 $ add C2, r0 $ adc C3, r1 - - mul A2, B3 $ add C3, r0 - mul A3, B2 $ add C3, r0 - - ;; Guard bits used in the signed version below. - pop GUARD - clr __zero_reg__ - ret -ENDF __mulusa3_round -#endif /* L_mulusa3 */ - -#if defined (L_mulsa3) -;;; (C3:C0) = (A3:A0) * (B3:B0) -;;; Clobbers: __tmp_reg__, T -;;; Rounding: -0.5 LSB <= error <= 0.5 LSB -DEFUN __mulsa3 - clt - XCALL __mulusa3_round - ;; A posteriori sign extension of the operands - tst B3 - brpl 1f - sub C2, A0 - sbc C3, A1 -1: sbrs A3, 7 - rjmp 2f - sub C2, B0 - sbc C3, B1 -2: - ;; Shift 1 bit left to adjust for 15 fractional bits - lsl GUARD - rol C0 - rol C1 - rol C2 - rol C3 - ;; Round last digit - lsl GUARD - adc C0, __zero_reg__ - adc C1, __zero_reg__ - adc C2, __zero_reg__ - adc C3, __zero_reg__ - ret -ENDF __mulsa3 -#endif /* L_mulsa3 */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 -#undef B0 -#undef B1 -#undef B2 -#undef B3 -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#else /* __AVR_HAVE_MUL__ */ - -#define A0 18 -#define A1 A0+1 -#define A2 A0+2 -#define A3 A0+3 - -#define B0 22 -#define B1 B0+1 -#define B2 B0+2 -#define B3 B0+3 - -#define C0 22 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - -;; __tmp_reg__ -#define CC0 0 -;; __zero_reg__ -#define CC1 1 -#define CC2 16 -#define CC3 17 - -#define AA0 26 -#define AA1 AA0+1 -#define AA2 30 -#define AA3 AA2+1 - -#if defined (L_mulsa3) -;;; (R25:R22) *= (R21:R18) -;;; Clobbers: ABI, called by optabs -;;; Rounding: -1 LSB <= error <= 1 LSB -DEFUN __mulsa3 - push B0 - push B1 - push B3 - clt - XCALL __mulusa3_round - pop r30 - ;; sign-extend B - bst r30, 7 - brtc 1f - ;; A1, A0 survived in R27:R26 - sub C2, AA0 - sbc C3, AA1 -1: - pop AA1 ;; B1 - pop AA0 ;; B0 - - ;; sign-extend A. A3 survived in R31 - bst AA3, 7 - brtc 2f - sub C2, AA0 - sbc C3, AA1 -2: - ;; Shift 1 bit left to adjust for 15 fractional bits - lsl GUARD - rol C0 - rol C1 - rol C2 - rol C3 - ;; Round last digit - lsl GUARD - adc C0, __zero_reg__ - adc C1, __zero_reg__ - adc C2, __zero_reg__ - adc C3, __zero_reg__ - ret -ENDF __mulsa3 -#endif /* L_mulsa3 */ - -#if defined (L_mulusa3) -;;; (R25:R22) *= (R21:R18) -;;; Clobbers: ABI, called by optabs -;;; Rounding: -1 LSB <= error <= 1 LSB -DEFUN __mulusa3 - set - ;; Fallthru -ENDF __mulusa3 - -;;; A[] survives in 26, 27, 30, 31 -;;; Also used by __mulsa3 with T = 0 -;;; Round if T = 1 -;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version. -DEFUN __mulusa3_round - push CC2 - push CC3 - ; clear result - clr __tmp_reg__ - wmov CC2, CC0 - ; save multiplicand - wmov AA0, A0 - wmov AA2, A2 - rjmp 3f - - ;; Loop the integral part - -1: ;; CC += A * 2^n; n >= 0 - add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 - -2: ;; A <<= 1 - lsl A0 $ rol A1 $ rol A2 $ rol A3 - -3: ;; IBIT(B) >>= 1 - ;; Carry = n-th bit of B; n >= 0 - lsr B3 - ror B2 - brcs 1b - sbci B3, 0 - brne 2b - - ;; Loop the fractional part - ;; B2/B3 is 0 now, use as guard bits for rounding - ;; Restore multiplicand - wmov A0, AA0 - wmov A2, AA2 - rjmp 5f - -4: ;; CC += A:Guard * 2^n; n < 0 - add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 -5: - ;; A:Guard >>= 1 - lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2 - - ;; FBIT(B) <<= 1 - ;; Carry = n-th bit of B; n < 0 - lsl B0 - rol B1 - brcs 4b - sbci B0, 0 - brne 5b - - ;; Save guard bits and set carry for rounding - push B3 - lsl B3 - ;; Move result into place - wmov C2, CC2 - wmov C0, CC0 - clr __zero_reg__ - brtc 6f - ;; Round iff T = 1 - adc C0, __zero_reg__ - adc C1, __zero_reg__ - adc C2, __zero_reg__ - adc C3, __zero_reg__ -6: - pop GUARD - ;; Epilogue - pop CC3 - pop CC2 - ret -ENDF __mulusa3_round -#endif /* L_mulusa3 */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 -#undef B0 -#undef B1 -#undef B2 -#undef B3 -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef AA0 -#undef AA1 -#undef AA2 -#undef AA3 -#undef CC0 -#undef CC1 -#undef CC2 -#undef CC3 - -#endif /* __AVR_HAVE_MUL__ */ - -#undef GUARD - -/*********************************************************** - Fixed unsigned saturated Multiplication 8.8 x 8.8 -***********************************************************/ - -#define C0 22 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 -#define SS __tmp_reg__ - -#if defined (L_usmuluha3) -DEFUN __usmuluha3 - ;; Widening multiply -#ifdef __AVR_HAVE_MUL__ - ;; Adjust interface - movw R26, R22 - movw R18, R24 -#endif /* HAVE MUL */ - XCALL __umulhisi3 - tst C3 - brne .Lmax - ;; Round, target is in C1..C2 - lsl C0 - adc C1, __zero_reg__ - adc C2, __zero_reg__ - brcs .Lmax - ;; Move result into place - mov C3, C2 - mov C2, C1 - ret -.Lmax: - ;; Saturate - ldi C2, 0xff - ldi C3, 0xff - ret -ENDF __usmuluha3 -#endif /* L_usmuluha3 */ - -/*********************************************************** - Fixed signed saturated Multiplication s8.7 x s8.7 -***********************************************************/ - -#if defined (L_ssmulha3) -DEFUN __ssmulha3 - ;; Widening multiply -#ifdef __AVR_HAVE_MUL__ - ;; Adjust interface - movw R26, R22 - movw R18, R24 -#endif /* HAVE MUL */ - XCALL __mulhisi3 - ;; Adjust decimal point - lsl C0 - rol C1 - rol C2 - brvs .LsatC3.3 - ;; The 9 MSBs must be the same - rol C3 - sbc SS, SS - cp C3, SS - brne .LsatSS - ;; Round - lsl C0 - adc C1, __zero_reg__ - adc C2, __zero_reg__ - brvs .Lmax - ;; Move result into place - mov C3, C2 - mov C2, C1 - ret -.Lmax: - ;; Load 0x7fff - clr C3 -.LsatC3.3: - ;; C3 < 0 --> 0x8000 - ;; C3 >= 0 --> 0x7fff - mov SS, C3 -.LsatSS: - ;; Load min / max value: - ;; SS = -1 --> 0x8000 - ;; SS = 0 --> 0x7fff - ldi C3, 0x7f - ldi C2, 0xff - sbrc SS, 7 - adiw C2, 1 - ret -ENDF __ssmulha3 -#endif /* L_ssmulha3 */ - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef SS - -/*********************************************************** - Fixed unsigned saturated Multiplication 16.16 x 16.16 -***********************************************************/ - -#define C0 18 -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 -#define C4 C0+4 -#define C5 C0+5 -#define C6 C0+6 -#define C7 C0+7 -#define SS __tmp_reg__ - -#if defined (L_usmulusa3) -;; R22[4] = R22[4] *{ssat} R18[4] -;; Ordinary ABI function -DEFUN __usmulusa3 - ;; Widening multiply - XCALL __umulsidi3 - or C7, C6 - brne .Lmax - ;; Round, target is in C2..C5 - lsl C1 - adc C2, __zero_reg__ - adc C3, __zero_reg__ - adc C4, __zero_reg__ - adc C5, __zero_reg__ - brcs .Lmax - ;; Move result into place - wmov C6, C4 - wmov C4, C2 - ret -.Lmax: - ;; Saturate - ldi C7, 0xff - ldi C6, 0xff - wmov C4, C6 - ret -ENDF __usmulusa3 -#endif /* L_usmulusa3 */ - -/*********************************************************** - Fixed signed saturated Multiplication s16.15 x s16.15 -***********************************************************/ - -#if defined (L_ssmulsa3) -;; R22[4] = R22[4] *{ssat} R18[4] -;; Ordinary ABI function -DEFUN __ssmulsa3 - ;; Widening multiply - XCALL __mulsidi3 - ;; Adjust decimal point - lsl C1 - rol C2 - rol C3 - rol C4 - rol C5 - brvs .LsatC7.7 - ;; The 17 MSBs must be the same - rol C6 - rol C7 - sbc SS, SS - cp C6, SS - cpc C7, SS - brne .LsatSS - ;; Round - lsl C1 - adc C2, __zero_reg__ - adc C3, __zero_reg__ - adc C4, __zero_reg__ - adc C5, __zero_reg__ - brvs .Lmax - ;; Move result into place - wmov C6, C4 - wmov C4, C2 - ret - -.Lmax: - ;; Load 0x7fffffff - clr C7 -.LsatC7.7: - ;; C7 < 0 --> 0x80000000 - ;; C7 >= 0 --> 0x7fffffff - lsl C7 - sbc SS, SS -.LsatSS: - ;; Load min / max value: - ;; SS = -1 --> 0x80000000 - ;; SS = 0 --> 0x7fffffff - com SS - mov C4, SS - mov C5, C4 - wmov C6, C4 - subi C7, 0x80 - ret -ENDF __ssmulsa3 -#endif /* L_ssmulsa3 */ - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 -#undef SS - -/******************************************************* - Fractional Division 8 / 8 -*******************************************************/ - -#define r_divd r25 /* dividend */ -#define r_quo r24 /* quotient */ -#define r_div r22 /* divisor */ -#define r_sign __tmp_reg__ - -#if defined (L_divqq3) -DEFUN __divqq3 - mov r_sign, r_divd - eor r_sign, r_div - sbrc r_div, 7 - neg r_div - sbrc r_divd, 7 - neg r_divd - XCALL __divqq_helper - lsr r_quo - sbrc r_sign, 7 ; negate result if needed - neg r_quo - ret -ENDF __divqq3 -#endif /* L_divqq3 */ - -#if defined (L_udivuqq3) -DEFUN __udivuqq3 - cp r_divd, r_div - brsh 0f - XJMP __divqq_helper - ;; Result is out of [0, 1) ==> Return 1 - eps. -0: ldi r_quo, 0xff - ret -ENDF __udivuqq3 -#endif /* L_udivuqq3 */ - - -#if defined (L_divqq_helper) -DEFUN __divqq_helper - clr r_quo ; clear quotient - inc __zero_reg__ ; init loop counter, used per shift -__udivuqq3_loop: - lsl r_divd ; shift dividend - brcs 0f ; dividend overflow - cp r_divd,r_div ; compare dividend & divisor - brcc 0f ; dividend >= divisor - rol r_quo ; shift quotient (with CARRY) - rjmp __udivuqq3_cont -0: - sub r_divd,r_div ; restore dividend - lsl r_quo ; shift quotient (without CARRY) -__udivuqq3_cont: - lsl __zero_reg__ ; shift loop-counter bit - brne __udivuqq3_loop - com r_quo ; complement result - ; because C flag was complemented in loop - ret -ENDF __divqq_helper -#endif /* L_divqq_helper */ - -#undef r_divd -#undef r_quo -#undef r_div -#undef r_sign - - -/******************************************************* - Fractional Division 16 / 16 -*******************************************************/ -#define r_divdL 26 /* dividend Low */ -#define r_divdH 27 /* dividend Hig */ -#define r_quoL 24 /* quotient Low */ -#define r_quoH 25 /* quotient High */ -#define r_divL 22 /* divisor */ -#define r_divH 23 /* divisor */ -#define r_cnt 21 - -#if defined (L_divhq3) -DEFUN __divhq3 - mov r0, r_divdH - eor r0, r_divH - sbrs r_divH, 7 - rjmp 1f - NEG2 r_divL -1: - sbrs r_divdH, 7 - rjmp 2f - NEG2 r_divdL -2: - cp r_divdL, r_divL - cpc r_divdH, r_divH - breq __divhq3_minus1 ; if equal return -1 - XCALL __udivuhq3 - lsr r_quoH - ror r_quoL - brpl 9f - ;; negate result if needed - NEG2 r_quoL -9: - ret -__divhq3_minus1: - ldi r_quoH, 0x80 - clr r_quoL - ret -ENDF __divhq3 -#endif /* defined (L_divhq3) */ - -#if defined (L_udivuhq3) -DEFUN __udivuhq3 - sub r_quoH,r_quoH ; clear quotient and carry - ;; FALLTHRU -ENDF __udivuhq3 - -DEFUN __udivuha3_common - clr r_quoL ; clear quotient - ldi r_cnt,16 ; init loop counter -__udivuhq3_loop: - rol r_divdL ; shift dividend (with CARRY) - rol r_divdH - brcs __udivuhq3_ep ; dividend overflow - cp r_divdL,r_divL ; compare dividend & divisor - cpc r_divdH,r_divH - brcc __udivuhq3_ep ; dividend >= divisor - rol r_quoL ; shift quotient (with CARRY) - rjmp __udivuhq3_cont -__udivuhq3_ep: - sub r_divdL,r_divL ; restore dividend - sbc r_divdH,r_divH - lsl r_quoL ; shift quotient (without CARRY) -__udivuhq3_cont: - rol r_quoH ; shift quotient - dec r_cnt ; decrement loop counter - brne __udivuhq3_loop - com r_quoL ; complement result - com r_quoH ; because C flag was complemented in loop - ret -ENDF __udivuha3_common -#endif /* defined (L_udivuhq3) */ - -/******************************************************* - Fixed Division 8.8 / 8.8 -*******************************************************/ -#if defined (L_divha3) -DEFUN __divha3 - mov r0, r_divdH - eor r0, r_divH - sbrs r_divH, 7 - rjmp 1f - NEG2 r_divL -1: - sbrs r_divdH, 7 - rjmp 2f - NEG2 r_divdL -2: - XCALL __udivuha3 - lsr r_quoH ; adjust to 7 fractional bits - ror r_quoL - sbrs r0, 7 ; negate result if needed - ret - NEG2 r_quoL - ret -ENDF __divha3 -#endif /* defined (L_divha3) */ - -#if defined (L_udivuha3) -DEFUN __udivuha3 - mov r_quoH, r_divdL - mov r_divdL, r_divdH - clr r_divdH - lsl r_quoH ; shift quotient into carry - XJMP __udivuha3_common ; same as fractional after rearrange -ENDF __udivuha3 -#endif /* defined (L_udivuha3) */ - -#undef r_divdL -#undef r_divdH -#undef r_quoL -#undef r_quoH -#undef r_divL -#undef r_divH -#undef r_cnt - -/******************************************************* - Fixed Division 16.16 / 16.16 -*******************************************************/ - -#define r_arg1L 24 /* arg1 gets passed already in place */ -#define r_arg1H 25 -#define r_arg1HL 26 -#define r_arg1HH 27 -#define r_divdL 26 /* dividend Low */ -#define r_divdH 27 -#define r_divdHL 30 -#define r_divdHH 31 /* dividend High */ -#define r_quoL 22 /* quotient Low */ -#define r_quoH 23 -#define r_quoHL 24 -#define r_quoHH 25 /* quotient High */ -#define r_divL 18 /* divisor Low */ -#define r_divH 19 -#define r_divHL 20 -#define r_divHH 21 /* divisor High */ -#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ - -#if defined (L_divsa3) -DEFUN __divsa3 - mov r0, r_arg1HH - eor r0, r_divHH - sbrs r_divHH, 7 - rjmp 1f - NEG4 r_divL -1: - sbrs r_arg1HH, 7 - rjmp 2f - NEG4 r_arg1L -2: - XCALL __udivusa3 - lsr r_quoHH ; adjust to 15 fractional bits - ror r_quoHL - ror r_quoH - ror r_quoL - sbrs r0, 7 ; negate result if needed - ret - ;; negate r_quoL - XJMP __negsi2 -ENDF __divsa3 -#endif /* defined (L_divsa3) */ - -#if defined (L_udivusa3) -DEFUN __udivusa3 - ldi r_divdHL, 32 ; init loop counter - mov r_cnt, r_divdHL - clr r_divdHL - clr r_divdHH - wmov r_quoL, r_divdHL - lsl r_quoHL ; shift quotient into carry - rol r_quoHH -__udivusa3_loop: - rol r_divdL ; shift dividend (with CARRY) - rol r_divdH - rol r_divdHL - rol r_divdHH - brcs __udivusa3_ep ; dividend overflow - cp r_divdL,r_divL ; compare dividend & divisor - cpc r_divdH,r_divH - cpc r_divdHL,r_divHL - cpc r_divdHH,r_divHH - brcc __udivusa3_ep ; dividend >= divisor - rol r_quoL ; shift quotient (with CARRY) - rjmp __udivusa3_cont -__udivusa3_ep: - sub r_divdL,r_divL ; restore dividend - sbc r_divdH,r_divH - sbc r_divdHL,r_divHL - sbc r_divdHH,r_divHH - lsl r_quoL ; shift quotient (without CARRY) -__udivusa3_cont: - rol r_quoH ; shift quotient - rol r_quoHL - rol r_quoHH - dec r_cnt ; decrement loop counter - brne __udivusa3_loop - com r_quoL ; complement result - com r_quoH ; because C flag was complemented in loop - com r_quoHL - com r_quoHH - ret -ENDF __udivusa3 -#endif /* defined (L_udivusa3) */ - -#undef r_arg1L -#undef r_arg1H -#undef r_arg1HL -#undef r_arg1HH -#undef r_divdL -#undef r_divdH -#undef r_divdHL -#undef r_divdHH -#undef r_quoL -#undef r_quoH -#undef r_quoHL -#undef r_quoHH -#undef r_divL -#undef r_divH -#undef r_divHL -#undef r_divHH -#undef r_cnt - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Saturation, 1 Byte -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; First Argument and Return Register -#define A0 24 - -#if defined (L_ssabs_1) -DEFUN __ssabs_1 - sbrs A0, 7 - ret - neg A0 - sbrc A0,7 - dec A0 - ret -ENDF __ssabs_1 -#endif /* L_ssabs_1 */ - -#undef A0 - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Saturation, 2 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; First Argument and Return Register -#define A0 24 -#define A1 A0+1 - -#if defined (L_ssneg_2) -DEFUN __ssneg_2 - NEG2 A0 - brvc 0f - sbiw A0, 1 -0: ret -ENDF __ssneg_2 -#endif /* L_ssneg_2 */ - -#if defined (L_ssabs_2) -DEFUN __ssabs_2 - sbrs A1, 7 - ret - XJMP __ssneg_2 -ENDF __ssabs_2 -#endif /* L_ssabs_2 */ - -#undef A0 -#undef A1 - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Saturation, 4 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; First Argument and Return Register -#define A0 22 -#define A1 A0+1 -#define A2 A0+2 -#define A3 A0+3 - -#if defined (L_ssneg_4) -DEFUN __ssneg_4 - XCALL __negsi2 - brvc 0f - ldi A3, 0x7f - ldi A2, 0xff - ldi A1, 0xff - ldi A0, 0xff -0: ret -ENDF __ssneg_4 -#endif /* L_ssneg_4 */ - -#if defined (L_ssabs_4) -DEFUN __ssabs_4 - sbrs A3, 7 - ret - XJMP __ssneg_4 -ENDF __ssabs_4 -#endif /* L_ssabs_4 */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Saturation, 8 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; First Argument and Return Register -#define A0 18 -#define A1 A0+1 -#define A2 A0+2 -#define A3 A0+3 -#define A4 A0+4 -#define A5 A0+5 -#define A6 A0+6 -#define A7 A0+7 - -#if defined (L_clr_8) -FALIAS __usneguta2 -FALIAS __usneguda2 -FALIAS __usnegudq2 - -;; Clear Carry and all Bytes -DEFUN __clr_8 - ;; Clear Carry and set Z - sub A7, A7 - ;; FALLTHRU -ENDF __clr_8 -;; Propagate Carry to all Bytes, Carry unaltered -DEFUN __sbc_8 - sbc A7, A7 - sbc A6, A6 - wmov A4, A6 - wmov A2, A6 - wmov A0, A6 - ret -ENDF __sbc_8 -#endif /* L_clr_8 */ - -#if defined (L_ssneg_8) -FALIAS __ssnegta2 -FALIAS __ssnegda2 -FALIAS __ssnegdq2 - -DEFUN __ssneg_8 - XCALL __negdi2 - brvc 0f - ;; A[] = 0x7fffffff - sec - XCALL __sbc_8 - ldi A7, 0x7f -0: ret -ENDF __ssneg_8 -#endif /* L_ssneg_8 */ - -#if defined (L_ssabs_8) -FALIAS __ssabsta2 -FALIAS __ssabsda2 -FALIAS __ssabsdq2 - -DEFUN __ssabs_8 - sbrs A7, 7 - ret - XJMP __ssneg_8 -ENDF __ssabs_8 -#endif /* L_ssabs_8 */ - -;; Second Argument -#define B0 10 -#define B1 B0+1 -#define B2 B0+2 -#define B3 B0+3 -#define B4 B0+4 -#define B5 B0+5 -#define B6 B0+6 -#define B7 B0+7 - -#if defined (L_usadd_8) -FALIAS __usadduta3 -FALIAS __usadduda3 -FALIAS __usaddudq3 - -DEFUN __usadd_8 - XCALL __adddi3 - brcs 0f - ret -0: ;; A[] = 0xffffffff - XJMP __sbc_8 -ENDF __usadd_8 -#endif /* L_usadd_8 */ - -#if defined (L_ussub_8) -FALIAS __ussubuta3 -FALIAS __ussubuda3 -FALIAS __ussubudq3 - -DEFUN __ussub_8 - XCALL __subdi3 - brcs 0f - ret -0: ;; A[] = 0 - XJMP __clr_8 -ENDF __ussub_8 -#endif /* L_ussub_8 */ - -#if defined (L_ssadd_8) -FALIAS __ssaddta3 -FALIAS __ssaddda3 -FALIAS __ssadddq3 - -DEFUN __ssadd_8 - XCALL __adddi3 - brvc 0f - ;; A = (B >= 0) ? INT64_MAX : INT64_MIN - cpi B7, 0x80 - XCALL __sbc_8 - subi A7, 0x80 -0: ret -ENDF __ssadd_8 -#endif /* L_ssadd_8 */ - -#if defined (L_sssub_8) -FALIAS __sssubta3 -FALIAS __sssubda3 -FALIAS __sssubdq3 - -DEFUN __sssub_8 - XCALL __subdi3 - brvc 0f - ;; A = (B < 0) ? INT64_MAX : INT64_MIN - ldi A7, 0x7f - cp A7, B7 - XCALL __sbc_8 - subi A7, 0x80 -0: ret -ENDF __sssub_8 -#endif /* L_sssub_8 */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 -#undef A4 -#undef A5 -#undef A6 -#undef A7 -#undef B0 -#undef B1 -#undef B2 -#undef B3 -#undef B4 -#undef B5 -#undef B6 -#undef B7 - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Rounding Helpers -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#ifdef L_mask1 - -#define AA 24 -#define CC 25 - -;; R25 = 1 << (R24 & 7) -;; CC = 1 << (AA & 7) -;; Clobbers: None -DEFUN __mask1 - ;; CC = 2 ^ AA.1 - ldi CC, 1 << 2 - sbrs AA, 1 - ldi CC, 1 << 0 - ;; CC *= 2 ^ AA.0 - sbrc AA, 0 - lsl CC - ;; CC *= 2 ^ AA.2 - sbrc AA, 2 - swap CC - ret -ENDF __mask1 - -#undef AA -#undef CC -#endif /* L_mask1 */ - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; The rounding point. Any bits smaller than -;; 2^{-RP} will be cleared. -#define RP R24 - -#define A0 22 -#define A1 A0 + 1 - -#define C0 24 -#define C1 C0 + 1 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Rounding, 1 Byte -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#ifdef L_roundqq3 - -;; R24 = round (R22, R24) -;; Clobbers: R22, __tmp_reg__ -DEFUN __roundqq3 - mov __tmp_reg__, C1 - subi RP, __QQ_FBIT__ - 1 - neg RP - ;; R25 = 1 << RP (Total offset is FBIT-1 - RP) - XCALL __mask1 - mov C0, C1 - ;; Add-Saturate 2^{-RP-1} - add A0, C0 - brvc 0f - ldi A0, 0x7f -0: ;; Mask out bits beyond RP - lsl C0 - neg C0 - and C0, A0 - mov C1, __tmp_reg__ - ret -ENDF __roundqq3 -#endif /* L_roundqq3 */ - -#ifdef L_rounduqq3 - -;; R24 = round (R22, R24) -;; Clobbers: R22, __tmp_reg__ -DEFUN __rounduqq3 - mov __tmp_reg__, C1 - subi RP, __UQQ_FBIT__ - 1 - neg RP - ;; R25 = 1 << RP (Total offset is FBIT-1 - RP) - XCALL __mask1 - mov C0, C1 - ;; Add-Saturate 2^{-RP-1} - add A0, C0 - brcc 0f - ldi A0, 0xff -0: ;; Mask out bits beyond RP - lsl C0 - neg C0 - and C0, A0 - mov C1, __tmp_reg__ - ret -ENDF __rounduqq3 -#endif /* L_rounduqq3 */ - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Rounding, 2 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#ifdef L_addmask_2 - -;; [ R25:R24 = 1 << (R24 & 15) -;; R23:R22 += 1 << (R24 & 15) ] -;; SREG is set according to the addition -DEFUN __addmask_2 - ;; R25 = 1 << (R24 & 7) - XCALL __mask1 - cpi RP, 1 << 3 - sbc C0, C0 - ;; Swap C0 and C1 if RP.3 was set - and C0, C1 - eor C1, C0 - ;; Finally, add the power-of-two: A[] += C[] - add A0, C0 - adc A1, C1 - ret -ENDF __addmask_2 -#endif /* L_addmask_2 */ - -#ifdef L_round_s2 - -;; R25:R24 = round (R23:R22, R24) -;; Clobbers: R23, R22 -DEFUN __roundhq3 - subi RP, __HQ_FBIT__ - __HA_FBIT__ -ENDF __roundhq3 -DEFUN __roundha3 - subi RP, __HA_FBIT__ - 1 - neg RP - ;; [ R25:R24 = 1 << (FBIT-1 - RP) - ;; R23:R22 += 1 << (FBIT-1 - RP) ] - XCALL __addmask_2 - XJMP __round_s2_const -ENDF __roundha3 - -#endif /* L_round_s2 */ - -#ifdef L_round_u2 - -;; R25:R24 = round (R23:R22, R24) -;; Clobbers: R23, R22 -DEFUN __rounduhq3 - subi RP, __UHQ_FBIT__ - __UHA_FBIT__ -ENDF __rounduhq3 -DEFUN __rounduha3 - subi RP, __UHA_FBIT__ - 1 - neg RP - ;; [ R25:R24 = 1 << (FBIT-1 - RP) - ;; R23:R22 += 1 << (FBIT-1 - RP) ] - XCALL __addmask_2 - XJMP __round_u2_const -ENDF __rounduha3 - -#endif /* L_round_u2 */ - - -#ifdef L_round_2_const - -;; Helpers for 2 byte wide rounding - -DEFUN __round_s2_const - brvc 2f - ldi A1, 0x7f - rjmp 1f - ;; FALLTHRU (Barrier) -ENDF __round_s2_const - -DEFUN __round_u2_const - brcc 2f - ldi A1, 0xff -1: - ldi A0, 0xff -2: - ;; Saturation is performed now. - ;; Currently, we have C[] = 2^{-RP-1} - ;; C[] = 2^{-RP} - lsl C0 - rol C1 - ;; - NEG2 C0 - ;; Clear the bits beyond the rounding point. - and C0, A0 - and C1, A1 - ret -ENDF __round_u2_const - -#endif /* L_round_2_const */ - -#undef A0 -#undef A1 -#undef C0 -#undef C1 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Rounding, 4 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#define A0 18 -#define A1 A0 + 1 -#define A2 A0 + 2 -#define A3 A0 + 3 - -#define C0 22 -#define C1 C0 + 1 -#define C2 C0 + 2 -#define C3 C0 + 3 - -#ifdef L_addmask_4 - -;; [ R25:R22 = 1 << (R24 & 31) -;; R21:R18 += 1 << (R24 & 31) ] -;; SREG is set according to the addition -DEFUN __addmask_4 - ;; R25 = 1 << (R24 & 7) - XCALL __mask1 - cpi RP, 1 << 4 - sbc C0, C0 - sbc C1, C1 - ;; Swap C2 with C3 if RP.3 is not set - cpi RP, 1 << 3 - sbc C2, C2 - and C2, C3 - eor C3, C2 - ;; Swap C3:C2 with C1:C0 if RP.4 is not set - and C0, C2 $ eor C2, C0 - and C1, C3 $ eor C3, C1 - ;; Finally, add the power-of-two: A[] += C[] - add A0, C0 - adc A1, C1 - adc A2, C2 - adc A3, C3 - ret -ENDF __addmask_4 -#endif /* L_addmask_4 */ - -#ifdef L_round_s4 - -;; R25:R22 = round (R21:R18, R24) -;; Clobbers: R18...R21 -DEFUN __roundsq3 - subi RP, __SQ_FBIT__ - __SA_FBIT__ -ENDF __roundsq3 -DEFUN __roundsa3 - subi RP, __SA_FBIT__ - 1 - neg RP - ;; [ R25:R22 = 1 << (FBIT-1 - RP) - ;; R21:R18 += 1 << (FBIT-1 - RP) ] - XCALL __addmask_4 - XJMP __round_s4_const -ENDF __roundsa3 - -#endif /* L_round_s4 */ - -#ifdef L_round_u4 - -;; R25:R22 = round (R21:R18, R24) -;; Clobbers: R18...R21 -DEFUN __roundusq3 - subi RP, __USQ_FBIT__ - __USA_FBIT__ -ENDF __roundusq3 -DEFUN __roundusa3 - subi RP, __USA_FBIT__ - 1 - neg RP - ;; [ R25:R22 = 1 << (FBIT-1 - RP) - ;; R21:R18 += 1 << (FBIT-1 - RP) ] - XCALL __addmask_4 - XJMP __round_u4_const -ENDF __roundusa3 - -#endif /* L_round_u4 */ - - -#ifdef L_round_4_const - -;; Helpers for 4 byte wide rounding - -DEFUN __round_s4_const - brvc 2f - ldi A3, 0x7f - rjmp 1f - ;; FALLTHRU (Barrier) -ENDF __round_s4_const - -DEFUN __round_u4_const - brcc 2f - ldi A3, 0xff -1: - ldi A2, 0xff - ldi A1, 0xff - ldi A0, 0xff -2: - ;; Saturation is performed now. - ;; Currently, we have C[] = 2^{-RP-1} - ;; C[] = 2^{-RP} - lsl C0 - rol C1 - rol C2 - rol C3 - XCALL __negsi2 - ;; Clear the bits beyond the rounding point. - and C0, A0 - and C1, A1 - and C2, A2 - and C3, A3 - ret -ENDF __round_u4_const - -#endif /* L_round_4_const */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#undef RP - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Rounding, 8 Bytes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -#define RP 16 -#define FBITm1 31 - -#define C0 18 -#define C1 C0 + 1 -#define C2 C0 + 2 -#define C3 C0 + 3 -#define C4 C0 + 4 -#define C5 C0 + 5 -#define C6 C0 + 6 -#define C7 C0 + 7 - -#define A0 16 -#define A1 17 -#define A2 26 -#define A3 27 -#define A4 28 -#define A5 29 -#define A6 30 -#define A7 31 - - -#ifdef L_rounddq3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __rounddq3 - ldi FBITm1, __DQ_FBIT__ - 1 - clt - XJMP __round_x8 -ENDF __rounddq3 -#endif /* L_rounddq3 */ - -#ifdef L_roundudq3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __roundudq3 - ldi FBITm1, __UDQ_FBIT__ - 1 - set - XJMP __round_x8 -ENDF __roundudq3 -#endif /* L_roundudq3 */ - -#ifdef L_roundda3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __roundda3 - ldi FBITm1, __DA_FBIT__ - 1 - clt - XJMP __round_x8 -ENDF __roundda3 -#endif /* L_roundda3 */ - -#ifdef L_rounduda3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __rounduda3 - ldi FBITm1, __UDA_FBIT__ - 1 - set - XJMP __round_x8 -ENDF __rounduda3 -#endif /* L_rounduda3 */ - -#ifdef L_roundta3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __roundta3 - ldi FBITm1, __TA_FBIT__ - 1 - clt - XJMP __round_x8 -ENDF __roundta3 -#endif /* L_roundta3 */ - -#ifdef L_rounduta3 -;; R25:R18 = round (R25:R18, R16) -;; Clobbers: ABI -DEFUN __rounduta3 - ldi FBITm1, __UTA_FBIT__ - 1 - set - XJMP __round_x8 -ENDF __rounduta3 -#endif /* L_rounduta3 */ - - -#ifdef L_round_x8 -DEFUN __round_x8 - push r16 - push r17 - push r28 - push r29 - ;; Compute log2 of addend from rounding point - sub RP, FBITm1 - neg RP - ;; Move input to work register A[] - push C0 - mov A1, C1 - wmov A2, C2 - wmov A4, C4 - wmov A6, C6 - ;; C[] = 1 << (FBIT-1 - RP) - XCALL __clr_8 - inc C0 - XCALL __ashldi3 - pop A0 - ;; A[] += C[] - add A0, C0 - adc A1, C1 - adc A2, C2 - adc A3, C3 - adc A4, C4 - adc A5, C5 - adc A6, C6 - adc A7, C7 - brts 1f - ;; Signed - brvc 3f - ;; Signed overflow: A[] = 0x7f... - brvs 2f -1: ;; Unsigned - brcc 3f - ;; Unsigned overflow: A[] = 0xff... -2: ldi A7, 0xff - ldi A6, 0xff - wmov A0, A6 - wmov A2, A6 - wmov A4, A6 - bld A7, 7 -3: - ;; C[] = -C[] - C[] - push A0 - ldi r16, 1 - XCALL __ashldi3 - pop A0 - XCALL __negdi2 - ;; Clear the bits beyond the rounding point. - and C0, A0 - and C1, A1 - and C2, A2 - and C3, A3 - and C4, A4 - and C5, A5 - and C6, A6 - and C7, A7 - ;; Epilogue - pop r29 - pop r28 - pop r17 - pop r16 - ret -ENDF __round_x8 - -#endif /* L_round_x8 */ - -#undef A0 -#undef A1 -#undef A2 -#undef A3 -#undef A4 -#undef A5 -#undef A6 -#undef A7 - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 - -#undef RP -#undef FBITm1 - - -;; Supply implementations / symbols for the bit-banging functions -;; __builtin_avr_bitsfx and __builtin_avr_fxbits -#ifdef L_ret -DEFUN __ret - ret -ENDF __ret -#endif /* L_ret */ |