diff options
author | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
commit | 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch) | |
tree | c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libgcc/config/avr | |
parent | 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff) | |
download | toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip |
Initial checkin of GCC 4.9.0 from trunk (r208799).
Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
Diffstat (limited to 'gcc-4.9/libgcc/config/avr')
-rw-r--r-- | gcc-4.9/libgcc/config/avr/avr-lib.h | 105 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S | 1915 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/lib1funcs.S | 3226 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/lib2-object.mk | 23 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/lib2funcs.c | 226 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/t-avr | 277 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/t-avrlibc | 66 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/avr/t-rtems | 2 |
8 files changed, 5840 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/avr/avr-lib.h b/gcc-4.9/libgcc/config/avr/avr-lib.h new file mode 100644 index 000000000..37d5fa1a7 --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/avr-lib.h @@ -0,0 +1,105 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef FLOAT +#define CMPtype QItype +#define DF SF +#define DI SI +typedef int QItype __attribute__ ((mode (QI))); +#endif + +/* fixed-bit.h does not define functions for TA and UTA because + that part is wrapped in #if MIN_UNITS_PER_WORD > 4. + This would lead to empty functions for TA and UTA. + Thus, supply appropriate defines as if HAVE_[U]TA == 1. + #define HAVE_[U]TA 1 won't work because avr-modes.def + uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough + to arrange for such changes of the mode size. */ + +typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA))); + +#if defined (UTA_MODE) +#define FIXED_SIZE 8 /* in bytes */ +#define INT_C_TYPE UDItype +#define UINT_C_TYPE UDItype +#define HINT_C_TYPE USItype +#define HUINT_C_TYPE USItype +#define MODE_NAME UTA +#define MODE_NAME_S uta +#define MODE_UNSIGNED 1 +#endif + +#if defined (FROM_UTA) +#define FROM_TYPE 4 /* ID for fixed-point */ +#define FROM_MODE_NAME UTA +#define FROM_MODE_NAME_S uta +#define FROM_INT_C_TYPE UDItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 8 /* in bytes */ +#elif defined (TO_UTA) +#define TO_TYPE 4 /* ID for fixed-point */ +#define TO_MODE_NAME UTA +#define TO_MODE_NAME_S uta +#define TO_INT_C_TYPE UDItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 8 /* in bytes */ +#endif + +/* Same for TAmode */ + +typedef _Fract TAtype __attribute__ ((mode (TA))); + +#if defined (TA_MODE) +#define FIXED_SIZE 8 /* in bytes */ +#define INT_C_TYPE DItype +#define UINT_C_TYPE UDItype +#define HINT_C_TYPE SItype +#define HUINT_C_TYPE USItype +#define MODE_NAME TA +#define MODE_NAME_S ta +#define MODE_UNSIGNED 0 +#endif + +#if defined (FROM_TA) +#define FROM_TYPE 4 /* ID for fixed-point */ +#define FROM_MODE_NAME TA +#define FROM_MODE_NAME_S ta +#define FROM_INT_C_TYPE DItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 8 /* in bytes */ +#elif defined (TO_TA) +#define TO_TYPE 4 /* ID for fixed-point */ +#define TO_MODE_NAME TA +#define TO_MODE_NAME_S ta +#define TO_INT_C_TYPE DItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 8 /* in bytes */ +#endif diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S new file mode 100644 index 000000000..8f3ed9201 --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/lib1funcs-fixed.S @@ -0,0 +1,1915 @@ +/* -*- Mode: Asm -*- */ +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by Sean D'Epagnier (sean@depagnier.com) +;; Georg-Johann Lay (avr@gjlay.de) + +;; This file is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by the +;; Free Software Foundation; either version 3, or (at your option) any +;; later version. + +;; In addition to the permissions in the GNU General Public License, the +;; Free Software Foundation gives you unlimited permission to link the +;; compiled version of this file into combinations with other programs, +;; and to distribute those combinations without any restriction coming +;; from the use of this file. (The General Public License restrictions +;; do apply in other respects; for example, they cover modification of +;; the file, and distribution when not linked into a combine +;; executable.) + +;; This file is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Fixed point library routines for AVR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +.section .text.libgcc.fixed, "ax", @progbits + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Conversions to float +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#if defined (L_fractqqsf) +DEFUN __fractqqsf + ;; Move in place for SA -> SF conversion + clr r22 + mov r23, r24 + ;; Sign-extend + lsl r24 + sbc r24, r24 + mov r25, r24 + XJMP __fractsasf +ENDF __fractqqsf +#endif /* L_fractqqsf */ + +#if defined (L_fractuqqsf) +DEFUN __fractuqqsf + ;; Move in place for USA -> SF conversion + clr r22 + mov r23, r24 + ;; Zero-extend + clr r24 + clr r25 + XJMP __fractusasf +ENDF __fractuqqsf +#endif /* L_fractuqqsf */ + +#if defined (L_fracthqsf) +DEFUN __fracthqsf + ;; Move in place for SA -> SF conversion + wmov 22, 24 + ;; Sign-extend + lsl r25 + sbc r24, r24 + mov r25, r24 + XJMP __fractsasf +ENDF __fracthqsf +#endif /* L_fracthqsf */ + +#if defined (L_fractuhqsf) +DEFUN __fractuhqsf + ;; Move in place for USA -> SF conversion + wmov 22, 24 + ;; Zero-extend + clr r24 + clr r25 + XJMP __fractusasf +ENDF __fractuhqsf +#endif /* L_fractuhqsf */ + +#if defined (L_fracthasf) +DEFUN __fracthasf + ;; Move in place for SA -> SF conversion + clr r22 + mov r23, r24 + mov r24, r25 + ;; Sign-extend + lsl r25 + sbc r25, r25 + XJMP __fractsasf +ENDF __fracthasf +#endif /* L_fracthasf */ + +#if defined (L_fractuhasf) +DEFUN __fractuhasf + ;; Move in place for USA -> SF conversion + clr r22 + mov r23, r24 + mov r24, r25 + ;; Zero-extend + clr r25 + XJMP __fractusasf +ENDF __fractuhasf +#endif /* L_fractuhasf */ + + +#if defined (L_fractsqsf) +DEFUN __fractsqsf + XCALL __floatsisf + ;; Divide non-zero results by 2^31 to move the + ;; decimal point into place + tst r25 + breq 0f + subi r24, exp_lo (31) + sbci r25, exp_hi (31) +0: ret +ENDF __fractsqsf +#endif /* L_fractsqsf */ + +#if defined (L_fractusqsf) +DEFUN __fractusqsf + XCALL __floatunsisf + ;; Divide non-zero results by 2^32 to move the + ;; decimal point into place + cpse r25, __zero_reg__ + subi r25, exp_hi (32) + ret +ENDF __fractusqsf +#endif /* L_fractusqsf */ + +#if defined (L_fractsasf) +DEFUN __fractsasf + XCALL __floatsisf + ;; Divide non-zero results by 2^15 to move the + ;; decimal point into place + tst r25 + breq 0f + subi r24, exp_lo (15) + sbci r25, exp_hi (15) +0: ret +ENDF __fractsasf +#endif /* L_fractsasf */ + +#if defined (L_fractusasf) +DEFUN __fractusasf + XCALL __floatunsisf + ;; Divide non-zero results by 2^16 to move the + ;; decimal point into place + cpse r25, __zero_reg__ + subi r25, exp_hi (16) + ret +ENDF __fractusasf +#endif /* L_fractusasf */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Conversions from float +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#if defined (L_fractsfqq) +DEFUN __fractsfqq + ;; Multiply with 2^{24+7} to get a QQ result in r25 + subi r24, exp_lo (-31) + sbci r25, exp_hi (-31) + XCALL __fixsfsi + mov r24, r25 + ret +ENDF __fractsfqq +#endif /* L_fractsfqq */ + +#if defined (L_fractsfuqq) +DEFUN __fractsfuqq + ;; Multiply with 2^{24+8} to get a UQQ result in r25 + subi r25, exp_hi (-32) + XCALL __fixunssfsi + mov r24, r25 + ret +ENDF __fractsfuqq +#endif /* L_fractsfuqq */ + +#if defined (L_fractsfha) +DEFUN __fractsfha + ;; Multiply with 2^{16+7} to get a HA result in r25:r24 + subi r24, exp_lo (-23) + sbci r25, exp_hi (-23) + XJMP __fixsfsi +ENDF __fractsfha +#endif /* L_fractsfha */ + +#if defined (L_fractsfuha) +DEFUN __fractsfuha + ;; Multiply with 2^24 to get a UHA result in r25:r24 + subi r25, exp_hi (-24) + XJMP __fixunssfsi +ENDF __fractsfuha +#endif /* L_fractsfuha */ + +#if defined (L_fractsfhq) +FALIAS __fractsfsq + +DEFUN __fractsfhq + ;; Multiply with 2^{16+15} to get a HQ result in r25:r24 + ;; resp. with 2^31 to get a SQ result in r25:r22 + subi r24, exp_lo (-31) + sbci r25, exp_hi (-31) + XJMP __fixsfsi +ENDF __fractsfhq +#endif /* L_fractsfhq */ + +#if defined (L_fractsfuhq) +FALIAS __fractsfusq + +DEFUN __fractsfuhq + ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 + ;; resp. with 2^32 to get a USQ result in r25:r22 + subi r25, exp_hi (-32) + XJMP __fixunssfsi +ENDF __fractsfuhq +#endif /* L_fractsfuhq */ + +#if defined (L_fractsfsa) +DEFUN __fractsfsa + ;; Multiply with 2^15 to get a SA result in r25:r22 + subi r24, exp_lo (-15) + sbci r25, exp_hi (-15) + XJMP __fixsfsi +ENDF __fractsfsa +#endif /* L_fractsfsa */ + +#if defined (L_fractsfusa) +DEFUN __fractsfusa + ;; Multiply with 2^16 to get a USA result in r25:r22 + subi r25, exp_hi (-16) + XJMP __fixunssfsi +ENDF __fractsfusa +#endif /* L_fractsfusa */ + + +;; For multiplication the functions here are called directly from +;; avr-fixed.md instead of using the standard libcall mechanisms. +;; This can make better code because GCC knows exactly which +;; of the call-used registers (not all of them) are clobbered. */ + +/******************************************************* + Fractional Multiplication 8 x 8 without MUL +*******************************************************/ + +#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__) +;;; R23 = R24 * R25 +;;; Clobbers: __tmp_reg__, R22, R24, R25 +;;; Rounding: ??? +DEFUN __mulqq3 + XCALL __fmuls + ;; TR 18037 requires that (-1) * (-1) does not overflow + ;; The only input that can produce -1 is (-1)^2. + dec r23 + brvs 0f + inc r23 +0: ret +ENDF __mulqq3 +#endif /* L_mulqq3 && ! HAVE_MUL */ + +/******************************************************* + Fractional Multiply .16 x .16 with and without MUL +*******************************************************/ + +#if defined (L_mulhq3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulhq3 + XCALL __mulhisi3 + ;; Shift result into place + lsl r23 + rol r24 + rol r25 + brvs 1f + ;; Round + sbrc r23, 7 + adiw r24, 1 + ret +1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow + ldi r24, lo8 (0x7fff) + ldi r25, hi8 (0x7fff) + ret +ENDF __mulhq3 +#endif /* defined (L_mulhq3) */ + +#if defined (L_muluhq3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) *= (R23:R22) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB < error <= 0.5 LSB +DEFUN __muluhq3 + XCALL __umulhisi3 + ;; Round + sbrc r23, 7 + adiw r24, 1 + ret +ENDF __muluhq3 +#endif /* L_muluhq3 */ + + +/******************************************************* + Fixed Multiply 8.8 x 8.8 with and without MUL +*******************************************************/ + +#if defined (L_mulha3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulha3 + XCALL __mulhisi3 + lsl r22 + rol r23 + rol r24 + XJMP __muluha3_round +ENDF __mulha3 +#endif /* L_mulha3 */ + +#if defined (L_muluha3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) *= (R23:R22) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB < error <= 0.5 LSB +DEFUN __muluha3 + XCALL __umulhisi3 + XJMP __muluha3_round +ENDF __muluha3 +#endif /* L_muluha3 */ + +#if defined (L_muluha3_round) +DEFUN __muluha3_round + ;; Shift result into place + mov r25, r24 + mov r24, r23 + ;; Round + sbrc r22, 7 + adiw r24, 1 + ret +ENDF __muluha3_round +#endif /* L_muluha3_round */ + + +/******************************************************* + Fixed Multiplication 16.16 x 16.16 +*******************************************************/ + +;; Bits outside the result (below LSB), used in the signed version +#define GUARD __tmp_reg__ + +#if defined (__AVR_HAVE_MUL__) + +;; Multiplier +#define A0 16 +#define A1 A0+1 +#define A2 A1+1 +#define A3 A2+1 + +;; Multiplicand +#define B0 20 +#define B1 B0+1 +#define B2 B1+1 +#define B3 B2+1 + +;; Result +#define C0 24 +#define C1 C0+1 +#define C2 C1+1 +#define C3 C2+1 + +#if defined (L_mulusa3) +;;; (C3:C0) = (A3:A0) * (B3:B0) +DEFUN __mulusa3 + set + ;; Fallthru +ENDF __mulusa3 + +;;; Round for last digit iff T = 1 +;;; Return guard bits in GUARD (__tmp_reg__). +;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB +;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB +DEFUN __mulusa3_round + ;; Some of the MUL instructions have LSBs outside the result. + ;; Don't ignore these LSBs in order to tame rounding error. + ;; Use C2/C3 for these LSBs. + + clr C0 + clr C1 + mul A0, B0 $ movw C2, r0 + + mul A1, B0 $ add C3, r0 $ adc C0, r1 + mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1 + + ;; Round if T = 1. Store guarding bits outside the result for rounding + ;; and left-shift by the signed version (function below). + brtc 0f + sbrc C3, 7 + adiw C0, 1 +0: push C3 + + ;; The following MULs don't have LSBs outside the result. + ;; C2/C3 is the high part. + + mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2 + mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 + mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 + neg C2 + + mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3 + mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + neg C3 + + mul A1, B3 $ add C2, r0 $ adc C3, r1 + mul A2, B2 $ add C2, r0 $ adc C3, r1 + mul A3, B1 $ add C2, r0 $ adc C3, r1 + + mul A2, B3 $ add C3, r0 + mul A3, B2 $ add C3, r0 + + ;; Guard bits used in the signed version below. + pop GUARD + clr __zero_reg__ + ret +ENDF __mulusa3_round +#endif /* L_mulusa3 */ + +#if defined (L_mulsa3) +;;; (C3:C0) = (A3:A0) * (B3:B0) +;;; Clobbers: __tmp_reg__, T +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulsa3 + clt + XCALL __mulusa3_round + ;; A posteriori sign extension of the operands + tst B3 + brpl 1f + sub C2, A0 + sbc C3, A1 +1: sbrs A3, 7 + rjmp 2f + sub C2, B0 + sbc C3, B1 +2: + ;; Shift 1 bit left to adjust for 15 fractional bits + lsl GUARD + rol C0 + rol C1 + rol C2 + rol C3 + ;; Round last digit + lsl GUARD + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ + ret +ENDF __mulsa3 +#endif /* L_mulsa3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#else /* __AVR_HAVE_MUL__ */ + +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 + +#define B0 22 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +;; __tmp_reg__ +#define CC0 0 +;; __zero_reg__ +#define CC1 1 +#define CC2 16 +#define CC3 17 + +#define AA0 26 +#define AA1 AA0+1 +#define AA2 30 +#define AA3 AA2+1 + +#if defined (L_mulsa3) +;;; (R25:R22) *= (R21:R18) +;;; Clobbers: ABI, called by optabs +;;; Rounding: -1 LSB <= error <= 1 LSB +DEFUN __mulsa3 + push B0 + push B1 + push B3 + clt + XCALL __mulusa3_round + pop r30 + ;; sign-extend B + bst r30, 7 + brtc 1f + ;; A1, A0 survived in R27:R26 + sub C2, AA0 + sbc C3, AA1 +1: + pop AA1 ;; B1 + pop AA0 ;; B0 + + ;; sign-extend A. A3 survived in R31 + bst AA3, 7 + brtc 2f + sub C2, AA0 + sbc C3, AA1 +2: + ;; Shift 1 bit left to adjust for 15 fractional bits + lsl GUARD + rol C0 + rol C1 + rol C2 + rol C3 + ;; Round last digit + lsl GUARD + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ + ret +ENDF __mulsa3 +#endif /* L_mulsa3 */ + +#if defined (L_mulusa3) +;;; (R25:R22) *= (R21:R18) +;;; Clobbers: ABI, called by optabs +;;; Rounding: -1 LSB <= error <= 1 LSB +DEFUN __mulusa3 + set + ;; Fallthru +ENDF __mulusa3 + +;;; A[] survives in 26, 27, 30, 31 +;;; Also used by __mulsa3 with T = 0 +;;; Round if T = 1 +;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version. +DEFUN __mulusa3_round + push CC2 + push CC3 + ; clear result + clr __tmp_reg__ + wmov CC2, CC0 + ; save multiplicand + wmov AA0, A0 + wmov AA2, A2 + rjmp 3f + + ;; Loop the integral part + +1: ;; CC += A * 2^n; n >= 0 + add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 + +2: ;; A <<= 1 + lsl A0 $ rol A1 $ rol A2 $ rol A3 + +3: ;; IBIT(B) >>= 1 + ;; Carry = n-th bit of B; n >= 0 + lsr B3 + ror B2 + brcs 1b + sbci B3, 0 + brne 2b + + ;; Loop the fractional part + ;; B2/B3 is 0 now, use as guard bits for rounding + ;; Restore multiplicand + wmov A0, AA0 + wmov A2, AA2 + rjmp 5f + +4: ;; CC += A:Guard * 2^n; n < 0 + add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 +5: + ;; A:Guard >>= 1 + lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2 + + ;; FBIT(B) <<= 1 + ;; Carry = n-th bit of B; n < 0 + lsl B0 + rol B1 + brcs 4b + sbci B0, 0 + brne 5b + + ;; Save guard bits and set carry for rounding + push B3 + lsl B3 + ;; Move result into place + wmov C2, CC2 + wmov C0, CC0 + clr __zero_reg__ + brtc 6f + ;; Round iff T = 1 + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ +6: + pop GUARD + ;; Epilogue + pop CC3 + pop CC2 + ret +ENDF __mulusa3_round +#endif /* L_mulusa3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef AA0 +#undef AA1 +#undef AA2 +#undef AA3 +#undef CC0 +#undef CC1 +#undef CC2 +#undef CC3 + +#endif /* __AVR_HAVE_MUL__ */ + +#undef GUARD + +/*********************************************************** + Fixed unsigned saturated Multiplication 8.8 x 8.8 +***********************************************************/ + +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 +#define SS __tmp_reg__ + +#if defined (L_usmuluha3) +DEFUN __usmuluha3 + ;; Widening multiply +#ifdef __AVR_HAVE_MUL__ + ;; Adjust interface + movw R26, R22 + movw R18, R24 +#endif /* HAVE MUL */ + XCALL __umulhisi3 + tst C3 + brne .Lmax + ;; Round, target is in C1..C2 + lsl C0 + adc C1, __zero_reg__ + adc C2, __zero_reg__ + brcs .Lmax + ;; Move result into place + mov C3, C2 + mov C2, C1 + ret +.Lmax: + ;; Saturate + ldi C2, 0xff + ldi C3, 0xff + ret +ENDF __usmuluha3 +#endif /* L_usmuluha3 */ + +/*********************************************************** + Fixed signed saturated Multiplication s8.7 x s8.7 +***********************************************************/ + +#if defined (L_ssmulha3) +DEFUN __ssmulha3 + ;; Widening multiply +#ifdef __AVR_HAVE_MUL__ + ;; Adjust interface + movw R26, R22 + movw R18, R24 +#endif /* HAVE MUL */ + XCALL __mulhisi3 + ;; Adjust decimal point + lsl C0 + rol C1 + rol C2 + brvs .LsatC3.3 + ;; The 9 MSBs must be the same + rol C3 + sbc SS, SS + cp C3, SS + brne .LsatSS + ;; Round + lsl C0 + adc C1, __zero_reg__ + adc C2, __zero_reg__ + brvs .Lmax + ;; Move result into place + mov C3, C2 + mov C2, C1 + ret +.Lmax: + ;; Load 0x7fff + clr C3 +.LsatC3.3: + ;; C3 < 0 --> 0x8000 + ;; C3 >= 0 --> 0x7fff + mov SS, C3 +.LsatSS: + ;; Load min / max value: + ;; SS = -1 --> 0x8000 + ;; SS = 0 --> 0x7fff + ldi C3, 0x7f + ldi C2, 0xff + sbrc SS, 7 + adiw C2, 1 + ret +ENDF __ssmulha3 +#endif /* L_ssmulha3 */ + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef SS + +/*********************************************************** + Fixed unsigned saturated Multiplication 16.16 x 16.16 +***********************************************************/ + +#define C0 18 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 +#define C4 C0+4 +#define C5 C0+5 +#define C6 C0+6 +#define C7 C0+7 +#define SS __tmp_reg__ + +#if defined (L_usmulusa3) +;; R22[4] = R22[4] *{ssat} R18[4] +;; Ordinary ABI function +DEFUN __usmulusa3 + ;; Widening multiply + XCALL __umulsidi3 + or C7, C6 + brne .Lmax + ;; Round, target is in C2..C5 + lsl C1 + adc C2, __zero_reg__ + adc C3, __zero_reg__ + adc C4, __zero_reg__ + adc C5, __zero_reg__ + brcs .Lmax + ;; Move result into place + wmov C6, C4 + wmov C4, C2 + ret +.Lmax: + ;; Saturate + ldi C7, 0xff + ldi C6, 0xff + wmov C4, C6 + ret +ENDF __usmulusa3 +#endif /* L_usmulusa3 */ + +/*********************************************************** + Fixed signed saturated Multiplication s16.15 x s16.15 +***********************************************************/ + +#if defined (L_ssmulsa3) +;; R22[4] = R22[4] *{ssat} R18[4] +;; Ordinary ABI function +DEFUN __ssmulsa3 + ;; Widening multiply + XCALL __mulsidi3 + ;; Adjust decimal point + lsl C1 + rol C2 + rol C3 + rol C4 + rol C5 + brvs .LsatC7.7 + ;; The 17 MSBs must be the same + rol C6 + rol C7 + sbc SS, SS + cp C6, SS + cpc C7, SS + brne .LsatSS + ;; Round + lsl C1 + adc C2, __zero_reg__ + adc C3, __zero_reg__ + adc C4, __zero_reg__ + adc C5, __zero_reg__ + brvs .Lmax + ;; Move result into place + wmov C6, C4 + wmov C4, C2 + ret + +.Lmax: + ;; Load 0x7fffffff + clr C7 +.LsatC7.7: + ;; C7 < 0 --> 0x80000000 + ;; C7 >= 0 --> 0x7fffffff + lsl C7 + sbc SS, SS +.LsatSS: + ;; Load min / max value: + ;; SS = -1 --> 0x80000000 + ;; SS = 0 --> 0x7fffffff + com SS + mov C4, SS + mov C5, C4 + wmov C6, C4 + subi C7, 0x80 + ret +ENDF __ssmulsa3 +#endif /* L_ssmulsa3 */ + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#undef SS + +/******************************************************* + Fractional Division 8 / 8 +*******************************************************/ + +#define r_divd r25 /* dividend */ +#define r_quo r24 /* quotient */ +#define r_div r22 /* divisor */ +#define r_sign __tmp_reg__ + +#if defined (L_divqq3) +DEFUN __divqq3 + mov r_sign, r_divd + eor r_sign, r_div + sbrc r_div, 7 + neg r_div + sbrc r_divd, 7 + neg r_divd + XCALL __divqq_helper + lsr r_quo + sbrc r_sign, 7 ; negate result if needed + neg r_quo + ret +ENDF __divqq3 +#endif /* L_divqq3 */ + +#if defined (L_udivuqq3) +DEFUN __udivuqq3 + cp r_divd, r_div + brsh 0f + XJMP __divqq_helper + ;; Result is out of [0, 1) ==> Return 1 - eps. +0: ldi r_quo, 0xff + ret +ENDF __udivuqq3 +#endif /* L_udivuqq3 */ + + +#if defined (L_divqq_helper) +DEFUN __divqq_helper + clr r_quo ; clear quotient + inc __zero_reg__ ; init loop counter, used per shift +__udivuqq3_loop: + lsl r_divd ; shift dividend + brcs 0f ; dividend overflow + cp r_divd,r_div ; compare dividend & divisor + brcc 0f ; dividend >= divisor + rol r_quo ; shift quotient (with CARRY) + rjmp __udivuqq3_cont +0: + sub r_divd,r_div ; restore dividend + lsl r_quo ; shift quotient (without CARRY) +__udivuqq3_cont: + lsl __zero_reg__ ; shift loop-counter bit + brne __udivuqq3_loop + com r_quo ; complement result + ; because C flag was complemented in loop + ret +ENDF __divqq_helper +#endif /* L_divqq_helper */ + +#undef r_divd +#undef r_quo +#undef r_div +#undef r_sign + + +/******************************************************* + Fractional Division 16 / 16 +*******************************************************/ +#define r_divdL 26 /* dividend Low */ +#define r_divdH 27 /* dividend Hig */ +#define r_quoL 24 /* quotient Low */ +#define r_quoH 25 /* quotient High */ +#define r_divL 22 /* divisor */ +#define r_divH 23 /* divisor */ +#define r_cnt 21 + +#if defined (L_divhq3) +DEFUN __divhq3 + mov r0, r_divdH + eor r0, r_divH + sbrs r_divH, 7 + rjmp 1f + NEG2 r_divL +1: + sbrs r_divdH, 7 + rjmp 2f + NEG2 r_divdL +2: + cp r_divdL, r_divL + cpc r_divdH, r_divH + breq __divhq3_minus1 ; if equal return -1 + XCALL __udivuhq3 + lsr r_quoH + ror r_quoL + brpl 9f + ;; negate result if needed + NEG2 r_quoL +9: + ret +__divhq3_minus1: + ldi r_quoH, 0x80 + clr r_quoL + ret +ENDF __divhq3 +#endif /* defined (L_divhq3) */ + +#if defined (L_udivuhq3) +DEFUN __udivuhq3 + sub r_quoH,r_quoH ; clear quotient and carry + ;; FALLTHRU +ENDF __udivuhq3 + +DEFUN __udivuha3_common + clr r_quoL ; clear quotient + ldi r_cnt,16 ; init loop counter +__udivuhq3_loop: + rol r_divdL ; shift dividend (with CARRY) + rol r_divdH + brcs __udivuhq3_ep ; dividend overflow + cp r_divdL,r_divL ; compare dividend & divisor + cpc r_divdH,r_divH + brcc __udivuhq3_ep ; dividend >= divisor + rol r_quoL ; shift quotient (with CARRY) + rjmp __udivuhq3_cont +__udivuhq3_ep: + sub r_divdL,r_divL ; restore dividend + sbc r_divdH,r_divH + lsl r_quoL ; shift quotient (without CARRY) +__udivuhq3_cont: + rol r_quoH ; shift quotient + dec r_cnt ; decrement loop counter + brne __udivuhq3_loop + com r_quoL ; complement result + com r_quoH ; because C flag was complemented in loop + ret +ENDF __udivuha3_common +#endif /* defined (L_udivuhq3) */ + +/******************************************************* + Fixed Division 8.8 / 8.8 +*******************************************************/ +#if defined (L_divha3) +DEFUN __divha3 + mov r0, r_divdH + eor r0, r_divH + sbrs r_divH, 7 + rjmp 1f + NEG2 r_divL +1: + sbrs r_divdH, 7 + rjmp 2f + NEG2 r_divdL +2: + XCALL __udivuha3 + lsr r_quoH ; adjust to 7 fractional bits + ror r_quoL + sbrs r0, 7 ; negate result if needed + ret + NEG2 r_quoL + ret +ENDF __divha3 +#endif /* defined (L_divha3) */ + +#if defined (L_udivuha3) +DEFUN __udivuha3 + mov r_quoH, r_divdL + mov r_divdL, r_divdH + clr r_divdH + lsl r_quoH ; shift quotient into carry + XJMP __udivuha3_common ; same as fractional after rearrange +ENDF __udivuha3 +#endif /* defined (L_udivuha3) */ + +#undef r_divdL +#undef r_divdH +#undef r_quoL +#undef r_quoH +#undef r_divL +#undef r_divH +#undef r_cnt + +/******************************************************* + Fixed Division 16.16 / 16.16 +*******************************************************/ + +#define r_arg1L 24 /* arg1 gets passed already in place */ +#define r_arg1H 25 +#define r_arg1HL 26 +#define r_arg1HH 27 +#define r_divdL 26 /* dividend Low */ +#define r_divdH 27 +#define r_divdHL 30 +#define r_divdHH 31 /* dividend High */ +#define r_quoL 22 /* quotient Low */ +#define r_quoH 23 +#define r_quoHL 24 +#define r_quoHH 25 /* quotient High */ +#define r_divL 18 /* divisor Low */ +#define r_divH 19 +#define r_divHL 20 +#define r_divHH 21 /* divisor High */ +#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ + +#if defined (L_divsa3) +DEFUN __divsa3 + mov r0, r_arg1HH + eor r0, r_divHH + sbrs r_divHH, 7 + rjmp 1f + NEG4 r_divL +1: + sbrs r_arg1HH, 7 + rjmp 2f + NEG4 r_arg1L +2: + XCALL __udivusa3 + lsr r_quoHH ; adjust to 15 fractional bits + ror r_quoHL + ror r_quoH + ror r_quoL + sbrs r0, 7 ; negate result if needed + ret + ;; negate r_quoL + XJMP __negsi2 +ENDF __divsa3 +#endif /* defined (L_divsa3) */ + +#if defined (L_udivusa3) +DEFUN __udivusa3 + ldi r_divdHL, 32 ; init loop counter + mov r_cnt, r_divdHL + clr r_divdHL + clr r_divdHH + wmov r_quoL, r_divdHL + lsl r_quoHL ; shift quotient into carry + rol r_quoHH +__udivusa3_loop: + rol r_divdL ; shift dividend (with CARRY) + rol r_divdH + rol r_divdHL + rol r_divdHH + brcs __udivusa3_ep ; dividend overflow + cp r_divdL,r_divL ; compare dividend & divisor + cpc r_divdH,r_divH + cpc r_divdHL,r_divHL + cpc r_divdHH,r_divHH + brcc __udivusa3_ep ; dividend >= divisor + rol r_quoL ; shift quotient (with CARRY) + rjmp __udivusa3_cont +__udivusa3_ep: + sub r_divdL,r_divL ; restore dividend + sbc r_divdH,r_divH + sbc r_divdHL,r_divHL + sbc r_divdHH,r_divHH + lsl r_quoL ; shift quotient (without CARRY) +__udivusa3_cont: + rol r_quoH ; shift quotient + rol r_quoHL + rol r_quoHH + dec r_cnt ; decrement loop counter + brne __udivusa3_loop + com r_quoL ; complement result + com r_quoH ; because C flag was complemented in loop + com r_quoHL + com r_quoHH + ret +ENDF __udivusa3 +#endif /* defined (L_udivusa3) */ + +#undef r_arg1L +#undef r_arg1H +#undef r_arg1HL +#undef r_arg1HH +#undef r_divdL +#undef r_divdH +#undef r_divdHL +#undef r_divdHH +#undef r_quoL +#undef r_quoH +#undef r_quoHL +#undef r_quoHH +#undef r_divL +#undef r_divH +#undef r_divHL +#undef r_divHH +#undef r_cnt + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Saturation, 1 Byte +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; First Argument and Return Register +#define A0 24 + +#if defined (L_ssabs_1) +DEFUN __ssabs_1 + sbrs A0, 7 + ret + neg A0 + sbrc A0,7 + dec A0 + ret +ENDF __ssabs_1 +#endif /* L_ssabs_1 */ + +#undef A0 + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Saturation, 2 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; First Argument and Return Register +#define A0 24 +#define A1 A0+1 + +#if defined (L_ssneg_2) +DEFUN __ssneg_2 + NEG2 A0 + brvc 0f + sbiw A0, 1 +0: ret +ENDF __ssneg_2 +#endif /* L_ssneg_2 */ + +#if defined (L_ssabs_2) +DEFUN __ssabs_2 + sbrs A1, 7 + ret + XJMP __ssneg_2 +ENDF __ssabs_2 +#endif /* L_ssabs_2 */ + +#undef A0 +#undef A1 + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Saturation, 4 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; First Argument and Return Register +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 + +#if defined (L_ssneg_4) +DEFUN __ssneg_4 + XCALL __negsi2 + brvc 0f + ldi A3, 0x7f + ldi A2, 0xff + ldi A1, 0xff + ldi A0, 0xff +0: ret +ENDF __ssneg_4 +#endif /* L_ssneg_4 */ + +#if defined (L_ssabs_4) +DEFUN __ssabs_4 + sbrs A3, 7 + ret + XJMP __ssneg_4 +ENDF __ssabs_4 +#endif /* L_ssabs_4 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Saturation, 8 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; First Argument and Return Register +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 A0+5 +#define A6 A0+6 +#define A7 A0+7 + +#if defined (L_clr_8) +FALIAS __usneguta2 +FALIAS __usneguda2 +FALIAS __usnegudq2 + +;; Clear Carry and all Bytes +DEFUN __clr_8 + ;; Clear Carry and set Z + sub A7, A7 + ;; FALLTHRU +ENDF __clr_8 +;; Propagate Carry to all Bytes, Carry unaltered +DEFUN __sbc_8 + sbc A7, A7 + sbc A6, A6 + wmov A4, A6 + wmov A2, A6 + wmov A0, A6 + ret +ENDF __sbc_8 +#endif /* L_clr_8 */ + +#if defined (L_ssneg_8) +FALIAS __ssnegta2 +FALIAS __ssnegda2 +FALIAS __ssnegdq2 + +DEFUN __ssneg_8 + XCALL __negdi2 + brvc 0f + ;; A[] = 0x7fffffff + sec + XCALL __sbc_8 + ldi A7, 0x7f +0: ret +ENDF __ssneg_8 +#endif /* L_ssneg_8 */ + +#if defined (L_ssabs_8) +FALIAS __ssabsta2 +FALIAS __ssabsda2 +FALIAS __ssabsdq2 + +DEFUN __ssabs_8 + sbrs A7, 7 + ret + XJMP __ssneg_8 +ENDF __ssabs_8 +#endif /* L_ssabs_8 */ + +;; Second Argument +#define B0 10 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 +#define B7 B0+7 + +#if defined (L_usadd_8) +FALIAS __usadduta3 +FALIAS __usadduda3 +FALIAS __usaddudq3 + +DEFUN __usadd_8 + XCALL __adddi3 + brcs 0f + ret +0: ;; A[] = 0xffffffff + XJMP __sbc_8 +ENDF __usadd_8 +#endif /* L_usadd_8 */ + +#if defined (L_ussub_8) +FALIAS __ussubuta3 +FALIAS __ussubuda3 +FALIAS __ussubudq3 + +DEFUN __ussub_8 + XCALL __subdi3 + brcs 0f + ret +0: ;; A[] = 0 + XJMP __clr_8 +ENDF __ussub_8 +#endif /* L_ussub_8 */ + +#if defined (L_ssadd_8) +FALIAS __ssaddta3 +FALIAS __ssaddda3 +FALIAS __ssadddq3 + +DEFUN __ssadd_8 + XCALL __adddi3 + brvc 0f + ;; A = (B >= 0) ? INT64_MAX : INT64_MIN + cpi B7, 0x80 + XCALL __sbc_8 + subi A7, 0x80 +0: ret +ENDF __ssadd_8 +#endif /* L_ssadd_8 */ + +#if defined (L_sssub_8) +FALIAS __sssubta3 +FALIAS __sssubda3 +FALIAS __sssubdq3 + +DEFUN __sssub_8 + XCALL __subdi3 + brvc 0f + ;; A = (B < 0) ? INT64_MAX : INT64_MIN + ldi A7, 0x7f + cp A7, B7 + XCALL __sbc_8 + subi A7, 0x80 +0: ret +ENDF __sssub_8 +#endif /* L_sssub_8 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef A4 +#undef A5 +#undef A6 +#undef A7 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef B4 +#undef B5 +#undef B6 +#undef B7 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rounding Helpers +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#ifdef L_mask1 + +#define AA 24 +#define CC 25 + +;; R25 = 1 << (R24 & 7) +;; CC = 1 << (AA & 7) +;; Clobbers: None +DEFUN __mask1 + ;; CC = 2 ^ AA.1 + ldi CC, 1 << 2 + sbrs AA, 1 + ldi CC, 1 << 0 + ;; CC *= 2 ^ AA.0 + sbrc AA, 0 + lsl CC + ;; CC *= 2 ^ AA.2 + sbrc AA, 2 + swap CC + ret +ENDF __mask1 + +#undef AA +#undef CC +#endif /* L_mask1 */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The rounding point. Any bits smaller than +;; 2^{-RP} will be cleared. +#define RP R24 + +#define A0 22 +#define A1 A0 + 1 + +#define C0 24 +#define C1 C0 + 1 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rounding, 1 Byte +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#ifdef L_roundqq3 + +;; R24 = round (R22, R24) +;; Clobbers: R22, __tmp_reg__ +DEFUN __roundqq3 + mov __tmp_reg__, C1 + subi RP, __QQ_FBIT__ - 1 + neg RP + ;; R25 = 1 << RP (Total offset is FBIT-1 - RP) + XCALL __mask1 + mov C0, C1 + ;; Add-Saturate 2^{-RP-1} + add A0, C0 + brvc 0f + ldi C0, 0x7f + rjmp 9f +0: ;; Mask out bits beyond RP + lsl C0 + neg C0 + and C0, A0 +9: mov C1, __tmp_reg__ + ret +ENDF __roundqq3 +#endif /* L_roundqq3 */ + +#ifdef L_rounduqq3 + +;; R24 = round (R22, R24) +;; Clobbers: R22, __tmp_reg__ +DEFUN __rounduqq3 + mov __tmp_reg__, C1 + subi RP, __UQQ_FBIT__ - 1 + neg RP + ;; R25 = 1 << RP (Total offset is FBIT-1 - RP) + XCALL __mask1 + mov C0, C1 + ;; Add-Saturate 2^{-RP-1} + add A0, C0 + brcc 0f + ldi C0, 0xff + rjmp 9f +0: ;; Mask out bits beyond RP + lsl C0 + neg C0 + and C0, A0 +9: mov C1, __tmp_reg__ + ret +ENDF __rounduqq3 +#endif /* L_rounduqq3 */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rounding, 2 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#ifdef L_addmask_2 + +;; [ R25:R24 = 1 << (R24 & 15) +;; R23:R22 += 1 << (R24 & 15) ] +;; SREG is set according to the addition +DEFUN __addmask_2 + ;; R25 = 1 << (R24 & 7) + XCALL __mask1 + cpi RP, 1 << 3 + sbc C0, C0 + ;; Swap C0 and C1 if RP.3 was set + and C0, C1 + eor C1, C0 + ;; Finally, add the power-of-two: A[] += C[] + add A0, C0 + adc A1, C1 + ret +ENDF __addmask_2 +#endif /* L_addmask_2 */ + +#ifdef L_round_s2 + +;; R25:R24 = round (R23:R22, R24) +;; Clobbers: R23, R22 +DEFUN __roundhq3 + subi RP, __HQ_FBIT__ - __HA_FBIT__ +ENDF __roundhq3 +DEFUN __roundha3 + subi RP, __HA_FBIT__ - 1 + neg RP + ;; [ R25:R24 = 1 << (FBIT-1 - RP) + ;; R23:R22 += 1 << (FBIT-1 - RP) ] + XCALL __addmask_2 + XJMP __round_s2_const +ENDF __roundha3 + +#endif /* L_round_s2 */ + +#ifdef L_round_u2 + +;; R25:R24 = round (R23:R22, R24) +;; Clobbers: R23, R22 +DEFUN __rounduhq3 + subi RP, __UHQ_FBIT__ - __UHA_FBIT__ +ENDF __rounduhq3 +DEFUN __rounduha3 + subi RP, __UHA_FBIT__ - 1 + neg RP + ;; [ R25:R24 = 1 << (FBIT-1 - RP) + ;; R23:R22 += 1 << (FBIT-1 - RP) ] + XCALL __addmask_2 + XJMP __round_u2_const +ENDF __rounduha3 + +#endif /* L_round_u2 */ + + +#ifdef L_round_2_const + +;; Helpers for 2 byte wide rounding + +DEFUN __round_s2_const + brvc 2f + ldi C1, 0x7f + rjmp 1f + ;; FALLTHRU (Barrier) +ENDF __round_s2_const + +DEFUN __round_u2_const + brcc 2f + ldi C1, 0xff +1: + ldi C0, 0xff + rjmp 9f +2: + ;; Saturation is performed now. + ;; Currently, we have C[] = 2^{-RP-1} + ;; C[] = 2^{-RP} + lsl C0 + rol C1 + ;; + NEG2 C0 + ;; Clear the bits beyond the rounding point. + and C0, A0 + and C1, A1 +9: ret +ENDF __round_u2_const + +#endif /* L_round_2_const */ + +#undef A0 +#undef A1 +#undef C0 +#undef C1 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rounding, 4 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#define A0 18 +#define A1 A0 + 1 +#define A2 A0 + 2 +#define A3 A0 + 3 + +#define C0 22 +#define C1 C0 + 1 +#define C2 C0 + 2 +#define C3 C0 + 3 + +#ifdef L_addmask_4 + +;; [ R25:R22 = 1 << (R24 & 31) +;; R21:R18 += 1 << (R24 & 31) ] +;; SREG is set according to the addition +DEFUN __addmask_4 + ;; R25 = 1 << (R24 & 7) + XCALL __mask1 + cpi RP, 1 << 4 + sbc C0, C0 + sbc C1, C1 + ;; Swap C2 with C3 if RP.3 is not set + cpi RP, 1 << 3 + sbc C2, C2 + and C2, C3 + eor C3, C2 + ;; Swap C3:C2 with C1:C0 if RP.4 is not set + and C0, C2 $ eor C2, C0 + and C1, C3 $ eor C3, C1 + ;; Finally, add the power-of-two: A[] += C[] + add A0, C0 + adc A1, C1 + adc A2, C2 + adc A3, C3 + ret +ENDF __addmask_4 +#endif /* L_addmask_4 */ + +#ifdef L_round_s4 + +;; R25:R22 = round (R21:R18, R24) +;; Clobbers: R18...R21 +DEFUN __roundsq3 + subi RP, __SQ_FBIT__ - __SA_FBIT__ +ENDF __roundsq3 +DEFUN __roundsa3 + subi RP, __SA_FBIT__ - 1 + neg RP + ;; [ R25:R22 = 1 << (FBIT-1 - RP) + ;; R21:R18 += 1 << (FBIT-1 - RP) ] + XCALL __addmask_4 + XJMP __round_s4_const +ENDF __roundsa3 + +#endif /* L_round_s4 */ + +#ifdef L_round_u4 + +;; R25:R22 = round (R21:R18, R24) +;; Clobbers: R18...R21 +DEFUN __roundusq3 + subi RP, __USQ_FBIT__ - __USA_FBIT__ +ENDF __roundusq3 +DEFUN __roundusa3 + subi RP, __USA_FBIT__ - 1 + neg RP + ;; [ R25:R22 = 1 << (FBIT-1 - RP) + ;; R21:R18 += 1 << (FBIT-1 - RP) ] + XCALL __addmask_4 + XJMP __round_u4_const +ENDF __roundusa3 + +#endif /* L_round_u4 */ + + +#ifdef L_round_4_const + +;; Helpers for 4 byte wide rounding + +DEFUN __round_s4_const + brvc 2f + ldi C3, 0x7f + rjmp 1f + ;; FALLTHRU (Barrier) +ENDF __round_s4_const + +DEFUN __round_u4_const + brcc 2f + ldi C3, 0xff +1: + ldi C2, 0xff + ldi C1, 0xff + ldi C0, 0xff + rjmp 9f +2: + ;; Saturation is performed now. + ;; Currently, we have C[] = 2^{-RP-1} + ;; C[] = 2^{-RP} + lsl C0 + rol C1 + rol C2 + rol C3 + XCALL __negsi2 + ;; Clear the bits beyond the rounding point. + and C0, A0 + and C1, A1 + and C2, A2 + and C3, A3 +9: ret +ENDF __round_u4_const + +#endif /* L_round_4_const */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#undef RP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rounding, 8 Bytes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#define RP 16 +#define FBITm1 31 + +#define C0 18 +#define C1 C0 + 1 +#define C2 C0 + 2 +#define C3 C0 + 3 +#define C4 C0 + 4 +#define C5 C0 + 5 +#define C6 C0 + 6 +#define C7 C0 + 7 + +#define A0 16 +#define A1 17 +#define A2 26 +#define A3 27 +#define A4 28 +#define A5 29 +#define A6 30 +#define A7 31 + + +#ifdef L_rounddq3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __rounddq3 + ldi FBITm1, __DQ_FBIT__ - 1 + clt + XJMP __round_x8 +ENDF __rounddq3 +#endif /* L_rounddq3 */ + +#ifdef L_roundudq3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __roundudq3 + ldi FBITm1, __UDQ_FBIT__ - 1 + set + XJMP __round_x8 +ENDF __roundudq3 +#endif /* L_roundudq3 */ + +#ifdef L_roundda3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __roundda3 + ldi FBITm1, __DA_FBIT__ - 1 + clt + XJMP __round_x8 +ENDF __roundda3 +#endif /* L_roundda3 */ + +#ifdef L_rounduda3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __rounduda3 + ldi FBITm1, __UDA_FBIT__ - 1 + set + XJMP __round_x8 +ENDF __rounduda3 +#endif /* L_rounduda3 */ + +#ifdef L_roundta3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __roundta3 + ldi FBITm1, __TA_FBIT__ - 1 + clt + XJMP __round_x8 +ENDF __roundta3 +#endif /* L_roundta3 */ + +#ifdef L_rounduta3 +;; R25:R18 = round (R25:R18, R16) +;; Clobbers: ABI +DEFUN __rounduta3 + ldi FBITm1, __UTA_FBIT__ - 1 + set + XJMP __round_x8 +ENDF __rounduta3 +#endif /* L_rounduta3 */ + + +#ifdef L_round_x8 +DEFUN __round_x8 + push r16 + push r17 + push r28 + push r29 + ;; Compute log2 of addend from rounding point + sub RP, FBITm1 + neg RP + ;; Move input to work register A[] + push C0 + mov A1, C1 + wmov A2, C2 + wmov A4, C4 + wmov A6, C6 + ;; C[] = 1 << (FBIT-1 - RP) + XCALL __clr_8 + inc C0 + XCALL __ashldi3 + pop A0 + ;; A[] += C[] + add A0, C0 + adc A1, C1 + adc A2, C2 + adc A3, C3 + adc A4, C4 + adc A5, C5 + adc A6, C6 + adc A7, C7 + brts 1f + ;; Signed + brvc 3f + ;; Signed overflow: A[] = 0x7f... + brvs 2f +1: ;; Unsigned + brcc 3f + ;; Unsigned overflow: A[] = 0xff... +2: ldi C7, 0xff + ldi C6, 0xff + wmov C0, C6 + wmov C2, C6 + wmov C4, C6 + bld C7, 7 + rjmp 9f +3: + ;; C[] = -C[] - C[] + push A0 + ldi r16, 1 + XCALL __ashldi3 + pop A0 + XCALL __negdi2 + ;; Clear the bits beyond the rounding point. + and C0, A0 + and C1, A1 + and C2, A2 + and C3, A3 + and C4, A4 + and C5, A5 + and C6, A6 + and C7, A7 +9: ;; Epilogue + pop r29 + pop r28 + pop r17 + pop r16 + ret +ENDF __round_x8 + +#endif /* L_round_x8 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef A4 +#undef A5 +#undef A6 +#undef A7 + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 + +#undef RP +#undef FBITm1 + + +;; Supply implementations / symbols for the bit-banging functions +;; __builtin_avr_bitsfx and __builtin_avr_fxbits +#ifdef L_ret +DEFUN __ret + ret +ENDF __ret +#endif /* L_ret */ diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs.S b/gcc-4.9/libgcc/config/avr/lib1funcs.S new file mode 100644 index 000000000..6f1c77edb --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/lib1funcs.S @@ -0,0 +1,3226 @@ +/* -*- Mode: Asm -*- */ +/* Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Denis Chertykov <chertykov@gmail.com> + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define __zero_reg__ r1 +#define __tmp_reg__ r0 +#define __SREG__ 0x3f +#if defined (__AVR_HAVE_SPH__) +#define __SP_H__ 0x3e +#endif +#define __SP_L__ 0x3d +#define __RAMPZ__ 0x3B +#define __EIND__ 0x3C + +/* Most of the functions here are called directly from avr.md + patterns, instead of using the standard libcall mechanisms. + This can make better code because GCC knows exactly which + of the call-used registers (not all of them) are clobbered. */ + +/* FIXME: At present, there is no SORT directive in the linker + script so that we must not assume that different modules + in the same input section like .libgcc.text.mul will be + located close together. Therefore, we cannot use + RCALL/RJMP to call a function like __udivmodhi4 from + __divmodhi4 and have to use lengthy XCALL/XJMP even + though they are in the same input section and all same + input sections together are small enough to reach every + location with a RCALL/RJMP instruction. */ + + .macro mov_l r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + movw \r_dest, \r_src +#else + mov \r_dest, \r_src +#endif + .endm + + .macro mov_h r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + ; empty +#else + mov \r_dest, \r_src +#endif + .endm + +.macro wmov r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + movw \r_dest, \r_src +#else + mov \r_dest, \r_src + mov \r_dest+1, \r_src+1 +#endif +.endm + +#if defined (__AVR_HAVE_JMP_CALL__) +#define XCALL call +#define XJMP jmp +#else +#define XCALL rcall +#define XJMP rjmp +#endif + +;; Prologue stuff + +.macro do_prologue_saves n_pushed n_frame=0 + ldi r26, lo8(\n_frame) + ldi r27, hi8(\n_frame) + ldi r30, lo8(gs(.L_prologue_saves.\@)) + ldi r31, hi8(gs(.L_prologue_saves.\@)) + XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) +.L_prologue_saves.\@: +.endm + +;; Epilogue stuff + +.macro do_epilogue_restores n_pushed n_frame=0 + in r28, __SP_L__ +#ifdef __AVR_HAVE_SPH__ + in r29, __SP_H__ +.if \n_frame > 63 + subi r28, lo8(-\n_frame) + sbci r29, hi8(-\n_frame) +.elseif \n_frame > 0 + adiw r28, \n_frame +.endif +#else + clr r29 +.if \n_frame > 0 + subi r28, lo8(-\n_frame) +.endif +#endif /* HAVE SPH */ + ldi r30, \n_pushed + XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) +.endm + +;; Support function entry and exit for convenience + +.macro DEFUN name +.global \name +.func \name +\name: +.endm + +.macro ENDF name +.size \name, .-\name +.endfunc +.endm + +.macro FALIAS name +.global \name +.func \name +\name: +.size \name, .-\name +.endfunc +.endm + +;; Skip next instruction, typically a jump target +#define skip cpse 0,0 + +;; Negate a 2-byte value held in consecutive registers +.macro NEG2 reg + com \reg+1 + neg \reg + sbci \reg+1, -1 +.endm + +;; Negate a 4-byte value held in consecutive registers +;; Sets the V flag for signed overflow tests if REG >= 16 +.macro NEG4 reg + com \reg+3 + com \reg+2 + com \reg+1 +.if \reg >= 16 + neg \reg + sbci \reg+1, -1 + sbci \reg+2, -1 + sbci \reg+3, -1 +.else + com \reg + adc \reg, __zero_reg__ + adc \reg+1, __zero_reg__ + adc \reg+2, __zero_reg__ + adc \reg+3, __zero_reg__ +.endif +.endm + +#define exp_lo(N) hlo8 ((N) << 23) +#define exp_hi(N) hhi8 ((N) << 23) + + +.section .text.libgcc.mul, "ax", @progbits + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ +#if !defined (__AVR_HAVE_MUL__) +/******************************************************* + Multiplication 8 x 8 without MUL +*******************************************************/ +#if defined (L_mulqi3) + +#define r_arg2 r22 /* multiplicand */ +#define r_arg1 r24 /* multiplier */ +#define r_res __tmp_reg__ /* result */ + +DEFUN __mulqi3 + clr r_res ; clear result +__mulqi3_loop: + sbrc r_arg1,0 + add r_res,r_arg2 + add r_arg2,r_arg2 ; shift multiplicand + breq __mulqi3_exit ; while multiplicand != 0 + lsr r_arg1 ; + brne __mulqi3_loop ; exit if multiplier = 0 +__mulqi3_exit: + mov r_arg1,r_res ; result to return register + ret +ENDF __mulqi3 + +#undef r_arg2 +#undef r_arg1 +#undef r_res + +#endif /* defined (L_mulqi3) */ + + +/******************************************************* + Widening Multiplication 16 = 8 x 8 without MUL + Multiplication 16 x 16 without MUL +*******************************************************/ + +#define A0 r22 +#define A1 r23 +#define B0 r24 +#define BB0 r20 +#define B1 r25 +;; Output overlaps input, thus expand result in CC0/1 +#define C0 r24 +#define C1 r25 +#define CC0 __tmp_reg__ +#define CC1 R21 + +#if defined (L_umulqihi3) +;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 +;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 +;;; Clobbers: __tmp_reg__, R21..R23 +DEFUN __umulqihi3 + clr A1 + clr B1 + XJMP __mulhi3 +ENDF __umulqihi3 +#endif /* L_umulqihi3 */ + +#if defined (L_mulqihi3) +;;; R25:R24 = (signed int) R22 * (signed int) R24 +;;; (C1:C0) = (signed int) A0 * (signed int) B0 +;;; Clobbers: __tmp_reg__, R20..R23 +DEFUN __mulqihi3 + ;; Sign-extend B0 + clr B1 + sbrc B0, 7 + com B1 + ;; The multiplication runs twice as fast if A1 is zero, thus: + ;; Zero-extend A0 + clr A1 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; Store B0 * sign of A + clr BB0 + sbrc A0, 7 + mov BB0, B0 + call __mulhi3 +#else /* have no CALL */ + ;; Skip sign-extension of A if A >= 0 + ;; Same size as with the first alternative but avoids errata skip + ;; and is faster if A >= 0 + sbrs A0, 7 + rjmp __mulhi3 + ;; If A < 0 store B + mov BB0, B0 + rcall __mulhi3 +#endif /* HAVE_JMP_CALL */ + ;; 1-extend A after the multiplication + sub C1, BB0 + ret +ENDF __mulqihi3 +#endif /* L_mulqihi3 */ + +#if defined (L_mulhi3) +;;; R25:R24 = R23:R22 * R25:R24 +;;; (C1:C0) = (A1:A0) * (B1:B0) +;;; Clobbers: __tmp_reg__, R21..R23 +DEFUN __mulhi3 + + ;; Clear result + clr CC0 + clr CC1 + rjmp 3f +1: + ;; Bit n of A is 1 --> C += B << n + add CC0, B0 + adc CC1, B1 +2: + lsl B0 + rol B1 +3: + ;; If B == 0 we are ready + sbiw B0, 0 + breq 9f + + ;; Carry = n-th bit of A + lsr A1 + ror A0 + ;; If bit n of A is set, then go add B * 2^n to C + brcs 1b + + ;; Carry = 0 --> The ROR above acts like CP A0, 0 + ;; Thus, it is sufficient to CPC the high part to test A against 0 + cpc A1, __zero_reg__ + ;; Only proceed if A != 0 + brne 2b +9: + ;; Move Result into place + mov C0, CC0 + mov C1, CC1 + ret +ENDF __mulhi3 +#endif /* L_mulhi3 */ + +#undef A0 +#undef A1 +#undef B0 +#undef BB0 +#undef B1 +#undef C0 +#undef C1 +#undef CC0 +#undef CC1 + + +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 + +#define B0 18 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define CC0 26 +#define CC1 CC0+1 +#define CC2 30 +#define CC3 CC2+1 + +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +/******************************************************* + Widening Multiplication 32 = 16 x 16 without MUL +*******************************************************/ + +#if defined (L_umulhisi3) +DEFUN __umulhisi3 + wmov B0, 24 + ;; Zero-extend B + clr B2 + clr B3 + ;; Zero-extend A + wmov A2, B2 + XJMP __mulsi3 +ENDF __umulhisi3 +#endif /* L_umulhisi3 */ + +#if defined (L_mulhisi3) +DEFUN __mulhisi3 + wmov B0, 24 + ;; Sign-extend B + lsl r25 + sbc B2, B2 + mov B3, B2 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Sign-extend A + clr A2 + sbrc A1, 7 + com A2 + mov A3, A2 + XJMP __mulsi3 +#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ + ;; Zero-extend A and __mulsi3 will run at least twice as fast + ;; compared to a sign-extended A. + clr A2 + clr A3 + sbrs A1, 7 + XJMP __mulsi3 + ;; If A < 0 then perform the B * 0xffff.... before the + ;; very multiplication by initializing the high part of the + ;; result CC with -B. + wmov CC2, A2 + sub CC2, B0 + sbc CC3, B1 + XJMP __mulsi3_helper +#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ +ENDF __mulhisi3 +#endif /* L_mulhisi3 */ + + +/******************************************************* + Multiplication 32 x 32 without MUL +*******************************************************/ + +#if defined (L_mulsi3) +DEFUN __mulsi3 + ;; Clear result + clr CC2 + clr CC3 + ;; FALLTHRU +ENDF __mulsi3 + +DEFUN __mulsi3_helper + clr CC0 + clr CC1 + rjmp 3f + +1: ;; If bit n of A is set, then add B * 2^n to the result in CC + ;; CC += B + add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 + +2: ;; B <<= 1 + lsl B0 $ rol B1 $ rol B2 $ rol B3 + +3: ;; A >>= 1: Carry = n-th bit of A + lsr A3 $ ror A2 $ ror A1 $ ror A0 + + brcs 1b + ;; Only continue if A != 0 + sbci A1, 0 + brne 2b + sbiw A2, 0 + brne 2b + + ;; All bits of A are consumed: Copy result to return register C + wmov C0, CC0 + wmov C2, CC2 + ret +ENDF __mulsi3_helper +#endif /* L_mulsi3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef CC0 +#undef CC1 +#undef CC2 +#undef CC3 + +#endif /* !defined (__AVR_HAVE_MUL__) */ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#if defined (__AVR_HAVE_MUL__) +#define A0 26 +#define B0 18 +#define C0 22 + +#define A1 A0+1 + +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +/******************************************************* + Widening Multiplication 32 = 16 x 16 with MUL +*******************************************************/ + +#if defined (L_mulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulhisi3 + XCALL __umulhisi3 + ;; Sign-extend B + tst B1 + brpl 1f + sub C2, A0 + sbc C3, A1 +1: ;; Sign-extend A + XJMP __usmulhisi3_tail +ENDF __mulhisi3 +#endif /* L_mulhisi3 */ + +#if defined (L_usmulhisi3) +;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __usmulhisi3 + XCALL __umulhisi3 + ;; FALLTHRU +ENDF __usmulhisi3 + +DEFUN __usmulhisi3_tail + ;; Sign-extend A + sbrs A1, 7 + ret + sub C2, B0 + sbc C3, B1 + ret +ENDF __usmulhisi3_tail +#endif /* L_usmulhisi3 */ + +#if defined (L_umulhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 +;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __umulhisi3 + mul A0, B0 + movw C0, r0 + mul A1, B1 + movw C2, r0 + mul A0, B1 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; This function is used by many other routines, often multiple times. + ;; Therefore, if the flash size is not too limited, avoid the RCALL + ;; and inverst 6 Bytes to speed things up. + add C1, r0 + adc C2, r1 + clr __zero_reg__ + adc C3, __zero_reg__ +#else + rcall 1f +#endif + mul A1, B0 +1: add C1, r0 + adc C2, r1 + clr __zero_reg__ + adc C3, __zero_reg__ + ret +ENDF __umulhisi3 +#endif /* L_umulhisi3 */ + +/******************************************************* + Widening Multiplication 32 = 16 x 32 with MUL +*******************************************************/ + +#if defined (L_mulshisi3) +;;; R25:R22 = (signed long) R27:R26 * R21:R18 +;;; (C3:C0) = (signed long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulshisi3 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst A1 + brmi __mulohisi3 +#else + sbrs A1, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XJMP __muluhisi3 + ;; FALLTHRU +ENDF __mulshisi3 + +;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 +;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __mulohisi3 + XCALL __muluhisi3 + ;; One-extend R27:R26 (A1:A0) + sub C2, B0 + sbc C3, B1 + ret +ENDF __mulohisi3 +#endif /* L_mulshisi3 */ + +#if defined (L_muluhisi3) +;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 +;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 +;;; Clobbers: __tmp_reg__ +DEFUN __muluhisi3 + XCALL __umulhisi3 + mul A0, B3 + add C3, r0 + mul A1, B2 + add C3, r0 + mul A0, B2 + add C2, r0 + adc C3, r1 + clr __zero_reg__ + ret +ENDF __muluhisi3 +#endif /* L_muluhisi3 */ + +/******************************************************* + Multiplication 32 x 32 with MUL +*******************************************************/ + +#if defined (L_mulsi3) +;;; R25:R22 = R25:R22 * R21:R18 +;;; (C3:C0) = C3:C0 * B3:B0 +;;; Clobbers: R26, R27, __tmp_reg__ +DEFUN __mulsi3 + movw A0, C0 + push C2 + push C3 + XCALL __muluhisi3 + pop A1 + pop A0 + ;; A1:A0 now contains the high word of A + mul A0, B0 + add C2, r0 + adc C3, r1 + mul A0, B1 + add C3, r0 + mul A1, B0 + add C3, r0 + clr __zero_reg__ + ret +ENDF __mulsi3 +#endif /* L_mulsi3 */ + +#undef A0 +#undef A1 + +#undef B0 +#undef B1 +#undef B2 +#undef B3 + +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#endif /* __AVR_HAVE_MUL__ */ + +/******************************************************* + Multiplication 24 x 24 with MUL +*******************************************************/ + +#if defined (L_mulpsi3) + +;; A[0..2]: In: Multiplicand; Out: Product +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 + +;; B[0..2]: In: Multiplier +#define B0 18 +#define B1 B0+1 +#define B2 B0+2 + +#if defined (__AVR_HAVE_MUL__) + +;; C[0..2]: Expand Result +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 + +;; R24:R22 *= R20:R18 +;; Clobbers: r21, r25, r26, r27, __tmp_reg__ + +#define AA0 26 +#define AA2 21 + +DEFUN __mulpsi3 + wmov AA0, A0 + mov AA2, A2 + XCALL __umulhisi3 + mul AA2, B0 $ add C2, r0 + mul AA0, B2 $ add C2, r0 + clr __zero_reg__ + ret +ENDF __mulpsi3 + +#undef AA2 +#undef AA0 + +#undef C2 +#undef C1 +#undef C0 + +#else /* !HAVE_MUL */ + +;; C[0..2]: Expand Result +#define C0 0 +#define C1 C0+1 +#define C2 21 + +;; R24:R22 *= R20:R18 +;; Clobbers: __tmp_reg__, R18, R19, R20, R21 + +DEFUN __mulpsi3 + + ;; C[] = 0 + clr __tmp_reg__ + clr C2 + +0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop + LSR B2 $ ror B1 $ ror B0 + + ;; If the N-th Bit of B[] was set... + brcc 1f + + ;; ...then add A[] * 2^N to the Result C[] + ADD C0,A0 $ adc C1,A1 $ adc C2,A2 + +1: ;; Multiply A[] by 2 + LSL A0 $ rol A1 $ rol A2 + + ;; Loop until B[] is 0 + subi B0,0 $ sbci B1,0 $ sbci B2,0 + brne 0b + + ;; Copy C[] to the return Register A[] + wmov A0, C0 + mov A2, C2 + + clr __zero_reg__ + ret +ENDF __mulpsi3 + +#undef C2 +#undef C1 +#undef C0 + +#endif /* HAVE_MUL */ + +#undef B2 +#undef B1 +#undef B0 + +#undef A2 +#undef A1 +#undef A0 + +#endif /* L_mulpsi3 */ + +#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__) + +;; A[0..2]: In: Multiplicand +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 + +;; BB: In: Multiplier +#define BB 25 + +;; C[0..2]: Result +#define C0 18 +#define C1 C0+1 +#define C2 C0+2 + +;; C[] = A[] * sign_extend (BB) +DEFUN __mulsqipsi3 + mul A0, BB + movw C0, r0 + mul A2, BB + mov C2, r0 + mul A1, BB + add C1, r0 + adc C2, r1 + clr __zero_reg__ + sbrs BB, 7 + ret + ;; One-extend BB + sub C1, A0 + sbc C2, A1 + ret +ENDF __mulsqipsi3 + +#undef C2 +#undef C1 +#undef C0 + +#undef BB + +#undef A2 +#undef A1 +#undef A0 + +#endif /* L_mulsqipsi3 && HAVE_MUL */ + +/******************************************************* + Multiplication 64 x 64 +*******************************************************/ + +;; A[] = A[] * B[] + +;; A[0..7]: In: Multiplicand +;; Out: Product +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 A0+5 +#define A6 A0+6 +#define A7 A0+7 + +;; B[0..7]: In: Multiplier +#define B0 10 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 +#define B7 B0+7 + +#if defined (__AVR_HAVE_MUL__) + +;; Define C[] for convenience +;; Notice that parts of C[] overlap A[] respective B[] +#define C0 16 +#define C1 C0+1 +#define C2 20 +#define C3 C2+1 +#define C4 28 +#define C5 C4+1 +#define C6 C4+2 +#define C7 C4+3 + +#if defined (L_muldi3) + +;; A[] *= B[] +;; R25:R18 *= R17:R10 +;; Ordinary ABI-Function + +DEFUN __muldi3 + push r29 + push r28 + push r17 + push r16 + + ;; Counting in Words, we have to perform a 4 * 4 Multiplication + + ;; 3 * 0 + 0 * 3 + mul A7,B0 $ $ mov C7,r0 + mul A0,B7 $ $ add C7,r0 + mul A6,B1 $ $ add C7,r0 + mul A6,B0 $ mov C6,r0 $ add C7,r1 + mul B6,A1 $ $ add C7,r0 + mul B6,A0 $ add C6,r0 $ adc C7,r1 + + ;; 1 * 2 + mul A2,B4 $ add C6,r0 $ adc C7,r1 + mul A3,B4 $ $ add C7,r0 + mul A2,B5 $ $ add C7,r0 + + push A5 + push A4 + push B1 + push B0 + push A3 + push A2 + + ;; 0 * 0 + wmov 26, B0 + XCALL __umulhisi3 + wmov C0, 22 + wmov C2, 24 + + ;; 0 * 2 + wmov 26, B4 + XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 + + wmov 26, B2 + ;; 0 * 1 + XCALL __muldi3_6 + + pop A0 + pop A1 + ;; 1 * 1 + wmov 26, B2 + XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 + + pop r26 + pop r27 + ;; 1 * 0 + XCALL __muldi3_6 + + pop A0 + pop A1 + ;; 2 * 0 + XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 + + ;; 2 * 1 + wmov 26, B2 + XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 + + ;; A[] = C[] + wmov A0, C0 + ;; A2 = C2 already + wmov A4, C4 + wmov A6, C6 + + clr __zero_reg__ + pop r16 + pop r17 + pop r28 + pop r29 + ret +ENDF __muldi3 +#endif /* L_muldi3 */ + +#if defined (L_muldi3_6) +;; A helper for some 64-bit multiplications with MUL available +DEFUN __muldi3_6 +__muldi3_6: + XCALL __umulhisi3 + add C2, 22 + adc C3, 23 + adc C4, 24 + adc C5, 25 + brcc 0f + adiw C6, 1 +0: ret +ENDF __muldi3_6 +#endif /* L_muldi3_6 */ + +#undef C7 +#undef C6 +#undef C5 +#undef C4 +#undef C3 +#undef C2 +#undef C1 +#undef C0 + +#else /* !HAVE_MUL */ + +#if defined (L_muldi3) + +#define C0 26 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 +#define C4 C0+4 +#define C5 C0+5 +#define C6 0 +#define C7 C6+1 + +#define Loop 9 + +;; A[] *= B[] +;; R25:R18 *= R17:R10 +;; Ordinary ABI-Function + +DEFUN __muldi3 + push r29 + push r28 + push Loop + + ldi C0, 64 + mov Loop, C0 + + ;; C[] = 0 + clr __tmp_reg__ + wmov C0, 0 + wmov C2, 0 + wmov C4, 0 + +0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] + ;; where N = 64 - Loop. + ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, + ;; B[] will have its initial Value again. + LSR B7 $ ror B6 $ ror B5 $ ror B4 + ror B3 $ ror B2 $ ror B1 $ ror B0 + + ;; If the N-th Bit of B[] was set then... + brcc 1f + ;; ...finish Rotation... + ori B7, 1 << 7 + + ;; ...and add A[] * 2^N to the Result C[] + ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 + adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 + +1: ;; Multiply A[] by 2 + LSL A0 $ rol A1 $ rol A2 $ rol A3 + rol A4 $ rol A5 $ rol A6 $ rol A7 + + dec Loop + brne 0b + + ;; We expanded the Result in C[] + ;; Copy Result to the Return Register A[] + wmov A0, C0 + wmov A2, C2 + wmov A4, C4 + wmov A6, C6 + + clr __zero_reg__ + pop Loop + pop r28 + pop r29 + ret +ENDF __muldi3 + +#undef Loop + +#undef C7 +#undef C6 +#undef C5 +#undef C4 +#undef C3 +#undef C2 +#undef C1 +#undef C0 + +#endif /* L_muldi3 */ +#endif /* HAVE_MUL */ + +#undef B7 +#undef B6 +#undef B5 +#undef B4 +#undef B3 +#undef B2 +#undef B1 +#undef B0 + +#undef A7 +#undef A6 +#undef A5 +#undef A4 +#undef A3 +#undef A2 +#undef A1 +#undef A0 + +/******************************************************* + Widening Multiplication 64 = 32 x 32 with MUL +*******************************************************/ + +#if defined (__AVR_HAVE_MUL__) +#define A0 r22 +#define A1 r23 +#define A2 r24 +#define A3 r25 + +#define B0 r18 +#define B1 r19 +#define B2 r20 +#define B3 r21 + +#define C0 18 +#define C1 C0+1 +#define C2 20 +#define C3 C2+1 +#define C4 28 +#define C5 C4+1 +#define C6 C4+2 +#define C7 C4+3 + +#if defined (L_umulsidi3) + +;; Unsigned widening 64 = 32 * 32 Multiplication with MUL + +;; R18[8] = R22[4] * R18[4] +;; +;; Ordinary ABI Function, but additionally sets +;; X = R20[2] = B2[2] +;; Z = R22[2] = A0[2] +DEFUN __umulsidi3 + clt + ;; FALLTHRU +ENDF __umulsidi3 + ;; T = sign (A) +DEFUN __umulsidi3_helper + push 29 $ push 28 ; Y + wmov 30, A2 + ;; Counting in Words, we have to perform 4 Multiplications + ;; 0 * 0 + wmov 26, A0 + XCALL __umulhisi3 + push 23 $ push 22 ; C0 + wmov 28, B0 + wmov 18, B2 + wmov C2, 24 + push 27 $ push 26 ; A0 + push 19 $ push 18 ; B2 + ;; + ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y + ;; B2 C2 -- -- -- B0 A2 + ;; 1 * 1 + wmov 26, 30 ; A2 + XCALL __umulhisi3 + ;; Sign-extend A. T holds the sign of A + brtc 0f + ;; Subtract B from the high part of the result + sub 22, 28 + sbc 23, 29 + sbc 24, 18 + sbc 25, 19 +0: wmov 18, 28 ;; B0 + wmov C4, 22 + wmov C6, 24 + ;; + ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y + ;; B0 C2 -- -- A2 C4 C6 + ;; + ;; 1 * 0 + XCALL __muldi3_6 + ;; 0 * 1 + pop 26 $ pop 27 ;; B2 + pop 18 $ pop 19 ;; A0 + XCALL __muldi3_6 + + ;; Move result C into place and save A0 in Z + wmov 22, C4 + wmov 24, C6 + wmov 30, 18 ; A0 + pop C0 $ pop C1 + + ;; Epilogue + pop 28 $ pop 29 ;; Y + ret +ENDF __umulsidi3_helper +#endif /* L_umulsidi3 */ + + +#if defined (L_mulsidi3) + +;; Signed widening 64 = 32 * 32 Multiplication +;; +;; R18[8] = R22[4] * R18[4] +;; Ordinary ABI Function +DEFUN __mulsidi3 + bst A3, 7 + sbrs B3, 7 ; Enhanced core has no skip bug + XJMP __umulsidi3_helper + + ;; B needs sign-extension + push A3 + push A2 + XCALL __umulsidi3_helper + ;; A0 survived in Z + sub r22, r30 + sbc r23, r31 + pop r26 + pop r27 + sbc r24, r26 + sbc r25, r27 + ret +ENDF __mulsidi3 +#endif /* L_mulsidi3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#endif /* HAVE_MUL */ + +/********************************************************** + Widening Multiplication 64 = 32 x 32 without MUL +**********************************************************/ + +#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__) +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 A0+5 +#define A6 A0+6 +#define A7 A0+7 + +#define B0 10 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 +#define B7 B0+7 + +#define AA0 22 +#define AA1 AA0+1 +#define AA2 AA0+2 +#define AA3 AA0+3 + +#define BB0 18 +#define BB1 BB0+1 +#define BB2 BB0+2 +#define BB3 BB0+3 + +#define Mask r30 + +;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL +;; +;; R18[8] = R22[4] * R18[4] +;; Ordinary ABI Function +DEFUN __mulsidi3 + set + skip + ;; FALLTHRU +ENDF __mulsidi3 + +DEFUN __umulsidi3 + clt ; skipped + ;; Save 10 Registers: R10..R17, R28, R29 + do_prologue_saves 10 + ldi Mask, 0xff + bld Mask, 7 + ;; Move B into place... + wmov B0, BB0 + wmov B2, BB2 + ;; ...and extend it + and BB3, Mask + lsl BB3 + sbc B4, B4 + mov B5, B4 + wmov B6, B4 + ;; Move A into place... + wmov A0, AA0 + wmov A2, AA2 + ;; ...and extend it + and AA3, Mask + lsl AA3 + sbc A4, A4 + mov A5, A4 + wmov A6, A4 + XCALL __muldi3 + do_epilogue_restores 10 +ENDF __umulsidi3 + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef A4 +#undef A5 +#undef A6 +#undef A7 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef B4 +#undef B5 +#undef B6 +#undef B7 +#undef AA0 +#undef AA1 +#undef AA2 +#undef AA3 +#undef BB0 +#undef BB1 +#undef BB2 +#undef BB3 +#undef Mask +#endif /* L_mulsidi3 && !HAVE_MUL */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +.section .text.libgcc.div, "ax", @progbits + +/******************************************************* + Division 8 / 8 => (result + remainder) +*******************************************************/ +#define r_rem r25 /* remainder */ +#define r_arg1 r24 /* dividend, quotient */ +#define r_arg2 r22 /* divisor */ +#define r_cnt r23 /* loop count */ + +#if defined (L_udivmodqi4) +DEFUN __udivmodqi4 + sub r_rem,r_rem ; clear remainder and carry + ldi r_cnt,9 ; init loop counter + rjmp __udivmodqi4_ep ; jump to entry point +__udivmodqi4_loop: + rol r_rem ; shift dividend into remainder + cp r_rem,r_arg2 ; compare remainder & divisor + brcs __udivmodqi4_ep ; remainder <= divisor + sub r_rem,r_arg2 ; restore remainder +__udivmodqi4_ep: + rol r_arg1 ; shift dividend (with CARRY) + dec r_cnt ; decrement loop counter + brne __udivmodqi4_loop + com r_arg1 ; complement result + ; because C flag was complemented in loop + ret +ENDF __udivmodqi4 +#endif /* defined (L_udivmodqi4) */ + +#if defined (L_divmodqi4) +DEFUN __divmodqi4 + bst r_arg1,7 ; store sign of dividend + mov __tmp_reg__,r_arg1 + eor __tmp_reg__,r_arg2; r0.7 is sign of result + sbrc r_arg1,7 + neg r_arg1 ; dividend negative : negate + sbrc r_arg2,7 + neg r_arg2 ; divisor negative : negate + XCALL __udivmodqi4 ; do the unsigned div/mod + brtc __divmodqi4_1 + neg r_rem ; correct remainder sign +__divmodqi4_1: + sbrc __tmp_reg__,7 + neg r_arg1 ; correct result sign +__divmodqi4_exit: + ret +ENDF __divmodqi4 +#endif /* defined (L_divmodqi4) */ + +#undef r_rem +#undef r_arg1 +#undef r_arg2 +#undef r_cnt + + +/******************************************************* + Division 16 / 16 => (result + remainder) +*******************************************************/ +#define r_remL r26 /* remainder Low */ +#define r_remH r27 /* remainder High */ + +/* return: remainder */ +#define r_arg1L r24 /* dividend Low */ +#define r_arg1H r25 /* dividend High */ + +/* return: quotient */ +#define r_arg2L r22 /* divisor Low */ +#define r_arg2H r23 /* divisor High */ + +#define r_cnt r21 /* loop count */ + +#if defined (L_udivmodhi4) +DEFUN __udivmodhi4 + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + ldi r_cnt,17 ; init loop counter + rjmp __udivmodhi4_ep ; jump to entry point +__udivmodhi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + brcs __udivmodhi4_ep ; remainder < divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H +__udivmodhi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + dec r_cnt ; decrement loop counter + brne __udivmodhi4_loop + com r_arg1L + com r_arg1H +; div/mod results to return registers, as for the div() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + ret +ENDF __udivmodhi4 +#endif /* defined (L_udivmodhi4) */ + +#if defined (L_divmodhi4) +DEFUN __divmodhi4 + .global _div +_div: + bst r_arg1H,7 ; store sign of dividend + mov __tmp_reg__,r_arg2H + brtc 0f + com __tmp_reg__ ; r0.7 is sign of result + rcall __divmodhi4_neg1 ; dividend negative: negate +0: + sbrc r_arg2H,7 + rcall __divmodhi4_neg2 ; divisor negative: negate + XCALL __udivmodhi4 ; do the unsigned div/mod + sbrc __tmp_reg__,7 + rcall __divmodhi4_neg2 ; correct remainder sign + brtc __divmodhi4_exit +__divmodhi4_neg1: + ;; correct dividend/remainder sign + com r_arg1H + neg r_arg1L + sbci r_arg1H,0xff + ret +__divmodhi4_neg2: + ;; correct divisor/result sign + com r_arg2H + neg r_arg2L + sbci r_arg2H,0xff +__divmodhi4_exit: + ret +ENDF __divmodhi4 +#endif /* defined (L_divmodhi4) */ + +#undef r_remH +#undef r_remL + +#undef r_arg1H +#undef r_arg1L + +#undef r_arg2H +#undef r_arg2L + +#undef r_cnt + +/******************************************************* + Division 24 / 24 => (result + remainder) +*******************************************************/ + +;; A[0..2]: In: Dividend; Out: Quotient +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 + +;; B[0..2]: In: Divisor; Out: Remainder +#define B0 18 +#define B1 B0+1 +#define B2 B0+2 + +;; C[0..2]: Expand remainder +#define C0 __zero_reg__ +#define C1 26 +#define C2 25 + +;; Loop counter +#define r_cnt 21 + +#if defined (L_udivmodpsi4) +;; R24:R22 = R24:R22 udiv R20:R18 +;; R20:R18 = R24:R22 umod R20:R18 +;; Clobbers: R21, R25, R26 + +DEFUN __udivmodpsi4 + ; init loop counter + ldi r_cnt, 24+1 + ; Clear remainder and carry. C0 is already 0 + clr C1 + sub C2, C2 + ; jump to entry point + rjmp __udivmodpsi4_start +__udivmodpsi4_loop: + ; shift dividend into remainder + rol C0 + rol C1 + rol C2 + ; compare remainder & divisor + cp C0, B0 + cpc C1, B1 + cpc C2, B2 + brcs __udivmodpsi4_start ; remainder <= divisor + sub C0, B0 ; restore remainder + sbc C1, B1 + sbc C2, B2 +__udivmodpsi4_start: + ; shift dividend (with CARRY) + rol A0 + rol A1 + rol A2 + ; decrement loop counter + dec r_cnt + brne __udivmodpsi4_loop + com A0 + com A1 + com A2 + ; div/mod results to return registers + ; remainder + mov B0, C0 + mov B1, C1 + mov B2, C2 + clr __zero_reg__ ; C0 + ret +ENDF __udivmodpsi4 +#endif /* defined (L_udivmodpsi4) */ + +#if defined (L_divmodpsi4) +;; R24:R22 = R24:R22 div R20:R18 +;; R20:R18 = R24:R22 mod R20:R18 +;; Clobbers: T, __tmp_reg__, R21, R25, R26 + +DEFUN __divmodpsi4 + ; R0.7 will contain the sign of the result: + ; R0.7 = A.sign ^ B.sign + mov __tmp_reg__, B2 + ; T-flag = sign of dividend + bst A2, 7 + brtc 0f + com __tmp_reg__ + ; Adjust dividend's sign + rcall __divmodpsi4_negA +0: + ; Adjust divisor's sign + sbrc B2, 7 + rcall __divmodpsi4_negB + + ; Do the unsigned div/mod + XCALL __udivmodpsi4 + + ; Adjust quotient's sign + sbrc __tmp_reg__, 7 + rcall __divmodpsi4_negA + + ; Adjust remainder's sign + brtc __divmodpsi4_end + +__divmodpsi4_negB: + ; Correct divisor/remainder sign + com B2 + com B1 + neg B0 + sbci B1, -1 + sbci B2, -1 + ret + + ; Correct dividend/quotient sign +__divmodpsi4_negA: + com A2 + com A1 + neg A0 + sbci A1, -1 + sbci A2, -1 +__divmodpsi4_end: + ret + +ENDF __divmodpsi4 +#endif /* defined (L_divmodpsi4) */ + +#undef A0 +#undef A1 +#undef A2 + +#undef B0 +#undef B1 +#undef B2 + +#undef C0 +#undef C1 +#undef C2 + +#undef r_cnt + +/******************************************************* + Division 32 / 32 => (result + remainder) +*******************************************************/ +#define r_remHH r31 /* remainder High */ +#define r_remHL r30 +#define r_remH r27 +#define r_remL r26 /* remainder Low */ + +/* return: remainder */ +#define r_arg1HH r25 /* dividend High */ +#define r_arg1HL r24 +#define r_arg1H r23 +#define r_arg1L r22 /* dividend Low */ + +/* return: quotient */ +#define r_arg2HH r21 /* divisor High */ +#define r_arg2HL r20 +#define r_arg2H r19 +#define r_arg2L r18 /* divisor Low */ + +#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ + +#if defined (L_udivmodsi4) +DEFUN __udivmodsi4 + ldi r_remL, 33 ; init loop counter + mov r_cnt, r_remL + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + mov_l r_remHL, r_remL + mov_h r_remHH, r_remH + rjmp __udivmodsi4_ep ; jump to entry point +__udivmodsi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + rol r_remHL + rol r_remHH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + cpc r_remHL,r_arg2HL + cpc r_remHH,r_arg2HH + brcs __udivmodsi4_ep ; remainder <= divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H + sbc r_remHL,r_arg2HL + sbc r_remHH,r_arg2HH +__udivmodsi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + rol r_arg1HL + rol r_arg1HH + dec r_cnt ; decrement loop counter + brne __udivmodsi4_loop + ; __zero_reg__ now restored (r_cnt == 0) + com r_arg1L + com r_arg1H + com r_arg1HL + com r_arg1HH +; div/mod results to return registers, as for the ldiv() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg2HL, r_arg1HL + mov_h r_arg2HH, r_arg1HH + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + mov_l r_arg1HL, r_remHL + mov_h r_arg1HH, r_remHH + ret +ENDF __udivmodsi4 +#endif /* defined (L_udivmodsi4) */ + +#if defined (L_divmodsi4) +DEFUN __divmodsi4 + mov __tmp_reg__,r_arg2HH + bst r_arg1HH,7 ; store sign of dividend + brtc 0f + com __tmp_reg__ ; r0.7 is sign of result + XCALL __negsi2 ; dividend negative: negate +0: + sbrc r_arg2HH,7 + rcall __divmodsi4_neg2 ; divisor negative: negate + XCALL __udivmodsi4 ; do the unsigned div/mod + sbrc __tmp_reg__, 7 ; correct quotient sign + rcall __divmodsi4_neg2 + brtc __divmodsi4_exit ; correct remainder sign + XJMP __negsi2 +__divmodsi4_neg2: + ;; correct divisor/quotient sign + com r_arg2HH + com r_arg2HL + com r_arg2H + neg r_arg2L + sbci r_arg2H,0xff + sbci r_arg2HL,0xff + sbci r_arg2HH,0xff +__divmodsi4_exit: + ret +ENDF __divmodsi4 +#endif /* defined (L_divmodsi4) */ + +#if defined (L_negsi2) +;; (set (reg:SI 22) +;; (neg:SI (reg:SI 22))) +;; Sets the V flag for signed overflow tests +DEFUN __negsi2 + NEG4 22 + ret +ENDF __negsi2 +#endif /* L_negsi2 */ + +#undef r_remHH +#undef r_remHL +#undef r_remH +#undef r_remL +#undef r_arg1HH +#undef r_arg1HL +#undef r_arg1H +#undef r_arg1L +#undef r_arg2HH +#undef r_arg2HL +#undef r_arg2H +#undef r_arg2L +#undef r_cnt + +/******************************************************* + Division 64 / 64 + Modulo 64 % 64 +*******************************************************/ + +;; Use Speed-optimized Version on "big" Devices, i.e. Devices with +;; at least 16k of Program Memory. For smaller Devices, depend +;; on MOVW and SP Size. There is a Connexion between SP Size and +;; Flash Size so that SP Size can be used to test for Flash Size. + +#if defined (__AVR_HAVE_JMP_CALL__) +# define SPEED_DIV 8 +#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__) +# define SPEED_DIV 16 +#else +# define SPEED_DIV 0 +#endif + +;; A[0..7]: In: Dividend; +;; Out: Quotient (T = 0) +;; Out: Remainder (T = 1) +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 A0+5 +#define A6 A0+6 +#define A7 A0+7 + +;; B[0..7]: In: Divisor; Out: Clobber +#define B0 10 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 +#define B4 B0+4 +#define B5 B0+5 +#define B6 B0+6 +#define B7 B0+7 + +;; C[0..7]: Expand remainder; Out: Remainder (unused) +#define C0 8 +#define C1 C0+1 +#define C2 30 +#define C3 C2+1 +#define C4 28 +#define C5 C4+1 +#define C6 26 +#define C7 C6+1 + +;; Holds Signs during Division Routine +#define SS __tmp_reg__ + +;; Bit-Counter in Division Routine +#define R_cnt __zero_reg__ + +;; Scratch Register for Negation +#define NN r31 + +#if defined (L_udivdi3) + +;; R25:R18 = R24:R18 umod R17:R10 +;; Ordinary ABI-Function + +DEFUN __umoddi3 + set + rjmp __udivdi3_umoddi3 +ENDF __umoddi3 + +;; R25:R18 = R24:R18 udiv R17:R10 +;; Ordinary ABI-Function + +DEFUN __udivdi3 + clt +ENDF __udivdi3 + +DEFUN __udivdi3_umoddi3 + push C0 + push C1 + push C4 + push C5 + XCALL __udivmod64 + pop C5 + pop C4 + pop C1 + pop C0 + ret +ENDF __udivdi3_umoddi3 +#endif /* L_udivdi3 */ + +#if defined (L_udivmod64) + +;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation +;; No Registers saved/restored; the Callers will take Care. +;; Preserves B[] and T-flag +;; T = 0: Compute Quotient in A[] +;; T = 1: Compute Remainder in A[] and shift SS one Bit left + +DEFUN __udivmod64 + + ;; Clear Remainder (C6, C7 will follow) + clr C0 + clr C1 + wmov C2, C0 + wmov C4, C0 + ldi C7, 64 + +#if SPEED_DIV == 0 || SPEED_DIV == 16 + ;; Initialize Loop-Counter + mov R_cnt, C7 + wmov C6, C0 +#endif /* SPEED_DIV */ + +#if SPEED_DIV == 8 + + push A7 + clr C6 + +1: ;; Compare shifted Devidend against Divisor + ;; If -- even after Shifting -- it is smaller... + CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3 + cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7 + brcc 2f + + ;; ...then we can subtract it. Thus, it is legal to shift left + $ mov C6,C5 $ mov C5,C4 $ mov C4,C3 + mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7 + mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3 + mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0 + + ;; 8 Bits are done + subi C7, 8 + brne 1b + + ;; Shifted 64 Bits: A7 has traveled to C7 + pop C7 + ;; Divisor is greater than Dividend. We have: + ;; A[] % B[] = A[] + ;; A[] / B[] = 0 + ;; Thus, we can return immediately + rjmp 5f + +2: ;; Initialze Bit-Counter with Number of Bits still to be performed + mov R_cnt, C7 + + ;; Push of A7 is not needed because C7 is still 0 + pop C7 + clr C7 + +#elif SPEED_DIV == 16 + + ;; Compare shifted Dividend against Divisor + cp A7, B3 + cpc C0, B4 + cpc C1, B5 + cpc C2, B6 + cpc C3, B7 + brcc 2f + + ;; Divisor is greater than shifted Dividen: We can shift the Dividend + ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk + wmov C2,A6 $ wmov C0,A4 + wmov A6,A2 $ wmov A4,A0 + wmov A2,C6 $ wmov A0,C4 + + ;; Set Bit Counter to 32 + lsr R_cnt +2: +#elif SPEED_DIV +#error SPEED_DIV = ? +#endif /* SPEED_DIV */ + +;; The very Division + Remainder Routine + +3: ;; Left-shift Dividend... + lsl A0 $ rol A1 $ rol A2 $ rol A3 + rol A4 $ rol A5 $ rol A6 $ rol A7 + + ;; ...into Remainder + rol C0 $ rol C1 $ rol C2 $ rol C3 + rol C4 $ rol C5 $ rol C6 $ rol C7 + + ;; Compare Remainder and Divisor + CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3 + cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7 + + brcs 4f + + ;; Divisor fits into Remainder: Subtract it from Remainder... + SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3 + sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7 + + ;; ...and set according Bit in the upcoming Quotient + ;; The Bit will travel to its final Position + ori A0, 1 + +4: ;; This Bit is done + dec R_cnt + brne 3b + ;; __zero_reg__ is 0 again + + ;; T = 0: We are fine with the Quotient in A[] + ;; T = 1: Copy Remainder to A[] +5: brtc 6f + wmov A0, C0 + wmov A2, C2 + wmov A4, C4 + wmov A6, C6 + ;; Move the Sign of the Result to SS.7 + lsl SS + +6: ret + +ENDF __udivmod64 +#endif /* L_udivmod64 */ + + +#if defined (L_divdi3) + +;; R25:R18 = R24:R18 mod R17:R10 +;; Ordinary ABI-Function + +DEFUN __moddi3 + set + rjmp __divdi3_moddi3 +ENDF __moddi3 + +;; R25:R18 = R24:R18 div R17:R10 +;; Ordinary ABI-Function + +DEFUN __divdi3 + clt +ENDF __divdi3 + +DEFUN __divdi3_moddi3 +#if SPEED_DIV + mov r31, A7 + or r31, B7 + brmi 0f + ;; Both Signs are 0: the following Complexitiy is not needed + XJMP __udivdi3_umoddi3 +#endif /* SPEED_DIV */ + +0: ;; The Prologue + ;; Save 12 Registers: Y, 17...8 + ;; No Frame needed + do_prologue_saves 12 + + ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign) + ;; SS.6 will contain the Sign of the Remainder (A.sign) + mov SS, A7 + asr SS + ;; Adjust Dividend's Sign as needed +#if SPEED_DIV + ;; Compiling for Speed we know that at least one Sign must be < 0 + ;; Thus, if A[] >= 0 then we know B[] < 0 + brpl 22f +#else + brpl 21f +#endif /* SPEED_DIV */ + + XCALL __negdi2 + + ;; Adjust Divisor's Sign and SS.7 as needed +21: tst B7 + brpl 3f +22: ldi NN, 1 << 7 + eor SS, NN + + ldi NN, -1 + com B4 $ com B5 $ com B6 $ com B7 + $ com B1 $ com B2 $ com B3 + NEG B0 + $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN + sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN + +3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag) + XCALL __udivmod64 + + ;; Adjust Result's Sign +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + tst SS + brpl 4f +#else + sbrc SS, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XCALL __negdi2 + +4: ;; Epilogue: Restore 12 Registers and return + do_epilogue_restores 12 + +ENDF __divdi3_moddi3 + +#endif /* L_divdi3 */ + +#undef R_cnt +#undef SS +#undef NN + +.section .text.libgcc, "ax", @progbits + +#define TT __tmp_reg__ + +#if defined (L_adddi3) +;; (set (reg:DI 18) +;; (plus:DI (reg:DI 18) +;; (reg:DI 10))) +;; Sets the V flag for signed overflow tests +;; Sets the C flag for unsigned overflow tests +DEFUN __adddi3 + ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3 + adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7 + ret +ENDF __adddi3 +#endif /* L_adddi3 */ + +#if defined (L_adddi3_s8) +;; (set (reg:DI 18) +;; (plus:DI (reg:DI 18) +;; (sign_extend:SI (reg:QI 26)))) +;; Sets the V flag for signed overflow tests +;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128 +DEFUN __adddi3_s8 + clr TT + sbrc r26, 7 + com TT + ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT + adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT + ret +ENDF __adddi3_s8 +#endif /* L_adddi3_s8 */ + +#if defined (L_subdi3) +;; (set (reg:DI 18) +;; (minus:DI (reg:DI 18) +;; (reg:DI 10))) +;; Sets the V flag for signed overflow tests +;; Sets the C flag for unsigned overflow tests +DEFUN __subdi3 + SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3 + sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7 + ret +ENDF __subdi3 +#endif /* L_subdi3 */ + +#if defined (L_cmpdi2) +;; (set (cc0) +;; (compare (reg:DI 18) +;; (reg:DI 10))) +DEFUN __cmpdi2 + CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3 + cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7 + ret +ENDF __cmpdi2 +#endif /* L_cmpdi2 */ + +#if defined (L_cmpdi2_s8) +;; (set (cc0) +;; (compare (reg:DI 18) +;; (sign_extend:SI (reg:QI 26)))) +DEFUN __cmpdi2_s8 + clr TT + sbrc r26, 7 + com TT + CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT + cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT + ret +ENDF __cmpdi2_s8 +#endif /* L_cmpdi2_s8 */ + +#if defined (L_negdi2) +;; (set (reg:DI 18) +;; (neg:DI (reg:DI 18))) +;; Sets the V flag for signed overflow tests +DEFUN __negdi2 + + com A4 $ com A5 $ com A6 $ com A7 + $ com A1 $ com A2 $ com A3 + NEG A0 + $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1 + sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1 + ret + +ENDF __negdi2 +#endif /* L_negdi2 */ + +#undef TT + +#undef C7 +#undef C6 +#undef C5 +#undef C4 +#undef C3 +#undef C2 +#undef C1 +#undef C0 + +#undef B7 +#undef B6 +#undef B5 +#undef B4 +#undef B3 +#undef B2 +#undef B1 +#undef B0 + +#undef A7 +#undef A6 +#undef A5 +#undef A4 +#undef A3 +#undef A2 +#undef A1 +#undef A0 + + +.section .text.libgcc.prologue, "ax", @progbits + +/********************************** + * This is a prologue subroutine + **********************************/ +#if defined (L_prologue) + +;; This function does not clobber T-flag; 64-bit division relies on it +DEFUN __prologue_saves__ + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 +#if !defined (__AVR_HAVE_SPH__) + in r28,__SP_L__ + sub r28,r26 + out __SP_L__,r28 + clr r29 +#elif defined (__AVR_XMEGA__) + in r28,__SP_L__ + in r29,__SP_H__ + sub r28,r26 + sbc r29,r27 + out __SP_L__,r28 + out __SP_H__,r29 +#else + in r28,__SP_L__ + in r29,__SP_H__ + sub r28,r26 + sbc r29,r27 + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 +#endif /* #SP = 8/16 */ + +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +ENDF __prologue_saves__ +#endif /* defined (L_prologue) */ + +/* + * This is an epilogue subroutine + */ +#if defined (L_epilogue) + +DEFUN __epilogue_restores__ + ldd r2,Y+18 + ldd r3,Y+17 + ldd r4,Y+16 + ldd r5,Y+15 + ldd r6,Y+14 + ldd r7,Y+13 + ldd r8,Y+12 + ldd r9,Y+11 + ldd r10,Y+10 + ldd r11,Y+9 + ldd r12,Y+8 + ldd r13,Y+7 + ldd r14,Y+6 + ldd r15,Y+5 + ldd r16,Y+4 + ldd r17,Y+3 + ldd r26,Y+2 +#if !defined (__AVR_HAVE_SPH__) + ldd r29,Y+1 + add r28,r30 + out __SP_L__,r28 + mov r28, r26 +#elif defined (__AVR_XMEGA__) + ldd r27,Y+1 + add r28,r30 + adc r29,__zero_reg__ + out __SP_L__,r28 + out __SP_H__,r29 + wmov 28, 26 +#else + ldd r27,Y+1 + add r28,r30 + adc r29,__zero_reg__ + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 + mov_l r28, r26 + mov_h r29, r27 +#endif /* #SP = 8/16 */ + ret +ENDF __epilogue_restores__ +#endif /* defined (L_epilogue) */ + +#ifdef L_exit + .section .fini9,"ax",@progbits +DEFUN _exit + .weak exit +exit: +ENDF _exit + + /* Code from .fini8 ... .fini1 sections inserted by ld script. */ + + .section .fini0,"ax",@progbits + cli +__stop_program: + rjmp __stop_program +#endif /* defined (L_exit) */ + +#ifdef L_cleanup + .weak _cleanup + .func _cleanup +_cleanup: + ret +.endfunc +#endif /* defined (L_cleanup) */ + + +.section .text.libgcc, "ax", @progbits + +#ifdef L_tablejump +DEFUN __tablejump2__ + lsl r30 + rol r31 + ;; FALLTHRU +ENDF __tablejump2__ + +DEFUN __tablejump__ +#if defined (__AVR_HAVE_LPMX__) + lpm __tmp_reg__, Z+ + lpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#else /* !HAVE_LPMX */ + lpm + adiw r30, 1 + push r0 + lpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif /* !HAVE_LPMX */ +ENDF __tablejump__ +#endif /* defined (L_tablejump) */ + +#ifdef L_copy_data + .section .init4,"ax",@progbits +DEFUN __do_copy_data +#if defined(__AVR_HAVE_ELPMX__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start) + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm r0, Z+ + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start - 0x10000) +.L__do_copy_data_carry: + inc r16 + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm + st X+, r0 + adiw r30, 1 + brcs .L__do_copy_data_carry +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: +#if defined (__AVR_HAVE_LPMX__) + lpm r0, Z+ +#else + lpm + adiw r30, 1 +#endif + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ +#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) + ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM + out __RAMPZ__, __zero_reg__ +#endif /* ELPM && RAMPD */ +ENDF __do_copy_data +#endif /* L_copy_data */ + +/* __do_clear_bss is only necessary if there is anything in .bss section. */ + +#ifdef L_clear_bss + .section .init4,"ax",@progbits +DEFUN __do_clear_bss + ldi r17, hi8(__bss_end) + ldi r26, lo8(__bss_start) + ldi r27, hi8(__bss_start) + rjmp .do_clear_bss_start +.do_clear_bss_loop: + st X+, __zero_reg__ +.do_clear_bss_start: + cpi r26, lo8(__bss_end) + cpc r27, r17 + brne .do_clear_bss_loop +ENDF __do_clear_bss +#endif /* L_clear_bss */ + +/* __do_global_ctors and __do_global_dtors are only necessary + if there are any constructors/destructors. */ + +#ifdef L_ctors + .section .init6,"ax",@progbits +DEFUN __do_global_ctors +#if defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + ldi r16, hh8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + ldi r24, hh8(__ctors_start) + cpc r16, r24 + brne .L__do_global_ctors_loop +#else + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + brne .L__do_global_ctors_loop +#endif /* defined(__AVR_HAVE_ELPM__) */ +ENDF __do_global_ctors +#endif /* L_ctors */ + +#ifdef L_dtors + .section .fini6,"ax",@progbits +DEFUN __do_global_dtors +#if defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + ldi r16, hh8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + ldi r24, hh8(__dtors_end) + cpc r16, r24 + brne .L__do_global_dtors_loop +#else + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ + adiw r28, 2 +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + brne .L__do_global_dtors_loop +#endif /* defined(__AVR_HAVE_ELPM__) */ +ENDF __do_global_dtors +#endif /* L_dtors */ + +.section .text.libgcc, "ax", @progbits + +#ifdef L_tablejump_elpm +DEFUN __tablejump_elpm__ +#if defined (__AVR_HAVE_ELPMX__) + elpm __tmp_reg__, Z+ + elpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_RAMPD__) + ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM + out __RAMPZ__, __zero_reg__ +#endif /* RAMPD */ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#elif defined (__AVR_HAVE_ELPM__) + elpm + adiw r30, 1 + push r0 + elpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif +ENDF __tablejump_elpm__ +#endif /* defined (L_tablejump_elpm) */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Loading n bytes from Flash; n = 3,4 +;; R22... = Flash[Z] +;; Clobbers: __tmp_reg__ + +#if (defined (L_load_3) \ + || defined (L_load_4)) \ + && !defined (__AVR_HAVE_LPMX__) + +;; Destination +#define D0 22 +#define D1 D0+1 +#define D2 D0+2 +#define D3 D0+3 + +.macro .load dest, n + lpm + mov \dest, r0 +.if \dest != D0+\n-1 + adiw r30, 1 +.else + sbiw r30, \n-1 +.endif +.endm + +#if defined (L_load_3) +DEFUN __load_3 + push D3 + XCALL __load_4 + pop D3 + ret +ENDF __load_3 +#endif /* L_load_3 */ + +#if defined (L_load_4) +DEFUN __load_4 + .load D0, 4 + .load D1, 4 + .load D2, 4 + .load D3, 4 + ret +ENDF __load_4 +#endif /* L_load_4 */ + +#endif /* L_load_3 || L_load_3 */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Loading n bytes from Flash or RAM; n = 1,2,3,4 +;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7 +;; Clobbers: __tmp_reg__, R21, R30, R31 + +#if (defined (L_xload_1) \ + || defined (L_xload_2) \ + || defined (L_xload_3) \ + || defined (L_xload_4)) + +;; Destination +#define D0 22 +#define D1 D0+1 +#define D2 D0+2 +#define D3 D0+3 + +;; Register containing bits 16+ of the address + +#define HHI8 21 + +.macro .xload dest, n +#if defined (__AVR_HAVE_ELPMX__) + elpm \dest, Z+ +#elif defined (__AVR_HAVE_ELPM__) + elpm + mov \dest, r0 +.if \dest != D0+\n-1 + adiw r30, 1 + adc HHI8, __zero_reg__ + out __RAMPZ__, HHI8 +.endif +#elif defined (__AVR_HAVE_LPMX__) + lpm \dest, Z+ +#else + lpm + mov \dest, r0 +.if \dest != D0+\n-1 + adiw r30, 1 +.endif +#endif +#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) +.if \dest == D0+\n-1 + ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM + out __RAMPZ__, __zero_reg__ +.endif +#endif +.endm ; .xload + +#if defined (L_xload_1) +DEFUN __xload_1 +#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__) + sbrc HHI8, 7 + ld D0, Z + sbrs HHI8, 7 + lpm D0, Z + ret +#else + sbrc HHI8, 7 + rjmp 1f +#if defined (__AVR_HAVE_ELPM__) + out __RAMPZ__, HHI8 +#endif /* __AVR_HAVE_ELPM__ */ + .xload D0, 1 + ret +1: ld D0, Z + ret +#endif /* LPMx && ! ELPM */ +ENDF __xload_1 +#endif /* L_xload_1 */ + +#if defined (L_xload_2) +DEFUN __xload_2 + sbrc HHI8, 7 + rjmp 1f +#if defined (__AVR_HAVE_ELPM__) + out __RAMPZ__, HHI8 +#endif /* __AVR_HAVE_ELPM__ */ + .xload D0, 2 + .xload D1, 2 + ret +1: ld D0, Z+ + ld D1, Z+ + ret +ENDF __xload_2 +#endif /* L_xload_2 */ + +#if defined (L_xload_3) +DEFUN __xload_3 + sbrc HHI8, 7 + rjmp 1f +#if defined (__AVR_HAVE_ELPM__) + out __RAMPZ__, HHI8 +#endif /* __AVR_HAVE_ELPM__ */ + .xload D0, 3 + .xload D1, 3 + .xload D2, 3 + ret +1: ld D0, Z+ + ld D1, Z+ + ld D2, Z+ + ret +ENDF __xload_3 +#endif /* L_xload_3 */ + +#if defined (L_xload_4) +DEFUN __xload_4 + sbrc HHI8, 7 + rjmp 1f +#if defined (__AVR_HAVE_ELPM__) + out __RAMPZ__, HHI8 +#endif /* __AVR_HAVE_ELPM__ */ + .xload D0, 4 + .xload D1, 4 + .xload D2, 4 + .xload D3, 4 + ret +1: ld D0, Z+ + ld D1, Z+ + ld D2, Z+ + ld D3, Z+ + ret +ENDF __xload_4 +#endif /* L_xload_4 */ + +#endif /* L_xload_{1|2|3|4} */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; memcopy from Address Space __pgmx to RAM +;; R23:Z = Source Address +;; X = Destination Address +;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z + +#if defined (L_movmemx) + +#define HHI8 23 +#define LOOP 24 + +DEFUN __movmemx_qi + ;; #Bytes to copy fity in 8 Bits (1..255) + ;; Zero-extend Loop Counter + clr LOOP+1 + ;; FALLTHRU +ENDF __movmemx_qi + +DEFUN __movmemx_hi + +;; Read from where? + sbrc HHI8, 7 + rjmp 1f + +;; Read from Flash + +#if defined (__AVR_HAVE_ELPM__) + out __RAMPZ__, HHI8 +#endif + +0: ;; Load 1 Byte from Flash... + +#if defined (__AVR_HAVE_ELPMX__) + elpm r0, Z+ +#elif defined (__AVR_HAVE_ELPM__) + elpm + adiw r30, 1 + adc HHI8, __zero_reg__ + out __RAMPZ__, HHI8 +#elif defined (__AVR_HAVE_LPMX__) + lpm r0, Z+ +#else + lpm + adiw r30, 1 +#endif + + ;; ...and store that Byte to RAM Destination + st X+, r0 + sbiw LOOP, 1 + brne 0b +#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) + ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM + out __RAMPZ__, __zero_reg__ +#endif /* ELPM && RAMPD */ + ret + +;; Read from RAM + +1: ;; Read 1 Byte from RAM... + ld r0, Z+ + ;; and store that Byte to RAM Destination + st X+, r0 + sbiw LOOP, 1 + brne 1b + ret +ENDF __movmemx_hi + +#undef HHI8 +#undef LOOP + +#endif /* L_movmemx */ + + +.section .text.libgcc.builtins, "ax", @progbits + +/********************************** + * Find first set Bit (ffs) + **********************************/ + +#if defined (L_ffssi2) +;; find first set bit +;; r25:r24 = ffs32 (r25:r22) +;; clobbers: r22, r26 +DEFUN __ffssi2 + clr r26 + tst r22 + brne 1f + subi r26, -8 + or r22, r23 + brne 1f + subi r26, -8 + or r22, r24 + brne 1f + subi r26, -8 + or r22, r25 + brne 1f + ret +1: mov r24, r22 + XJMP __loop_ffsqi2 +ENDF __ffssi2 +#endif /* defined (L_ffssi2) */ + +#if defined (L_ffshi2) +;; find first set bit +;; r25:r24 = ffs16 (r25:r24) +;; clobbers: r26 +DEFUN __ffshi2 + clr r26 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst r24 + breq 2f +#else + cpse r24, __zero_reg__ +#endif /* __AVR_HAVE_JMP_CALL__ */ +1: XJMP __loop_ffsqi2 +2: ldi r26, 8 + or r24, r25 + brne 1b + ret +ENDF __ffshi2 +#endif /* defined (L_ffshi2) */ + +#if defined (L_loop_ffsqi2) +;; Helper for ffshi2, ffssi2 +;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) +;; r24 must be != 0 +;; clobbers: r26 +DEFUN __loop_ffsqi2 + inc r26 + lsr r24 + brcc __loop_ffsqi2 + mov r24, r26 + clr r25 + ret +ENDF __loop_ffsqi2 +#endif /* defined (L_loop_ffsqi2) */ + + +/********************************** + * Count trailing Zeros (ctz) + **********************************/ + +#if defined (L_ctzsi2) +;; count trailing zeros +;; r25:r24 = ctz32 (r25:r22) +;; clobbers: r26, r22 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC +DEFUN __ctzsi2 + XCALL __ffssi2 + dec r24 + ret +ENDF __ctzsi2 +#endif /* defined (L_ctzsi2) */ + +#if defined (L_ctzhi2) +;; count trailing zeros +;; r25:r24 = ctz16 (r25:r24) +;; clobbers: r26 +;; ctz(0) = 255 +;; Note that ctz(0) in undefined for GCC +DEFUN __ctzhi2 + XCALL __ffshi2 + dec r24 + ret +ENDF __ctzhi2 +#endif /* defined (L_ctzhi2) */ + + +/********************************** + * Count leading Zeros (clz) + **********************************/ + +#if defined (L_clzdi2) +;; count leading zeros +;; r25:r24 = clz64 (r25:r18) +;; clobbers: r22, r23, r26 +DEFUN __clzdi2 + XCALL __clzsi2 + sbrs r24, 5 + ret + mov_l r22, r18 + mov_h r23, r19 + mov_l r24, r20 + mov_h r25, r21 + XCALL __clzsi2 + subi r24, -32 + ret +ENDF __clzdi2 +#endif /* defined (L_clzdi2) */ + +#if defined (L_clzsi2) +;; count leading zeros +;; r25:r24 = clz32 (r25:r22) +;; clobbers: r26 +DEFUN __clzsi2 + XCALL __clzhi2 + sbrs r24, 4 + ret + mov_l r24, r22 + mov_h r25, r23 + XCALL __clzhi2 + subi r24, -16 + ret +ENDF __clzsi2 +#endif /* defined (L_clzsi2) */ + +#if defined (L_clzhi2) +;; count leading zeros +;; r25:r24 = clz16 (r25:r24) +;; clobbers: r26 +DEFUN __clzhi2 + clr r26 + tst r25 + brne 1f + subi r26, -8 + or r25, r24 + brne 1f + ldi r24, 16 + ret +1: cpi r25, 16 + brsh 3f + subi r26, -3 + swap r25 +2: inc r26 +3: lsl r25 + brcc 2b + mov r24, r26 + clr r25 + ret +ENDF __clzhi2 +#endif /* defined (L_clzhi2) */ + + +/********************************** + * Parity + **********************************/ + +#if defined (L_paritydi2) +;; r25:r24 = parity64 (r25:r18) +;; clobbers: __tmp_reg__ +DEFUN __paritydi2 + eor r24, r18 + eor r24, r19 + eor r24, r20 + eor r24, r21 + XJMP __paritysi2 +ENDF __paritydi2 +#endif /* defined (L_paritydi2) */ + +#if defined (L_paritysi2) +;; r25:r24 = parity32 (r25:r22) +;; clobbers: __tmp_reg__ +DEFUN __paritysi2 + eor r24, r22 + eor r24, r23 + XJMP __parityhi2 +ENDF __paritysi2 +#endif /* defined (L_paritysi2) */ + +#if defined (L_parityhi2) +;; r25:r24 = parity16 (r25:r24) +;; clobbers: __tmp_reg__ +DEFUN __parityhi2 + eor r24, r25 +;; FALLTHRU +ENDF __parityhi2 + +;; r25:r24 = parity8 (r24) +;; clobbers: __tmp_reg__ +DEFUN __parityqi2 + ;; parity is in r24[0..7] + mov __tmp_reg__, r24 + swap __tmp_reg__ + eor r24, __tmp_reg__ + ;; parity is in r24[0..3] + subi r24, -4 + andi r24, -5 + subi r24, -6 + ;; parity is in r24[0,3] + sbrc r24, 3 + inc r24 + ;; parity is in r24[0] + andi r24, 1 + clr r25 + ret +ENDF __parityqi2 +#endif /* defined (L_parityhi2) */ + + +/********************************** + * Population Count + **********************************/ + +#if defined (L_popcounthi2) +;; population count +;; r25:r24 = popcount16 (r25:r24) +;; clobbers: __tmp_reg__ +DEFUN __popcounthi2 + XCALL __popcountqi2 + push r24 + mov r24, r25 + XCALL __popcountqi2 + clr r25 + ;; FALLTHRU +ENDF __popcounthi2 + +DEFUN __popcounthi2_tail + pop __tmp_reg__ + add r24, __tmp_reg__ + ret +ENDF __popcounthi2_tail +#endif /* defined (L_popcounthi2) */ + +#if defined (L_popcountsi2) +;; population count +;; r25:r24 = popcount32 (r25:r22) +;; clobbers: __tmp_reg__ +DEFUN __popcountsi2 + XCALL __popcounthi2 + push r24 + mov_l r24, r22 + mov_h r25, r23 + XCALL __popcounthi2 + XJMP __popcounthi2_tail +ENDF __popcountsi2 +#endif /* defined (L_popcountsi2) */ + +#if defined (L_popcountdi2) +;; population count +;; r25:r24 = popcount64 (r25:r18) +;; clobbers: r22, r23, __tmp_reg__ +DEFUN __popcountdi2 + XCALL __popcountsi2 + push r24 + mov_l r22, r18 + mov_h r23, r19 + mov_l r24, r20 + mov_h r25, r21 + XCALL __popcountsi2 + XJMP __popcounthi2_tail +ENDF __popcountdi2 +#endif /* defined (L_popcountdi2) */ + +#if defined (L_popcountqi2) +;; population count +;; r24 = popcount8 (r24) +;; clobbers: __tmp_reg__ +DEFUN __popcountqi2 + mov __tmp_reg__, r24 + andi r24, 1 + lsr __tmp_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __zero_reg__ + lsr __tmp_reg__ + adc r24, __tmp_reg__ + ret +ENDF __popcountqi2 +#endif /* defined (L_popcountqi2) */ + + +/********************************** + * Swap bytes + **********************************/ + +;; swap two registers with different register number +.macro bswap a, b + eor \a, \b + eor \b, \a + eor \a, \b +.endm + +#if defined (L_bswapsi2) +;; swap bytes +;; r25:r22 = bswap32 (r25:r22) +DEFUN __bswapsi2 + bswap r22, r25 + bswap r23, r24 + ret +ENDF __bswapsi2 +#endif /* defined (L_bswapsi2) */ + +#if defined (L_bswapdi2) +;; swap bytes +;; r25:r18 = bswap64 (r25:r18) +DEFUN __bswapdi2 + bswap r18, r25 + bswap r19, r24 + bswap r20, r23 + bswap r21, r22 + ret +ENDF __bswapdi2 +#endif /* defined (L_bswapdi2) */ + + +/********************************** + * 64-bit shifts + **********************************/ + +#if defined (L_ashrdi3) +;; Arithmetic shift right +;; r25:r18 = ashr64 (r25:r18, r17:r16) +DEFUN __ashrdi3 + bst r25, 7 + bld __zero_reg__, 0 + ;; FALLTHRU +ENDF __ashrdi3 + +;; Logic shift right +;; r25:r18 = lshr64 (r25:r18, r17:r16) +DEFUN __lshrdi3 + lsr __zero_reg__ + sbc __tmp_reg__, __tmp_reg__ + push r16 +0: cpi r16, 8 + brlo 2f + subi r16, 8 + mov r18, r19 + mov r19, r20 + mov r20, r21 + mov r21, r22 + mov r22, r23 + mov r23, r24 + mov r24, r25 + mov r25, __tmp_reg__ + rjmp 0b +1: asr __tmp_reg__ + ror r25 + ror r24 + ror r23 + ror r22 + ror r21 + ror r20 + ror r19 + ror r18 +2: dec r16 + brpl 1b + pop r16 + ret +ENDF __lshrdi3 +#endif /* defined (L_ashrdi3) */ + +#if defined (L_ashldi3) +;; Shift left +;; r25:r18 = ashl64 (r25:r18, r17:r16) +DEFUN __ashldi3 + push r16 +0: cpi r16, 8 + brlo 2f + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + mov r21, r20 + mov r20, r19 + mov r19, r18 + clr r18 + subi r16, 8 + rjmp 0b +1: lsl r18 + rol r19 + rol r20 + rol r21 + rol r22 + rol r23 + rol r24 + rol r25 +2: dec r16 + brpl 1b + pop r16 + ret +ENDF __ashldi3 +#endif /* defined (L_ashldi3) */ + +#if defined (L_rotldi3) +;; Shift left +;; r25:r18 = rotl64 (r25:r18, r17:r16) +DEFUN __rotldi3 + push r16 +0: cpi r16, 8 + brlo 2f + subi r16, 8 + mov __tmp_reg__, r25 + mov r25, r24 + mov r24, r23 + mov r23, r22 + mov r22, r21 + mov r21, r20 + mov r20, r19 + mov r19, r18 + mov r18, __tmp_reg__ + rjmp 0b +1: lsl r18 + rol r19 + rol r20 + rol r21 + rol r22 + rol r23 + rol r24 + rol r25 + adc r18, __zero_reg__ +2: dec r16 + brpl 1b + pop r16 + ret +ENDF __rotldi3 +#endif /* defined (L_rotldi3) */ + + +.section .text.libgcc.fmul, "ax", @progbits + +/***********************************************************/ +;;; Softmul versions of FMUL, FMULS and FMULSU to implement +;;; __builtin_avr_fmul* if !AVR_HAVE_MUL +/***********************************************************/ + +#define A1 24 +#define B1 25 +#define C0 22 +#define C1 23 +#define A0 __tmp_reg__ + +#ifdef L_fmuls +;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmuls + ;; A0.7 = negate result? + mov A0, A1 + eor A0, B1 + ;; B1 = |B1| + sbrc B1, 7 + neg B1 + XJMP __fmulsu_exit +ENDF __fmuls +#endif /* L_fmuls */ + +#ifdef L_fmulsu +;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmulsu + ;; A0.7 = negate result? + mov A0, A1 +;; FALLTHRU +ENDF __fmulsu + +;; Helper for __fmuls and __fmulsu +DEFUN __fmulsu_exit + ;; A1 = |A1| + sbrc A1, 7 + neg A1 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Some cores have problem skipping 2-word instruction + tst A0 + brmi 1f +#else + sbrs A0, 7 +#endif /* __AVR_HAVE_JMP_CALL__ */ + XJMP __fmul +1: XCALL __fmul + ;; C = -C iff A0.7 = 1 + NEG2 C0 + ret +ENDF __fmulsu_exit +#endif /* L_fmulsu */ + + +#ifdef L_fmul +;;; r22:r23 = fmul (r24, r25) like in FMUL instruction +;;; Clobbers: r24, r25, __tmp_reg__ +DEFUN __fmul + ; clear result + clr C0 + clr C1 + clr A0 +1: tst B1 + ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. +2: brpl 3f + ;; C += A + add C0, A0 + adc C1, A1 +3: ;; A >>= 1 + lsr A1 + ror A0 + ;; B <<= 1 + lsl B1 + brne 2b + ret +ENDF __fmul +#endif /* L_fmul */ + +#undef A0 +#undef A1 +#undef B1 +#undef C0 +#undef C1 + +#include "lib1funcs-fixed.S" diff --git a/gcc-4.9/libgcc/config/avr/lib2-object.mk b/gcc-4.9/libgcc/config/avr/lib2-object.mk new file mode 100644 index 000000000..6a9e04de0 --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/lib2-object.mk @@ -0,0 +1,23 @@ +# This file is included several times in a row, once for each element of +# $(iter-items). On each inclusion, we advance $o to the next element. +# $(iter-labels) and $(iter-flags) are also advanced. +# This works similar to $(srcdir)/siditi-object.mk. + +o := $(firstword $(iter-items)) +iter-items := $(filter-out $o,$(iter-items)) + +$o-label := $(firstword $(iter-labels)) +iter-labels := $(wordlist 2,$(words $(iter-labels)),$(iter-labels)) + +$o-flag := $(firstword $(iter-flags)) +iter-flags := $(wordlist 2,$(words $(iter-flags)),$(iter-flags)) + +$o$(objext): %$(objext): $(srcdir)/config/avr/lib2funcs.c + $(gcc_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \ + -c $< $(vis_hide) + +ifeq ($(enable_shared),yes) +$(o)_s$(objext): %_s$(objext): $(srcdir)/config/avr/lib2funcs.c + $(gcc_s_compile) -DL_$($*-label) -DL_LABEL=$($*-label) $($*-flag) \ + -c $< +endif diff --git a/gcc-4.9/libgcc/config/avr/lib2funcs.c b/gcc-4.9/libgcc/config/avr/lib2funcs.c new file mode 100644 index 000000000..774d14ced --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/lib2funcs.c @@ -0,0 +1,226 @@ +/* Copyright (C) 2013-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + +/* This file supplies implementations for some AVR-specific builtin + functions so that code like the following works as expected: + + int (*f (void))(_Fract) + { + return __builtin_avr_countlsr; + } + + In this specific case, the generated code is: + + f: + ldi r24,lo8(gs(__countlsHI)) + ldi r25,hi8(gs(__countlsHI)) + ret +*/ + +/* Map fixed-point suffix to the corresponding fixed-point type. */ + +typedef short _Fract fx_hr_t; +typedef _Fract fx_r_t; +typedef long _Fract fx_lr_t; +typedef long long _Fract fx_llr_t; + +typedef unsigned short _Fract fx_uhr_t; +typedef unsigned _Fract fx_ur_t; +typedef unsigned long _Fract fx_ulr_t; +typedef unsigned long long _Fract fx_ullr_t; + +typedef short _Accum fx_hk_t; +typedef _Accum fx_k_t; +typedef long _Accum fx_lk_t; +typedef long long _Accum fx_llk_t; + +typedef unsigned short _Accum fx_uhk_t; +typedef unsigned _Accum fx_uk_t; +typedef unsigned long _Accum fx_ulk_t; +typedef unsigned long long _Accum fx_ullk_t; + +/* Map fixed-point suffix to the corresponding natural integer type. */ + +typedef char int_hr_t; +typedef int int_r_t; +typedef long int_lr_t; +typedef long long int_llr_t; + +typedef unsigned char int_uhr_t; +typedef unsigned int int_ur_t; +typedef unsigned long int_ulr_t; +typedef unsigned long long int_ullr_t; + +typedef int int_hk_t; +typedef long int_k_t; +typedef long long int_lk_t; +typedef long long int_llk_t; + +typedef unsigned int int_uhk_t; +typedef unsigned long int_uk_t; +typedef unsigned long long int_ulk_t; +typedef unsigned long long int_ullk_t; + +/* Map mode to the corresponding integer type. */ + +typedef char int_qi_t; +typedef int int_hi_t; +typedef long int_si_t; +typedef long long int_di_t; + +typedef unsigned char uint_qi_t; +typedef unsigned int uint_hi_t; +typedef unsigned long uint_si_t; +typedef unsigned long long uint_di_t; + + + +/************************************************************************/ + +/* Supply implementations / symbols for __builtin_roundFX ASM_NAME. */ + +#ifdef L_round + +#define ROUND1(FX) \ + ROUND2 (FX) + +#define ROUND2(FX) \ + extern fx_## FX ##_t __round## FX (fx_## FX ##_t x, int rpoint); \ + \ + fx_## FX ##_t \ + __round## FX (fx_## FX ##_t x, int rpoint) \ + { \ + return __builtin_avr_round ##FX (x, rpoint); \ + } + +ROUND1(L_LABEL) + +#endif /* L_round */ + + + +/*********************************************************************/ + +/* Implement some count-leading-redundant-sign-bits to be used with + coundlsFX implementation. */ + +#ifdef L__clrsbqi +extern int __clrsbqi2 (char x); + +int +__clrsbqi2 (char x) +{ + int ret; + + if (x < 0) + x = ~x; + + if (x == 0) + return 8 * sizeof (x) -1; + + ret = __builtin_clz (x << 8); + return ret - 1; +} +#endif /* L__clrsbqi */ + + +#ifdef L__clrsbdi +extern int __clrsbdi2 (long long x); + +int +__clrsbdi2 (long long x) +{ + int ret; + + if (x < 0LL) + x = ~x; + + if (x == 0LL) + return 8 * sizeof (x) -1; + + ret = __builtin_clzll ((unsigned long long) x); + return ret - 1; +} +#endif /* L__clrsbdi */ + + + +/*********************************************************************/ + +/* Supply implementations / symbols for __builtin_avr_countlsFX. */ + +/* Signed */ + +#ifdef L_countls + +#define COUNTLS1(MM) \ + COUNTLS2 (MM) + +#define COUNTLS2(MM) \ + extern int __countls## MM ##2 (int_## MM ##_t); \ + extern int __clrsb## MM ##2 (int_## MM ##_t); \ + \ + int \ + __countls## MM ##2 (int_## MM ##_t x) \ + { \ + if (x == 0) \ + return __INT8_MAX__; \ + \ + return __clrsb## MM ##2 (x); \ + } + +COUNTLS1(L_LABEL) + +#endif /* L_countls */ + +/* Unsigned */ + +#ifdef L_countlsu + +#define clz_qi2 __builtin_clz /* unused, avoid warning */ +#define clz_hi2 __builtin_clz +#define clz_si2 __builtin_clzl +#define clz_di2 __builtin_clzll + +#define COUNTLS1(MM) \ + COUNTLS2 (MM) + +#define COUNTLS2(MM) \ + extern int __countlsu## MM ##2 (uint_## MM ##_t); \ + \ + int \ + __countlsu## MM ##2 (uint_## MM ##_t x) \ + { \ + if (x == 0) \ + return __INT8_MAX__; \ + \ + if (sizeof (x) == 1) \ + return clz_hi2 (x << 8); \ + else \ + return clz_## MM ##2 (x); \ + } + +COUNTLS1(L_LABEL) + +#endif /* L_countlsu */ diff --git a/gcc-4.9/libgcc/config/avr/t-avr b/gcc-4.9/libgcc/config/avr/t-avr new file mode 100644 index 000000000..461304706 --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/t-avr @@ -0,0 +1,277 @@ +LIB1ASMSRC = avr/lib1funcs.S +LIB1ASMFUNCS = \ + _mulqi3 \ + _mulhi3 \ + _mulqihi3 _umulqihi3 \ + _mulpsi3 _mulsqipsi3 \ + _mulhisi3 \ + _umulhisi3 \ + _usmulhisi3 \ + _muluhisi3 \ + _mulshisi3 \ + _mulsi3 \ + _udivmodqi4 \ + _divmodqi4 \ + _udivmodhi4 \ + _divmodhi4 \ + _divmodpsi4 _udivmodpsi4 \ + _udivmodsi4 \ + _divmodsi4 \ + _divdi3 _udivdi3 \ + _muldi3 _muldi3_6 \ + _mulsidi3 _umulsidi3 \ + _udivmod64 \ + _negsi2 _negdi2 \ + _prologue \ + _epilogue \ + _exit \ + _cleanup \ + _tablejump \ + _tablejump_elpm \ + _load_3 _load_4 \ + _xload_1 _xload_2 _xload_3 _xload_4 \ + _movmemx \ + _copy_data \ + _clear_bss \ + _ctors \ + _dtors \ + _ffssi2 \ + _ffshi2 \ + _loop_ffsqi2 \ + _ctzsi2 \ + _ctzhi2 \ + _clzdi2 \ + _clzsi2 \ + _clzhi2 \ + _paritydi2 \ + _paritysi2 \ + _parityhi2 \ + _popcounthi2 \ + _popcountsi2 \ + _popcountdi2 \ + _popcountqi2 \ + _bswapsi2 \ + _bswapdi2 \ + _ashldi3 _ashrdi3 _lshrdi3 _rotldi3 \ + _adddi3 _adddi3_s8 _subdi3 \ + _cmpdi2 _cmpdi2_s8 \ + _fmul _fmuls _fmulsu + +# Fixed point routines in avr/lib1funcs-fixed.S +LIB1ASMFUNCS += \ + _fractqqsf _fractuqqsf \ + _fracthqsf _fractuhqsf _fracthasf _fractuhasf \ + _fractsasf _fractusasf _fractsqsf _fractusqsf \ + \ + _fractsfqq _fractsfuqq \ + _fractsfhq _fractsfuhq _fractsfha _fractsfuha \ + _fractsfsq _fractsfusq _fractsfsa _fractsfusa \ + _mulqq3 \ + _mulhq3 _muluhq3 \ + _mulha3 _muluha3 _muluha3_round \ + _mulsa3 _mulusa3 \ + _usmuluha3 _ssmulha3 \ + _usmulusa3 _ssmulsa3 \ + _divqq3 _udivuqq3 _divqq_helper \ + _divhq3 _udivuhq3 \ + _divha3 _udivuha3 \ + _divsa3 _udivusa3 \ + _clr_8 \ + _ssneg_2 _ssneg_4 _ssneg_8 \ + _ssabs_1 _ssabs_2 _ssabs_4 _ssabs_8 \ + _ssadd_8 _sssub_8 \ + _usadd_8 _ussub_8 \ + _mask1 _ret \ + _roundqq3 _rounduqq3 \ + _round_s2 _round_u2 _round_2_const _addmask_2 \ + _round_s4 _round_u4 _round_4_const _addmask_4 \ + _round_x8 \ + _rounddq3 _roundudq3 \ + _roundda3 _rounduda3 \ + _roundta3 _rounduta3 \ + + +LIB2FUNCS_EXCLUDE = \ + _moddi3 _umoddi3 \ + _clz \ + _clrsbdi2 \ + + +# We do not have the DF type. +# Most of the C functions in libgcc2 use almost all registers, +# so use -mcall-prologues for smaller code size. +HOST_LIBGCC2_CFLAGS += -DDF=SF -Dinhibit_libc -mcall-prologues -Os + +# Extra 16-bit integer functions. +intfuncs16 = _absvXX2 _addvXX3 _subvXX3 _mulvXX3 _negvXX2 _clrsbXX2 + +hiintfuncs16 = $(subst XX,hi,$(intfuncs16)) +siintfuncs16 = $(subst XX,si,$(intfuncs16)) + +iter-items := $(hiintfuncs16) +iter-labels := $(siintfuncs16) +iter-sizes := $(patsubst %,2,$(siintfuncs16)) $(patsubst %,2,$(hiintfuncs16)) + + +include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/siditi-object.mk,$(iter-items)) +libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16)) + +ifeq ($(enable_shared),yes) +libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16)) +endif + +### + +conv_XY=$(conv)$(mode1)$(mode2) +func_X=$(func)$(mode) + +# Compile C functions from lib2funcs.c and add them to libgcc.a. +# +# Some functions which are not performance.critical are more convenient +# to implement in C than in assembler. Most of them serve as implementation +# for AVR-specific builtins in the case where the address of a builtin +# function is taken or if there is no insn that implements the builtin. +# +# We don't use LIB2ADD because we want to iterate over the source for +# different modes, fixed-point suffixes, etc. See iter-labels and L_LABEL. +# iter-label will get one more underscore in order to avoid too short +# labels like -DLk and we use -DL_k instead. + +# Build roundFX functions from lib2funcs.c + +round_suffix := hr r lr uhr ur ulr \ + hk k uhk uk +round_funcs := $(foreach func,_round,\ + $(foreach mode,$(round_suffix),$(func_X))) + +iter-items := $(round_funcs) +iter-labels := $(round_suffix) +iter-flags := $(patsubst %,-DL_round,$(iter-items)) + +include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,%$(objext),$(round_funcs)) + +# Build clrsbXX functions from lib2funcs.c + +clrsb_modes := qi di +clrsb_funcs := $(foreach func,_clrsb,\ + $(foreach mode,$(clrsb_modes),$(func_X))) + +iter-items := $(clrsb_funcs) +iter-labels := $(clrsb_funcs) +iter-flags := $(patsubst %,-DL_clrsb,$(iter-items)) + +include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,%$(objext),$(clrsb_funcs)) + +# Build signed countlsFX functions from lib2funcs.c + +countls_modes := qi hi si di +countls_funcs := $(foreach func,_countls,\ + $(foreach mode,$(countls_modes),$(func_X))) + +iter-items := $(countls_funcs) +iter-labels := $(countls_modes) +iter-flags := $(patsubst %,-DL_countls,$(iter-items)) + +include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,%$(objext),$(countls_funcs)) + +# Build unsigned countlsFX functions from lib2funcs.c + +countlsu_modes := qi hi si di +countlsu_funcs := $(foreach func,_countlsu,\ + $(foreach mode,$(countlsu_modes),$(func_X))) + +iter-items := $(countlsu_funcs) +iter-labels := $(countlsu_modes) +iter-flags := $(patsubst %,-DL_countlsu,$(iter-items)) + +include $(srcdir)/empty.mk $(patsubst %,$(srcdir)/config/avr/lib2-object.mk,$(iter-items)) + +libgcc-objects += $(patsubst %,%$(objext),$(countlsu_funcs)) + + +# Filter out supported conversions from fixed-bit.c +# Also filter out TQ and UTQ. + +# Conversions supported by the compiler + +convf_modes = QI UQI QQ UQQ \ + HI UHI HQ UHQ HA UHA \ + SI USI SQ USQ SA USA \ + DI UDI DQ UDQ DA UDA \ + TI UTI TQ UTQ TA UTA + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract _fractuns,\ + $(foreach mode1,$(convf_modes),\ + $(foreach mode2,$(convf_modes),$(conv_XY)))) + +# Conversions supported by lib1funcs-fixed.S + +conv_to_sf_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA +conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA SA USA + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract, \ + $(foreach mode1,$(conv_to_sf_modes), \ + $(foreach mode2,SF,$(conv_XY)))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract,\ + $(foreach mode1,SF,\ + $(foreach mode2,$(conv_from_sf_modes),$(conv_XY)))) + +# Arithmetik supported by the compiler + +allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_add _sub,\ + $(foreach mode,$(allfix_modes),$(func_X))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_lshr _ashl _ashr _cmp,\ + $(foreach mode,$(allfix_modes),$(func_X))) + + +usat_modes = UQQ UHQ UHA USQ USA UDQ UDA UTQ UTA +ssat_modes = QQ HQ HA SQ SA DQ DA TQ TA + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_ssadd _sssub _ssneg _ssabs,\ + $(foreach mode,$(ssat_modes),$(func_X))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_usadd _ussub _usneg,\ + $(foreach mode,$(usat_modes),$(func_X))) + + +smul_modes = QQ HQ HA SA +umul_modes = UQQ UHQ UHA USA +sdiv_modes = QQ HQ HA SA +udiv_modes = UQQ UHQ UHA USA + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_mul,\ + $(foreach mode,$(smul_modes) $(umul_modes),$(func_X))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_div,\ + $(foreach mode,$(sdiv_modes) $(udiv_modes),$(func_X))) + + +ssmul_modes = HA SA +usmul_modes = UHA USA + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_usmul,\ + $(foreach mode,$(usmul_modes),$(func_X))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach func,_ssmul,\ + $(foreach mode,$(ssmul_modes),$(func_X))) diff --git a/gcc-4.9/libgcc/config/avr/t-avrlibc b/gcc-4.9/libgcc/config/avr/t-avrlibc new file mode 100644 index 000000000..d2c8b870a --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/t-avrlibc @@ -0,0 +1,66 @@ +# This file is used if not configured --with-avrlibc=no +# +# AVR-Libc comes with hand-optimized float routines. +# For historical reasons, these routines live in AVR-Libc +# and not in libgcc and use the same function names like libgcc. +# To get the best support, i.e. always use the routines from +# AVR-Libc, we remove these routines from libgcc. +# +# See also PR54461. +# +# +# Arithmetic: +# __addsf3 __subsf3 __divsf3 __mulsf3 __negsf2 +# +# Comparison: +# __cmpsf2 __unordsf2 +# __eqsf2 __lesf2 __ltsf2 __nesf2 __gesf2 __gtsf2 +# +# Conversion: +# __fixsfdi __fixunssfdi __floatdisf __floatundisf +# __fixsfsi __fixunssfsi __floatsisf __floatunsisf +# +# +# These functions are contained in modules: +# +# _addsub_sf.o: __addsf3 __subsf3 +# _mul_sf.o: __mulsf3 +# _div_sf.o: __divsf3 +# _negate_sf.o: __negsf2 +# +# _compare_sf.o: __cmpsf2 +# _unord_sf.o: __unordsf2 +# _eq_sf.o: __eqsf2 +# _ne_sf.o: __nesf2 +# _ge_sf.o: __gesf2 +# _gt_sf.o: __gtsf2 +# _le_sf.o: __lesf2 +# _lt_sf.o: __ltsf2 +# +# _fixsfdi.o: __fixsfdi +# _fixunssfdi.o: __fixunssfdi +# _fixunssfsi.o: __fixunssfsi +# _floatdisf.o: __floatdisf +# _floatundisf.o: __floatundisf +# _sf_to_si.o: __fixsfsi +# _si_to_sf.o: __floatsisf +# _usi_to_sf.o: __floatunsisf + + +# SFmode +LIB2FUNCS_EXCLUDE += \ + _addsub_sf \ + _negate_sf \ + _mul_sf _div_sf \ + \ + _compare_sf \ + _unord_sf \ + _eq_sf _ne_sf \ + _gt_sf _ge_sf \ + _lt_sf _le_sf \ + \ + _si_to_sf _sf_to_si \ + _usi_to_sf _sf_to_usi \ + _fixunssfsi _fixsfdi \ + _fixunssfdi \ + _floatdisf _floatundisf diff --git a/gcc-4.9/libgcc/config/avr/t-rtems b/gcc-4.9/libgcc/config/avr/t-rtems new file mode 100644 index 000000000..43b57ee32 --- /dev/null +++ b/gcc-4.9/libgcc/config/avr/t-rtems @@ -0,0 +1,2 @@ +# RTEMS uses _exit from newlib +LIB1ASMFUNCS := $(filter-out _exit,$(LIB1ASMFUNCS)) |