aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/libgcc/config/avr/lib1funcs.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/libgcc/config/avr/lib1funcs.S')
-rw-r--r--gcc-4.9/libgcc/config/avr/lib1funcs.S3226
1 files changed, 3226 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/avr/lib1funcs.S b/gcc-4.9/libgcc/config/avr/lib1funcs.S
new file mode 100644
index 000000000..6f1c77edb
--- /dev/null
+++ b/gcc-4.9/libgcc/config/avr/lib1funcs.S
@@ -0,0 +1,3226 @@
+/* -*- Mode: Asm -*- */
+/* Copyright (C) 1998-2014 Free Software Foundation, Inc.
+ Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#if defined (__AVR_HAVE_SPH__)
+#define __SP_H__ 0x3e
+#endif
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__ 0x3C
+
+/* Most of the functions here are called directly from avr.md
+ patterns, instead of using the standard libcall mechanisms.
+ This can make better code because GCC knows exactly which
+ of the call-used registers (not all of them) are clobbered. */
+
+/* FIXME: At present, there is no SORT directive in the linker
+ script so that we must not assume that different modules
+ in the same input section like .libgcc.text.mul will be
+ located close together. Therefore, we cannot use
+ RCALL/RJMP to call a function like __udivmodhi4 from
+ __divmodhi4 and have to use lengthy XCALL/XJMP even
+ though they are in the same input section and all same
+ input sections together are small enough to reach every
+ location with a RCALL/RJMP instruction. */
+
+ .macro mov_l r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ movw \r_dest, \r_src
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+ .macro mov_h r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ ; empty
+#else
+ mov \r_dest, \r_src
+#endif
+ .endm
+
+.macro wmov r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+ movw \r_dest, \r_src
+#else
+ mov \r_dest, \r_src
+ mov \r_dest+1, \r_src+1
+#endif
+.endm
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#define XJMP jmp
+#else
+#define XCALL rcall
+#define XJMP rjmp
+#endif
+
+;; Prologue stuff
+
+.macro do_prologue_saves n_pushed n_frame=0
+ ldi r26, lo8(\n_frame)
+ ldi r27, hi8(\n_frame)
+ ldi r30, lo8(gs(.L_prologue_saves.\@))
+ ldi r31, hi8(gs(.L_prologue_saves.\@))
+ XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
+.L_prologue_saves.\@:
+.endm
+
+;; Epilogue stuff
+
+.macro do_epilogue_restores n_pushed n_frame=0
+ in r28, __SP_L__
+#ifdef __AVR_HAVE_SPH__
+ in r29, __SP_H__
+.if \n_frame > 63
+ subi r28, lo8(-\n_frame)
+ sbci r29, hi8(-\n_frame)
+.elseif \n_frame > 0
+ adiw r28, \n_frame
+.endif
+#else
+ clr r29
+.if \n_frame > 0
+ subi r28, lo8(-\n_frame)
+.endif
+#endif /* HAVE SPH */
+ ldi r30, \n_pushed
+ XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
+.endm
+
+;; Support function entry and exit for convenience
+
+.macro DEFUN name
+.global \name
+.func \name
+\name:
+.endm
+
+.macro ENDF name
+.size \name, .-\name
+.endfunc
+.endm
+
+.macro FALIAS name
+.global \name
+.func \name
+\name:
+.size \name, .-\name
+.endfunc
+.endm
+
+;; Skip next instruction, typically a jump target
+#define skip cpse 0,0
+
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2 reg
+ com \reg+1
+ neg \reg
+ sbci \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+;; Sets the V flag for signed overflow tests if REG >= 16
+.macro NEG4 reg
+ com \reg+3
+ com \reg+2
+ com \reg+1
+.if \reg >= 16
+ neg \reg
+ sbci \reg+1, -1
+ sbci \reg+2, -1
+ sbci \reg+3, -1
+.else
+ com \reg
+ adc \reg, __zero_reg__
+ adc \reg+1, __zero_reg__
+ adc \reg+2, __zero_reg__
+ adc \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N) hlo8 ((N) << 23)
+#define exp_hi(N) hhi8 ((N) << 23)
+
+
+.section .text.libgcc.mul, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+ Multiplication 8 x 8 without MUL
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define r_arg2 r22 /* multiplicand */
+#define r_arg1 r24 /* multiplier */
+#define r_res __tmp_reg__ /* result */
+
+DEFUN __mulqi3
+ clr r_res ; clear result
+__mulqi3_loop:
+ sbrc r_arg1,0
+ add r_res,r_arg2
+ add r_arg2,r_arg2 ; shift multiplicand
+ breq __mulqi3_exit ; while multiplicand != 0
+ lsr r_arg1 ;
+ brne __mulqi3_loop ; exit if multiplier = 0
+__mulqi3_exit:
+ mov r_arg1,r_res ; result to return register
+ ret
+ENDF __mulqi3
+
+#undef r_arg2
+#undef r_arg1
+#undef r_res
+
+#endif /* defined (L_mulqi3) */
+
+
+/*******************************************************
+ Widening Multiplication 16 = 8 x 8 without MUL
+ Multiplication 16 x 16 without MUL
+*******************************************************/
+
+#define A0 r22
+#define A1 r23
+#define B0 r24
+#define BB0 r20
+#define B1 r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0 r24
+#define C1 r25
+#define CC0 __tmp_reg__
+#define CC1 R21
+
+#if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __umulqihi3
+ clr A1
+ clr B1
+ XJMP __mulhi3
+ENDF __umulqihi3
+#endif /* L_umulqihi3 */
+
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0 * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+ ;; Sign-extend B0
+ clr B1
+ sbrc B0, 7
+ com B1
+ ;; The multiplication runs twice as fast if A1 is zero, thus:
+ ;; Zero-extend A0
+ clr A1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Store B0 * sign of A
+ clr BB0
+ sbrc A0, 7
+ mov BB0, B0
+ call __mulhi3
+#else /* have no CALL */
+ ;; Skip sign-extension of A if A >= 0
+ ;; Same size as with the first alternative but avoids errata skip
+ ;; and is faster if A >= 0
+ sbrs A0, 7
+ rjmp __mulhi3
+ ;; If A < 0 store B
+ mov BB0, B0
+ rcall __mulhi3
+#endif /* HAVE_JMP_CALL */
+ ;; 1-extend A after the multiplication
+ sub C1, BB0
+ ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
+
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
+DEFUN __mulhi3
+
+ ;; Clear result
+ clr CC0
+ clr CC1
+ rjmp 3f
+1:
+ ;; Bit n of A is 1 --> C += B << n
+ add CC0, B0
+ adc CC1, B1
+2:
+ lsl B0
+ rol B1
+3:
+ ;; If B == 0 we are ready
+ sbiw B0, 0
+ breq 9f
+
+ ;; Carry = n-th bit of A
+ lsr A1
+ ror A0
+ ;; If bit n of A is set, then go add B * 2^n to C
+ brcs 1b
+
+ ;; Carry = 0 --> The ROR above acts like CP A0, 0
+ ;; Thus, it is sufficient to CPC the high part to test A against 0
+ cpc A1, __zero_reg__
+ ;; Only proceed if A != 0
+ brne 2b
+9:
+ ;; Move Result into place
+ mov C0, CC0
+ mov C1, CC1
+ ret
+ENDF __mulhi3
+#endif /* L_mulhi3 */
+
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16 without MUL
+*******************************************************/
+
+#if defined (L_umulhisi3)
+DEFUN __umulhisi3
+ wmov B0, 24
+ ;; Zero-extend B
+ clr B2
+ clr B3
+ ;; Zero-extend A
+ wmov A2, B2
+ XJMP __mulsi3
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+ wmov B0, 24
+ ;; Sign-extend B
+ lsl r25
+ sbc B2, B2
+ mov B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Sign-extend A
+ clr A2
+ sbrc A1, 7
+ com A2
+ mov A3, A2
+ XJMP __mulsi3
+#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
+ ;; Zero-extend A and __mulsi3 will run at least twice as fast
+ ;; compared to a sign-extended A.
+ clr A2
+ clr A3
+ sbrs A1, 7
+ XJMP __mulsi3
+ ;; If A < 0 then perform the B * 0xffff.... before the
+ ;; very multiplication by initializing the high part of the
+ ;; result CC with -B.
+ wmov CC2, A2
+ sub CC2, B0
+ sbc CC3, B1
+ XJMP __mulsi3_helper
+#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+
+/*******************************************************
+ Multiplication 32 x 32 without MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+DEFUN __mulsi3
+ ;; Clear result
+ clr CC2
+ clr CC3
+ ;; FALLTHRU
+ENDF __mulsi3
+
+DEFUN __mulsi3_helper
+ clr CC0
+ clr CC1
+ rjmp 3f
+
+1: ;; If bit n of A is set, then add B * 2^n to the result in CC
+ ;; CC += B
+ add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
+
+2: ;; B <<= 1
+ lsl B0 $ rol B1 $ rol B2 $ rol B3
+
+3: ;; A >>= 1: Carry = n-th bit of A
+ lsr A3 $ ror A2 $ ror A1 $ ror A0
+
+ brcs 1b
+ ;; Only continue if A != 0
+ sbci A1, 0
+ brne 2b
+ sbiw A2, 0
+ brne 2b
+
+ ;; All bits of A are consumed: Copy result to return register C
+ wmov C0, CC0
+ wmov C2, CC2
+ ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16 with MUL
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+ XCALL __umulhisi3
+ ;; Sign-extend B
+ tst B1
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: ;; Sign-extend A
+ XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+ XCALL __umulhisi3
+ ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+ ;; Sign-extend A
+ sbrs A1, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+ mul A0, B0
+ movw C0, r0
+ mul A1, B1
+ movw C2, r0
+ mul A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; This function is used by many other routines, often multiple times.
+ ;; Therefore, if the flash size is not too limited, avoid the RCALL
+ ;; and inverst 6 Bytes to speed things up.
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+#else
+ rcall 1f
+#endif
+ mul A1, B0
+1: add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 32 with MUL
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A1
+ brmi __mulohisi3
+#else
+ sbrs A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __muluhisi3
+ ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+ XCALL __muluhisi3
+ ;; One-extend R27:R26 (A1:A0)
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+ XCALL __umulhisi3
+ mul A0, B3
+ add C3, r0
+ mul A1, B2
+ add C3, r0
+ mul A0, B2
+ add C2, r0
+ adc C3, r1
+ clr __zero_reg__
+ ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+ Multiplication 32 x 32 with MUL
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0 * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+ movw A0, C0
+ push C2
+ push C3
+ XCALL __muluhisi3
+ pop A1
+ pop A0
+ ;; A1:A0 now contains the high word of A
+ mul A0, B0
+ add C2, r0
+ adc C3, r1
+ mul A0, B1
+ add C3, r0
+ mul A1, B0
+ add C3, r0
+ clr __zero_reg__
+ ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+/*******************************************************
+ Multiplication 24 x 24 with MUL
+*******************************************************/
+
+#if defined (L_mulpsi3)
+
+;; A[0..2]: In: Multiplicand; Out: Product
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; B[0..2]: In: Multiplier
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; C[0..2]: Expand Result
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+
+;; R24:R22 *= R20:R18
+;; Clobbers: r21, r25, r26, r27, __tmp_reg__
+
+#define AA0 26
+#define AA2 21
+
+DEFUN __mulpsi3
+ wmov AA0, A0
+ mov AA2, A2
+ XCALL __umulhisi3
+ mul AA2, B0 $ add C2, r0
+ mul AA0, B2 $ add C2, r0
+ clr __zero_reg__
+ ret
+ENDF __mulpsi3
+
+#undef AA2
+#undef AA0
+
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+;; C[0..2]: Expand Result
+#define C0 0
+#define C1 C0+1
+#define C2 21
+
+;; R24:R22 *= R20:R18
+;; Clobbers: __tmp_reg__, R18, R19, R20, R21
+
+DEFUN __mulpsi3
+
+ ;; C[] = 0
+ clr __tmp_reg__
+ clr C2
+
+0: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop
+ LSR B2 $ ror B1 $ ror B0
+
+ ;; If the N-th Bit of B[] was set...
+ brcc 1f
+
+ ;; ...then add A[] * 2^N to the Result C[]
+ ADD C0,A0 $ adc C1,A1 $ adc C2,A2
+
+1: ;; Multiply A[] by 2
+ LSL A0 $ rol A1 $ rol A2
+
+ ;; Loop until B[] is 0
+ subi B0,0 $ sbci B1,0 $ sbci B2,0
+ brne 0b
+
+ ;; Copy C[] to the return Register A[]
+ wmov A0, C0
+ mov A2, C2
+
+ clr __zero_reg__
+ ret
+ENDF __mulpsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* HAVE_MUL */
+
+#undef B2
+#undef B1
+#undef B0
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulpsi3 */
+
+#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
+
+;; A[0..2]: In: Multiplicand
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; BB: In: Multiplier
+#define BB 25
+
+;; C[0..2]: Result
+#define C0 18
+#define C1 C0+1
+#define C2 C0+2
+
+;; C[] = A[] * sign_extend (BB)
+DEFUN __mulsqipsi3
+ mul A0, BB
+ movw C0, r0
+ mul A2, BB
+ mov C2, r0
+ mul A1, BB
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ sbrs BB, 7
+ ret
+ ;; One-extend BB
+ sub C1, A0
+ sbc C2, A1
+ ret
+ENDF __mulsqipsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#undef BB
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulsqipsi3 && HAVE_MUL */
+
+/*******************************************************
+ Multiplication 64 x 64
+*******************************************************/
+
+;; A[] = A[] * B[]
+
+;; A[0..7]: In: Multiplicand
+;; Out: Product
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+;; B[0..7]: In: Multiplier
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Define C[] for convenience
+;; Notice that parts of C[] overlap A[] respective B[]
+#define C0 16
+#define C1 C0+1
+#define C2 20
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 C4+2
+#define C7 C4+3
+
+#if defined (L_muldi3)
+
+;; A[] *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+ push r29
+ push r28
+ push r17
+ push r16
+
+ ;; Counting in Words, we have to perform a 4 * 4 Multiplication
+
+ ;; 3 * 0 + 0 * 3
+ mul A7,B0 $ $ mov C7,r0
+ mul A0,B7 $ $ add C7,r0
+ mul A6,B1 $ $ add C7,r0
+ mul A6,B0 $ mov C6,r0 $ add C7,r1
+ mul B6,A1 $ $ add C7,r0
+ mul B6,A0 $ add C6,r0 $ adc C7,r1
+
+ ;; 1 * 2
+ mul A2,B4 $ add C6,r0 $ adc C7,r1
+ mul A3,B4 $ $ add C7,r0
+ mul A2,B5 $ $ add C7,r0
+
+ push A5
+ push A4
+ push B1
+ push B0
+ push A3
+ push A2
+
+ ;; 0 * 0
+ wmov 26, B0
+ XCALL __umulhisi3
+ wmov C0, 22
+ wmov C2, 24
+
+ ;; 0 * 2
+ wmov 26, B4
+ XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25
+
+ wmov 26, B2
+ ;; 0 * 1
+ XCALL __muldi3_6
+
+ pop A0
+ pop A1
+ ;; 1 * 1
+ wmov 26, B2
+ XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+ pop r26
+ pop r27
+ ;; 1 * 0
+ XCALL __muldi3_6
+
+ pop A0
+ pop A1
+ ;; 2 * 0
+ XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
+
+ ;; 2 * 1
+ wmov 26, B2
+ XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23
+
+ ;; A[] = C[]
+ wmov A0, C0
+ ;; A2 = C2 already
+ wmov A4, C4
+ wmov A6, C6
+
+ clr __zero_reg__
+ pop r16
+ pop r17
+ pop r28
+ pop r29
+ ret
+ENDF __muldi3
+#endif /* L_muldi3 */
+
+#if defined (L_muldi3_6)
+;; A helper for some 64-bit multiplications with MUL available
+DEFUN __muldi3_6
+__muldi3_6:
+ XCALL __umulhisi3
+ add C2, 22
+ adc C3, 23
+ adc C4, 24
+ adc C5, 25
+ brcc 0f
+ adiw C6, 1
+0: ret
+ENDF __muldi3_6
+#endif /* L_muldi3_6 */
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+#if defined (L_muldi3)
+
+#define C0 26
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+#define C4 C0+4
+#define C5 C0+5
+#define C6 0
+#define C7 C6+1
+
+#define Loop 9
+
+;; A[] *= B[]
+;; R25:R18 *= R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __muldi3
+ push r29
+ push r28
+ push Loop
+
+ ldi C0, 64
+ mov Loop, C0
+
+ ;; C[] = 0
+ clr __tmp_reg__
+ wmov C0, 0
+ wmov C2, 0
+ wmov C4, 0
+
+0: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
+ ;; where N = 64 - Loop.
+ ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
+ ;; B[] will have its initial Value again.
+ LSR B7 $ ror B6 $ ror B5 $ ror B4
+ ror B3 $ ror B2 $ ror B1 $ ror B0
+
+ ;; If the N-th Bit of B[] was set then...
+ brcc 1f
+ ;; ...finish Rotation...
+ ori B7, 1 << 7
+
+ ;; ...and add A[] * 2^N to the Result C[]
+ ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3
+ adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7
+
+1: ;; Multiply A[] by 2
+ LSL A0 $ rol A1 $ rol A2 $ rol A3
+ rol A4 $ rol A5 $ rol A6 $ rol A7
+
+ dec Loop
+ brne 0b
+
+ ;; We expanded the Result in C[]
+ ;; Copy Result to the Return Register A[]
+ wmov A0, C0
+ wmov A2, C2
+ wmov A4, C4
+ wmov A6, C6
+
+ clr __zero_reg__
+ pop Loop
+ pop r28
+ pop r29
+ ret
+ENDF __muldi3
+
+#undef Loop
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* L_muldi3 */
+#endif /* HAVE_MUL */
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+/*******************************************************
+ Widening Multiplication 64 = 32 x 32 with MUL
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+#define A0 r22
+#define A1 r23
+#define A2 r24
+#define A3 r25
+
+#define B0 r18
+#define B1 r19
+#define B2 r20
+#define B3 r21
+
+#define C0 18
+#define C1 C0+1
+#define C2 20
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 C4+2
+#define C7 C4+3
+
+#if defined (L_umulsidi3)
+
+;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
+
+;; R18[8] = R22[4] * R18[4]
+;;
+;; Ordinary ABI Function, but additionally sets
+;; X = R20[2] = B2[2]
+;; Z = R22[2] = A0[2]
+DEFUN __umulsidi3
+ clt
+ ;; FALLTHRU
+ENDF __umulsidi3
+ ;; T = sign (A)
+DEFUN __umulsidi3_helper
+ push 29 $ push 28 ; Y
+ wmov 30, A2
+ ;; Counting in Words, we have to perform 4 Multiplications
+ ;; 0 * 0
+ wmov 26, A0
+ XCALL __umulhisi3
+ push 23 $ push 22 ; C0
+ wmov 28, B0
+ wmov 18, B2
+ wmov C2, 24
+ push 27 $ push 26 ; A0
+ push 19 $ push 18 ; B2
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B2 C2 -- -- -- B0 A2
+ ;; 1 * 1
+ wmov 26, 30 ; A2
+ XCALL __umulhisi3
+ ;; Sign-extend A. T holds the sign of A
+ brtc 0f
+ ;; Subtract B from the high part of the result
+ sub 22, 28
+ sbc 23, 29
+ sbc 24, 18
+ sbc 25, 19
+0: wmov 18, 28 ;; B0
+ wmov C4, 22
+ wmov C6, 24
+ ;;
+ ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y
+ ;; B0 C2 -- -- A2 C4 C6
+ ;;
+ ;; 1 * 0
+ XCALL __muldi3_6
+ ;; 0 * 1
+ pop 26 $ pop 27 ;; B2
+ pop 18 $ pop 19 ;; A0
+ XCALL __muldi3_6
+
+ ;; Move result C into place and save A0 in Z
+ wmov 22, C4
+ wmov 24, C6
+ wmov 30, 18 ; A0
+ pop C0 $ pop C1
+
+ ;; Epilogue
+ pop 28 $ pop 29 ;; Y
+ ret
+ENDF __umulsidi3_helper
+#endif /* L_umulsidi3 */
+
+
+#if defined (L_mulsidi3)
+
+;; Signed widening 64 = 32 * 32 Multiplication
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ bst A3, 7
+ sbrs B3, 7 ; Enhanced core has no skip bug
+ XJMP __umulsidi3_helper
+
+ ;; B needs sign-extension
+ push A3
+ push A2
+ XCALL __umulsidi3_helper
+ ;; A0 survived in Z
+ sub r22, r30
+ sbc r23, r31
+ pop r26
+ pop r27
+ sbc r24, r26
+ sbc r25, r27
+ ret
+ENDF __mulsidi3
+#endif /* L_mulsidi3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+#endif /* HAVE_MUL */
+
+/**********************************************************
+ Widening Multiplication 64 = 32 x 32 without MUL
+**********************************************************/
+
+#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+#define AA0 22
+#define AA1 AA0+1
+#define AA2 AA0+2
+#define AA3 AA0+3
+
+#define BB0 18
+#define BB1 BB0+1
+#define BB2 BB0+2
+#define BB3 BB0+3
+
+#define Mask r30
+
+;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
+;;
+;; R18[8] = R22[4] * R18[4]
+;; Ordinary ABI Function
+DEFUN __mulsidi3
+ set
+ skip
+ ;; FALLTHRU
+ENDF __mulsidi3
+
+DEFUN __umulsidi3
+ clt ; skipped
+ ;; Save 10 Registers: R10..R17, R28, R29
+ do_prologue_saves 10
+ ldi Mask, 0xff
+ bld Mask, 7
+ ;; Move B into place...
+ wmov B0, BB0
+ wmov B2, BB2
+ ;; ...and extend it
+ and BB3, Mask
+ lsl BB3
+ sbc B4, B4
+ mov B5, B4
+ wmov B6, B4
+ ;; Move A into place...
+ wmov A0, AA0
+ wmov A2, AA2
+ ;; ...and extend it
+ and AA3, Mask
+ lsl AA3
+ sbc A4, A4
+ mov A5, A4
+ wmov A6, A4
+ XCALL __muldi3
+ do_epilogue_restores 10
+ENDF __umulsidi3
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef A4
+#undef A5
+#undef A6
+#undef A7
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef B4
+#undef B5
+#undef B6
+#undef B7
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef BB0
+#undef BB1
+#undef BB2
+#undef BB3
+#undef Mask
+#endif /* L_mulsidi3 && !HAVE_MUL */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+.section .text.libgcc.div, "ax", @progbits
+
+/*******************************************************
+ Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define r_rem r25 /* remainder */
+#define r_arg1 r24 /* dividend, quotient */
+#define r_arg2 r22 /* divisor */
+#define r_cnt r23 /* loop count */
+
+#if defined (L_udivmodqi4)
+DEFUN __udivmodqi4
+ sub r_rem,r_rem ; clear remainder and carry
+ ldi r_cnt,9 ; init loop counter
+ rjmp __udivmodqi4_ep ; jump to entry point
+__udivmodqi4_loop:
+ rol r_rem ; shift dividend into remainder
+ cp r_rem,r_arg2 ; compare remainder & divisor
+ brcs __udivmodqi4_ep ; remainder <= divisor
+ sub r_rem,r_arg2 ; restore remainder
+__udivmodqi4_ep:
+ rol r_arg1 ; shift dividend (with CARRY)
+ dec r_cnt ; decrement loop counter
+ brne __udivmodqi4_loop
+ com r_arg1 ; complement result
+ ; because C flag was complemented in loop
+ ret
+ENDF __udivmodqi4
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+DEFUN __divmodqi4
+ bst r_arg1,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg1
+ eor __tmp_reg__,r_arg2; r0.7 is sign of result
+ sbrc r_arg1,7
+ neg r_arg1 ; dividend negative : negate
+ sbrc r_arg2,7
+ neg r_arg2 ; divisor negative : negate
+ XCALL __udivmodqi4 ; do the unsigned div/mod
+ brtc __divmodqi4_1
+ neg r_rem ; correct remainder sign
+__divmodqi4_1:
+ sbrc __tmp_reg__,7
+ neg r_arg1 ; correct result sign
+__divmodqi4_exit:
+ ret
+ENDF __divmodqi4
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+
+
+/*******************************************************
+ Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define r_remL r26 /* remainder Low */
+#define r_remH r27 /* remainder High */
+
+/* return: remainder */
+#define r_arg1L r24 /* dividend Low */
+#define r_arg1H r25 /* dividend High */
+
+/* return: quotient */
+#define r_arg2L r22 /* divisor Low */
+#define r_arg2H r23 /* divisor High */
+
+#define r_cnt r21 /* loop count */
+
+#if defined (L_udivmodhi4)
+DEFUN __udivmodhi4
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ ldi r_cnt,17 ; init loop counter
+ rjmp __udivmodhi4_ep ; jump to entry point
+__udivmodhi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ brcs __udivmodhi4_ep ; remainder < divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+__udivmodhi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ dec r_cnt ; decrement loop counter
+ brne __udivmodhi4_loop
+ com r_arg1L
+ com r_arg1H
+; div/mod results to return registers, as for the div() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ ret
+ENDF __udivmodhi4
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+DEFUN __divmodhi4
+ .global _div
+_div:
+ bst r_arg1H,7 ; store sign of dividend
+ mov __tmp_reg__,r_arg2H
+ brtc 0f
+ com __tmp_reg__ ; r0.7 is sign of result
+ rcall __divmodhi4_neg1 ; dividend negative: negate
+0:
+ sbrc r_arg2H,7
+ rcall __divmodhi4_neg2 ; divisor negative: negate
+ XCALL __udivmodhi4 ; do the unsigned div/mod
+ sbrc __tmp_reg__,7
+ rcall __divmodhi4_neg2 ; correct remainder sign
+ brtc __divmodhi4_exit
+__divmodhi4_neg1:
+ ;; correct dividend/remainder sign
+ com r_arg1H
+ neg r_arg1L
+ sbci r_arg1H,0xff
+ ret
+__divmodhi4_neg2:
+ ;; correct divisor/result sign
+ com r_arg2H
+ neg r_arg2L
+ sbci r_arg2H,0xff
+__divmodhi4_exit:
+ ret
+ENDF __divmodhi4
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH
+#undef r_remL
+
+#undef r_arg1H
+#undef r_arg1L
+
+#undef r_arg2H
+#undef r_arg2L
+
+#undef r_cnt
+
+/*******************************************************
+ Division 24 / 24 => (result + remainder)
+*******************************************************/
+
+;; A[0..2]: In: Dividend; Out: Quotient
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+
+;; B[0..2]: In: Divisor; Out: Remainder
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+
+;; C[0..2]: Expand remainder
+#define C0 __zero_reg__
+#define C1 26
+#define C2 25
+
+;; Loop counter
+#define r_cnt 21
+
+#if defined (L_udivmodpsi4)
+;; R24:R22 = R24:R22 udiv R20:R18
+;; R20:R18 = R24:R22 umod R20:R18
+;; Clobbers: R21, R25, R26
+
+DEFUN __udivmodpsi4
+ ; init loop counter
+ ldi r_cnt, 24+1
+ ; Clear remainder and carry. C0 is already 0
+ clr C1
+ sub C2, C2
+ ; jump to entry point
+ rjmp __udivmodpsi4_start
+__udivmodpsi4_loop:
+ ; shift dividend into remainder
+ rol C0
+ rol C1
+ rol C2
+ ; compare remainder & divisor
+ cp C0, B0
+ cpc C1, B1
+ cpc C2, B2
+ brcs __udivmodpsi4_start ; remainder <= divisor
+ sub C0, B0 ; restore remainder
+ sbc C1, B1
+ sbc C2, B2
+__udivmodpsi4_start:
+ ; shift dividend (with CARRY)
+ rol A0
+ rol A1
+ rol A2
+ ; decrement loop counter
+ dec r_cnt
+ brne __udivmodpsi4_loop
+ com A0
+ com A1
+ com A2
+ ; div/mod results to return registers
+ ; remainder
+ mov B0, C0
+ mov B1, C1
+ mov B2, C2
+ clr __zero_reg__ ; C0
+ ret
+ENDF __udivmodpsi4
+#endif /* defined (L_udivmodpsi4) */
+
+#if defined (L_divmodpsi4)
+;; R24:R22 = R24:R22 div R20:R18
+;; R20:R18 = R24:R22 mod R20:R18
+;; Clobbers: T, __tmp_reg__, R21, R25, R26
+
+DEFUN __divmodpsi4
+ ; R0.7 will contain the sign of the result:
+ ; R0.7 = A.sign ^ B.sign
+ mov __tmp_reg__, B2
+ ; T-flag = sign of dividend
+ bst A2, 7
+ brtc 0f
+ com __tmp_reg__
+ ; Adjust dividend's sign
+ rcall __divmodpsi4_negA
+0:
+ ; Adjust divisor's sign
+ sbrc B2, 7
+ rcall __divmodpsi4_negB
+
+ ; Do the unsigned div/mod
+ XCALL __udivmodpsi4
+
+ ; Adjust quotient's sign
+ sbrc __tmp_reg__, 7
+ rcall __divmodpsi4_negA
+
+ ; Adjust remainder's sign
+ brtc __divmodpsi4_end
+
+__divmodpsi4_negB:
+ ; Correct divisor/remainder sign
+ com B2
+ com B1
+ neg B0
+ sbci B1, -1
+ sbci B2, -1
+ ret
+
+ ; Correct dividend/quotient sign
+__divmodpsi4_negA:
+ com A2
+ com A1
+ neg A0
+ sbci A1, -1
+ sbci A2, -1
+__divmodpsi4_end:
+ ret
+
+ENDF __divmodpsi4
+#endif /* defined (L_divmodpsi4) */
+
+#undef A0
+#undef A1
+#undef A2
+
+#undef B0
+#undef B1
+#undef B2
+
+#undef C0
+#undef C1
+#undef C2
+
+#undef r_cnt
+
+/*******************************************************
+ Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define r_remHH r31 /* remainder High */
+#define r_remHL r30
+#define r_remH r27
+#define r_remL r26 /* remainder Low */
+
+/* return: remainder */
+#define r_arg1HH r25 /* dividend High */
+#define r_arg1HL r24
+#define r_arg1H r23
+#define r_arg1L r22 /* dividend Low */
+
+/* return: quotient */
+#define r_arg2HH r21 /* divisor High */
+#define r_arg2HL r20
+#define r_arg2H r19
+#define r_arg2L r18 /* divisor Low */
+
+#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+DEFUN __udivmodsi4
+ ldi r_remL, 33 ; init loop counter
+ mov r_cnt, r_remL
+ sub r_remL,r_remL
+ sub r_remH,r_remH ; clear remainder and carry
+ mov_l r_remHL, r_remL
+ mov_h r_remHH, r_remH
+ rjmp __udivmodsi4_ep ; jump to entry point
+__udivmodsi4_loop:
+ rol r_remL ; shift dividend into remainder
+ rol r_remH
+ rol r_remHL
+ rol r_remHH
+ cp r_remL,r_arg2L ; compare remainder & divisor
+ cpc r_remH,r_arg2H
+ cpc r_remHL,r_arg2HL
+ cpc r_remHH,r_arg2HH
+ brcs __udivmodsi4_ep ; remainder <= divisor
+ sub r_remL,r_arg2L ; restore remainder
+ sbc r_remH,r_arg2H
+ sbc r_remHL,r_arg2HL
+ sbc r_remHH,r_arg2HH
+__udivmodsi4_ep:
+ rol r_arg1L ; shift dividend (with CARRY)
+ rol r_arg1H
+ rol r_arg1HL
+ rol r_arg1HH
+ dec r_cnt ; decrement loop counter
+ brne __udivmodsi4_loop
+ ; __zero_reg__ now restored (r_cnt == 0)
+ com r_arg1L
+ com r_arg1H
+ com r_arg1HL
+ com r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+ mov_l r_arg2L, r_arg1L ; quotient
+ mov_h r_arg2H, r_arg1H
+ mov_l r_arg2HL, r_arg1HL
+ mov_h r_arg2HH, r_arg1HH
+ mov_l r_arg1L, r_remL ; remainder
+ mov_h r_arg1H, r_remH
+ mov_l r_arg1HL, r_remHL
+ mov_h r_arg1HH, r_remHH
+ ret
+ENDF __udivmodsi4
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+DEFUN __divmodsi4
+ mov __tmp_reg__,r_arg2HH
+ bst r_arg1HH,7 ; store sign of dividend
+ brtc 0f
+ com __tmp_reg__ ; r0.7 is sign of result
+ XCALL __negsi2 ; dividend negative: negate
+0:
+ sbrc r_arg2HH,7
+ rcall __divmodsi4_neg2 ; divisor negative: negate
+ XCALL __udivmodsi4 ; do the unsigned div/mod
+ sbrc __tmp_reg__, 7 ; correct quotient sign
+ rcall __divmodsi4_neg2
+ brtc __divmodsi4_exit ; correct remainder sign
+ XJMP __negsi2
+__divmodsi4_neg2:
+ ;; correct divisor/quotient sign
+ com r_arg2HH
+ com r_arg2HL
+ com r_arg2H
+ neg r_arg2L
+ sbci r_arg2H,0xff
+ sbci r_arg2HL,0xff
+ sbci r_arg2HH,0xff
+__divmodsi4_exit:
+ ret
+ENDF __divmodsi4
+#endif /* defined (L_divmodsi4) */
+
+#if defined (L_negsi2)
+;; (set (reg:SI 22)
+;; (neg:SI (reg:SI 22)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negsi2
+ NEG4 22
+ ret
+ENDF __negsi2
+#endif /* L_negsi2 */
+
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
+
+/*******************************************************
+ Division 64 / 64
+ Modulo 64 % 64
+*******************************************************/
+
+;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
+;; at least 16k of Program Memory. For smaller Devices, depend
+;; on MOVW and SP Size. There is a Connexion between SP Size and
+;; Flash Size so that SP Size can be used to test for Flash Size.
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+# define SPEED_DIV 8
+#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
+# define SPEED_DIV 16
+#else
+# define SPEED_DIV 0
+#endif
+
+;; A[0..7]: In: Dividend;
+;; Out: Quotient (T = 0)
+;; Out: Remainder (T = 1)
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+#define A4 A0+4
+#define A5 A0+5
+#define A6 A0+6
+#define A7 A0+7
+
+;; B[0..7]: In: Divisor; Out: Clobber
+#define B0 10
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+#define B4 B0+4
+#define B5 B0+5
+#define B6 B0+6
+#define B7 B0+7
+
+;; C[0..7]: Expand remainder; Out: Remainder (unused)
+#define C0 8
+#define C1 C0+1
+#define C2 30
+#define C3 C2+1
+#define C4 28
+#define C5 C4+1
+#define C6 26
+#define C7 C6+1
+
+;; Holds Signs during Division Routine
+#define SS __tmp_reg__
+
+;; Bit-Counter in Division Routine
+#define R_cnt __zero_reg__
+
+;; Scratch Register for Negation
+#define NN r31
+
+#if defined (L_udivdi3)
+
+;; R25:R18 = R24:R18 umod R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __umoddi3
+ set
+ rjmp __udivdi3_umoddi3
+ENDF __umoddi3
+
+;; R25:R18 = R24:R18 udiv R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __udivdi3
+ clt
+ENDF __udivdi3
+
+DEFUN __udivdi3_umoddi3
+ push C0
+ push C1
+ push C4
+ push C5
+ XCALL __udivmod64
+ pop C5
+ pop C4
+ pop C1
+ pop C0
+ ret
+ENDF __udivdi3_umoddi3
+#endif /* L_udivdi3 */
+
+#if defined (L_udivmod64)
+
+;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
+;; No Registers saved/restored; the Callers will take Care.
+;; Preserves B[] and T-flag
+;; T = 0: Compute Quotient in A[]
+;; T = 1: Compute Remainder in A[] and shift SS one Bit left
+
+DEFUN __udivmod64
+
+ ;; Clear Remainder (C6, C7 will follow)
+ clr C0
+ clr C1
+ wmov C2, C0
+ wmov C4, C0
+ ldi C7, 64
+
+#if SPEED_DIV == 0 || SPEED_DIV == 16
+ ;; Initialize Loop-Counter
+ mov R_cnt, C7
+ wmov C6, C0
+#endif /* SPEED_DIV */
+
+#if SPEED_DIV == 8
+
+ push A7
+ clr C6
+
+1: ;; Compare shifted Devidend against Divisor
+ ;; If -- even after Shifting -- it is smaller...
+ CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3
+ cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7
+ brcc 2f
+
+ ;; ...then we can subtract it. Thus, it is legal to shift left
+ $ mov C6,C5 $ mov C5,C4 $ mov C4,C3
+ mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7
+ mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3
+ mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0
+
+ ;; 8 Bits are done
+ subi C7, 8
+ brne 1b
+
+ ;; Shifted 64 Bits: A7 has traveled to C7
+ pop C7
+ ;; Divisor is greater than Dividend. We have:
+ ;; A[] % B[] = A[]
+ ;; A[] / B[] = 0
+ ;; Thus, we can return immediately
+ rjmp 5f
+
+2: ;; Initialze Bit-Counter with Number of Bits still to be performed
+ mov R_cnt, C7
+
+ ;; Push of A7 is not needed because C7 is still 0
+ pop C7
+ clr C7
+
+#elif SPEED_DIV == 16
+
+ ;; Compare shifted Dividend against Divisor
+ cp A7, B3
+ cpc C0, B4
+ cpc C1, B5
+ cpc C2, B6
+ cpc C3, B7
+ brcc 2f
+
+ ;; Divisor is greater than shifted Dividen: We can shift the Dividend
+ ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
+ wmov C2,A6 $ wmov C0,A4
+ wmov A6,A2 $ wmov A4,A0
+ wmov A2,C6 $ wmov A0,C4
+
+ ;; Set Bit Counter to 32
+ lsr R_cnt
+2:
+#elif SPEED_DIV
+#error SPEED_DIV = ?
+#endif /* SPEED_DIV */
+
+;; The very Division + Remainder Routine
+
+3: ;; Left-shift Dividend...
+ lsl A0 $ rol A1 $ rol A2 $ rol A3
+ rol A4 $ rol A5 $ rol A6 $ rol A7
+
+ ;; ...into Remainder
+ rol C0 $ rol C1 $ rol C2 $ rol C3
+ rol C4 $ rol C5 $ rol C6 $ rol C7
+
+ ;; Compare Remainder and Divisor
+ CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3
+ cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7
+
+ brcs 4f
+
+ ;; Divisor fits into Remainder: Subtract it from Remainder...
+ SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3
+ sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7
+
+ ;; ...and set according Bit in the upcoming Quotient
+ ;; The Bit will travel to its final Position
+ ori A0, 1
+
+4: ;; This Bit is done
+ dec R_cnt
+ brne 3b
+ ;; __zero_reg__ is 0 again
+
+ ;; T = 0: We are fine with the Quotient in A[]
+ ;; T = 1: Copy Remainder to A[]
+5: brtc 6f
+ wmov A0, C0
+ wmov A2, C2
+ wmov A4, C4
+ wmov A6, C6
+ ;; Move the Sign of the Result to SS.7
+ lsl SS
+
+6: ret
+
+ENDF __udivmod64
+#endif /* L_udivmod64 */
+
+
+#if defined (L_divdi3)
+
+;; R25:R18 = R24:R18 mod R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __moddi3
+ set
+ rjmp __divdi3_moddi3
+ENDF __moddi3
+
+;; R25:R18 = R24:R18 div R17:R10
+;; Ordinary ABI-Function
+
+DEFUN __divdi3
+ clt
+ENDF __divdi3
+
+DEFUN __divdi3_moddi3
+#if SPEED_DIV
+ mov r31, A7
+ or r31, B7
+ brmi 0f
+ ;; Both Signs are 0: the following Complexitiy is not needed
+ XJMP __udivdi3_umoddi3
+#endif /* SPEED_DIV */
+
+0: ;; The Prologue
+ ;; Save 12 Registers: Y, 17...8
+ ;; No Frame needed
+ do_prologue_saves 12
+
+ ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign)
+ ;; SS.6 will contain the Sign of the Remainder (A.sign)
+ mov SS, A7
+ asr SS
+ ;; Adjust Dividend's Sign as needed
+#if SPEED_DIV
+ ;; Compiling for Speed we know that at least one Sign must be < 0
+ ;; Thus, if A[] >= 0 then we know B[] < 0
+ brpl 22f
+#else
+ brpl 21f
+#endif /* SPEED_DIV */
+
+ XCALL __negdi2
+
+ ;; Adjust Divisor's Sign and SS.7 as needed
+21: tst B7
+ brpl 3f
+22: ldi NN, 1 << 7
+ eor SS, NN
+
+ ldi NN, -1
+ com B4 $ com B5 $ com B6 $ com B7
+ $ com B1 $ com B2 $ com B3
+ NEG B0
+ $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN
+ sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN
+
+3: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
+ XCALL __udivmod64
+
+ ;; Adjust Result's Sign
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ tst SS
+ brpl 4f
+#else
+ sbrc SS, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XCALL __negdi2
+
+4: ;; Epilogue: Restore 12 Registers and return
+ do_epilogue_restores 12
+
+ENDF __divdi3_moddi3
+
+#endif /* L_divdi3 */
+
+#undef R_cnt
+#undef SS
+#undef NN
+
+.section .text.libgcc, "ax", @progbits
+
+#define TT __tmp_reg__
+
+#if defined (L_adddi3)
+;; (set (reg:DI 18)
+;; (plus:DI (reg:DI 18)
+;; (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __adddi3
+ ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3
+ adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7
+ ret
+ENDF __adddi3
+#endif /* L_adddi3 */
+
+#if defined (L_adddi3_s8)
+;; (set (reg:DI 18)
+;; (plus:DI (reg:DI 18)
+;; (sign_extend:SI (reg:QI 26))))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
+DEFUN __adddi3_s8
+ clr TT
+ sbrc r26, 7
+ com TT
+ ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT
+ adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT
+ ret
+ENDF __adddi3_s8
+#endif /* L_adddi3_s8 */
+
+#if defined (L_subdi3)
+;; (set (reg:DI 18)
+;; (minus:DI (reg:DI 18)
+;; (reg:DI 10)))
+;; Sets the V flag for signed overflow tests
+;; Sets the C flag for unsigned overflow tests
+DEFUN __subdi3
+ SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3
+ sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7
+ ret
+ENDF __subdi3
+#endif /* L_subdi3 */
+
+#if defined (L_cmpdi2)
+;; (set (cc0)
+;; (compare (reg:DI 18)
+;; (reg:DI 10)))
+DEFUN __cmpdi2
+ CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3
+ cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7
+ ret
+ENDF __cmpdi2
+#endif /* L_cmpdi2 */
+
+#if defined (L_cmpdi2_s8)
+;; (set (cc0)
+;; (compare (reg:DI 18)
+;; (sign_extend:SI (reg:QI 26))))
+DEFUN __cmpdi2_s8
+ clr TT
+ sbrc r26, 7
+ com TT
+ CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT
+ cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT
+ ret
+ENDF __cmpdi2_s8
+#endif /* L_cmpdi2_s8 */
+
+#if defined (L_negdi2)
+;; (set (reg:DI 18)
+;; (neg:DI (reg:DI 18)))
+;; Sets the V flag for signed overflow tests
+DEFUN __negdi2
+
+ com A4 $ com A5 $ com A6 $ com A7
+ $ com A1 $ com A2 $ com A3
+ NEG A0
+ $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1
+ sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1
+ ret
+
+ENDF __negdi2
+#endif /* L_negdi2 */
+
+#undef TT
+
+#undef C7
+#undef C6
+#undef C5
+#undef C4
+#undef C3
+#undef C2
+#undef C1
+#undef C0
+
+#undef B7
+#undef B6
+#undef B5
+#undef B4
+#undef B3
+#undef B2
+#undef B1
+#undef B0
+
+#undef A7
+#undef A6
+#undef A5
+#undef A4
+#undef A3
+#undef A2
+#undef A1
+#undef A0
+
+
+.section .text.libgcc.prologue, "ax", @progbits
+
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+;; This function does not clobber T-flag; 64-bit division relies on it
+DEFUN __prologue_saves__
+ push r2
+ push r3
+ push r4
+ push r5
+ push r6
+ push r7
+ push r8
+ push r9
+ push r10
+ push r11
+ push r12
+ push r13
+ push r14
+ push r15
+ push r16
+ push r17
+ push r28
+ push r29
+#if !defined (__AVR_HAVE_SPH__)
+ in r28,__SP_L__
+ sub r28,r26
+ out __SP_L__,r28
+ clr r29
+#elif defined (__AVR_XMEGA__)
+ in r28,__SP_L__
+ in r29,__SP_H__
+ sub r28,r26
+ sbc r29,r27
+ out __SP_L__,r28
+ out __SP_H__,r29
+#else
+ in r28,__SP_L__
+ in r29,__SP_H__
+ sub r28,r26
+ sbc r29,r27
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+#endif /* #SP = 8/16 */
+
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+ENDF __prologue_saves__
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+DEFUN __epilogue_restores__
+ ldd r2,Y+18
+ ldd r3,Y+17
+ ldd r4,Y+16
+ ldd r5,Y+15
+ ldd r6,Y+14
+ ldd r7,Y+13
+ ldd r8,Y+12
+ ldd r9,Y+11
+ ldd r10,Y+10
+ ldd r11,Y+9
+ ldd r12,Y+8
+ ldd r13,Y+7
+ ldd r14,Y+6
+ ldd r15,Y+5
+ ldd r16,Y+4
+ ldd r17,Y+3
+ ldd r26,Y+2
+#if !defined (__AVR_HAVE_SPH__)
+ ldd r29,Y+1
+ add r28,r30
+ out __SP_L__,r28
+ mov r28, r26
+#elif defined (__AVR_XMEGA__)
+ ldd r27,Y+1
+ add r28,r30
+ adc r29,__zero_reg__
+ out __SP_L__,r28
+ out __SP_H__,r29
+ wmov 28, 26
+#else
+ ldd r27,Y+1
+ add r28,r30
+ adc r29,__zero_reg__
+ in __tmp_reg__,__SREG__
+ cli
+ out __SP_H__,r29
+ out __SREG__,__tmp_reg__
+ out __SP_L__,r28
+ mov_l r28, r26
+ mov_h r29, r27
+#endif /* #SP = 8/16 */
+ ret
+ENDF __epilogue_restores__
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+ .section .fini9,"ax",@progbits
+DEFUN _exit
+ .weak exit
+exit:
+ENDF _exit
+
+ /* Code from .fini8 ... .fini1 sections inserted by ld script. */
+
+ .section .fini0,"ax",@progbits
+ cli
+__stop_program:
+ rjmp __stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+ .weak _cleanup
+ .func _cleanup
+_cleanup:
+ ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump
+DEFUN __tablejump2__
+ lsl r30
+ rol r31
+ ;; FALLTHRU
+ENDF __tablejump2__
+
+DEFUN __tablejump__
+#if defined (__AVR_HAVE_LPMX__)
+ lpm __tmp_reg__, Z+
+ lpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#else /* !HAVE_LPMX */
+ lpm
+ adiw r30, 1
+ push r0
+ lpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif /* !HAVE_LPMX */
+ENDF __tablejump__
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+ .section .init4,"ax",@progbits
+DEFUN __do_copy_data
+#if defined(__AVR_HAVE_ELPMX__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start)
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm r0, Z+
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ ldi r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+ inc r16
+ out __RAMPZ__, r16
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+ elpm
+ st X+, r0
+ adiw r30, 1
+ brcs .L__do_copy_data_carry
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__data_end)
+ ldi r26, lo8(__data_start)
+ ldi r27, hi8(__data_start)
+ ldi r30, lo8(__data_load_start)
+ ldi r31, hi8(__data_load_start)
+ rjmp .L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+ lpm r0, Z+
+#else
+ lpm
+ adiw r30, 1
+#endif
+ st X+, r0
+.L__do_copy_data_start:
+ cpi r26, lo8(__data_end)
+ cpc r27, r17
+ brne .L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+ENDF __do_copy_data
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section. */
+
+#ifdef L_clear_bss
+ .section .init4,"ax",@progbits
+DEFUN __do_clear_bss
+ ldi r17, hi8(__bss_end)
+ ldi r26, lo8(__bss_start)
+ ldi r27, hi8(__bss_start)
+ rjmp .do_clear_bss_start
+.do_clear_bss_loop:
+ st X+, __zero_reg__
+.do_clear_bss_start:
+ cpi r26, lo8(__bss_end)
+ cpc r27, r17
+ brne .do_clear_bss_loop
+ENDF __do_clear_bss
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+ if there are any constructors/destructors. */
+
+#ifdef L_ctors
+ .section .init6,"ax",@progbits
+DEFUN __do_global_ctors
+#if defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ ldi r16, hh8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ ldi r24, hh8(__ctors_start)
+ cpc r16, r24
+ brne .L__do_global_ctors_loop
+#else
+ ldi r17, hi8(__ctors_start)
+ ldi r28, lo8(__ctors_end)
+ ldi r29, hi8(__ctors_end)
+ rjmp .L__do_global_ctors_start
+.L__do_global_ctors_loop:
+ sbiw r28, 2
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+.L__do_global_ctors_start:
+ cpi r28, lo8(__ctors_start)
+ cpc r29, r17
+ brne .L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_ctors
+#endif /* L_ctors */
+
+#ifdef L_dtors
+ .section .fini6,"ax",@progbits
+DEFUN __do_global_dtors
+#if defined(__AVR_HAVE_ELPM__)
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ ldi r16, hh8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ sbiw r28, 2
+ sbc r16, __zero_reg__
+ mov_h r31, r29
+ mov_l r30, r28
+ out __RAMPZ__, r16
+ XCALL __tablejump_elpm__
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ ldi r24, hh8(__dtors_end)
+ cpc r16, r24
+ brne .L__do_global_dtors_loop
+#else
+ ldi r17, hi8(__dtors_end)
+ ldi r28, lo8(__dtors_start)
+ ldi r29, hi8(__dtors_start)
+ rjmp .L__do_global_dtors_start
+.L__do_global_dtors_loop:
+ mov_h r31, r29
+ mov_l r30, r28
+ XCALL __tablejump__
+ adiw r28, 2
+.L__do_global_dtors_start:
+ cpi r28, lo8(__dtors_end)
+ cpc r29, r17
+ brne .L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_ELPM__) */
+ENDF __do_global_dtors
+#endif /* L_dtors */
+
+.section .text.libgcc, "ax", @progbits
+
+#ifdef L_tablejump_elpm
+DEFUN __tablejump_elpm__
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm __tmp_reg__, Z+
+ elpm r31, Z
+ mov r30, __tmp_reg__
+#if defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* RAMPD */
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ eijmp
+#else
+ ijmp
+#endif
+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ adiw r30, 1
+ push r0
+ elpm
+ push r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+ in __tmp_reg__, __EIND__
+ push __tmp_reg__
+#endif
+ ret
+#endif
+ENDF __tablejump_elpm__
+#endif /* defined (L_tablejump_elpm) */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash; n = 3,4
+;; R22... = Flash[Z]
+;; Clobbers: __tmp_reg__
+
+#if (defined (L_load_3) \
+ || defined (L_load_4)) \
+ && !defined (__AVR_HAVE_LPMX__)
+
+;; Destination
+#define D0 22
+#define D1 D0+1
+#define D2 D0+2
+#define D3 D0+3
+
+.macro .load dest, n
+ lpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+.else
+ sbiw r30, \n-1
+.endif
+.endm
+
+#if defined (L_load_3)
+DEFUN __load_3
+ push D3
+ XCALL __load_4
+ pop D3
+ ret
+ENDF __load_3
+#endif /* L_load_3 */
+
+#if defined (L_load_4)
+DEFUN __load_4
+ .load D0, 4
+ .load D1, 4
+ .load D2, 4
+ .load D3, 4
+ ret
+ENDF __load_4
+#endif /* L_load_4 */
+
+#endif /* L_load_3 || L_load_3 */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Loading n bytes from Flash or RAM; n = 1,2,3,4
+;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
+;; Clobbers: __tmp_reg__, R21, R30, R31
+
+#if (defined (L_xload_1) \
+ || defined (L_xload_2) \
+ || defined (L_xload_3) \
+ || defined (L_xload_4))
+
+;; Destination
+#define D0 22
+#define D1 D0+1
+#define D2 D0+2
+#define D3 D0+3
+
+;; Register containing bits 16+ of the address
+
+#define HHI8 21
+
+.macro .xload dest, n
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm \dest, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+ adc HHI8, __zero_reg__
+ out __RAMPZ__, HHI8
+.endif
+#elif defined (__AVR_HAVE_LPMX__)
+ lpm \dest, Z+
+#else
+ lpm
+ mov \dest, r0
+.if \dest != D0+\n-1
+ adiw r30, 1
+.endif
+#endif
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+.if \dest == D0+\n-1
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+.endif
+#endif
+.endm ; .xload
+
+#if defined (L_xload_1)
+DEFUN __xload_1
+#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
+ sbrc HHI8, 7
+ ld D0, Z
+ sbrs HHI8, 7
+ lpm D0, Z
+ ret
+#else
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 1
+ ret
+1: ld D0, Z
+ ret
+#endif /* LPMx && ! ELPM */
+ENDF __xload_1
+#endif /* L_xload_1 */
+
+#if defined (L_xload_2)
+DEFUN __xload_2
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 2
+ .xload D1, 2
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ret
+ENDF __xload_2
+#endif /* L_xload_2 */
+
+#if defined (L_xload_3)
+DEFUN __xload_3
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 3
+ .xload D1, 3
+ .xload D2, 3
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ld D2, Z+
+ ret
+ENDF __xload_3
+#endif /* L_xload_3 */
+
+#if defined (L_xload_4)
+DEFUN __xload_4
+ sbrc HHI8, 7
+ rjmp 1f
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif /* __AVR_HAVE_ELPM__ */
+ .xload D0, 4
+ .xload D1, 4
+ .xload D2, 4
+ .xload D3, 4
+ ret
+1: ld D0, Z+
+ ld D1, Z+
+ ld D2, Z+
+ ld D3, Z+
+ ret
+ENDF __xload_4
+#endif /* L_xload_4 */
+
+#endif /* L_xload_{1|2|3|4} */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; memcopy from Address Space __pgmx to RAM
+;; R23:Z = Source Address
+;; X = Destination Address
+;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
+
+#if defined (L_movmemx)
+
+#define HHI8 23
+#define LOOP 24
+
+DEFUN __movmemx_qi
+ ;; #Bytes to copy fity in 8 Bits (1..255)
+ ;; Zero-extend Loop Counter
+ clr LOOP+1
+ ;; FALLTHRU
+ENDF __movmemx_qi
+
+DEFUN __movmemx_hi
+
+;; Read from where?
+ sbrc HHI8, 7
+ rjmp 1f
+
+;; Read from Flash
+
+#if defined (__AVR_HAVE_ELPM__)
+ out __RAMPZ__, HHI8
+#endif
+
+0: ;; Load 1 Byte from Flash...
+
+#if defined (__AVR_HAVE_ELPMX__)
+ elpm r0, Z+
+#elif defined (__AVR_HAVE_ELPM__)
+ elpm
+ adiw r30, 1
+ adc HHI8, __zero_reg__
+ out __RAMPZ__, HHI8
+#elif defined (__AVR_HAVE_LPMX__)
+ lpm r0, Z+
+#else
+ lpm
+ adiw r30, 1
+#endif
+
+ ;; ...and store that Byte to RAM Destination
+ st X+, r0
+ sbiw LOOP, 1
+ brne 0b
+#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
+ ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
+ out __RAMPZ__, __zero_reg__
+#endif /* ELPM && RAMPD */
+ ret
+
+;; Read from RAM
+
+1: ;; Read 1 Byte from RAM...
+ ld r0, Z+
+ ;; and store that Byte to RAM Destination
+ st X+, r0
+ sbiw LOOP, 1
+ brne 1b
+ ret
+ENDF __movmemx_hi
+
+#undef HHI8
+#undef LOOP
+
+#endif /* L_movmemx */
+
+
+.section .text.libgcc.builtins, "ax", @progbits
+
+/**********************************
+ * Find first set Bit (ffs)
+ **********************************/
+
+#if defined (L_ffssi2)
+;; find first set bit
+;; r25:r24 = ffs32 (r25:r22)
+;; clobbers: r22, r26
+DEFUN __ffssi2
+ clr r26
+ tst r22
+ brne 1f
+ subi r26, -8
+ or r22, r23
+ brne 1f
+ subi r26, -8
+ or r22, r24
+ brne 1f
+ subi r26, -8
+ or r22, r25
+ brne 1f
+ ret
+1: mov r24, r22
+ XJMP __loop_ffsqi2
+ENDF __ffssi2
+#endif /* defined (L_ffssi2) */
+
+#if defined (L_ffshi2)
+;; find first set bit
+;; r25:r24 = ffs16 (r25:r24)
+;; clobbers: r26
+DEFUN __ffshi2
+ clr r26
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
+ cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
+1: XJMP __loop_ffsqi2
+2: ldi r26, 8
+ or r24, r25
+ brne 1b
+ ret
+ENDF __ffshi2
+#endif /* defined (L_ffshi2) */
+
+#if defined (L_loop_ffsqi2)
+;; Helper for ffshi2, ffssi2
+;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
+;; r24 must be != 0
+;; clobbers: r26
+DEFUN __loop_ffsqi2
+ inc r26
+ lsr r24
+ brcc __loop_ffsqi2
+ mov r24, r26
+ clr r25
+ ret
+ENDF __loop_ffsqi2
+#endif /* defined (L_loop_ffsqi2) */
+
+
+/**********************************
+ * Count trailing Zeros (ctz)
+ **********************************/
+
+#if defined (L_ctzsi2)
+;; count trailing zeros
+;; r25:r24 = ctz32 (r25:r22)
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzsi2
+ XCALL __ffssi2
+ dec r24
+ ret
+ENDF __ctzsi2
+#endif /* defined (L_ctzsi2) */
+
+#if defined (L_ctzhi2)
+;; count trailing zeros
+;; r25:r24 = ctz16 (r25:r24)
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
+DEFUN __ctzhi2
+ XCALL __ffshi2
+ dec r24
+ ret
+ENDF __ctzhi2
+#endif /* defined (L_ctzhi2) */
+
+
+/**********************************
+ * Count leading Zeros (clz)
+ **********************************/
+
+#if defined (L_clzdi2)
+;; count leading zeros
+;; r25:r24 = clz64 (r25:r18)
+;; clobbers: r22, r23, r26
+DEFUN __clzdi2
+ XCALL __clzsi2
+ sbrs r24, 5
+ ret
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __clzsi2
+ subi r24, -32
+ ret
+ENDF __clzdi2
+#endif /* defined (L_clzdi2) */
+
+#if defined (L_clzsi2)
+;; count leading zeros
+;; r25:r24 = clz32 (r25:r22)
+;; clobbers: r26
+DEFUN __clzsi2
+ XCALL __clzhi2
+ sbrs r24, 4
+ ret
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __clzhi2
+ subi r24, -16
+ ret
+ENDF __clzsi2
+#endif /* defined (L_clzsi2) */
+
+#if defined (L_clzhi2)
+;; count leading zeros
+;; r25:r24 = clz16 (r25:r24)
+;; clobbers: r26
+DEFUN __clzhi2
+ clr r26
+ tst r25
+ brne 1f
+ subi r26, -8
+ or r25, r24
+ brne 1f
+ ldi r24, 16
+ ret
+1: cpi r25, 16
+ brsh 3f
+ subi r26, -3
+ swap r25
+2: inc r26
+3: lsl r25
+ brcc 2b
+ mov r24, r26
+ clr r25
+ ret
+ENDF __clzhi2
+#endif /* defined (L_clzhi2) */
+
+
+/**********************************
+ * Parity
+ **********************************/
+
+#if defined (L_paritydi2)
+;; r25:r24 = parity64 (r25:r18)
+;; clobbers: __tmp_reg__
+DEFUN __paritydi2
+ eor r24, r18
+ eor r24, r19
+ eor r24, r20
+ eor r24, r21
+ XJMP __paritysi2
+ENDF __paritydi2
+#endif /* defined (L_paritydi2) */
+
+#if defined (L_paritysi2)
+;; r25:r24 = parity32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __paritysi2
+ eor r24, r22
+ eor r24, r23
+ XJMP __parityhi2
+ENDF __paritysi2
+#endif /* defined (L_paritysi2) */
+
+#if defined (L_parityhi2)
+;; r25:r24 = parity16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityhi2
+ eor r24, r25
+;; FALLTHRU
+ENDF __parityhi2
+
+;; r25:r24 = parity8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __parityqi2
+ ;; parity is in r24[0..7]
+ mov __tmp_reg__, r24
+ swap __tmp_reg__
+ eor r24, __tmp_reg__
+ ;; parity is in r24[0..3]
+ subi r24, -4
+ andi r24, -5
+ subi r24, -6
+ ;; parity is in r24[0,3]
+ sbrc r24, 3
+ inc r24
+ ;; parity is in r24[0]
+ andi r24, 1
+ clr r25
+ ret
+ENDF __parityqi2
+#endif /* defined (L_parityhi2) */
+
+
+/**********************************
+ * Population Count
+ **********************************/
+
+#if defined (L_popcounthi2)
+;; population count
+;; r25:r24 = popcount16 (r25:r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcounthi2
+ XCALL __popcountqi2
+ push r24
+ mov r24, r25
+ XCALL __popcountqi2
+ clr r25
+ ;; FALLTHRU
+ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
+#endif /* defined (L_popcounthi2) */
+
+#if defined (L_popcountsi2)
+;; population count
+;; r25:r24 = popcount32 (r25:r22)
+;; clobbers: __tmp_reg__
+DEFUN __popcountsi2
+ XCALL __popcounthi2
+ push r24
+ mov_l r24, r22
+ mov_h r25, r23
+ XCALL __popcounthi2
+ XJMP __popcounthi2_tail
+ENDF __popcountsi2
+#endif /* defined (L_popcountsi2) */
+
+#if defined (L_popcountdi2)
+;; population count
+;; r25:r24 = popcount64 (r25:r18)
+;; clobbers: r22, r23, __tmp_reg__
+DEFUN __popcountdi2
+ XCALL __popcountsi2
+ push r24
+ mov_l r22, r18
+ mov_h r23, r19
+ mov_l r24, r20
+ mov_h r25, r21
+ XCALL __popcountsi2
+ XJMP __popcounthi2_tail
+ENDF __popcountdi2
+#endif /* defined (L_popcountdi2) */
+
+#if defined (L_popcountqi2)
+;; population count
+;; r24 = popcount8 (r24)
+;; clobbers: __tmp_reg__
+DEFUN __popcountqi2
+ mov __tmp_reg__, r24
+ andi r24, 1
+ lsr __tmp_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __zero_reg__
+ lsr __tmp_reg__
+ adc r24, __tmp_reg__
+ ret
+ENDF __popcountqi2
+#endif /* defined (L_popcountqi2) */
+
+
+/**********************************
+ * Swap bytes
+ **********************************/
+
+;; swap two registers with different register number
+.macro bswap a, b
+ eor \a, \b
+ eor \b, \a
+ eor \a, \b
+.endm
+
+#if defined (L_bswapsi2)
+;; swap bytes
+;; r25:r22 = bswap32 (r25:r22)
+DEFUN __bswapsi2
+ bswap r22, r25
+ bswap r23, r24
+ ret
+ENDF __bswapsi2
+#endif /* defined (L_bswapsi2) */
+
+#if defined (L_bswapdi2)
+;; swap bytes
+;; r25:r18 = bswap64 (r25:r18)
+DEFUN __bswapdi2
+ bswap r18, r25
+ bswap r19, r24
+ bswap r20, r23
+ bswap r21, r22
+ ret
+ENDF __bswapdi2
+#endif /* defined (L_bswapdi2) */
+
+
+/**********************************
+ * 64-bit shifts
+ **********************************/
+
+#if defined (L_ashrdi3)
+;; Arithmetic shift right
+;; r25:r18 = ashr64 (r25:r18, r17:r16)
+DEFUN __ashrdi3
+ bst r25, 7
+ bld __zero_reg__, 0
+ ;; FALLTHRU
+ENDF __ashrdi3
+
+;; Logic shift right
+;; r25:r18 = lshr64 (r25:r18, r17:r16)
+DEFUN __lshrdi3
+ lsr __zero_reg__
+ sbc __tmp_reg__, __tmp_reg__
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ subi r16, 8
+ mov r18, r19
+ mov r19, r20
+ mov r20, r21
+ mov r21, r22
+ mov r22, r23
+ mov r23, r24
+ mov r24, r25
+ mov r25, __tmp_reg__
+ rjmp 0b
+1: asr __tmp_reg__
+ ror r25
+ ror r24
+ ror r23
+ ror r22
+ ror r21
+ ror r20
+ ror r19
+ ror r18
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __lshrdi3
+#endif /* defined (L_ashrdi3) */
+
+#if defined (L_ashldi3)
+;; Shift left
+;; r25:r18 = ashl64 (r25:r18, r17:r16)
+DEFUN __ashldi3
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ mov r21, r20
+ mov r20, r19
+ mov r19, r18
+ clr r18
+ subi r16, 8
+ rjmp 0b
+1: lsl r18
+ rol r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __ashldi3
+#endif /* defined (L_ashldi3) */
+
+#if defined (L_rotldi3)
+;; Shift left
+;; r25:r18 = rotl64 (r25:r18, r17:r16)
+DEFUN __rotldi3
+ push r16
+0: cpi r16, 8
+ brlo 2f
+ subi r16, 8
+ mov __tmp_reg__, r25
+ mov r25, r24
+ mov r24, r23
+ mov r23, r22
+ mov r22, r21
+ mov r21, r20
+ mov r20, r19
+ mov r19, r18
+ mov r18, __tmp_reg__
+ rjmp 0b
+1: lsl r18
+ rol r19
+ rol r20
+ rol r21
+ rol r22
+ rol r23
+ rol r24
+ rol r25
+ adc r18, __zero_reg__
+2: dec r16
+ brpl 1b
+ pop r16
+ ret
+ENDF __rotldi3
+#endif /* defined (L_rotldi3) */
+
+
+.section .text.libgcc.fmul, "ax", @progbits
+
+/***********************************************************/
+;;; Softmul versions of FMUL, FMULS and FMULSU to implement
+;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
+/***********************************************************/
+
+#define A1 24
+#define B1 25
+#define C0 22
+#define C1 23
+#define A0 __tmp_reg__
+
+#ifdef L_fmuls
+;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmuls
+ ;; A0.7 = negate result?
+ mov A0, A1
+ eor A0, B1
+ ;; B1 = |B1|
+ sbrc B1, 7
+ neg B1
+ XJMP __fmulsu_exit
+ENDF __fmuls
+#endif /* L_fmuls */
+
+#ifdef L_fmulsu
+;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmulsu
+ ;; A0.7 = negate result?
+ mov A0, A1
+;; FALLTHRU
+ENDF __fmulsu
+
+;; Helper for __fmuls and __fmulsu
+DEFUN __fmulsu_exit
+ ;; A1 = |A1|
+ sbrc A1, 7
+ neg A1
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A0
+ brmi 1f
+#else
+ sbrs A0, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __fmul
+1: XCALL __fmul
+ ;; C = -C iff A0.7 = 1
+ NEG2 C0
+ ret
+ENDF __fmulsu_exit
+#endif /* L_fmulsu */
+
+
+#ifdef L_fmul
+;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
+;;; Clobbers: r24, r25, __tmp_reg__
+DEFUN __fmul
+ ; clear result
+ clr C0
+ clr C1
+ clr A0
+1: tst B1
+ ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
+2: brpl 3f
+ ;; C += A
+ add C0, A0
+ adc C1, A1
+3: ;; A >>= 1
+ lsr A1
+ ror A0
+ ;; B <<= 1
+ lsl B1
+ brne 2b
+ ret
+ENDF __fmul
+#endif /* L_fmul */
+
+#undef A0
+#undef A1
+#undef B1
+#undef C0
+#undef C1
+
+#include "lib1funcs-fixed.S"