1 files changed, 837 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/h8300/lib1funcs.S b/gcc-4.9/libgcc/config/h8300/lib1funcs.S
new file mode 100644
index 000000000..4bfa7bee1
--- /dev/null
+++ b/gcc-4.9/libgcc/config/h8300/lib1funcs.S
@@ -0,0 +1,837 @@
+;; libgcc routines for the Renesas H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
+
+/* Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assembler register definitions.  */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define PUSHP	push
+#define POPP	pop
+
+#define A0P	r0
+#define A1P	r1
+#define A2P	r2
+#define A3P	r3
+#define S0P	r4
+#define S1P	r5
+#define S2P	r6
+#endif
+
+#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
+#define PUSHP	push.l
+#define POPP	pop.l
+
+#define A0P	er0
+#define A1P	er1
+#define A2P	er2
+#define A3P	er3
+#define S0P	er4
+#define S1P	er5
+#define S2P	er6
+
+#define A0E	e0
+#define A1E	e1
+#define A2E	e2
+#define A3E	e3
+#endif
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+	.h8300hn
+#else
+	.h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+	.h8300sn
+#else
+	.h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+	.h8300sxn
+#else
+	.h8300sx
+#endif
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___cmpsi2
+___cmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bgt	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___ucmpsi2
+___ucmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bhi	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+;
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+divnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab1
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab1:	or	A1H,A1H	; look at dividend
+	bge	_lab2
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+	xor	#0x8,A2L; and toggle sign of result
+_lab2:	rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab7
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab7:	or	A1H,A1H	; look at dividend
+	bge	_lab8
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+_lab8:	rts
+
+; A0=A0/A1 signed
+
+	.global	___divhi3
+___divhi3:
+	bsr	divnorm
+	bsr	___udivhi3
+negans:	btst	#3,A2L	; should answer be negative ?
+	beq	_lab4
+	not	A0H	; yes, so make it so
+	not	A0L
+	adds	#1,A0
+_lab4:	rts
+
+; A0=A0%A1 signed
+
+	.global	___modhi3
+___modhi3:
+	bsr	modnorm
+	bsr	___udivhi3
+	mov	A3,A0
+	bra	negans
+
+; A0=A0%A1 unsigned
+
+	.global	___umodhi3
+___umodhi3:
+	bsr	___udivhi3
+	mov	A3,A0
+	rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+	.global	___udivhi3
+___udivhi3:
+				; A0 A1 A2 A3
+				; Nn Dd       P
+	sub.w	A3,A3		; Nn Dd xP 00
+	or	A1H,A1H
+	bne	divlongway
+	or	A0H,A0H
+	beq	_lab6
+
+; we know that D == 0 and N is != 0
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	divxu	A1L,A3		;          MQ
+	mov.b	A3L,A0H	 	; Q
+; dealt with N, do n
+_lab6:	mov.b	A0L,A3L		;           n
+	divxu	A1L,A3		;          mq
+	mov.b	A3L,A0L		; Qq
+	mov.b	A3H,A3L         ;           m
+	mov.b	#0x0,A3H	; Qq       0m
+	rts
+
+; D != 0 - which means the denominator is
+;          loop around to get the result.
+
+divlongway:
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	mov.b	#0x0,A0H	; high byte of answer has to be zero
+	mov.b	#0x8,A2H	;       8
+div8:	add.b	A0L,A0L		; n*=2
+	rotxl	A3L		; Make remainder bigger
+	rotxl	A3H
+	sub.w	A1,A3		; Q-=N
+	bhs	setbit		; set a bit ?
+	add.w	A1,A3		;  no : too far , Q+=N
+
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+setbit:	inc	A0L		; do insert bit
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+	.section .text
+	.align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+postive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	postive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+	xor.b	#0x08,S2L	; toggle the result sign
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+mpostive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	mpostive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+mpostive2:
+	rts
+
+#else /* __H8300H__ */
+
+divnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	neg.l	A0P		; negate arg
+
+postive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	postive2
+
+	neg.l	A1P		; negate arg
+	xor.b	#0x08,S2L	; toggle the result sign
+
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	neg.l	A0P		; negate arg
+
+mpostive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	mpostive2
+
+	neg.l	A1P		; negate arg
+
+mpostive2:
+	rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+	.global	___modsi3
+___modsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	modnorm
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	exitdiv
+#else
+	PUSHP	S2P
+	bsr	modnorm
+	bsr	___udivsi3
+	mov.l	er3,er0
+	bra	exitdiv
+#endif
+
+	;; H8/300H and H8S version of ___udivsi3 is defined later in
+	;; the file.
+#ifdef __H8300__
+	.global	___udivsi3
+___udivsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	bra	reti
+#endif
+
+	.global	___umodsi3
+___umodsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	reti
+#else
+	bsr	___udivsi3
+	mov.l	er3,er0
+	rts
+#endif
+
+	.global	___divsi3
+___divsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	jsr	divnorm
+	jsr	divmodsi4
+#else
+	PUSHP	S2P
+	jsr	divnorm
+	bsr	___udivsi3
+#endif
+
+	; examine what the sign should be
+exitdiv:
+	btst	#3,S2L
+	beq	reti
+
+	; should be -ve
+#ifdef __H8300__
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+#else /* __H8300H__ */
+	neg.l	A0P
+#endif
+
+reti:
+#ifdef __H8300__
+	POPP	S1P
+	POPP	S0P
+#endif
+	POPP	S2P
+	rts
+
+	; takes A0/A1 numerator (A0P for H8/300H)
+	; A2/A3 denominator (A1P for H8/300H)
+	; returns A0/A1 quotient (A0P for H8/300H)
+	; S0/S1 remainder (S0P for H8/300H)
+	; trashes S2H
+
+#ifdef __H8300__
+
+divmodsi4:
+        sub.w	S0,S0		; zero play area
+        mov.w	S0,S1
+        mov.b	A2H,S2H
+        or	A2L,S2H
+        or	A3H,S2H
+        bne	DenHighNonZero
+        mov.b	A0H,A0H
+        bne	NumByte0Zero
+        mov.b	A0L,A0L
+        bne	NumByte1Zero
+        mov.b	A1H,A1H
+        bne	NumByte2Zero
+        bra	NumByte3Zero
+NumByte0Zero:
+	mov.b	A0H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0H
+NumByte1Zero:
+	mov.b	A0L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0L
+NumByte2Zero:
+	mov.b	A1H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1H
+NumByte3Zero:
+	mov.b	A1L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1L
+
+        mov.b	S1H,S1L
+        mov.b	#0x0,S1H
+        rts
+
+; have to do the divide by shift and test
+DenHighNonZero:
+	mov.b	A0H,S1L
+        mov.b	A0L,A0H
+        mov.b	A1H,A0L
+        mov.b	A1L,A1H
+
+        mov.b	#0,A1L
+        mov.b	#24,S2H	; only do 24 iterations
+
+nextbit:
+	add.w	A1,A1	; double the answer guess
+        rotxl	A0L
+        rotxl	A0H
+
+        rotxl	S1L	; double remainder
+        rotxl	S1H
+        rotxl	S0L
+        rotxl	S0H
+        sub.w	A3,S1	; does it all fit
+        subx	A2L,S0L
+        subx	A2H,S0H
+        bhs	setone
+
+        add.w	A3,S1	; no, restore mistake
+        addx	A2L,S0L
+        addx	A2H,S0H
+
+        dec	S2H
+        bne	nextbit
+        rts
+
+setone:
+	inc	A1L
+        dec	S2H
+        bne	nextbit
+        rts
+
+#else /* __H8300H__ */
+
+	;; This function also computes the remainder and stores it in er3.
+	.global	___udivsi3
+___udivsi3:
+	mov.w	A1E,A1E		; denominator top word 0?
+	bne	DenHighNonZero
+
+	; do it the easy way, see page 107 in manual
+	mov.w	A0E,A2
+	extu.l	A2P
+	divxu.w	A1,A2P
+	mov.w	A2E,A0E
+	divxu.w	A1,A0P
+	mov.w	A0E,A3
+	mov.w	A2,A0E
+	extu.l	A3P
+	rts
+
+ 	; er0 = er0 / er1
+ 	; er3 = er0 % er1
+ 	; trashes er1 er2
+ 	; expects er1 >= 2^16
+DenHighNonZero:
+	mov.l	er0,er3
+	mov.l	er1,er2
+#ifdef __H8300H__
+divmod_L21:
+	shlr.l	er0
+	shlr.l	er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+#else
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	beq	divmod_L22A
+divmod_L21:
+	shlr.l	#2,er0
+divmod_L22:
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+divmod_L22A:
+	rotxl.w	r2
+	bcs	divmod_L23
+	shlr.l	er0
+	bra	divmod_L24
+divmod_L23:
+	rotxr.w	r2
+	shlr.l	#2,er0
+divmod_L24:
+#endif
+	;; At this point,
+	;;  er0 contains shifted dividend
+	;;  er1 contains divisor
+	;;  er2 contains shifted divisor
+	;;  er3 contains dividend, later remainder
+	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
+	extu.l	er0
+	beq	divmod_L25
+	subs	#1,er0		; er0 = AQ - 1
+	mov.w	e1,r2
+	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
+	sub.w	r2,e3		; dividend - 65536 * er2
+	mov.w	r1,r2
+	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
+	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+ 	cmp.l	er1,er3		; is divisor < remainder?
+	blo	divmod_L26
+ 	adds	#1,er0
+	sub.l	er1,er3		; correct the remainder
+divmod_L26:
+	rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The H8/300 only has an 8*8->16 multiply.
+; The answer is the same as:
+;
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in
+; A0 answer
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global	___mulhi3
+___mulhi3:
+	mov.b	A1L,A2L		; A2l gets srcb.l
+	mulxu	A0L,A2		; A2 gets first sub product
+
+	mov.b	A0H,A3L		; prepare for
+	mulxu	A1L,A3		; second sub product
+
+	add.b	A3L,A2H		; sum first two terms
+
+	mov.b	A1H,A3L		; third sub product
+	mulxu	A0L,A3
+
+	add.b	A3L,A2H		; almost there
+	mov.w	A2,A0		; that is
+	rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;;
+;; I think that shift and add may be sufficient for this.  Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;;  while (a)
+;;    {
+;;      if (a & 1)
+;;        r += b;
+;;      a >>= 1;
+;;      b <<= 1;
+;;    }
+
+	.section .text
+	.align 2
+
+#ifdef __H8300__
+
+	.global	___mulsi3
+___mulsi3:
+	PUSHP	S0P
+	PUSHP	S1P
+
+	sub.w	S0,S0
+	sub.w	S1,S1
+
+	; while (a)
+_top:	mov.w	A0,A0
+	bne	_more
+	mov.w	A1,A1
+	beq	_done
+_more:	; if (a & 1)
+	bld	#0,A1L
+	bcc	_nobit
+	; r += b
+	add.w	A3,S1
+	addx	A2L,S0L
+	addx	A2H,S0H
+_nobit:
+	; a >>= 1
+	shlr	A0H
+	rotxr	A0L
+	rotxr	A1H
+	rotxr	A1L
+
+	; b <<= 1
+	add.w	A3,A3
+	addx	A2L,A2L
+	addx	A2H,A2H
+	bra 	_top
+
+_done:
+	mov.w	S0,A0
+	mov.w	S1,A1
+	POPP	S1P
+	POPP	S0P
+	rts
+
+#else /* __H8300H__ */
+
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
+
+	.global	___mulsi3
+___mulsi3:
+	mov.w	r1,r2   ; ( 2 states) b * d
+	mulxu	r0,er2  ; (22 states)
+
+	mov.w	e0,r3   ; ( 2 states) a * d
+	beq	L_skip1 ; ( 4 states)
+	mulxu	r1,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip1:
+	mov.w	e1,r3   ; ( 2 states) c * b
+	beq	L_skip2 ; ( 4 states)
+	mulxu	r0,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip2:
+	mov.l	er2,er0	; ( 2 states)
+	rts		; (10 states)
+
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+	.global	___fixunssfsi
+___fixunssfsi:
+	cmp.b #0x4f,r0h
+	bge Large_num
+	jmp     @___fixsfsi
+Large_num:
+	bhi L_huge_num
+	xor.b #0x80,A0L
+	bmi L_shift8
+L_huge_num:
+	mov.w #65535,A0
+	mov.w A0,A1
+	rts
+L_shift8:
+	mov.b A0L,A0H
+	mov.b A1H,A0L
+	mov.b A1L,A1H
+	mov.b #0,A1L
+	rts
+#endif
+#endif /* L_fixunssfsi_asm */