Initial checkin of GCC 4.9.0 from trunk (r208799).

Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
author: Ben Cheng <bccheng@google.com> 2014-03-25 22:37:19 -0700
committer: Ben Cheng <bccheng@google.com> 2014-03-25 22:37:19 -0700
commit: 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch)
tree: c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libgcc/config/sh
parent: 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff)
download: toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz
toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2
toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip
16 files changed, 7438 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/sh/crt1.S b/gcc-4.9/libgcc/config/sh/crt1.S
new file mode 100644
index 000000000..d8b929549
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/crt1.S
@@ -0,0 +1,1368 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   This file was pretty much copied from newlib.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifdef MMU_SUPPORT
+	/* Section used for exception/timer interrupt stack area */
+	.section .data.vbr.stack,"aw"
+	.align 4
+	.global __ST_VBR
+__ST_VBR:
+	.zero 1024 * 2          /* ; 2k for VBR handlers */
+/* Label at the highest stack address where the stack grows from */
+__timer_stack:
+#endif /* MMU_SUPPORT */
+	
+	/* ;----------------------------------------
+	Normal newlib crt1.S */
+
+#ifdef __SH5__
+	.section .data,"aw"
+	.global ___data
+___data:
+
+	.section .rodata,"a"
+	.global ___rodata
+___rodata:
+
+#define ICCR_BASE  0x01600000
+#define OCCR_BASE  0x01e00000
+#define MMUIR_BASE 0x00000000
+#define MMUDR_BASE 0x00800000
+
+#define PTE_ENABLED     1
+#define PTE_DISABLED    0
+
+#define PTE_SHARED (1 << 1)
+#define PTE_NOT_SHARED  0
+
+#define PTE_CB_UNCACHEABLE  0
+#define PTE_CB_DEVICE       1
+#define PTE_CB_CACHEABLE_WB 2
+#define PTE_CB_CACHEABLE_WT 3
+
+#define PTE_SZ_4KB   (0 << 3)
+#define PTE_SZ_64KB  (1 << 3)
+#define PTE_SZ_1MB   (2 << 3)
+#define PTE_SZ_512MB (3 << 3)
+
+#define PTE_PRR      (1 << 6)
+#define PTE_PRX      (1 << 7)
+#define PTE_PRW      (1 << 8)
+#define PTE_PRU      (1 << 9)
+
+#define SR_MMU_BIT          31
+#define SR_BL_BIT           28
+
+#define ALIGN_4KB  (0xfff)
+#define ALIGN_1MB  (0xfffff)
+#define ALIGN_512MB (0x1fffffff)
+
+#define DYNACON_BASE               0x0f000000
+#define DM_CB_DLINK_BASE           0x0c000000
+#define DM_DB_DLINK_BASE           0x0b000000
+
+#define FEMI_AREA_0                0x00000000
+#define FEMI_AREA_1                0x04000000
+#define FEMI_AREA_2                0x05000000
+#define FEMI_AREA_3                0x06000000
+#define FEMI_AREA_4                0x07000000
+#define FEMI_CB                    0x08000000
+
+#define EMI_BASE                   0X80000000
+
+#define DMA_BASE                   0X0e000000
+
+#define CPU_BASE                   0X0d000000
+
+#define PERIPH_BASE                0X09000000
+#define DMAC_BASE                  0x0e000000
+#define INTC_BASE                  0x0a000000
+#define CPRC_BASE                  0x0a010000
+#define TMU_BASE                   0x0a020000
+#define SCIF_BASE                  0x0a030000
+#define RTC_BASE                   0x0a040000
+
+
+
+#define LOAD_CONST32(val, reg) \
+	movi	((val) >> 16) & 65535, reg; \
+	shori	(val) & 65535, reg
+
+#define LOAD_PTEH_VAL(sym, align, bits, scratch_reg, reg) \
+	LOAD_ADDR (sym, reg); \
+	LOAD_CONST32 ((align), scratch_reg); \
+	andc	reg, scratch_reg, reg; \
+	LOAD_CONST32 ((bits), scratch_reg); \
+	or	reg, scratch_reg, reg
+
+#define LOAD_PTEL_VAL(sym, align, bits, scratch_reg, reg) \
+	LOAD_ADDR (sym, reg); \
+	LOAD_CONST32 ((align), scratch_reg); \
+	andc	reg, scratch_reg, reg; \
+	LOAD_CONST32 ((bits), scratch_reg); \
+	or	reg, scratch_reg, reg
+
+#define SET_PTE(pte_addr_reg, pteh_val_reg, ptel_val_reg) \
+	putcfg  pte_addr_reg, 0, r63; \
+	putcfg  pte_addr_reg, 1, ptel_val_reg; \
+	putcfg  pte_addr_reg, 0, pteh_val_reg
+
+#if __SH5__ == 64
+	.section .text,"ax"
+#define LOAD_ADDR(sym, reg) \
+	movi	(sym >> 48) & 65535, reg; \
+	shori	(sym >> 32) & 65535, reg; \
+	shori	(sym >> 16) & 65535, reg; \
+	shori	sym & 65535, reg
+#else
+	.mode	SHmedia
+	.section .text..SHmedia32,"ax"
+#define LOAD_ADDR(sym, reg) \
+	movi	(sym >> 16) & 65535, reg; \
+	shori	sym & 65535, reg
+#endif
+	.global start
+start:
+	LOAD_ADDR (_stack, r15)
+
+#ifdef MMU_SUPPORT
+	! Set up the VM using the MMU and caches
+
+	! .vm_ep is first instruction to execute
+	! after VM initialization
+	pt/l	.vm_ep, tr1
+	
+	! Configure instruction cache (ICCR)
+	movi	3, r2
+	movi	0, r3
+	LOAD_ADDR (ICCR_BASE, r1)
+	putcfg	r1, 0, r2
+	putcfg	r1, 1, r3
+
+	! movi	7, r2 ! write through
+	! Configure operand cache (OCCR)
+	LOAD_ADDR (OCCR_BASE, r1)
+	putcfg	r1, 0, r2
+	putcfg	r1, 1, r3
+
+	! Disable all PTE translations
+	LOAD_ADDR (MMUIR_BASE, r1)
+	LOAD_ADDR (MMUDR_BASE, r2)
+	movi	64, r3
+	pt/l	.disable_ptes_loop, tr0
+.disable_ptes_loop:
+	putcfg	r1, 0, r63
+	putcfg	r2, 0, r63
+	addi	r1, 16, r1
+	addi	r2, 16, r2
+	addi	r3, -1, r3
+	bgt	r3, r63, tr0
+
+	LOAD_ADDR (MMUIR_BASE, r1)
+
+	! FEMI instruction mappings
+	!   Area 0 - 1Mb cacheable at 0x00000000
+	!   Area 1 - None
+	!   Area 2 - 1Mb cacheable at 0x05000000
+	!          - 1Mb cacheable at 0x05100000
+	!   Area 3 - None
+	!   Area 4 - None
+
+	! Map a 1Mb page for instructions at 0x00000000
+	LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for instructions at 0x05000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for instructions at 0x05100000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 512M page for instructions at EMI base
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for instructions at DM_DB_DLINK_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	LOAD_ADDR (MMUDR_BASE, r1)
+
+	! FEMI data mappings
+	!   Area 0 - 1Mb cacheable at 0x00000000
+	!   Area 1 - 1Mb device at 0x04000000
+	!   Area 2 - 1Mb cacheable at 0x05000000
+	!          - 1Mb cacheable at 0x05100000
+	!   Area 3 - None
+	!   Area 4 - None
+	!   CB     - 1Mb device at 0x08000000
+
+	! Map a 1Mb page for data at 0x00000000
+	LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x04000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x05000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x05100000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for registers at 0x08000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_CB, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_CB, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 512M page for data at EMI
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for DYNACON at DYNACON_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DYNACON_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DYNACON_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for instructions at DM_DB_DLINK_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for data at DM_DB_DLINK_BASE+0x1000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_CB_UNCACHEABLE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for stack DM_DB_DLINK_BASE+0x2000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c000000 - 0x0c0fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c100000 - 0x0c1fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c200000 - 0x0c2fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c400000 - 0x0c4fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c800000 - 0x0c8fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for DMA control registers
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DMA_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DMA_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map lots of 4K pages for peripherals
+
+	! /* peripheral */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (PERIPH_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (PERIPH_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* dmac */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DMAC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DMAC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* intc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (INTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (INTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* rtc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (RTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (RTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* dmac */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (TMU_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (TMU_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* scif */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (SCIF_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (SCIF_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* cprc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (CPRC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (CPRC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map CPU WPC registers 
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (CPU_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (CPU_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	addi	r1, 16, r1
+
+	LOAD_PTEH_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Switch over to virtual addressing and enabled cache
+	getcon	sr, r1
+	movi	1, r2
+	shlli	r2, SR_BL_BIT, r2
+	or	r1, r2, r1
+	putcon	r1, ssr
+	getcon	sr, r1
+	movi	1, r2
+	shlli	r2, SR_MMU_BIT, r2
+	or	r1, r2, r1
+	putcon	r1, ssr
+	gettr	tr1, r1
+	putcon	r1, spc
+	synco
+	rte
+
+	! VM entry point.  From now on, we are in VM mode.
+.vm_ep:
+
+	! Install the trap handler, by seeding vbr with the
+	! correct value, and by assigning sr.bl = 0.
+
+	LOAD_ADDR (vbr_start, r1)
+	putcon	r1, vbr
+	movi	~(1<<28), r1
+	getcon	sr, r2
+	and     r1, r2, r2
+	putcon	r2, sr
+#endif /* MMU_SUPPORT */
+
+	pt/l	.Lzero_bss_loop, tr0
+	pt/l	_init, tr5
+	pt/l	___setup_argv_and_call_main, tr6
+	pt/l	_exit, tr7
+
+	! zero out bss
+	LOAD_ADDR (_edata, r0)
+	LOAD_ADDR (_end, r1)
+.Lzero_bss_loop:
+	stx.q	r0, r63, r63
+	addi	r0, 8, r0
+	bgt/l	r1, r0, tr0
+
+	LOAD_ADDR (___data, r26)
+	LOAD_ADDR (___rodata, r27)
+
+#ifdef __SH_FPU_ANY__
+	getcon	sr, r0
+	! enable the FP unit, by resetting SR.FD
+	! also zero out SR.FR, SR.SZ and SR.PR, as mandated by the ABI
+	movi	0, r1
+	shori	0xf000, r1
+	andc	r0, r1, r0
+	putcon	r0, sr
+#if __SH5__ == 32
+	pt/l ___set_fpscr, tr0
+	movi	0, r4
+	blink	tr0, r18
+#endif
+#endif
+
+	! arrange for exit to call fini
+	pt/l	_atexit, tr1
+	LOAD_ADDR (_fini, r2)
+	blink	tr1, r18
+
+	! call init
+	blink	tr5, r18
+
+	! call the mainline
+	blink	tr6, r18
+
+	! call exit
+	blink	tr7, r18
+	! We should never return from _exit but in case we do we would enter the
+	! the following tight loop. This avoids executing any data that might follow.
+limbo:
+	pt/l limbo, tr0
+	blink tr0, r63
+	
+#ifdef MMU_SUPPORT
+	! All these traps are handled in the same place. 
+	.balign 256
+vbr_start:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256
+vbr_100:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+vbr_100_end:
+	.balign 256
+vbr_200:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256
+vbr_300:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256	
+vbr_400:	! Should be at vbr+0x400
+handler:
+	/* If the trap handler is there call it */
+	LOAD_ADDR (__superh_trap_handler, r2)
+	pta chandler,tr2
+	beq r2, r63, tr2 /* If zero, ie not present branch around to chandler */
+	/* Now call the trap handler with as much of the context unchanged as possible.
+	   Move trapping address into R18 to make it look like the trap point */
+	getcon spc, r18
+	pt/l __superh_trap_handler, tr0
+	blink tr0, r7
+chandler:	
+	getcon	spc, r62
+	getcon expevt, r2
+	pt/l	_exit, tr0
+	blink	tr0, r63
+
+	/* Simulated trap handler */
+	.section	.text..SHmedia32,"ax"
+gcc2_compiled.:
+	.section	.debug_abbrev
+.Ldebug_abbrev0:
+	.section	.text..SHmedia32
+.Ltext0:
+	.section	.debug_info
+.Ldebug_info0:
+	.section	.debug_line
+.Ldebug_line0:
+	.section	.text..SHmedia32,"ax"
+	.align 5
+	.global	__superh_trap_handler
+	.type	__superh_trap_handler,@function
+__superh_trap_handler:
+.LFB1:
+	ptabs	r18, tr0
+	addi.l	r15, -8, r15
+	st.l	r15, 4, r14
+	addi.l	r15, -8, r15
+	add.l	r15, r63, r14
+	st.l	r14, 0, r2
+	 ptabs r7, tr0 
+	addi.l	r14, 8, r14
+	add.l	r14, r63, r15
+	ld.l	r15, 4, r14
+	addi.l	r15, 8, r15
+	blink	tr0, r63
+.LFE1:
+.Lfe1:
+	.size	__superh_trap_handler,.Lfe1-__superh_trap_handler
+
+	.section	.text..SHmedia32
+.Letext0:
+
+	.section	.debug_info
+	.ualong	0xa7
+	.uaword	0x2
+	.ualong	.Ldebug_abbrev0
+	.byte	0x4
+	.byte	0x1
+	.ualong	.Ldebug_line0
+	.ualong	.Letext0
+	.ualong	.Ltext0
+	.string	"trap_handler.c"
+
+	.string	"xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+	.string	"GNU C 2.97-sh5-010522"
+
+	.byte	0x1
+	.byte	0x2
+	.ualong	0x9a
+	.byte	0x1
+	.string	"_superh_trap_handler"
+
+	.byte	0x1
+	.byte	0x2
+	.byte	0x1
+	.ualong	.LFB1
+	.ualong	.LFE1
+	.byte	0x1
+	.byte	0x5e
+	.byte	0x3
+	.string	"trap_reason"
+
+	.byte	0x1
+	.byte	0x1
+	.ualong	0x9a
+	.byte	0x2
+	.byte	0x91
+	.byte	0x0
+	.byte	0x0
+	.byte	0x4
+	.string	"unsigned int"
+
+	.byte	0x4
+	.byte	0x7
+	.byte	0x0
+
+	.section	.debug_abbrev
+	.byte	0x1
+	.byte	0x11
+	.byte	0x1
+	.byte	0x10
+	.byte	0x6
+	.byte	0x12
+	.byte	0x1
+	.byte	0x11
+	.byte	0x1
+	.byte	0x3
+	.byte	0x8
+	.byte	0x1b
+	.byte	0x8
+	.byte	0x25
+	.byte	0x8
+	.byte	0x13
+	.byte	0xb
+	.byte	0,0
+	.byte	0x2
+	.byte	0x2e
+	.byte	0x1
+	.byte	0x1
+	.byte	0x13
+	.byte	0x3f
+	.byte	0xc
+	.byte	0x3
+	.byte	0x8
+	.byte	0x3a
+	.byte	0xb
+	.byte	0x3b
+	.byte	0xb
+	.byte	0x27
+	.byte	0xc
+	.byte	0x11
+	.byte	0x1
+	.byte	0x12
+	.byte	0x1
+	.byte	0x40
+	.byte	0xa
+	.byte	0,0
+	.byte	0x3
+	.byte	0x5
+	.byte	0x0
+	.byte	0x3
+	.byte	0x8
+	.byte	0x3a
+	.byte	0xb
+	.byte	0x3b
+	.byte	0xb
+	.byte	0x49
+	.byte	0x13
+	.byte	0x2
+	.byte	0xa
+	.byte	0,0
+	.byte	0x4
+	.byte	0x24
+	.byte	0x0
+	.byte	0x3
+	.byte	0x8
+	.byte	0xb
+	.byte	0xb
+	.byte	0x3e
+	.byte	0xb
+	.byte	0,0
+	.byte	0
+
+	.section	.debug_pubnames
+	.ualong	0x27
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.ualong	0xab
+	.ualong	0x5b
+	.string	"_superh_trap_handler"
+
+	.ualong	0x0
+
+	.section	.debug_aranges
+	.ualong	0x1c
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.byte	0x4
+	.byte	0x0
+	.uaword	0x0,0
+	.ualong	.Ltext0
+	.ualong	.Letext0-.Ltext0
+	.ualong	0x0
+	.ualong	0x0
+	.ident	"GCC: (GNU) 2.97-sh5-010522"
+#endif /* MMU_SUPPORT */
+#else /* ! __SH5__ */
+
+	! make a place to keep any previous value of the vbr register
+	! this will only have a value if it has been set by redboot (for example)
+	.section .bss
+old_vbr:
+	.long 0
+#ifdef PROFILE
+profiling_enabled:
+	.long 0
+#endif
+
+
+	.section .text
+	.global	start
+	.import ___rtos_profiler_start_timer
+	.weak   ___rtos_profiler_start_timer
+start:
+	mov.l	stack_k,r15
+
+#if defined (__SH3__) || (defined (__SH_FPU_ANY__) && ! defined (__SH2E__) && ! defined (__SH2A__)) || defined (__SH4_NOFPU__)
+#define VBR_SETUP
+	! before zeroing the bss ...
+	! if the vbr is already set to vbr_start then the program has been restarted
+	! (i.e. it is not the first time the program has been run since reset)
+	! reset the vbr to its old value before old_vbr (in bss) is wiped
+	! this ensures that the later code does not create a circular vbr chain
+	stc	vbr, r1
+	mov.l	vbr_start_k, r2
+	cmp/eq	r1, r2
+	bf	0f
+	! reset the old vbr value
+	mov.l	old_vbr_k, r1
+	mov.l	@r1, r2
+	ldc	r2, vbr
+0:	
+#endif /* VBR_SETUP */
+	
+	! zero out bss
+	mov.l	edata_k,r0
+	mov.l	end_k,r1
+	mov	#0,r2
+start_l:
+	mov.l	r2,@r0
+	add	#4,r0
+	cmp/ge	r0,r1
+	bt	start_l
+
+#if defined (__SH_FPU_ANY__)
+	mov.l set_fpscr_k, r1
+	mov #4,r4
+	jsr @r1
+	shll16 r4	! Set DN bit (flush denormal inputs to zero)
+	lds r3,fpscr	! Switch to default precision
+#endif /* defined (__SH_FPU_ANY__) */
+
+#ifdef VBR_SETUP
+	! save the existing contents of the vbr
+	! there will only be a prior value when using something like redboot
+	! otherwise it will be zero
+	stc	vbr, r1
+	mov.l	old_vbr_k, r2
+	mov.l	r1, @r2
+	! setup vbr
+	mov.l	vbr_start_k, r1
+	ldc	r1,vbr
+#endif /* VBR_SETUP */
+
+	! if an rtos is exporting a timer start fn,
+	! then pick up an SR which does not enable ints
+	! (the rtos will take care of this)
+	mov.l rtos_start_fn, r0
+	mov.l sr_initial_bare, r1
+	tst	r0, r0
+	bt	set_sr
+
+	mov.l sr_initial_rtos, r1
+
+set_sr:
+	! Set status register (sr)
+	ldc	r1, sr
+
+	! arrange for exit to call fini
+	mov.l	atexit_k,r0
+	mov.l	fini_k,r4
+	jsr	@r0
+	nop
+
+#ifdef PROFILE
+	! arrange for exit to call _mcleanup (via stop_profiling)
+	mova    stop_profiling,r0
+	mov.l   atexit_k,r1
+	jsr     @r1
+	mov	r0, r4
+
+	! Call profiler startup code
+	mov.l monstartup_k, r0
+	mov.l start_k, r4
+	mov.l etext_k, r5
+	jsr @r0
+	nop
+
+	! enable profiling trap
+	! until now any trap 33s will have been ignored
+	! This means that all library functions called before this point
+	! (directly or indirectly) may have the profiling trap at the start.
+	! Therefore, only mcount itself may not have the extra header.
+	mov.l	profiling_enabled_k2, r0
+	mov	#1, r1
+	mov.l	r1, @r0
+#endif /* PROFILE */
+
+	! call init
+	mov.l	init_k,r0
+	jsr	@r0
+	nop
+
+	! call the mainline	
+	mov.l	main_k,r0
+	jsr	@r0
+	nop
+
+	! call exit
+	mov	r0,r4
+	mov.l	exit_k,r0
+	jsr	@r0
+	nop
+	
+		.balign 4
+#ifdef PROFILE
+stop_profiling:
+	# stop mcount counting
+	mov.l	profiling_enabled_k2, r0
+	mov	#0, r1
+	mov.l	r1, @r0
+
+	# call mcleanup
+	mov.l	mcleanup_k, r0
+	jmp	@r0
+	nop
+		
+		.balign 4
+mcleanup_k:
+	.long __mcleanup
+monstartup_k:
+	.long ___monstartup
+profiling_enabled_k2:
+	.long profiling_enabled
+start_k:
+	.long _start
+etext_k:
+	.long __etext
+#endif /* PROFILE */
+
+	.align 2
+#if defined (__SH_FPU_ANY__)
+set_fpscr_k:
+	.long	___set_fpscr
+#endif /*  defined (__SH_FPU_ANY__) */
+
+stack_k:
+	.long	_stack	
+edata_k:
+	.long	_edata
+end_k:
+	.long	_end
+main_k:
+	.long	___setup_argv_and_call_main
+exit_k:
+	.long	_exit
+atexit_k:
+	.long	_atexit
+init_k:
+	.long	_init
+fini_k:
+	.long	_fini
+#ifdef VBR_SETUP
+old_vbr_k:
+	.long	old_vbr
+vbr_start_k:
+	.long	vbr_start
+#endif /* VBR_SETUP */
+	
+sr_initial_rtos:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! Whether profiling or not, keep interrupts masked,
+	! the RTOS will enable these if required.
+	.long 0x600000f1 
+
+rtos_start_fn:
+	.long ___rtos_profiler_start_timer
+	
+#ifdef PROFILE
+sr_initial_bare:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! For bare machine, we need to enable interrupts to get profiling working
+	.long 0x60000001
+#else
+
+sr_initial_bare:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! Keep interrupts disabled - the application will enable as required.
+	.long 0x600000f1
+#endif
+
+	! supplied for backward compatibility only, in case of linking
+	! code whose main() was compiled with an older version of GCC.
+	.global ___main
+___main:
+	rts
+	nop
+#ifdef VBR_SETUP
+! Exception handlers	
+	.section .text.vbr, "ax"
+vbr_start:
+
+	.org 0x100
+vbr_100:
+#ifdef PROFILE
+	! Note on register usage.
+	! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling
+	! then this is OK as we are just before executing any function code.
+	! The other r4..r7 we save explicityl on the stack
+	! Remaining registers are saved by normal ABI conventions and we assert we do not
+	! use floating point registers.
+	mov.l expevt_k1, r1
+	mov.l @r1, r1
+	mov.l event_mask, r0
+	and r0,r1
+	mov.l trapcode_k, r2
+	cmp/eq r1,r2
+	bt 1f
+	bra handler_100   ! if not a trapa, go to default handler
+	nop
+1:	
+	mov.l trapa_k, r0
+	mov.l @r0, r0
+	shlr2 r0      ! trapa code is shifted by 2.
+	cmp/eq #33, r0
+	bt 2f
+	bra handler_100
+	nop
+2:	
+	
+	! If here then it looks like we have trap #33
+	! Now we need to call mcount with the following convention
+	! Save and restore r4..r7
+	mov.l	r4,@-r15
+	mov.l	r5,@-r15
+	mov.l	r6,@-r15
+	mov.l	r7,@-r15
+	sts.l	pr,@-r15
+
+	! r4 is frompc.
+	! r5 is selfpc
+	! r0 is the branch back address.
+	! The code sequence emitted by gcc for the profiling trap is
+	! .align 2
+	! trapa #33
+	! .align 2
+	! .long lab Where lab is planted by the compiler. This is the address
+	! of a datum that needs to be incremented. 
+	sts pr,  r4     ! frompc
+	stc spc, r5	! selfpc
+	mov #2, r2
+	not r2, r2      ! pattern to align to 4
+	and r2, r5      ! r5 now has aligned address
+!	add #4, r5      ! r5 now has address of address
+	mov r5, r2      ! Remember it.
+!	mov.l @r5, r5   ! r5 has value of lable (lab in above example)
+	add #8, r2
+	ldc r2, spc     ! our return address avoiding address word
+
+	! only call mcount if profiling is enabled
+	mov.l profiling_enabled_k, r0
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	bt 3f
+	! call mcount
+	mov.l mcount_k, r2
+	jsr @r2
+	nop
+3:
+	lds.l @r15+,pr
+	mov.l @r15+,r7
+	mov.l @r15+,r6
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	rte
+	nop
+	.balign 4
+event_mask:
+	.long 0xfff
+trapcode_k:	
+	.long 0x160
+expevt_k1:
+	.long 0xff000024 ! Address of expevt
+trapa_k:	
+	.long 0xff000020
+mcount_k:
+	.long __call_mcount
+profiling_enabled_k:
+	.long profiling_enabled
+#endif
+	! Non profiling case.
+handler_100:
+	mov.l 2f, r0     ! load the old vbr setting (if any)
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	bf 1f
+	! no previous vbr - jump to own generic handler
+	bra handler
+	nop	
+1:	! there was a previous handler - chain them
+	add #0x7f, r0	 ! 0x7f
+	add #0x7f, r0	 ! 0xfe
+	add #0x2, r0     ! add 0x100 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:	
+	.long old_vbr
+
+	.org 0x400
+vbr_400:	! Should be at vbr+0x400
+	mov.l 2f, r0     ! load the old vbr setting (if any)
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	! no previous vbr - jump to own generic handler
+	bt handler
+	! there was a previous handler - chain them
+	rotcr r0
+	rotcr r0
+	add #0x7f, r0	 ! 0x1fc
+	add #0x7f, r0	 ! 0x3f8
+	add #0x02, r0	 ! 0x400
+	rotcl r0
+	rotcl r0	 ! Add 0x400 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:
+	.long old_vbr
+handler:
+	/* If the trap handler is there call it */
+	mov.l	superh_trap_handler_k, r0
+	cmp/eq	#0, r0       ! True if zero.
+	bf 3f
+	bra   chandler
+	nop
+3:	
+	! Here handler available, call it. 
+	/* Now call the trap handler with as much of the context unchanged as possible.
+	   Move trapping address into PR to make it look like the trap point */
+	stc spc, r1
+	lds r1, pr
+	mov.l expevt_k, r4
+	mov.l @r4, r4 ! r4 is value of expevt, first parameter.
+	mov r1, r5   ! Remember trapping pc.
+	mov r1, r6   ! Remember trapping pc.
+	mov.l chandler_k, r1
+	mov.l superh_trap_handler_k, r2
+	! jmp to trap handler to avoid disturbing pr. 
+	jmp @r2
+	nop
+
+	.org 0x600
+vbr_600:
+#ifdef PROFILE	
+	! Should be at vbr+0x600
+	! Now we are in the land of interrupts so need to save more state. 
+	! Save register state
+	mov.l interrupt_stack_k, r15 ! r15 has been saved to sgr.
+	mov.l	r0,@-r15	
+	mov.l	r1,@-r15
+	mov.l	r2,@-r15
+	mov.l	r3,@-r15
+	mov.l	r4,@-r15
+	mov.l	r5,@-r15
+	mov.l	r6,@-r15
+	mov.l	r7,@-r15
+	sts.l	pr,@-r15
+	sts.l	mach,@-r15
+	sts.l	macl,@-r15
+#if defined(__SH_FPU_ANY__)
+	! Save fpul and fpscr, save fr0-fr7 in 64 bit mode
+	! and set the pervading precision for the timer_handler
+	mov	#0,r0
+	sts.l	fpul,@-r15
+	sts.l	fpscr,@-r15
+	lds	r0,fpscr	! Clear fpscr
+	fmov	fr0,@-r15
+	fmov	fr1,@-r15
+	fmov	fr2,@-r15
+	fmov	fr3,@-r15
+	mov.l	pervading_precision_k,r0
+	fmov	fr4,@-r15
+	fmov	fr5,@-r15
+	mov.l	@r0,r0
+	fmov	fr6,@-r15
+	fmov	fr7,@-r15
+	lds	r0,fpscr
+#endif /* __SH_FPU_ANY__ */
+	! Pass interrupted pc to timer_handler as first parameter (r4).
+	stc    spc, r4
+	mov.l timer_handler_k, r0
+	jsr @r0
+	nop
+#if defined(__SH_FPU_ANY__)
+	mov	#0,r0
+	lds	r0,fpscr	! Clear the fpscr
+	fmov	@r15+,fr7
+	fmov	@r15+,fr6
+	fmov	@r15+,fr5
+	fmov	@r15+,fr4
+	fmov	@r15+,fr3
+	fmov	@r15+,fr2
+	fmov	@r15+,fr1
+	fmov	@r15+,fr0
+	lds.l	@r15+,fpscr
+	lds.l	@r15+,fpul
+#endif /* __SH_FPU_ANY__ */
+	lds.l @r15+,macl
+	lds.l @r15+,mach
+	lds.l @r15+,pr
+	mov.l @r15+,r7
+	mov.l @r15+,r6
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	mov.l @r15+,r3
+	mov.l @r15+,r2
+	mov.l @r15+,r1
+	mov.l @r15+,r0
+	stc sgr, r15    ! Restore r15, destroyed by this sequence. 
+	rte
+	nop
+#if defined(__SH_FPU_ANY__)
+	.balign 4
+pervading_precision_k:
+#define CONCAT1(A,B) A##B
+#define CONCAT(A,B) CONCAT1(A,B)
+	.long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4
+#endif
+#else
+	mov.l 2f, r0     ! Load the old vbr setting (if any).
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	! no previous vbr - jump to own handler
+	bt chandler
+	! there was a previous handler - chain them
+	rotcr r0
+	rotcr r0
+	add #0x7f, r0	 ! 0x1fc
+	add #0x7f, r0	 ! 0x3f8
+	add #0x7f, r0	 ! 0x5f4
+	add #0x03, r0	 ! 0x600
+	rotcl r0
+	rotcl r0	 ! Add 0x600 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:
+	.long old_vbr
+#endif	 /* PROFILE code */
+chandler:
+	mov.l expevt_k, r4
+	mov.l @r4, r4 ! r4 is value of expevt hence making this the return code
+	mov.l handler_exit_k,r0
+	jsr   @r0
+	nop
+	! We should never return from _exit but in case we do we would enter the
+	! the following tight loop
+limbo:
+	bra limbo
+	nop
+	.balign 4
+#ifdef PROFILE
+interrupt_stack_k:
+	.long __timer_stack	! The high end of the stack
+timer_handler_k:
+	.long __profil_counter
+#endif
+expevt_k:
+	.long 0xff000024 ! Address of expevt
+chandler_k:	
+	.long chandler	
+superh_trap_handler_k:
+	.long	__superh_trap_handler
+handler_exit_k:
+	.long _exit
+	.align 2
+! Simulated compile of trap handler.
+	.section	.debug_abbrev,"",@progbits
+.Ldebug_abbrev0:
+	.section	.debug_info,"",@progbits
+.Ldebug_info0:
+	.section	.debug_line,"",@progbits
+.Ldebug_line0:
+	.text
+.Ltext0:
+	.align 5
+	.type	__superh_trap_handler,@function
+__superh_trap_handler:
+.LFB1:
+	mov.l	r14,@-r15
+.LCFI0:
+	add	#-4,r15
+.LCFI1:
+	mov	r15,r14
+.LCFI2:
+	mov.l	r4,@r14
+	lds	r1, pr
+	add	#4,r14
+	mov	r14,r15
+	mov.l	@r15+,r14
+	rts	
+	nop
+.LFE1:
+.Lfe1:
+	.size	__superh_trap_handler,.Lfe1-__superh_trap_handler
+	.section	.debug_frame,"",@progbits
+.Lframe0:
+	.ualong	.LECIE0-.LSCIE0
+.LSCIE0:
+	.ualong	0xffffffff
+	.byte	0x1
+	.string	""
+	.uleb128 0x1
+	.sleb128 -4
+	.byte	0x11
+	.byte	0xc
+	.uleb128 0xf
+	.uleb128 0x0
+	.align 2
+.LECIE0:
+.LSFDE0:
+	.ualong	.LEFDE0-.LASFDE0
+.LASFDE0:
+	.ualong	.Lframe0
+	.ualong	.LFB1
+	.ualong	.LFE1-.LFB1
+	.byte	0x4
+	.ualong	.LCFI0-.LFB1
+	.byte	0xe
+	.uleb128 0x4
+	.byte	0x4
+	.ualong	.LCFI1-.LCFI0
+	.byte	0xe
+	.uleb128 0x8
+	.byte	0x8e
+	.uleb128 0x1
+	.byte	0x4
+	.ualong	.LCFI2-.LCFI1
+	.byte	0xd
+	.uleb128 0xe
+	.align 2
+.LEFDE0:
+	.text
+.Letext0:
+	.section	.debug_info
+	.ualong	0xb3
+	.uaword	0x2
+	.ualong	.Ldebug_abbrev0
+	.byte	0x4
+	.uleb128 0x1
+	.ualong	.Ldebug_line0
+	.ualong	.Letext0
+	.ualong	.Ltext0
+	.string	"trap_handler.c"
+	.string	"xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+	.string	"GNU C 3.2 20020529 (experimental)"
+	.byte	0x1
+	.uleb128 0x2
+	.ualong	0xa6
+	.byte	0x1
+	.string	"_superh_trap_handler"
+	.byte	0x1
+	.byte	0x2
+	.byte	0x1
+	.ualong	.LFB1
+	.ualong	.LFE1
+	.byte	0x1
+	.byte	0x5e
+	.uleb128 0x3
+	.string	"trap_reason"
+	.byte	0x1
+	.byte	0x1
+	.ualong	0xa6
+	.byte	0x2
+	.byte	0x91
+	.sleb128 0
+	.byte	0x0
+	.uleb128 0x4
+	.string	"unsigned int"
+	.byte	0x4
+	.byte	0x7
+	.byte	0x0
+	.section	.debug_abbrev
+	.uleb128 0x1
+	.uleb128 0x11
+	.byte	0x1
+	.uleb128 0x10
+	.uleb128 0x6
+	.uleb128 0x12
+	.uleb128 0x1
+	.uleb128 0x11
+	.uleb128 0x1
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x1b
+	.uleb128 0x8
+	.uleb128 0x25
+	.uleb128 0x8
+	.uleb128 0x13
+	.uleb128 0xb
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x2
+	.uleb128 0x2e
+	.byte	0x1
+	.uleb128 0x1
+	.uleb128 0x13
+	.uleb128 0x3f
+	.uleb128 0xc
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x3a
+	.uleb128 0xb
+	.uleb128 0x3b
+	.uleb128 0xb
+	.uleb128 0x27
+	.uleb128 0xc
+	.uleb128 0x11
+	.uleb128 0x1
+	.uleb128 0x12
+	.uleb128 0x1
+	.uleb128 0x40
+	.uleb128 0xa
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x5
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x3a
+	.uleb128 0xb
+	.uleb128 0x3b
+	.uleb128 0xb
+	.uleb128 0x49
+	.uleb128 0x13
+	.uleb128 0x2
+	.uleb128 0xa
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x4
+	.uleb128 0x24
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0xb
+	.uleb128 0xb
+	.uleb128 0x3e
+	.uleb128 0xb
+	.byte	0x0
+	.byte	0x0
+	.byte	0x0
+	.section	.debug_pubnames,"",@progbits
+	.ualong	0x27
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.ualong	0xb7
+	.ualong	0x67
+	.string	"_superh_trap_handler"
+	.ualong	0x0
+	.section	.debug_aranges,"",@progbits
+	.ualong	0x1c
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.byte	0x4
+	.byte	0x0
+	.uaword	0x0
+	.uaword	0x0
+	.ualong	.Ltext0
+	.ualong	.Letext0-.Ltext0
+	.ualong	0x0
+	.ualong	0x0
+#endif /* VBR_SETUP */
+#endif /* ! __SH5__ */
diff --git a/gcc-4.9/libgcc/config/sh/crti.S b/gcc-4.9/libgcc/config/sh/crti.S
new file mode 100644
index 000000000..550f63758
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/crti.S
@@ -0,0 +1,125 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   This file was adapted from glibc sources.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* The code in sections .init and .fini is supposed to be a single
+   regular function.  The function in .init is called directly from
+   start in crt1.S.  The function in .fini is atexit()ed in crt1.S
+   too.
+
+   crti.S contributes the prologue of a function to these sections,
+   and crtn.S comes up the epilogue.  STARTFILE_SPEC should list
+   crti.o before any other object files that might add code to .init
+   or .fini sections, and ENDFILE_SPEC should list crtn.o after any
+   such object files.  */
+
+	.section .init
+/* The alignment below can't be smaller, otherwise the mova below
+   breaks.  Yes, we might align just the label, but then we'd be
+   exchanging an alignment here for one there, since the code fragment
+   below ensures 4-byte alignment on __ELF__.  */
+#ifdef __ELF__
+	.p2align 2
+#else
+	.p2align 1
+#endif
+	.global	 _init
+_init:
+#if __SHMEDIA__
+	addi	r15, -16, r15
+	st.q	r15, 8, r14
+	st.q	r15, 0, r18
+	add	r15, r63, r14
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r15,r0
+	add	#-8,r15
+	mov.l	r14,@-r0
+	sts.l	pr,@-r0
+	mov	r15,r14
+	nop
+#else
+#ifdef __ELF__
+	mov.l	r12,@-r15
+	mova	0f,r0
+	mov.l	0f,r12
+#endif
+	mov.l	r14,@-r15
+#ifdef __ELF__
+	add	r0,r12
+#endif
+	sts.l	pr,@-r15
+#ifdef __ELF__
+	bra	1f
+#endif
+	mov	r15,r14
+#ifdef __ELF__
+0:	.long	_GLOBAL_OFFSET_TABLE_
+1:
+#endif
+#endif /* __SHMEDIA__ */
+
+	.section .fini
+/* The alignment below can't be smaller, otherwise the mova below
+   breaks.  Yes, we might align just the label, but then we'd be
+   exchanging an alignment here for one there, since the code fragment
+   below ensures 4-byte alignment on __ELF__.  */
+#ifdef __ELF__
+	.p2align 2
+#else
+	.p2align 1
+#endif
+	.global  _fini
+_fini:	
+#if __SHMEDIA__
+	addi	r15, -16, r15
+	st.q	r15, 8, r14
+	st.q	r15, 0, r18
+	add	r15, r63, r14
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r15,r0
+	add	#-8,r15
+	mov.l	r14,@-r0
+	sts.l	pr,@-r0
+	mov	r15,r14
+	nop
+#else
+#ifdef __ELF__
+	mov.l	r12,@-r15
+	mova	0f,r0
+	mov.l	0f,r12
+#endif
+	mov.l	r14,@-r15
+#ifdef __ELF__
+	add	r0,r12
+#endif
+	sts.l	pr,@-r15
+#ifdef __ELF__
+	bra	1f
+#endif
+	mov	r15,r14
+#ifdef __ELF__
+0:	.long	_GLOBAL_OFFSET_TABLE_
+1:
+#endif
+#endif /* __SHMEDIA__ */
diff --git a/gcc-4.9/libgcc/config/sh/crtn.S b/gcc-4.9/libgcc/config/sh/crtn.S
new file mode 100644
index 000000000..9d7c4eb65
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/crtn.S
@@ -0,0 +1,77 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   This file was adapted from glibc sources.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* See an explanation about .init and .fini in crti.S.  */
+
+	.section .init
+#if __SHMEDIA__
+	add	r14, r63, r15
+	ld.q	r15, 0, r18
+	ptabs	r18, tr0
+	ld.q	r15, 8, r14
+	addi	r15, 16, r15
+	blink	tr0, r63
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r14,r15
+	lds.l	@r14+,pr
+	mov.l	@r14,r14
+	rts
+	add	#8,r15
+#else
+	mov	r14,r15
+	lds.l	@r15+,pr
+	mov.l	@r15+,r14
+	rts
+#ifdef __ELF__
+	mov.l	@r15+,r12
+#else
+	nop
+#endif
+#endif /* __SHMEDIA__ */
+
+	.section .fini
+#if __SHMEDIA__
+	add	r14, r63, r15
+	ld.q	r15, 0, r18
+	ptabs	r18, tr0
+	ld.q	r15, 8, r14
+	addi	r15, 16, r15
+	blink	tr0, r63
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r14,r15
+	lds.l	@r14+,pr
+	mov.l	@r14,r14
+	rts
+	add	#8,r15
+#else
+	mov	r14,r15
+	lds.l	@r15+,pr
+	mov.l	@r15+,r14
+	rts
+#ifdef __ELF__
+	mov.l	@r15+,r12
+#else
+	nop
+#endif
+#endif /* __SHMEDIA__ */
diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S b/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S
new file mode 100644
index 000000000..5f05b0812
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S
@@ -0,0 +1,936 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* libgcc routines for the STMicroelectronics ST40-300 CPU.
+   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
+
+#include "lib1funcs.h"
+
+#if !__SHMEDIA__
+#ifdef L_div_table
+#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4-300.
+   Uses a lookup table for divisors in the range -128 .. +127, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+	.global	GLOBAL(udivsi3_i4i)
+	.global	GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+	FUNC(GLOBAL(sdivsi3_i4i))
+
+	.balign 4
+LOCAL(div_ge8m): ! 10 cycles up to here
+	rotcr r1 ! signed shift must use original sign from r4
+	div0s r5,r4
+	mov #24,r7
+	shld r7,r6
+	shad r0,r1
+	rotcl r6
+	div1 r5,r1
+	swap.w r5,r0 ! detect -0x80000000 : 0x800000
+	rotcl r6
+	swap.w r4,r7
+	div1 r5,r1
+	swap.b r7,r7
+	rotcl r6
+	or r7,r0
+	div1 r5,r1
+	swap.w r0,r7
+	rotcl r6
+	or r7,r0
+	div1 r5,r1
+	add #-0x80,r0
+	rotcl r6
+	extu.w r0,r0
+	div1 r5,r1
+	neg r0,r0
+	rotcl r6
+	swap.w r0,r0
+	div1 r5,r1
+	mov.l @r15+,r7
+	and r6,r0
+	rotcl r6
+	div1 r5,r1
+	shll2 r0
+	rotcl r6
+	exts.b r0,r0
+	div1 r5,r1
+	swap.w r0,r0
+	exts.w r0,r1
+	exts.b r6,r0
+	mov.l @r15+,r6
+	rotcl r0
+	rts
+	sub r1,r0
+	! 31 cycles up to here
+
+	.balign 4
+LOCAL(udiv_ge64k): ! 3 cycles up to here
+	mov r4,r0
+	shlr8 r0
+	div0u
+	cmp/hi r0,r5
+	bt LOCAL(udiv_r8)
+	mov.l r5,@-r15
+	shll8 r5
+	! 7 cycles up to here
+	.rept 8
+	div1 r5,r0
+	.endr
+	extu.b r4,r1 ! 15 cycles up to here
+	extu.b r0,r6
+	xor r1,r0
+	xor r6,r0
+	swap.b r6,r6
+	.rept 8
+	div1 r5,r0
+	.endr ! 25 cycles up to here
+	extu.b r0,r0
+	mov.l @r15+,r5
+	or r6,r0
+	mov.l @r15+,r6
+	rts
+	rotcl r0 ! 28 cycles up to here
+
+	.balign 4
+LOCAL(udiv_r8): ! 6 cycles up to here
+	mov.l r4,@-r15
+	shll16 r4
+	shll8 r4
+	!
+	shll r4
+	mov r0,r1
+	div1 r5,r1
+	mov r4,r0
+	rotcl r0
+	mov.l @r15+,r4
+	div1 r5,r1
+	! 12 cycles up to here
+	.rept 6
+	rotcl r0; div1 r5,r1
+	.endr
+	mov.l @r15+,r6 ! 24 cycles up to here
+	rts
+	rotcl r0
+
+	.balign 4
+LOCAL(div_ge32k): ! 6 cycles up to here
+	mov.l r7,@-r15
+	swap.w r5,r6
+	exts.b r6,r7
+	exts.w r6,r6
+	cmp/eq r6,r7
+	extu.b r1,r6
+	bf/s LOCAL(div_ge8m)
+	cmp/hi r1,r4 ! copy sign bit of r4 into T
+	rotcr r1 ! signed shift must use original sign from r4
+	div0s r5,r4
+	shad r0,r1
+	shll8 r5
+	div1 r5,r1
+	mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
+	div1 r5,r1
+	shlr8 r7
+	div1 r5,r1
+	swap.w r4,r0
+	div1 r5,r1
+	swap.b r0,r0
+	div1 r5,r1
+	or r0,r7
+	div1 r5,r1
+	add #-80,r7
+	div1 r5,r1
+	swap.w r7,r0
+	div1 r5,r1
+	or r0,r7
+	extu.b r1,r0
+	xor r6,r1
+	xor r0,r1
+	exts.b r0,r0
+	div1 r5,r1
+	extu.w r7,r7
+	div1 r5,r1
+	neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
+	div1 r5,r1
+	and r0,r7
+	div1 r5,r1
+	swap.w r7,r7 ! 26 cycles up to here.
+	div1 r5,r1
+	shll8 r0
+	div1 r5,r1
+	exts.w r7,r7
+	div1 r5,r1
+	add r0,r0
+	div1 r5,r1
+	sub r7,r0
+	extu.b r1,r1
+	mov.l @r15+,r7
+	rotcl r1
+	mov.l @r15+,r6
+	add r1,r0
+	mov #-8,r1
+	rts
+	shad r1,r5 ! 34 cycles up to here
+
+	.balign 4
+GLOBAL(udivsi3_i4i):
+	mov.l r6,@-r15
+	extu.w r5,r6
+	cmp/eq r5,r6
+	mov #0x7f,r0
+	bf LOCAL(udiv_ge64k)
+	cmp/hi r0,r5
+	bf LOCAL(udiv_le128)
+	mov r4,r1
+	shlr8 r1
+	div0u
+	shlr r1
+	shll16 r6
+	div1 r6,r1
+	extu.b r4,r0 ! 7 cycles up to here
+	.rept 8
+	div1 r6,r1
+	.endr     ! 15 cycles up to here
+	xor r1,r0 ! xor dividend with result lsb
+	.rept 6
+	div1 r6,r1
+	.endr
+	mov.l r7,@-r15 ! 21 cycles up to here
+	div1 r6,r1
+	extu.b r0,r7
+	div1 r6,r1
+	shll8 r7
+	extu.w r1,r0
+	xor r7,r1 ! replace lsb of result with lsb of dividend
+	div1 r6,r1
+	mov #0,r7
+	div1 r6,r1
+	!
+	div1 r6,r1
+	bra LOCAL(div_end)
+	div1 r6,r1 ! 28 cycles up to here
+
+	/* This is link-compatible with a GLOBAL(sdivsi3) call,
+	   but we effectively clobber only r1, macl and mach  */
+        /* Because negative quotients are calculated as one's complements,
+	   -0x80000000 divided by the smallest positive number of a number
+	   range (0x80, 0x8000, 0x800000) causes saturation in the one's
+           complement representation, and we have to suppress the
+	   one's -> two's complement adjustment.  Since positive numbers
+	   don't get such an adjustment, it's OK to also compute one's -> two's
+	   complement adjustment suppression for a dividend of 0.  */
+	.balign 4
+GLOBAL(sdivsi3_i4i):
+	mov.l r6,@-r15
+	exts.b r5,r6
+	cmp/eq r5,r6
+	mov #-1,r1
+	bt/s LOCAL(div_le128)
+	cmp/pz r4
+	addc r4,r1
+	exts.w r5,r6
+	cmp/eq r5,r6
+	mov #-7,r0
+	bf/s LOCAL(div_ge32k)
+	cmp/hi r1,r4 ! copy sign bit of r4 into T
+	rotcr r1
+	shll16 r6  ! 7 cycles up to here
+	shad r0,r1
+	div0s r5,r4
+	div1 r6,r1
+	mov.l r7,@-r15
+	div1 r6,r1
+	mov r4,r0 ! re-compute adjusted dividend
+	div1 r6,r1
+	mov #-31,r7
+	div1 r6,r1
+	shad r7,r0
+	div1 r6,r1
+	add r4,r0 ! adjusted dividend
+	div1 r6,r1
+	mov.l r8,@-r15
+	div1 r6,r1
+	swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
+	div1 r6,r1
+	swap.b r8,r8
+	xor r1,r0 ! xor dividend with result lsb
+	div1 r6,r1
+	div1 r6,r1
+	or r5,r8
+	div1 r6,r1
+	add #-0x80,r8 ! r8 is 0 iff there is a match
+	div1 r6,r1
+	swap.w r8,r7 ! or upper 16 bits...
+	div1 r6,r1
+	or r7,r8 !...into lower 16 bits
+	div1 r6,r1
+	extu.w r8,r8
+	div1 r6,r1
+	extu.b r0,r7
+	div1 r6,r1
+	shll8 r7
+	exts.w r1,r0
+	xor r7,r1 ! replace lsb of result with lsb of dividend
+	div1 r6,r1
+	neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
+	div1 r6,r1
+	and r0,r8
+	div1 r6,r1
+	swap.w r8,r7
+	div1 r6,r1
+	mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
+LOCAL(div_end):
+	div1 r6,r1
+	shll8 r0
+	div1 r6,r1
+	exts.w r7,r7
+	div1 r6,r1
+	add r0,r0
+	div1 r6,r1
+	sub r7,r0
+	extu.b r1,r1
+	mov.l @r15+,r7
+	rotcl r1
+	mov.l @r15+,r6
+	rts
+	add r1,r0
+
+	.balign 4
+LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
+	mova LOCAL(div_table_inv),r0
+	shll2 r6
+	mov.l @(r0,r6),r1
+	mova LOCAL(div_table_clz),r0
+	lds r4,mach
+	!
+	!
+	!
+	tst r1,r1
+	!
+	bt 0f
+	dmulu.l r1,r4
+0:	mov.b @(r0,r5),r1
+	clrt
+	!
+	!
+	sts mach,r0
+	addc r4,r0
+	rotcr r0
+	mov.l @r15+,r6
+	rts
+	shld r1,r0
+
+	.balign 4
+LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
+	mova LOCAL(div_table_inv),r0
+	shll2 r6
+	mov.l @(r0,r6),r1
+	mova LOCAL(div_table_clz),r0
+	neg r4,r6
+	bf 0f
+	mov r4,r6
+0:	lds r6,mach
+	tst r1,r1
+	bt 0f
+	dmulu.l r1,r6
+0:	div0s r4,r5
+	mov.b @(r0,r5),r1
+	bt/s LOCAL(le128_neg)
+	clrt
+	!
+	sts mach,r0
+	addc r6,r0
+	rotcr r0
+	mov.l @r15+,r6
+	rts
+	shld r1,r0
+
+/* Could trap divide by zero for the cost of one cycle more mispredict penalty:
+...
+	dmulu.l r1,r6
+0:	div0s r4,r5
+	bt/s LOCAL(le128_neg)
+	tst r5,r5
+	bt LOCAL(div_by_zero)
+	mov.b @(r0,r5),r1
+	sts mach,r0
+	addc r6,r0
+...
+LOCAL(div_by_zero):
+	trapa #
+	.balign 4
+LOCAL(le128_neg):
+	bt LOCAL(div_by_zero)
+	mov.b @(r0,r5),r1
+	sts mach,r0
+	addc r6,r0
+...  */
+
+	.balign 4
+LOCAL(le128_neg):
+	sts mach,r0
+	addc r6,r0
+	rotcr r0
+	mov.l @r15+,r6
+	shad r1,r0
+	rts
+	neg r0,r0
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+
+/* This table has been generated by divtab-sh4.c.  */
+	.balign 4
+	.byte	-7
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-1
+	.byte	-1
+	.byte	0
+LOCAL(div_table_clz):
+	.byte	0
+	.byte	0
+	.byte	-1
+	.byte	-1
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+/* 1/-128 .. 1/127, normalized.  There is an implicit leading 1 in bit 32,
+   or in bit 33 for powers of two.  */
+	.balign 4
+	.long   0x0
+	.long	0x2040811
+	.long	0x4104105
+	.long	0x624DD30
+	.long	0x8421085
+	.long	0xA6810A7
+	.long	0xC9714FC
+	.long	0xECF56BF
+	.long	0x11111112
+	.long	0x135C8114
+	.long	0x15B1E5F8
+	.long	0x18118119
+	.long	0x1A7B9612
+	.long	0x1CF06ADB
+	.long	0x1F7047DD
+	.long	0x21FB7813
+	.long	0x24924925
+	.long	0x27350B89
+	.long	0x29E4129F
+	.long	0x2C9FB4D9
+	.long	0x2F684BDB
+	.long	0x323E34A3
+	.long	0x3521CFB3
+	.long	0x38138139
+	.long	0x3B13B13C
+	.long	0x3E22CBCF
+	.long	0x41414142
+	.long	0x446F8657
+	.long	0x47AE147B
+	.long	0x4AFD6A06
+	.long	0x4E5E0A73
+	.long	0x51D07EAF
+	.long	0x55555556
+	.long	0x58ED2309
+	.long	0x5C9882BA
+	.long	0x60581606
+	.long	0x642C8591
+	.long	0x68168169
+	.long	0x6C16C16D
+	.long	0x702E05C1
+	.long	0x745D1746
+	.long	0x78A4C818
+	.long	0x7D05F418
+	.long	0x81818182
+	.long	0x86186187
+	.long	0x8ACB90F7
+	.long	0x8F9C18FA
+	.long	0x948B0FCE
+	.long	0x9999999A
+	.long	0x9EC8E952
+	.long	0xA41A41A5
+	.long	0xA98EF607
+	.long	0xAF286BCB
+	.long	0xB4E81B4F
+	.long	0xBACF914D
+	.long	0xC0E07039
+	.long	0xC71C71C8
+	.long	0xCD856891
+	.long	0xD41D41D5
+	.long	0xDAE6076C
+	.long	0xE1E1E1E2
+	.long	0xE9131AC0
+	.long	0xF07C1F08
+	.long	0xF81F81F9
+	.long	0x0
+	.long	0x4104105
+	.long	0x8421085
+	.long	0xC9714FC
+	.long	0x11111112
+	.long	0x15B1E5F8
+	.long	0x1A7B9612
+	.long	0x1F7047DD
+	.long	0x24924925
+	.long	0x29E4129F
+	.long	0x2F684BDB
+	.long	0x3521CFB3
+	.long	0x3B13B13C
+	.long	0x41414142
+	.long	0x47AE147B
+	.long	0x4E5E0A73
+	.long	0x55555556
+	.long	0x5C9882BA
+	.long	0x642C8591
+	.long	0x6C16C16D
+	.long	0x745D1746
+	.long	0x7D05F418
+	.long	0x86186187
+	.long	0x8F9C18FA
+	.long	0x9999999A
+	.long	0xA41A41A5
+	.long	0xAF286BCB
+	.long	0xBACF914D
+	.long	0xC71C71C8
+	.long	0xD41D41D5
+	.long	0xE1E1E1E2
+	.long	0xF07C1F08
+	.long	0x0
+	.long	0x8421085
+	.long	0x11111112
+	.long	0x1A7B9612
+	.long	0x24924925
+	.long	0x2F684BDB
+	.long	0x3B13B13C
+	.long	0x47AE147B
+	.long	0x55555556
+	.long	0x642C8591
+	.long	0x745D1746
+	.long	0x86186187
+	.long	0x9999999A
+	.long	0xAF286BCB
+	.long	0xC71C71C8
+	.long	0xE1E1E1E2
+	.long	0x0
+	.long	0x11111112
+	.long	0x24924925
+	.long	0x3B13B13C
+	.long	0x55555556
+	.long	0x745D1746
+	.long	0x9999999A
+	.long	0xC71C71C8
+	.long	0x0
+	.long	0x24924925
+	.long	0x55555556
+	.long	0x9999999A
+	.long	0x0
+	.long	0x55555556
+	.long	0x0
+	.long	0x0
+LOCAL(div_table_inv):
+	.long	0x0
+	.long	0x0
+	.long	0x0
+	.long	0x55555556
+	.long	0x0
+	.long	0x9999999A
+	.long	0x55555556
+	.long	0x24924925
+	.long	0x0
+	.long	0xC71C71C8
+	.long	0x9999999A
+	.long	0x745D1746
+	.long	0x55555556
+	.long	0x3B13B13C
+	.long	0x24924925
+	.long	0x11111112
+	.long	0x0
+	.long	0xE1E1E1E2
+	.long	0xC71C71C8
+	.long	0xAF286BCB
+	.long	0x9999999A
+	.long	0x86186187
+	.long	0x745D1746
+	.long	0x642C8591
+	.long	0x55555556
+	.long	0x47AE147B
+	.long	0x3B13B13C
+	.long	0x2F684BDB
+	.long	0x24924925
+	.long	0x1A7B9612
+	.long	0x11111112
+	.long	0x8421085
+	.long	0x0
+	.long	0xF07C1F08
+	.long	0xE1E1E1E2
+	.long	0xD41D41D5
+	.long	0xC71C71C8
+	.long	0xBACF914D
+	.long	0xAF286BCB
+	.long	0xA41A41A5
+	.long	0x9999999A
+	.long	0x8F9C18FA
+	.long	0x86186187
+	.long	0x7D05F418
+	.long	0x745D1746
+	.long	0x6C16C16D
+	.long	0x642C8591
+	.long	0x5C9882BA
+	.long	0x55555556
+	.long	0x4E5E0A73
+	.long	0x47AE147B
+	.long	0x41414142
+	.long	0x3B13B13C
+	.long	0x3521CFB3
+	.long	0x2F684BDB
+	.long	0x29E4129F
+	.long	0x24924925
+	.long	0x1F7047DD
+	.long	0x1A7B9612
+	.long	0x15B1E5F8
+	.long	0x11111112
+	.long	0xC9714FC
+	.long	0x8421085
+	.long	0x4104105
+	.long	0x0
+	.long	0xF81F81F9
+	.long	0xF07C1F08
+	.long	0xE9131AC0
+	.long	0xE1E1E1E2
+	.long	0xDAE6076C
+	.long	0xD41D41D5
+	.long	0xCD856891
+	.long	0xC71C71C8
+	.long	0xC0E07039
+	.long	0xBACF914D
+	.long	0xB4E81B4F
+	.long	0xAF286BCB
+	.long	0xA98EF607
+	.long	0xA41A41A5
+	.long	0x9EC8E952
+	.long	0x9999999A
+	.long	0x948B0FCE
+	.long	0x8F9C18FA
+	.long	0x8ACB90F7
+	.long	0x86186187
+	.long	0x81818182
+	.long	0x7D05F418
+	.long	0x78A4C818
+	.long	0x745D1746
+	.long	0x702E05C1
+	.long	0x6C16C16D
+	.long	0x68168169
+	.long	0x642C8591
+	.long	0x60581606
+	.long	0x5C9882BA
+	.long	0x58ED2309
+	.long	0x55555556
+	.long	0x51D07EAF
+	.long	0x4E5E0A73
+	.long	0x4AFD6A06
+	.long	0x47AE147B
+	.long	0x446F8657
+	.long	0x41414142
+	.long	0x3E22CBCF
+	.long	0x3B13B13C
+	.long	0x38138139
+	.long	0x3521CFB3
+	.long	0x323E34A3
+	.long	0x2F684BDB
+	.long	0x2C9FB4D9
+	.long	0x29E4129F
+	.long	0x27350B89
+	.long	0x24924925
+	.long	0x21FB7813
+	.long	0x1F7047DD
+	.long	0x1CF06ADB
+	.long	0x1A7B9612
+	.long	0x18118119
+	.long	0x15B1E5F8
+	.long	0x135C8114
+	.long	0x11111112
+	.long	0xECF56BF
+	.long	0xC9714FC
+	.long	0xA6810A7
+	.long	0x8421085
+	.long	0x624DD30
+	.long	0x4104105
+	.long	0x2040811
+	/* maximum error: 0.987342 scaled: 0.921875*/
+
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+#endif /* !__SHMEDIA__ */
diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S b/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S
new file mode 100644
index 000000000..f541c81b1
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S
@@ -0,0 +1,322 @@
+/* Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Moderately Space-optimized libgcc routines for the Renesas SH /
+   STMicroelectronics ST40 CPUs.
+   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
+
+#include "lib1funcs.h"
+
+#if !__SHMEDIA__
+#ifdef L_udivsi3_i4i
+
+/* 88 bytes; sh4-200 cycle counts:
+   divisor  >= 2G: 11 cycles
+   dividend <  2G: 48 cycles
+   dividend >= 2G: divisor != 1: 54 cycles
+   dividend >= 2G, divisor == 1: 22 cycles */
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in r0, clobber r1
+
+	.global GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	mova L1,r0
+	cmp/pz r5
+	sts fpscr,r1
+	lds.l @r0+,fpscr
+	sts.l fpul,@-r15
+	bf LOCAL(huge_divisor)
+	mov.l r1,@-r15
+	lds r4,fpul
+	cmp/pz r4
+#ifdef FMOVD_WORKS
+	fmov.d dr0,@-r15
+	float fpul,dr0
+	fmov.d dr2,@-r15
+	bt LOCAL(dividend_adjusted)
+	mov #1,r1
+	fmov.d @r0,dr2
+	cmp/eq r1,r5
+	bt LOCAL(div_by_1)
+	fadd dr2,dr0
+LOCAL(dividend_adjusted):
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+LOCAL(div_by_1):
+	fmov.d @r15+,dr2
+	ftrc dr0,fpul
+	fmov.d @r15+,dr0
+#else /* !FMOVD_WORKS */
+	fmov.s DR01,@-r15
+	mov #1,r1
+	fmov.s DR00,@-r15
+	float fpul,dr0
+	fmov.s DR21,@-r15
+	bt/s LOCAL(dividend_adjusted)
+	fmov.s DR20,@-r15
+	cmp/eq r1,r5
+	bt LOCAL(div_by_1)
+	fmov.s @r0+,DR20
+	fmov.s @r0,DR21
+	fadd dr2,dr0
+LOCAL(dividend_adjusted):
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+LOCAL(div_by_1):
+	fmov.s @r15+,DR20
+	fmov.s @r15+,DR21
+	ftrc dr0,fpul
+	fmov.s @r15+,DR00
+	fmov.s @r15+,DR01
+#endif /* !FMOVD_WORKS */
+	lds.l @r15+,fpscr
+	sts fpul,r0
+	rts
+	lds.l @r15+,fpul
+
+#ifdef FMOVD_WORKS
+	.p2align 3        ! make double below 8 byte aligned.
+#endif
+LOCAL(huge_divisor):
+	lds r1,fpscr
+	add #4,r15
+	cmp/hs r5,r4
+	rts
+	movt r0
+
+	.p2align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+	.double 4294967296
+
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+#elif !defined (__sh1__)  /* !__SH_FPU_DOUBLE__ */
+
+#if 0
+/* With 36 bytes, the following would probably be the most compact
+   implementation, but with 139 cycles on an sh4-200, it is extremely slow.  */
+GLOBAL(udivsi3_i4i):
+	mov.l r2,@-r15
+	mov #0,r1
+	div0u
+	mov r1,r2
+	mov.l r3,@-r15
+	mov r1,r3
+	sett
+	mov r4,r0
+LOCAL(loop):
+	rotcr r2
+	;
+	bt/s LOCAL(end)
+	cmp/gt r2,r3
+	rotcl r0
+	bra LOCAL(loop)
+	div1 r5,r1
+LOCAL(end):
+	rotcl r0
+	mov.l @r15+,r3
+	rts
+	mov.l @r15+,r2
+#endif /* 0 */
+
+/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
+   sh4-200 run times:
+   udiv small divisor: 55 cycles
+   udiv large divisor: 52 cycles
+   sdiv small divisor, positive result: 59 cycles
+   sdiv large divisor, positive result: 56 cycles
+   sdiv small divisor, negative result: 65 cycles (*)
+   sdiv large divisor, negative result: 62 cycles (*)
+   (*): r2 is restored in the rts delay slot and has a lingering latency
+        of two more cycles.  */
+	.balign 4
+	.global	GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+	FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	sts pr,r1
+	mov.l r4,@-r15
+	extu.w r5,r0
+	cmp/eq r5,r0
+	swap.w r4,r0
+	shlr16 r4
+	bf/s LOCAL(large_divisor)
+	div0u
+	mov.l r5,@-r15
+	shll16 r5
+LOCAL(sdiv_small_divisor):
+	div1 r5,r4
+	bsr LOCAL(div6)
+	div1 r5,r4
+	div1 r5,r4
+	bsr LOCAL(div6)
+	div1 r5,r4
+	xtrct r4,r0
+	xtrct r0,r4
+	bsr LOCAL(div7)
+	swap.w r4,r4
+	div1 r5,r4
+	bsr LOCAL(div7)
+	div1 r5,r4
+	xtrct r4,r0
+	mov.l @r15+,r5
+	swap.w r0,r0
+	mov.l @r15+,r4
+	jmp @r1
+	rotcl r0
+LOCAL(div7):
+	div1 r5,r4
+LOCAL(div6):
+	            div1 r5,r4; div1 r5,r4; div1 r5,r4
+	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
+
+LOCAL(divx3):
+	rotcl r0
+	div1 r5,r4
+	rotcl r0
+	div1 r5,r4
+	rotcl r0
+	rts
+	div1 r5,r4
+
+LOCAL(large_divisor):
+	mov.l r5,@-r15
+LOCAL(sdiv_large_divisor):
+	xor r4,r0
+	.rept 4
+	rotcl r0
+	bsr LOCAL(divx3)
+	div1 r5,r4
+	.endr
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	jmp @r1
+	rotcl r0
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+
+	.global	GLOBAL(sdivsi3_i4i)
+GLOBAL(sdivsi3_i4i):
+	mov.l r4,@-r15
+	cmp/pz r5
+	mov.l r5,@-r15
+	bt/s LOCAL(pos_divisor)
+	cmp/pz r4
+	neg r5,r5
+	extu.w r5,r0
+	bt/s LOCAL(neg_result)
+	cmp/eq r5,r0
+	neg r4,r4
+LOCAL(pos_result):
+	swap.w r4,r0
+	bra LOCAL(sdiv_check_divisor)
+	sts pr,r1
+LOCAL(pos_divisor):
+	extu.w r5,r0
+	bt/s LOCAL(pos_result)
+	cmp/eq r5,r0
+	neg r4,r4
+LOCAL(neg_result):
+	mova LOCAL(negate_result),r0
+	;
+	mov r0,r1
+	swap.w r4,r0
+	lds r2,macl
+	sts pr,r2
+LOCAL(sdiv_check_divisor):
+	shlr16 r4
+	bf/s LOCAL(sdiv_large_divisor)
+	div0u
+	bra LOCAL(sdiv_small_divisor)
+	shll16 r5
+	.balign 4
+LOCAL(negate_result):
+	neg r0,r0
+	jmp @r2
+	sts macl,r2
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* !__SH_FPU_DOUBLE__ */
+#endif /* L_udivsi3_i4i */
+
+#ifdef L_sdivsi3_i4i
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+/* 48 bytes, 45 cycles on sh4-200  */
+!! args in r4 and r5, result in r0, clobber r1
+
+	.global GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(sdivsi3_i4i):
+	sts.l fpscr,@-r15
+	sts fpul,r1
+	mova L1,r0
+	lds.l @r0+,fpscr
+	lds r4,fpul
+#ifdef FMOVD_WORKS
+	fmov.d dr0,@-r15
+	float fpul,dr0
+	lds r5,fpul
+	fmov.d dr2,@-r15
+#else
+	fmov.s DR01,@-r15
+	fmov.s DR00,@-r15
+	float fpul,dr0
+	lds r5,fpul
+	fmov.s DR21,@-r15
+	fmov.s DR20,@-r15
+#endif
+	float fpul,dr2
+	fdiv dr2,dr0
+#ifdef FMOVD_WORKS
+	fmov.d @r15+,dr2
+#else
+	fmov.s @r15+,DR20
+	fmov.s @r15+,DR21
+#endif
+	ftrc dr0,fpul
+#ifdef FMOVD_WORKS
+	fmov.d @r15+,dr0
+#else
+	fmov.s @r15+,DR00
+	fmov.s @r15+,DR01
+#endif
+	lds.l @r15+,fpscr
+	sts fpul,r0
+	rts
+	lds r1,fpul
+
+	.p2align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* __SH_FPU_DOUBLE__ */
+#endif /* L_sdivsi3_i4i */
+#endif /* !__SHMEDIA__ */
diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs.S b/gcc-4.9/libgcc/config/sh/lib1funcs.S
new file mode 100644
index 000000000..3410cf7c1
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/lib1funcs.S
@@ -0,0 +1,4047 @@
+/* Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+!! libgcc routines for the Renesas / SuperH SH CPUs.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+   ELF local label prefixes by J"orn Rennecke
+   amylaar@cygnus.com  */
+
+#include "lib1funcs.h"
+
+/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
+   so it is more convenient to define NO_FPSCR_VALUES here than to
+   define it on the command line.  */
+#if defined __vxworks && defined __PIC__
+#define NO_FPSCR_VALUES
+#endif
+	
+#if ! __SH5__
+#ifdef L_ashiftrt
+	.global	GLOBAL(ashiftrt_r4_0)
+	.global	GLOBAL(ashiftrt_r4_1)
+	.global	GLOBAL(ashiftrt_r4_2)
+	.global	GLOBAL(ashiftrt_r4_3)
+	.global	GLOBAL(ashiftrt_r4_4)
+	.global	GLOBAL(ashiftrt_r4_5)
+	.global	GLOBAL(ashiftrt_r4_6)
+	.global	GLOBAL(ashiftrt_r4_7)
+	.global	GLOBAL(ashiftrt_r4_8)
+	.global	GLOBAL(ashiftrt_r4_9)
+	.global	GLOBAL(ashiftrt_r4_10)
+	.global	GLOBAL(ashiftrt_r4_11)
+	.global	GLOBAL(ashiftrt_r4_12)
+	.global	GLOBAL(ashiftrt_r4_13)
+	.global	GLOBAL(ashiftrt_r4_14)
+	.global	GLOBAL(ashiftrt_r4_15)
+	.global	GLOBAL(ashiftrt_r4_16)
+	.global	GLOBAL(ashiftrt_r4_17)
+	.global	GLOBAL(ashiftrt_r4_18)
+	.global	GLOBAL(ashiftrt_r4_19)
+	.global	GLOBAL(ashiftrt_r4_20)
+	.global	GLOBAL(ashiftrt_r4_21)
+	.global	GLOBAL(ashiftrt_r4_22)
+	.global	GLOBAL(ashiftrt_r4_23)
+	.global	GLOBAL(ashiftrt_r4_24)
+	.global	GLOBAL(ashiftrt_r4_25)
+	.global	GLOBAL(ashiftrt_r4_26)
+	.global	GLOBAL(ashiftrt_r4_27)
+	.global	GLOBAL(ashiftrt_r4_28)
+	.global	GLOBAL(ashiftrt_r4_29)
+	.global	GLOBAL(ashiftrt_r4_30)
+	.global	GLOBAL(ashiftrt_r4_31)
+	.global	GLOBAL(ashiftrt_r4_32)
+
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
+
+	.align	1
+GLOBAL(ashiftrt_r4_32):
+GLOBAL(ashiftrt_r4_31):
+	rotcl	r4
+	rts
+	subc	r4,r4
+
+GLOBAL(ashiftrt_r4_30):
+	shar	r4
+GLOBAL(ashiftrt_r4_29):
+	shar	r4
+GLOBAL(ashiftrt_r4_28):
+	shar	r4
+GLOBAL(ashiftrt_r4_27):
+	shar	r4
+GLOBAL(ashiftrt_r4_26):
+	shar	r4
+GLOBAL(ashiftrt_r4_25):
+	shar	r4
+GLOBAL(ashiftrt_r4_24):
+	shlr16	r4
+	shlr8	r4
+	rts
+	exts.b	r4,r4
+
+GLOBAL(ashiftrt_r4_23):
+	shar	r4
+GLOBAL(ashiftrt_r4_22):
+	shar	r4
+GLOBAL(ashiftrt_r4_21):
+	shar	r4
+GLOBAL(ashiftrt_r4_20):
+	shar	r4
+GLOBAL(ashiftrt_r4_19):
+	shar	r4
+GLOBAL(ashiftrt_r4_18):
+	shar	r4
+GLOBAL(ashiftrt_r4_17):
+	shar	r4
+GLOBAL(ashiftrt_r4_16):
+	shlr16	r4
+	rts
+	exts.w	r4,r4
+
+GLOBAL(ashiftrt_r4_15):
+	shar	r4
+GLOBAL(ashiftrt_r4_14):
+	shar	r4
+GLOBAL(ashiftrt_r4_13):
+	shar	r4
+GLOBAL(ashiftrt_r4_12):
+	shar	r4
+GLOBAL(ashiftrt_r4_11):
+	shar	r4
+GLOBAL(ashiftrt_r4_10):
+	shar	r4
+GLOBAL(ashiftrt_r4_9):
+	shar	r4
+GLOBAL(ashiftrt_r4_8):
+	shar	r4
+GLOBAL(ashiftrt_r4_7):
+	shar	r4
+GLOBAL(ashiftrt_r4_6):
+	shar	r4
+GLOBAL(ashiftrt_r4_5):
+	shar	r4
+GLOBAL(ashiftrt_r4_4):
+	shar	r4
+GLOBAL(ashiftrt_r4_3):
+	shar	r4
+GLOBAL(ashiftrt_r4_2):
+	shar	r4
+GLOBAL(ashiftrt_r4_1):
+	rts
+	shar	r4
+
+GLOBAL(ashiftrt_r4_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashiftrt_r4_0))
+	ENDFUNC(GLOBAL(ashiftrt_r4_1))
+	ENDFUNC(GLOBAL(ashiftrt_r4_2))
+	ENDFUNC(GLOBAL(ashiftrt_r4_3))
+	ENDFUNC(GLOBAL(ashiftrt_r4_4))
+	ENDFUNC(GLOBAL(ashiftrt_r4_5))
+	ENDFUNC(GLOBAL(ashiftrt_r4_6))
+	ENDFUNC(GLOBAL(ashiftrt_r4_7))
+	ENDFUNC(GLOBAL(ashiftrt_r4_8))
+	ENDFUNC(GLOBAL(ashiftrt_r4_9))
+	ENDFUNC(GLOBAL(ashiftrt_r4_10))
+	ENDFUNC(GLOBAL(ashiftrt_r4_11))
+	ENDFUNC(GLOBAL(ashiftrt_r4_12))
+	ENDFUNC(GLOBAL(ashiftrt_r4_13))
+	ENDFUNC(GLOBAL(ashiftrt_r4_14))
+	ENDFUNC(GLOBAL(ashiftrt_r4_15))
+	ENDFUNC(GLOBAL(ashiftrt_r4_16))
+	ENDFUNC(GLOBAL(ashiftrt_r4_17))
+	ENDFUNC(GLOBAL(ashiftrt_r4_18))
+	ENDFUNC(GLOBAL(ashiftrt_r4_19))
+	ENDFUNC(GLOBAL(ashiftrt_r4_20))
+	ENDFUNC(GLOBAL(ashiftrt_r4_21))
+	ENDFUNC(GLOBAL(ashiftrt_r4_22))
+	ENDFUNC(GLOBAL(ashiftrt_r4_23))
+	ENDFUNC(GLOBAL(ashiftrt_r4_24))
+	ENDFUNC(GLOBAL(ashiftrt_r4_25))
+	ENDFUNC(GLOBAL(ashiftrt_r4_26))
+	ENDFUNC(GLOBAL(ashiftrt_r4_27))
+	ENDFUNC(GLOBAL(ashiftrt_r4_28))
+	ENDFUNC(GLOBAL(ashiftrt_r4_29))
+	ENDFUNC(GLOBAL(ashiftrt_r4_30))
+	ENDFUNC(GLOBAL(ashiftrt_r4_31))
+	ENDFUNC(GLOBAL(ashiftrt_r4_32))
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! GLOBAL(ashrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shift count
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! T bit, r5
+!
+
+	.global	GLOBAL(ashrsi3)
+	HIDDEN_FUNC(GLOBAL(ashrsi3))
+	.align	2
+GLOBAL(ashrsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashrsi3_table):
+	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+	rotcl	r0
+	rts
+	subc	r0,r0
+
+LOCAL(ashrsi3_30):
+	shar	r0
+LOCAL(ashrsi3_29):
+	shar	r0
+LOCAL(ashrsi3_28):
+	shar	r0
+LOCAL(ashrsi3_27):
+	shar	r0
+LOCAL(ashrsi3_26):
+	shar	r0
+LOCAL(ashrsi3_25):
+	shar	r0
+LOCAL(ashrsi3_24):
+	shlr16	r0
+	shlr8	r0
+	rts
+	exts.b	r0,r0
+
+LOCAL(ashrsi3_23):
+	shar	r0
+LOCAL(ashrsi3_22):
+	shar	r0
+LOCAL(ashrsi3_21):
+	shar	r0
+LOCAL(ashrsi3_20):
+	shar	r0
+LOCAL(ashrsi3_19):
+	shar	r0
+LOCAL(ashrsi3_18):
+	shar	r0
+LOCAL(ashrsi3_17):
+	shar	r0
+LOCAL(ashrsi3_16):
+	shlr16	r0
+	rts
+	exts.w	r0,r0
+
+LOCAL(ashrsi3_15):
+	shar	r0
+LOCAL(ashrsi3_14):
+	shar	r0
+LOCAL(ashrsi3_13):
+	shar	r0
+LOCAL(ashrsi3_12):
+	shar	r0
+LOCAL(ashrsi3_11):
+	shar	r0
+LOCAL(ashrsi3_10):
+	shar	r0
+LOCAL(ashrsi3_9):
+	shar	r0
+LOCAL(ashrsi3_8):
+	shar	r0
+LOCAL(ashrsi3_7):
+	shar	r0
+LOCAL(ashrsi3_6):
+	shar	r0
+LOCAL(ashrsi3_5):
+	shar	r0
+LOCAL(ashrsi3_4):
+	shar	r0
+LOCAL(ashrsi3_3):
+	shar	r0
+LOCAL(ashrsi3_2):
+	shar	r0
+LOCAL(ashrsi3_1):
+	rts
+	shar	r0
+
+LOCAL(ashrsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashrsi3))
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! GLOBAL(ashlsi3)
+! (For compatibility with older binaries, not used by compiler)
+!
+! Entry:
+!	r4: Value to shift
+!	r5: Shift count
+!
+! Exit:
+!	r0: Result
+!
+! Destroys:
+!	T bit
+!
+!
+! GLOBAL(ashlsi3_r0)
+!
+! Entry:
+!	r4: Value to shift
+!	r0: Shift count
+!
+! Exit:
+!	r0: Result
+!
+! Destroys:
+!	T bit
+
+	.global	GLOBAL(ashlsi3)
+	.global GLOBAL(ashlsi3_r0)
+	HIDDEN_FUNC(GLOBAL(ashlsi3))
+	HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
+GLOBAL(ashlsi3):
+	mov	r5,r0
+	.align	2
+GLOBAL(ashlsi3_r0):
+
+#ifdef __sh1__
+	and	#31,r0
+	shll2	r0
+	mov.l	r4,@-r15
+	mov	r0,r4
+	mova	LOCAL(ashlsi3_table),r0
+	add	r4,r0
+	mov.l	@r15+,r4
+	jmp	@r0
+	mov	r4,r0
+	.align 2
+#else
+	and	#31,r0
+	shll2	r0
+	braf	r0
+	mov	r4,r0
+#endif
+
+LOCAL(ashlsi3_table):
+	rts				// << 0
+	nop
+LOCAL(ashlsi_1):
+	rts				// << 1
+	shll	r0
+LOCAL(ashlsi_2):			// << 2
+	rts
+	shll2	r0
+	bra	LOCAL(ashlsi_1)		// << 3
+	shll2	r0
+	bra	LOCAL(ashlsi_2)		// << 4
+	shll2	r0
+	bra	LOCAL(ashlsi_5)		// << 5
+	shll	r0
+	bra	LOCAL(ashlsi_6)		// << 6
+	shll2	r0
+	bra	LOCAL(ashlsi_7)		// << 7
+	shll	r0
+LOCAL(ashlsi_8):			// << 8
+	rts
+	shll8	r0
+	bra	LOCAL(ashlsi_8)		// << 9
+	shll	r0
+	bra	LOCAL(ashlsi_8)		// << 10
+	shll2	r0
+	bra	LOCAL(ashlsi_11)	// << 11
+	shll	r0
+	bra	LOCAL(ashlsi_12)	// << 12
+	shll2	r0
+	bra	LOCAL(ashlsi_13)	// << 13
+	shll	r0
+	bra	LOCAL(ashlsi_14)	// << 14
+	shll8	r0
+	bra	LOCAL(ashlsi_15)	// << 15
+	shll8	r0
+LOCAL(ashlsi_16):			// << 16
+	rts
+	shll16	r0
+	bra	LOCAL(ashlsi_16)	// << 17
+	shll	r0
+	bra	LOCAL(ashlsi_16)	// << 18
+	shll2	r0
+	bra	LOCAL(ashlsi_19)	// << 19
+	shll	r0
+	bra	LOCAL(ashlsi_20)	// << 20
+	shll2	r0
+	bra	LOCAL(ashlsi_21)	// << 21
+	shll	r0
+	bra	LOCAL(ashlsi_22)	// << 22
+	shll16	r0
+	bra	LOCAL(ashlsi_23)	// << 23
+	shll16	r0
+	bra	LOCAL(ashlsi_16)	// << 24
+	shll8	r0
+	bra	LOCAL(ashlsi_25)	// << 25
+	shll	r0
+	bra	LOCAL(ashlsi_26)	// << 26
+	shll2	r0
+	bra	LOCAL(ashlsi_27)	// << 27
+	shll	r0
+	bra	LOCAL(ashlsi_28)	// << 28
+	shll2	r0
+	bra	LOCAL(ashlsi_29)	// << 29
+	shll16	r0
+	bra	LOCAL(ashlsi_30)	// << 30
+	shll16	r0
+	and	#1,r0			// << 31
+	rts
+	rotr	r0
+
+LOCAL(ashlsi_7):
+	shll2	r0
+LOCAL(ashlsi_5):
+LOCAL(ashlsi_6):
+	shll2	r0
+	rts
+LOCAL(ashlsi_13):
+	shll2	r0
+LOCAL(ashlsi_12):
+LOCAL(ashlsi_11):
+	shll8	r0
+	rts
+LOCAL(ashlsi_21):
+	shll2	r0
+LOCAL(ashlsi_20):
+LOCAL(ashlsi_19):
+	shll16	r0
+	rts
+LOCAL(ashlsi_28):
+LOCAL(ashlsi_27):
+	shll2	r0
+LOCAL(ashlsi_26):
+LOCAL(ashlsi_25):
+	shll16	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi_22):
+LOCAL(ashlsi_14):
+	shlr2	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi_23):
+LOCAL(ashlsi_15):
+	shlr	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi_29):
+	shlr	r0
+LOCAL(ashlsi_30):
+	shlr2	r0
+	rts
+	shll16	r0	
+
+	ENDFUNC(GLOBAL(ashlsi3))
+	ENDFUNC(GLOBAL(ashlsi3_r0))
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! GLOBAL(lshrsi3)
+! (For compatibility with older binaries, not used by compiler)
+!
+! Entry:
+!	r4: Value to shift
+!	r5: Shift count
+!
+! Exit:
+!	r0: Result
+!
+! Destroys:
+!	T bit
+!
+!
+! GLOBAL(lshrsi3_r0)
+!
+! Entry:
+!	r4: Value to shift
+!	r0: Shift count
+!
+! Exit:
+!	r0: Result
+!
+! Destroys:
+!	T bit
+
+	.global	GLOBAL(lshrsi3)
+	.global	GLOBAL(lshrsi3_r0)
+	HIDDEN_FUNC(GLOBAL(lshrsi3))
+	HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
+GLOBAL(lshrsi3):
+	mov	r5,r0
+	.align	2
+GLOBAL(lshrsi3_r0):
+
+#ifdef __sh1__
+	and	#31,r0
+	shll2	r0
+	mov.l	r4,@-r15
+	mov	r0,r4
+	mova	LOCAL(lshrsi3_table),r0
+	add	r4,r0
+	mov.l	@r15+,r4
+	jmp	@r0
+	mov	r4,r0
+	.align 2
+#else
+	and	#31,r0
+	shll2	r0
+	braf	r0
+	mov	r4,r0
+#endif
+LOCAL(lshrsi3_table):
+	rts				// >> 0
+	nop
+LOCAL(lshrsi_1):			// >> 1
+	rts
+	shlr	r0
+LOCAL(lshrsi_2):			// >> 2
+	rts
+	shlr2	r0
+	bra	LOCAL(lshrsi_1)		// >> 3
+	shlr2	r0
+	bra	LOCAL(lshrsi_2)		// >> 4
+	shlr2	r0
+	bra	LOCAL(lshrsi_5)		// >> 5
+	shlr	r0
+	bra	LOCAL(lshrsi_6)		// >> 6
+	shlr2	r0
+	bra	LOCAL(lshrsi_7)		// >> 7
+	shlr	r0
+LOCAL(lshrsi_8):			// >> 8
+	rts
+	shlr8	r0
+	bra	LOCAL(lshrsi_8)		// >> 9
+	shlr	r0
+	bra	LOCAL(lshrsi_8)		// >> 10
+	shlr2	r0
+	bra	LOCAL(lshrsi_11)	// >> 11
+	shlr	r0
+	bra	LOCAL(lshrsi_12)	// >> 12
+	shlr2	r0
+	bra	LOCAL(lshrsi_13)	// >> 13
+	shlr	r0
+	bra	LOCAL(lshrsi_14)	// >> 14
+	shlr8	r0
+	bra	LOCAL(lshrsi_15)	// >> 15
+	shlr8	r0
+LOCAL(lshrsi_16):			// >> 16
+	rts
+	shlr16	r0
+	bra	LOCAL(lshrsi_16)	// >> 17
+	shlr	r0
+	bra	LOCAL(lshrsi_16)	// >> 18
+	shlr2	r0
+	bra	LOCAL(lshrsi_19)	// >> 19
+	shlr	r0
+	bra	LOCAL(lshrsi_20)	// >> 20
+	shlr2	r0
+	bra	LOCAL(lshrsi_21)	// >> 21
+	shlr	r0
+	bra	LOCAL(lshrsi_22)	// >> 22
+	shlr16	r0
+	bra	LOCAL(lshrsi_23)	// >> 23
+	shlr16	r0
+	bra	LOCAL(lshrsi_16)	// >> 24
+	shlr8	r0
+	bra	LOCAL(lshrsi_25)	// >> 25
+	shlr	r0
+	bra	LOCAL(lshrsi_26)	// >> 26
+	shlr2	r0
+	bra	LOCAL(lshrsi_27)	// >> 27
+	shlr	r0
+	bra	LOCAL(lshrsi_28)	// >> 28
+	shlr2	r0
+	bra	LOCAL(lshrsi_29)	// >> 29
+	shlr16	r0
+	bra	LOCAL(lshrsi_30)	// >> 30
+	shlr16	r0
+	shll	r0			// >> 31
+	rts
+	movt	r0
+
+LOCAL(lshrsi_7):
+	shlr2	r0
+LOCAL(lshrsi_5):
+LOCAL(lshrsi_6):
+	shlr2	r0
+	rts
+LOCAL(lshrsi_13):
+	shlr2	r0
+LOCAL(lshrsi_12):
+LOCAL(lshrsi_11):
+	shlr8	r0
+	rts
+LOCAL(lshrsi_21):
+	shlr2	r0
+LOCAL(lshrsi_20):
+LOCAL(lshrsi_19):
+	shlr16	r0
+	rts
+LOCAL(lshrsi_28):
+LOCAL(lshrsi_27):
+	shlr2	r0
+LOCAL(lshrsi_26):
+LOCAL(lshrsi_25):
+	shlr16	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi_22):
+LOCAL(lshrsi_14):
+	shll2	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi_23):
+LOCAL(lshrsi_15):
+	shll	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi_29):
+	shll	r0
+LOCAL(lshrsi_30):
+	shll2	r0
+	rts
+	shlr16	r0	
+
+	ENDFUNC(GLOBAL(lshrsi3))
+	ENDFUNC(GLOBAL(lshrsi3_r0))
+#endif
+
+#ifdef L_movmem
+	.text
+	.balign	4
+	.global	GLOBAL(movmem)
+	HIDDEN_FUNC(GLOBAL(movmem))
+	HIDDEN_ALIAS(movstr,movmem)
+	/* This would be a lot simpler if r6 contained the byte count
+	   minus 64, and we wouldn't be called here for a byte count of 64.  */
+GLOBAL(movmem):
+	sts.l	pr,@-r15
+	shll2	r6
+	bsr	GLOBAL(movmemSI52+2)
+	mov.l	@(48,r5),r0
+	.balign	4
+LOCAL(movmem_loop): /* Reached with rts */
+	mov.l	@(60,r5),r0
+	add	#-64,r6
+	mov.l	r0,@(60,r4)
+	tst	r6,r6
+	mov.l	@(56,r5),r0
+	bt	LOCAL(movmem_done)
+	mov.l	r0,@(56,r4)
+	cmp/pl	r6
+	mov.l	@(52,r5),r0
+	add	#64,r5
+	mov.l	r0,@(52,r4)
+	add	#64,r4
+	bt	GLOBAL(movmemSI52)
+! done all the large groups, do the remainder
+! jump to movmem+
+	mova	GLOBAL(movmemSI4)+4,r0
+	add	r6,r0
+	jmp	@r0
+LOCAL(movmem_done): ! share slot insn, works out aligned.
+	lds.l	@r15+,pr
+	mov.l	r0,@(56,r4)
+	mov.l	@(52,r5),r0
+	rts
+	mov.l	r0,@(52,r4)
+	.balign	4
+! ??? We need aliases movstr* for movmem* for the older libraries.  These
+! aliases will be removed at the some point in the future.
+	.global	GLOBAL(movmemSI64)
+	HIDDEN_FUNC(GLOBAL(movmemSI64))
+	HIDDEN_ALIAS(movstrSI64,movmemSI64)
+GLOBAL(movmemSI64):
+	mov.l	@(60,r5),r0
+	mov.l	r0,@(60,r4)
+	.global	GLOBAL(movmemSI60)
+	HIDDEN_FUNC(GLOBAL(movmemSI60))
+	HIDDEN_ALIAS(movstrSI60,movmemSI60)
+GLOBAL(movmemSI60):
+	mov.l	@(56,r5),r0
+	mov.l	r0,@(56,r4)
+	.global	GLOBAL(movmemSI56)
+	HIDDEN_FUNC(GLOBAL(movmemSI56))
+	HIDDEN_ALIAS(movstrSI56,movmemSI56)
+GLOBAL(movmemSI56):
+	mov.l	@(52,r5),r0
+	mov.l	r0,@(52,r4)
+	.global	GLOBAL(movmemSI52)
+	HIDDEN_FUNC(GLOBAL(movmemSI52))
+	HIDDEN_ALIAS(movstrSI52,movmemSI52)
+GLOBAL(movmemSI52):
+	mov.l	@(48,r5),r0
+	mov.l	r0,@(48,r4)
+	.global	GLOBAL(movmemSI48)
+	HIDDEN_FUNC(GLOBAL(movmemSI48))
+	HIDDEN_ALIAS(movstrSI48,movmemSI48)
+GLOBAL(movmemSI48):
+	mov.l	@(44,r5),r0
+	mov.l	r0,@(44,r4)
+	.global	GLOBAL(movmemSI44)
+	HIDDEN_FUNC(GLOBAL(movmemSI44))
+	HIDDEN_ALIAS(movstrSI44,movmemSI44)
+GLOBAL(movmemSI44):
+	mov.l	@(40,r5),r0
+	mov.l	r0,@(40,r4)
+	.global	GLOBAL(movmemSI40)
+	HIDDEN_FUNC(GLOBAL(movmemSI40))
+	HIDDEN_ALIAS(movstrSI40,movmemSI40)
+GLOBAL(movmemSI40):
+	mov.l	@(36,r5),r0
+	mov.l	r0,@(36,r4)
+	.global	GLOBAL(movmemSI36)
+	HIDDEN_FUNC(GLOBAL(movmemSI36))
+	HIDDEN_ALIAS(movstrSI36,movmemSI36)
+GLOBAL(movmemSI36):
+	mov.l	@(32,r5),r0
+	mov.l	r0,@(32,r4)
+	.global	GLOBAL(movmemSI32)
+	HIDDEN_FUNC(GLOBAL(movmemSI32))
+	HIDDEN_ALIAS(movstrSI32,movmemSI32)
+GLOBAL(movmemSI32):
+	mov.l	@(28,r5),r0
+	mov.l	r0,@(28,r4)
+	.global	GLOBAL(movmemSI28)
+	HIDDEN_FUNC(GLOBAL(movmemSI28))
+	HIDDEN_ALIAS(movstrSI28,movmemSI28)
+GLOBAL(movmemSI28):
+	mov.l	@(24,r5),r0
+	mov.l	r0,@(24,r4)
+	.global	GLOBAL(movmemSI24)
+	HIDDEN_FUNC(GLOBAL(movmemSI24))
+	HIDDEN_ALIAS(movstrSI24,movmemSI24)
+GLOBAL(movmemSI24):
+	mov.l	@(20,r5),r0
+	mov.l	r0,@(20,r4)
+	.global	GLOBAL(movmemSI20)
+	HIDDEN_FUNC(GLOBAL(movmemSI20))
+	HIDDEN_ALIAS(movstrSI20,movmemSI20)
+GLOBAL(movmemSI20):
+	mov.l	@(16,r5),r0
+	mov.l	r0,@(16,r4)
+	.global	GLOBAL(movmemSI16)
+	HIDDEN_FUNC(GLOBAL(movmemSI16))
+	HIDDEN_ALIAS(movstrSI16,movmemSI16)
+GLOBAL(movmemSI16):
+	mov.l	@(12,r5),r0
+	mov.l	r0,@(12,r4)
+	.global	GLOBAL(movmemSI12)
+	HIDDEN_FUNC(GLOBAL(movmemSI12))
+	HIDDEN_ALIAS(movstrSI12,movmemSI12)
+GLOBAL(movmemSI12):
+	mov.l	@(8,r5),r0
+	mov.l	r0,@(8,r4)
+	.global	GLOBAL(movmemSI8)
+	HIDDEN_FUNC(GLOBAL(movmemSI8))
+	HIDDEN_ALIAS(movstrSI8,movmemSI8)
+GLOBAL(movmemSI8):
+	mov.l	@(4,r5),r0
+	mov.l	r0,@(4,r4)
+	.global	GLOBAL(movmemSI4)
+	HIDDEN_FUNC(GLOBAL(movmemSI4))
+	HIDDEN_ALIAS(movstrSI4,movmemSI4)
+GLOBAL(movmemSI4):
+	mov.l	@(0,r5),r0
+	rts
+	mov.l	r0,@(0,r4)
+
+	ENDFUNC(GLOBAL(movmemSI64))
+	ENDFUNC(GLOBAL(movmemSI60))
+	ENDFUNC(GLOBAL(movmemSI56))
+	ENDFUNC(GLOBAL(movmemSI52))
+	ENDFUNC(GLOBAL(movmemSI48))
+	ENDFUNC(GLOBAL(movmemSI44))
+	ENDFUNC(GLOBAL(movmemSI40))
+	ENDFUNC(GLOBAL(movmemSI36))
+	ENDFUNC(GLOBAL(movmemSI32))
+	ENDFUNC(GLOBAL(movmemSI28))
+	ENDFUNC(GLOBAL(movmemSI24))
+	ENDFUNC(GLOBAL(movmemSI20))
+	ENDFUNC(GLOBAL(movmemSI16))
+	ENDFUNC(GLOBAL(movmemSI12))
+	ENDFUNC(GLOBAL(movmemSI8))
+	ENDFUNC(GLOBAL(movmemSI4))
+	ENDFUNC(GLOBAL(movmem))
+#endif
+
+#ifdef L_movmem_i4
+	.text
+	.global	GLOBAL(movmem_i4_even)
+	.global	GLOBAL(movmem_i4_odd)
+	.global	GLOBAL(movmemSI12_i4)
+
+	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
+	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
+	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
+
+	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
+	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
+	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
+
+	.p2align	5
+L_movmem_2mod4_end:
+	mov.l	r0,@(16,r4)
+	rts
+	mov.l	r1,@(20,r4)
+
+	.p2align	2
+
+GLOBAL(movmem_i4_even):
+	mov.l	@r5+,r0
+	bra	L_movmem_start_even
+	mov.l	@r5+,r1
+
+GLOBAL(movmem_i4_odd):
+	mov.l	@r5+,r1
+	add	#-4,r4
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r1,@(4,r4)
+	mov.l	r2,@(8,r4)
+
+L_movmem_loop:
+	mov.l	r3,@(12,r4)
+	dt	r6
+	mov.l	@r5+,r0
+	bt/s	L_movmem_2mod4_end
+	mov.l	@r5+,r1
+	add	#16,r4
+L_movmem_start_even:
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r0,@r4
+	dt	r6
+	mov.l	r1,@(4,r4)
+	bf/s	L_movmem_loop
+	mov.l	r2,@(8,r4)
+	rts
+	mov.l	r3,@(12,r4)
+
+	ENDFUNC(GLOBAL(movmem_i4_even))
+	ENDFUNC(GLOBAL(movmem_i4_odd))
+
+	.p2align	4
+GLOBAL(movmemSI12_i4):
+	mov.l	@r5,r0
+	mov.l	@(4,r5),r1
+	mov.l	@(8,r5),r2
+	mov.l	r0,@r4
+	mov.l	r1,@(4,r4)
+	rts
+	mov.l	r2,@(8,r4)
+
+	ENDFUNC(GLOBAL(movmemSI12_i4))
+#endif
+
+#ifdef L_mulsi3
+
+
+	.global	GLOBAL(mulsi3)
+	HIDDEN_FUNC(GLOBAL(mulsi3))
+
+! r4 =       aabb
+! r5 =       ccdd
+! r0 = aabb*ccdd  via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+GLOBAL(mulsi3):
+	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
+	mov     r5,r3		! r3 = ccdd
+	swap.w  r4,r2		! r2 = bbaa
+	xtrct   r2,r3		! r3 = aacc
+	tst  	r3,r3		! msws zero ?
+	bf      hiset
+	rts			! yes - then we have the answer
+	sts     macl,r0
+
+hiset:	sts	macl,r0		! r0 = bb*dd
+	mulu.w	r2,r5		! brewing macl = aa*dd
+	sts	macl,r1
+	mulu.w	r3,r4		! brewing macl = cc*bb
+	sts	macl,r2
+	add	r1,r2
+	shll16	r2
+	rts
+	add	r2,r0
+
+	ENDFUNC(GLOBAL(mulsi3))
+#endif
+#endif /* ! __SH5__ */
+
+/*------------------------------------------------------------------------------
+  32 bit signed integer division that uses FPU double precision division.  */
+
+#ifdef L_sdivsi3_i4
+	.title "SH DIVIDE"
+
+#if defined (__SH4__) || defined (__SH2A__)
+/* This variant is used when FPSCR.PR = 1 (double precision) is the default
+   setting.
+   Args in r4 and r5, result in fpul, clobber dr0, dr2.  */
+
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+
+#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
+/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
+   setting.
+   Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
+   For this to work, we must temporarily switch the FPU do double precision,
+   but we better do not touch FPSCR.FR.  See PR 6526.  */
+
+#if ! __SH5__ || __SH5__ == 32
+#if __SH5__
+	.mode	SHcompact
+#endif
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+
+#ifndef __SH4A__
+	mov.l	r3,@-r15
+	sts	fpscr,r2
+	mov	#8,r3
+	swap.w	r3,r3		// r3 = 1 << 19 (FPSCR.PR bit)
+	or	r2,r3
+	lds	r3,fpscr	// Set FPSCR.PR = 1.
+	lds	r4,fpul
+	float	fpul,dr0
+	lds	r5,fpul
+	float	fpul,dr2
+	fdiv	dr2,dr0
+	ftrc	dr0,fpul
+	lds	r2,fpscr
+	rts
+	mov.l	@r15+,r3
+#else
+/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.  */
+	fpchg
+	lds	r4,fpul
+	float	fpul,dr0
+	lds	r5,fpul
+	float	fpul,dr2
+	fdiv	dr2,dr0
+	ftrc	dr0,fpul
+	rts
+	fpchg	
+
+#endif /* __SH4A__  */
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#endif /* ! __SH4__ || __SH2A__  */
+#endif /* L_sdivsi3_i4  */
+
+//------------------------------------------------------------------------------
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
+
+	.global	GLOBAL(sdivsi3)
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns divsi3_i1 and
+   divsi3_i1_media.
+	
+int __sdivsi3 (i, j)
+     int i, j;
+{
+  register unsigned long long r18 asm ("r18");
+  register unsigned long long r19 asm ("r19");
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r1 asm ("r1") = 1;
+  register int r2 asm ("r2") = i >> 31;
+  register int r3 asm ("r3") = j >> 31;
+
+  r2 = r2 ? r2 : r1;
+  r3 = r3 ? r3 : r1;
+  r18 = i * r2;
+  r19 = j * r3;
+  r2 *= r3;
+  
+  r19 <<= 31;
+  r1 <<= 31;
+  do
+    if (r18 >= r19)
+      r0 |= r1, r18 -= r19;
+  while (r19 >>= 1, r1 >>= 1);
+
+  return r2 * (int)r0;
+}
+*/
+GLOBAL(sdivsi3):
+	pt/l	LOCAL(sdivsi3_dontadd), tr2
+	pt/l	LOCAL(sdivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r1
+	shari.l	r4, 31, r2
+	shari.l	r5, 31, r3
+	cmveq	r2, r1, r2
+	cmveq	r3, r1, r3
+	muls.l	r4, r2, r18
+	muls.l	r5, r3, r19
+	muls.l	r2, r3, r2
+	shlli	r19, 31, r19
+	shlli	r1, 31, r1
+LOCAL(sdivsi3_loop):
+	bgtu	r19, r18, tr2
+	or	r0, r1, r0
+	sub	r18, r19, r18
+LOCAL(sdivsi3_dontadd):
+	shlri	r1, 1, r1
+	shlri	r19, 1, r19
+	bnei	r1, 0, tr1
+	muls.l	r0, r2, r0
+	add.l	r0, r63, r0
+	blink	tr0, r63
+#elif 0 /* ! 0 */
+ // inputs: r4,r5
+ // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
+ // result in r0
+GLOBAL(sdivsi3):
+ // can create absolute value without extra latency,
+ // but dependent on proper sign extension of inputs:
+ // shari.l r5,31,r2
+ // xor r5,r2,r20
+ // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ shari.l r5,31,r2
+ ori r2,1,r2
+ muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
+ shari.l r4,31,r3
+ nsb r20,r0
+ shlld r20,r0,r25
+ shlri r25,48,r25
+ sub r19,r25,r1
+ mmulfx.w r1,r1,r2
+ mshflo.w r1,r63,r1
+ // If r4 was to be used in-place instead of r21, could use this sequence
+ // to compute absolute:
+ // sub r63,r4,r19 // compute absolute value of r4
+ // shlri r4,32,r3 // into lower 32 bit of r4, keeping
+ // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
+ ori r3,1,r3
+ mmulfx.w r25,r2,r2
+ sub r19,r0,r0
+ muls.l r4,r3,r21
+ msub.w r1,r2,r2
+ addi r2,-2,r1
+ mulu.l r21,r1,r19
+ mmulfx.w r2,r2,r2
+ shlli r1,15,r1
+ shlrd r19,r0,r19
+ mulu.l r19,r20,r3
+ mmacnfx.wl r25,r2,r1
+ ptabs r18,tr0
+ sub r21,r3,r25
+
+ mulu.l r25,r1,r2
+ addi r0,14,r0
+ xor r4,r5,r18
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ add r19,r2,r19
+ shari.l r18,31,r18
+ sub r25,r3,r25
+
+ mulu.l r25,r1,r2
+ sub r25,r20,r25
+ add r19,r18,r19
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ addi r25,1,r25
+ add r19,r2,r19
+
+ cmpgt r25,r3,r25
+ add.l r19,r25,r0
+ xor r0,r18,r0
+ blink tr0,r63
+#else /* ! 0 && ! 0 */
+
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
+#ifndef __pic__
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3): /* this is the shcompact entry point */
+ // The special SHmedia entry point sdivsi3_1 prevents accidental linking
+ // with the SHcompact implementation, which clobbers tr1 / tr2.
+ .global GLOBAL(sdivsi3_1)
+GLOBAL(sdivsi3_1):
+ .global GLOBAL(div_table_internal)
+ movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
+ shori GLOBAL(div_table_internal) & 65535, r20
+#endif
+ .global GLOBAL(sdivsi3_2)
+ // div_table in r20
+ // clobbered: r1,r18,r19,r21,r25,tr0
+GLOBAL(sdivsi3_2):
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+#ifndef __pic__
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif
+	ENDFUNC(GLOBAL(sdivsi3_2))
+#endif
+#elif defined __SHMEDIA__
+/* m5compact-nofpu */
+ // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	pt/l LOCAL(sdivsi3_dontsub), tr0
+	pt/l LOCAL(sdivsi3_loop), tr1
+	ptabs/l r18,tr2
+	shari.l r4,31,r18
+	shari.l r5,31,r19
+	xor r4,r18,r20
+	xor r5,r19,r21
+	sub.l r20,r18,r20
+	sub.l r21,r19,r21
+	xor r18,r19,r19
+	shlli r21,32,r25
+	addi r25,-1,r21
+	addz.l r20,r63,r20
+LOCAL(sdivsi3_loop):
+	shlli r20,1,r20
+	bgeu/u r21,r20,tr0
+	sub r20,r21,r20
+LOCAL(sdivsi3_dontsub):
+	addi.l r25,-1,r25
+	bnei r25,-32,tr1
+	xor r20,r19,r20
+	sub.l r20,r19,r0
+	blink tr2,r63
+	ENDFUNC(GLOBAL(sdivsi3))
+#else /* ! __SHMEDIA__ */
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	mov	r4,r1
+	mov	r5,r0
+
+	tst	r0,r0
+	bt	div0
+	mov	#0,r2
+	div0s	r2,r1
+	subc	r3,r3
+	subc	r2,r1
+	div0s	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	addc	r2,r1
+	rts
+	mov	r1,r0
+
+
+div0:	rts
+	mov	#0,r0
+
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif /* ! __SHMEDIA__  */
+#endif /* L_sdivsi3  */
+
+/*------------------------------------------------------------------------------
+  32 bit unsigned integer division that uses FPU double precision division.  */
+
+#ifdef L_udivsi3_i4
+	.title "SH DIVIDE"
+
+#if defined (__SH4__) || defined (__SH2A__)
+/* This variant is used when FPSCR.PR = 1 (double precision) is the default
+   setting.
+   Args in r4 and r5, result in fpul,
+   clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit  */
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	mov	#1,r1
+	cmp/hi	r1,r5
+	bf/s	trivial
+	rotr	r1
+	xor	r1,r4
+	lds	r4,fpul
+	mova	L1,r0
+#ifdef FMOVD_WORKS
+	fmov.d	@r0+,dr4
+#else
+	fmov.s	@r0+,DR40
+	fmov.s	@r0,DR41
+#endif
+	float	fpul,dr0
+	xor	r1,r5
+	lds	r5,fpul
+	float	fpul,dr2
+	fadd	dr4,dr0
+	fadd	dr4,dr2
+	fdiv	dr2,dr0
+	rts
+	ftrc	dr0,fpul
+
+trivial:
+	rts
+	lds	r4,fpul
+
+	.align 2
+#ifdef FMOVD_WORKS
+	.align 3	// Make the double below 8 byte aligned.
+#endif
+L1:
+	.double 2147483648
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+
+#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
+#if ! __SH5__ || __SH5__ == 32
+!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
+	.mode	SHmedia
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	addz.l	r4,r63,r20
+	addz.l	r5,r63,r21
+	fmov.qd	r20,dr0
+	fmov.qd	r21,dr32
+	ptabs	r18,tr0
+	float.qd dr0,dr0
+	float.qd dr32,dr32
+	fdiv.d	dr0,dr32,dr0
+	ftrc.dq dr0,dr32
+	fmov.s fr33,fr32
+	blink tr0,r63
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+
+#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
+   setting.
+   Args in r4 and r5, result in fpul,
+   clobber r0, r1, r4, r5, dr0, dr2, dr4.
+   For this to work, we must temporarily switch the FPU do double precision,
+   but we better do not touch FPSCR.FR.  See PR 6526.  */
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+
+#ifndef __SH4A__
+	mov	#1,r1
+	cmp/hi	r1,r5
+	bf/s	trivial
+	rotr	r1		// r1 = 1 << 31
+	sts.l	fpscr,@-r15
+	xor	r1,r4
+	mov.l	@(0,r15),r0
+	xor	r1,r5
+	mov.l	L2,r1
+	lds	r4,fpul
+	or	r0,r1
+	mova	L1,r0
+	lds	r1,fpscr
+#ifdef FMOVD_WORKS
+	fmov.d	@r0+,dr4
+#else
+	fmov.s	@r0+,DR40
+	fmov.s	@r0,DR41
+#endif
+	float	fpul,dr0
+	lds	r5,fpul
+	float	fpul,dr2
+	fadd	dr4,dr0
+	fadd	dr4,dr2
+	fdiv	dr2,dr0
+	ftrc	dr0,fpul
+	rts
+	lds.l	@r15+,fpscr
+
+#ifdef FMOVD_WORKS
+	.align 3	// Make the double below 8 byte aligned.
+#endif
+trivial:
+	rts
+	lds	r4,fpul
+
+	.align 2
+L2:
+#ifdef FMOVD_WORKS
+	.long 0x180000	// FPSCR.PR = 1, FPSCR.SZ = 1
+#else
+	.long 0x80000	// FPSCR.PR = 1
+#endif
+L1:
+	.double 2147483648
+
+#else
+/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
+   Although on SH4A fmovd usually works, it would require either additional
+   two fschg instructions or an FPSCR push + pop.  It's not worth the effort
+   for loading only one double constant.  */
+	mov	#1,r1
+	cmp/hi	r1,r5
+	bf/s	trivial
+	rotr	r1		// r1 = 1 << 31
+	fpchg
+	mova	L1,r0
+	xor	r1,r4
+	fmov.s	@r0+,DR40
+	lds	r4,fpul
+	fmov.s	@r0,DR41
+	xor	r1,r5
+	float	fpul,dr0
+	lds	r5,fpul
+	float	fpul,dr2
+	fadd	dr4,dr0
+	fadd	dr4,dr2
+	fdiv	dr2,dr0
+	ftrc	dr0,fpul
+	rts
+	fpchg
+
+trivial:
+	rts
+	lds	r4,fpul
+
+	.align 2
+L1:
+	.double 2147483648
+
+#endif /* __SH4A__  */
+
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH4__ */
+#endif /* L_udivsi3_i4  */
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+	.global	GLOBAL(udivsi3)
+	HIDDEN_FUNC(GLOBAL(udivsi3))
+
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns udivsi3_i1 and
+   udivsi3_i1_media.
+	
+unsigned 
+__udivsi3 (i, j)
+    unsigned i, j; 
+{
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r18 asm ("r18") = 1;
+  register unsigned long long r4 asm ("r4") = i;
+  register unsigned long long r19 asm ("r19") = j;
+
+  r19 <<= 31;
+  r18 <<= 31;
+  do
+    if (r4 >= r19)
+      r0 |= r18, r4 -= r19;
+  while (r19 >>= 1, r18 >>= 1);
+
+  return r0;
+}
+*/
+GLOBAL(udivsi3):
+	pt/l	LOCAL(udivsi3_dontadd), tr2
+	pt/l	LOCAL(udivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r18
+	addz.l	r5, r63, r19
+	addz.l	r4, r63, r4
+	shlli	r19, 31, r19
+	shlli	r18, 31, r18
+LOCAL(udivsi3_loop):
+	bgtu	r19, r4, tr2
+	or	r0, r18, r0
+	sub	r4, r19, r4
+LOCAL(udivsi3_dontadd):
+	shlri	r18, 1, r18
+	shlri	r19, 1, r19
+	bnei	r18, 0, tr1
+	blink	tr0, r63
+#else
+GLOBAL(udivsi3):
+ // inputs: r4,r5
+ // clobbered: r18,r19,r20,r21,r22,r25,tr0
+ // result in r0.
+ addz.l r5,r63,r22
+ nsb r22,r0
+ shlld r22,r0,r25
+ shlri r25,48,r25
+ movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
+ sub r20,r25,r21
+ mmulfx.w r21,r21,r19
+ mshflo.w r21,r63,r21
+ ptabs r18,tr0
+ mmulfx.w r25,r19,r19
+ sub r20,r0,r0
+ /* bubble */
+ msub.w r21,r19,r19
+ addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
+		    before the msub.w, but we need a different value for
+		    r19 to keep errors under control.  */
+ mulu.l r4,r21,r18
+ mmulfx.w r19,r19,r19
+ shlli r21,15,r21
+ shlrd r18,r0,r18
+ mulu.l r18,r22,r20
+ mmacnfx.wl r25,r19,r21
+ /* bubble */
+ sub r4,r20,r25
+
+ mulu.l r25,r21,r19
+ addi r0,14,r0
+ /* bubble */
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ add r18,r19,r18
+ /* bubble */
+ sub.l r25,r20,r25
+
+ mulu.l r25,r21,r19
+ addz.l r25,r63,r25
+ sub r25,r22,r25
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ addi r25,1,r25
+ add r18,r19,r18
+
+ cmpgt r25,r20,r25
+ add.l r18,r25,r0
+ blink tr0,r63
+#endif
+#elif defined (__SHMEDIA__)
+/* m5compact-nofpu - more emphasis on code size than on speed, but don't
+   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
+   So use a short shmedia loop.  */
+ // clobbered: r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontsub), tr0
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shlli r5,32,r25
+ addi r25,-1,r21
+ addz.l r4,r63,r20
+LOCAL(udivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(udivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ add.l r20,r63,r0
+ blink tr2,r63
+#else /* ! defined (__SHMEDIA__) */
+LOCAL(div8):
+ div1 r5,r4
+LOCAL(div7):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx4):
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ rts; div1 r5,r4
+
+GLOBAL(udivsi3):
+ sts.l pr,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+#ifdef __sh1__
+ bf LOCAL(large_divisor)
+#else
+ bf/s LOCAL(large_divisor)
+#endif
+ div0u
+ swap.w r4,r0
+ shlr16 r4
+ bsr LOCAL(div8)
+ shll16 r5
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div8)
+ swap.w r4,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ lds.l @r15+,pr
+ xtrct r4,r0
+ swap.w r0,r0
+ rotcl r0
+ rts
+ shlr16 r5
+
+LOCAL(large_divisor):
+#ifdef __sh1__
+ div0u
+#endif
+ mov #0,r0
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ lds.l @r15+,pr
+ rts
+ rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* L_udivsi3 */
+
+#ifdef L_udivdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(udivdi3)
+	FUNC(GLOBAL(udivdi3))
+GLOBAL(udivdi3):
+	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r8
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r8,r0,r8
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r8,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	shlld r5,r0,r8
+	addi r20,30-22,r0
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	add r8,r21,r8
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	addi r2,1,r2
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	add r8,r7,r8
+	sub r2,r3,r2
+	cmpgt r2,r5,r5
+	add r8,r5,r2
+	/* could test r3 here to check for divide by zero.  */
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	/* bubble */
+	cmpgtu r5,r2,r5
+	sub r8,r5,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(udivdi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_udivdi3 */
+
+#ifdef L_divdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(divdi3)
+	FUNC(GLOBAL(divdi3))
+GLOBAL(divdi3):
+	pta GLOBAL(udivdi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r23,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(divdi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_divdi3 */
+
+#ifdef L_umoddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(umoddi3)
+	FUNC(GLOBAL(umoddi3))
+GLOBAL(umoddi3):
+	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r5,r0,r5
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r5,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	addi r20,30-22,r0
+	/* bubble */ /* could test r3 here to check for divide by zero.  */
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	/* bubble */
+	addi r8,1,r7
+	cmpgt r7,r5,r7
+	cmvne r7,r8,r2
+	sub r2,r5,r2
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	add r2,r6,r7
+	cmpgtu r5,r2,r8
+	cmvne r8,r7,r2
+	sub r2,r5,r2
+	shlrd r2,r22,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(umoddi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_umoddi3 */
+
+#ifdef L_moddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(moddi3)
+	FUNC(GLOBAL(moddi3))
+GLOBAL(moddi3):
+	pta GLOBAL(umoddi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r63,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(moddi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_moddi3 */
+
+#ifdef L_set_fpscr
+#if !defined (__SH2A_NOFPU__)
+#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
+#ifdef __SH5__
+	.mode	SHcompact
+#endif
+	.global GLOBAL(set_fpscr)
+	HIDDEN_FUNC(GLOBAL(set_fpscr))
+GLOBAL(set_fpscr):
+	lds r4,fpscr
+#ifdef __PIC__
+	mov.l	r12,@-r15
+#ifdef __vxworks
+	mov.l	LOCAL(set_fpscr_L0_base),r12
+	mov.l	LOCAL(set_fpscr_L0_index),r0
+	mov.l	@r12,r12
+	mov.l	@(r0,r12),r12
+#else
+	mova	LOCAL(set_fpscr_L0),r0
+	mov.l	LOCAL(set_fpscr_L0),r12
+	add	r0,r12
+#endif
+	mov.l	LOCAL(set_fpscr_L1),r0
+	mov.l	@(r0,r12),r1
+	mov.l	@r15+,r12
+#else
+	mov.l LOCAL(set_fpscr_L1),r1
+#endif
+	swap.w r4,r0
+	or #24,r0
+#ifndef FMOVD_WORKS
+	xor #16,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r3
+	mov.l r3,@(4,r1)
+#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r2
+	mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+	xor #8,r0
+#else
+	xor #24,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r2
+	rts
+	mov.l r2,@r1
+#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r3
+	rts
+	mov.l r3,@(4,r1)
+#endif
+	.align 2
+#ifdef __PIC__
+#ifdef __vxworks
+LOCAL(set_fpscr_L0_base):
+	.long ___GOTT_BASE__
+LOCAL(set_fpscr_L0_index):
+	.long ___GOTT_INDEX__
+#else
+LOCAL(set_fpscr_L0):
+	.long _GLOBAL_OFFSET_TABLE_
+#endif
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values@GOT)
+#else
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values)
+#endif
+
+	ENDFUNC(GLOBAL(set_fpscr))
+#ifndef NO_FPSCR_VALUES
+#ifdef __ELF__
+        .comm   GLOBAL(fpscr_values),8,4
+#else
+        .comm   GLOBAL(fpscr_values),8
+#endif /* ELF */
+#endif /* NO_FPSCR_VALUES */
+#endif /* SH2E / SH3E / SH4 */
+#endif /* __SH2A_NOFPU__ */
+#endif /* L_set_fpscr */
+#ifdef L_ic_invalidate
+#if __SH5__ == 32
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(init_trampoline)
+	HIDDEN_FUNC(GLOBAL(init_trampoline))
+GLOBAL(init_trampoline):
+	st.l	r0,8,r2
+#ifdef __LITTLE_ENDIAN__
+	movi	9,r20
+	shori	0x402b,r20
+	shori	0xd101,r20
+	shori	0xd002,r20
+#else
+	movi	0xffffffffffffd002,r20
+	shori	0xd101,r20
+	shori	0x402b,r20
+	shori	9,r20
+#endif
+	st.q	r0,0,r20
+	st.l	r0,12,r3
+	ENDFUNC(GLOBAL(init_trampoline))
+	.global	GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	r0,0
+	synco
+	icbi	r0, 0
+	ptabs	r18, tr0
+	synci
+	blink	tr0, r63
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4A__)
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	@r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	/* For system code, we use ic_invalidate_line_i, but user code
+	   needs a different mechanism.  A kernel call is generally not
+	   available, and it would also be slow.  Different SH4 variants use
+	   different sizes and associativities of the Icache.  We use a small
+	   bit of dispatch code that can be put hidden in every shared object,
+	   which calls the actual processor-specific invalidation code in a
+	   separate module.
+	   Or if you have operating system support, the OS could mmap the
+	   procesor-specific code from a single page, since it is highly
+	   repetitive.  */
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+#ifdef __pic__
+#ifdef __vxworks
+	mov.l	1f,r1
+	mov.l	2f,r0
+	mov.l	@r1,r1
+	mov.l	0f,r2
+	mov.l	@(r0,r1),r0
+#else
+	mov.l	1f,r1
+	mova	1f,r0
+	mov.l	0f,r2
+	add	r1,r0
+#endif
+	mov.l	@(r0,r2),r1
+#else
+	mov.l	0f,r1
+#endif
+	ocbwb	@r4
+	mov.l	@(8,r1),r0
+	sub	r1,r4
+	and	r4,r0
+	add	r1,r0
+	jmp	@r0
+	mov.l	@(4,r1),r0
+	.align	2
+#ifndef __pic__
+0:	.long   GLOBAL(ic_invalidate_array)
+#else /* __pic__ */
+	.global GLOBAL(ic_invalidate_array)
+0:	.long   GLOBAL(ic_invalidate_array)@GOT
+#ifdef __vxworks
+1:	.long	___GOTT_BASE__
+2:	.long	___GOTT_INDEX__
+#else
+1:	.long   _GLOBAL_OFFSET_TABLE_
+#endif
+	ENDFUNC(GLOBAL(ic_invalidate))
+#endif /* __pic__ */
+#endif /* SH4 */
+#endif /* L_ic_invalidate */
+
+#ifdef L_ic_invalidate_array
+#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
+	.global GLOBAL(ic_invalidate_array)
+	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
+	.global GLOBAL(ic_invalidate_array)
+	FUNC(GLOBAL(ic_invalidate_array))
+GLOBAL(ic_invalidate_array):
+	add	r1,r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	.align 2
+	.long	0
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	.global GLOBAL(ic_invalidate_array)
+	.p2align 5
+	FUNC(GLOBAL(ic_invalidate_array))
+/* This must be aligned to the beginning of a cache line.  */
+GLOBAL(ic_invalidate_array):
+#ifndef WAYS
+#define WAYS 4
+#define WAY_SIZE 0x4000
+#endif
+#if WAYS == 1
+	.rept	WAY_SIZE * WAYS / 32
+	rts
+	nop
+	.rept	7
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+#elif WAYS <= 6
+	.rept	WAY_SIZE * WAYS / 32
+	braf	r0
+	add	#-8,r0
+	.long	WAY_SIZE + 8
+	.long	WAY_SIZE - 32
+	.rept	WAYS-2
+	braf	r0
+	nop
+	.endr
+	.rept	7 - WAYS
+	rts
+	nop
+	.endr
+	.endr
+#else /* WAYS > 6 */
+	/* This variant needs two different pages for mmap-ing.  */
+ 	.rept	WAYS-1
+	.rept	WAY_SIZE / 32
+	braf	r0
+	nop
+	.long	WAY_SIZE
+	.rept 6
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+	.endr
+	.rept	WAY_SIZE / 32
+	rts
+	.rept	15
+	nop
+	.endr
+	.endr
+#endif /* WAYS */
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#endif /* SH4 */
+#endif /* L_ic_invalidate_array */
+
+#if defined (__SH5__) && __SH5__ == 32
+#ifdef L_shcompact_call_trampoline
+	.section	.rodata
+	.align	1
+LOCAL(ct_main_table):
+.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function loads 64-bit general-purpose registers from the
+	stack, from a memory address contained in them or from an FP
+	register, according to a cookie passed in r1.  Its execution
+	time is linear on the number of registers that actually have
+	to be copied.  See sh.h for details on the actual bit pattern.
+
+	The function to be called is passed in r0.  If a 32-bit return
+	value is expected, the actual function will be tail-called,
+	otherwise the return address will be stored in r10 (that the
+	caller should expect to be clobbered) and the return value
+	will be expanded into r2/r3 upon return.  */
+	
+	.global	GLOBAL(GCC_shcompact_call_trampoline)
+	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
+GLOBAL(GCC_shcompact_call_trampoline):
+	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
+	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
+	pt/l	LOCAL(ct_loop), tr1
+	addz.l	r1, r63, r1
+	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
+LOCAL(ct_loop):
+	nsb	r1, r28
+	shlli	r28, 1, r29
+	ldx.w	r0, r29, r30
+LOCAL(ct_main_label):
+	ptrel/l	r30, tr2
+	blink	tr2, r63
+LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
+	/* It must be dr0, so just do it.  */
+	fmov.dq	dr0, r2
+	movi	7, r30
+	shlli	r30, 29, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
+	/* It is either dr0 or dr2.  */
+	movi	7, r30
+	shlri	r1, 26, r32
+	shlli	r30, 26, r31
+	andc	r1, r31, r1
+	fmov.dq	dr0, r3
+	beqi/l	r32, 4, tr1
+	fmov.dq	dr2, r3
+	blink	tr1, r63
+LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
+	shlri	r1, 23 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
+LOCAL(ct_r4_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 23, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r4_fp_copy):
+	fmov.dq	dr0, r4
+	blink	tr1, r63
+	fmov.dq	dr2, r4
+	blink	tr1, r63
+	fmov.dq	dr4, r4
+	blink	tr1, r63
+LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
+	shlri	r1, 20 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
+LOCAL(ct_r5_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 20, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r5_fp_copy):
+	fmov.dq	dr0, r5
+	blink	tr1, r63
+	fmov.dq	dr2, r5
+	blink	tr1, r63
+	fmov.dq	dr4, r5
+	blink	tr1, r63
+	fmov.dq	dr6, r5
+	blink	tr1, r63
+LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
+	/* It must be dr8.  */
+	fmov.dq	dr8, r6
+	movi	15, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
+	shlri	r1, 16 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
+LOCAL(ct_r6_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r6_fp_copy):
+	fmov.dq	dr0, r6
+	blink	tr1, r63
+	fmov.dq	dr2, r6
+	blink	tr1, r63
+	fmov.dq	dr4, r6
+	blink	tr1, r63
+	fmov.dq	dr6, r6
+	blink	tr1, r63
+LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 12, r31
+	shlri	r1, 12, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r7
+	beqi/l	r32, 8, tr1
+	fmov.dq	dr10, r7
+	blink	tr1, r63
+LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
+	shlri	r1, 12 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
+LOCAL(ct_r7_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 12, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r7_fp_copy):
+	fmov.dq	dr0, r7
+	blink	tr1, r63
+	fmov.dq	dr2, r7
+	blink	tr1, r63
+	fmov.dq	dr4, r7
+	blink	tr1, r63
+	fmov.dq	dr6, r7
+	blink	tr1, r63
+LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 8, r31
+	andi	r1, 1 << 8, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r8
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r8
+	blink	tr1, r63
+LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
+	shlri	r1, 8 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
+LOCAL(ct_r8_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 8, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r8_fp_copy):
+	fmov.dq	dr0, r8
+	blink	tr1, r63
+	fmov.dq	dr2, r8
+	blink	tr1, r63
+	fmov.dq	dr4, r8
+	blink	tr1, r63
+	fmov.dq	dr6, r8
+	blink	tr1, r63
+LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 4, r31
+	andi	r1, 1 << 4, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r9
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r9
+	blink	tr1, r63
+LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
+	shlri	r1, 4 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
+LOCAL(ct_r9_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 4, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r9_fp_copy):
+	fmov.dq	dr0, r9
+	blink	tr1, r63
+	fmov.dq	dr2, r9
+	blink	tr1, r63
+	fmov.dq	dr4, r9
+	blink	tr1, r63
+	fmov.dq	dr6, r9
+	blink	tr1, r63
+LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
+	pt/l	LOCAL(ct_r2_load), tr2
+	movi	3, r30
+	shlli	r30, 29, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r2, 8, r3
+	ldx.q	r2, r63, r2
+	/* Fall through.  */
+LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
+	pt/l	LOCAL(ct_r3_load), tr2
+	movi	3, r30
+	shlli	r30, 26, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r3, 8, r4
+	ldx.q	r3, r63, r3
+LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
+	pt/l	LOCAL(ct_r4_load), tr2
+	movi	3, r30
+	shlli	r30, 23, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r4, 8, r5
+	ldx.q	r4, r63, r4
+LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
+	pt/l	LOCAL(ct_r5_load), tr2
+	movi	3, r30
+	shlli	r30, 20, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r5, 8, r6
+	ldx.q	r5, r63, r5
+LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
+	pt/l	LOCAL(ct_r6_load), tr2
+	movi	3 << 16, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r6, 8, r7
+	ldx.q	r6, r63, r6
+LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
+	pt/l	LOCAL(ct_r7_load), tr2
+	movi	3 << 12, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r7, 8, r8
+	ldx.q	r7, r63, r7
+LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
+	pt/l	LOCAL(ct_r8_load), tr2
+	movi	3 << 8, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r8, 8, r9
+	ldx.q	r8, r63, r8
+LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
+	pt/l	LOCAL(ct_check_tramp), tr2
+	ldx.q	r9, r63, r9
+	blink	tr2, r63
+LOCAL(ct_r2_load):
+	ldx.q	r2, r63, r2
+	blink	tr1, r63
+LOCAL(ct_r3_load):
+	ldx.q	r3, r63, r3
+	blink	tr1, r63
+LOCAL(ct_r4_load):
+	ldx.q	r4, r63, r4
+	blink	tr1, r63
+LOCAL(ct_r5_load):
+	ldx.q	r5, r63, r5
+	blink	tr1, r63
+LOCAL(ct_r6_load):
+	ldx.q	r6, r63, r6
+	blink	tr1, r63
+LOCAL(ct_r7_load):
+	ldx.q	r7, r63, r7
+	blink	tr1, r63
+LOCAL(ct_r8_load):
+	ldx.q	r8, r63, r8
+	blink	tr1, r63
+LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r2
+	shlli	r30, 29, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r3
+	shlli	r30, 26, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r4
+	shlli	r30, 23, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r5
+	shlli	r30, 20, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r6
+	shlli	r30, 16, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
+	ldx.q	r15, r63, r7
+	movi	1 << 12, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
+	ldx.q	r15, r63, r8
+	movi	1 << 8, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
+	andi	r1, 7 << 1, r30
+	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
+	shlli	r30, 2, r31
+	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
+	sub.l	r32, r31, r33
+	ptabs/l	r33, tr2
+	blink	tr2, r63
+LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
+	ldx.q	r15, r63, r3
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r4
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r5
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r6
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r7
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r8
+	addi.l	r15, 8, r15
+LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
+	ldx.q	r15, r63, r9
+	addi.l	r15, 8, r15
+LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
+LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
+	pt/u	LOCAL(ct_ret_wide), tr2
+	andi	r1, 1, r1
+	bne/u	r1, r63, tr2
+LOCAL(ct_call_func):	/* Just branch to the function.  */
+	blink	tr0, r63
+LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its 
+			   64-bit return value.  */
+	add.l	r18, r63, r10
+	blink	tr0, r18
+	ptabs	r10, tr0
+#if __LITTLE_ENDIAN__
+	shari	r2, 32, r3
+	add.l	r2, r63, r2
+#else
+	add.l	r2, r63, r3
+	shari	r2, 32, r2
+#endif
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
+#endif /* L_shcompact_call_trampoline */
+
+#ifdef L_shcompact_return_trampoline
+     /* This function does the converse of the code in `ret_wide'
+	above.  It is tail-called by SHcompact functions returning
+	64-bit non-floating-point values, to pack the 32-bit values in
+	r2 and r3 into r2.  */
+
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	.global	GLOBAL(GCC_shcompact_return_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
+GLOBAL(GCC_shcompact_return_trampoline):
+	ptabs/l	r18, tr0
+#if __LITTLE_ENDIAN__
+	addz.l	r2, r63, r2
+	shlli	r3, 32, r3
+#else
+	addz.l	r3, r63, r3
+	shlli	r2, 32, r2
+#endif
+	or	r3, r2, r2
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
+#endif /* L_shcompact_return_trampoline */
+
+#ifdef L_shcompact_incoming_args
+	.section	.rodata
+	.align	1
+LOCAL(ia_main_table):
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function stores 64-bit general-purpose registers back in
+	the stack, and loads the address in which each register
+	was stored into itself.  The lower 32 bits of r17 hold the address
+	to begin storing, and the upper 32 bits of r17 hold the cookie.
+	Its execution time is linear on the
+	number of registers that actually have to be copied, and it is
+	optimized for structures larger than 64 bits, as opposed to
+	individual `long long' arguments.  See sh.h for details on the
+	actual bit pattern.  */
+	
+	.global	GLOBAL(GCC_shcompact_incoming_args)
+ 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
+GLOBAL(GCC_shcompact_incoming_args):
+	ptabs/l	r18, tr0	/* Prepare to return.  */
+	shlri	r17, 32, r0	/* Load the cookie.  */
+	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
+	pt/l	LOCAL(ia_loop), tr1
+	add.l	r17, r63, r17
+	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
+LOCAL(ia_loop):
+	nsb	r0, r36
+	shlli	r36, 1, r37
+	ldx.w	r43, r37, r38
+LOCAL(ia_main_label):
+	ptrel/l	r38, tr2
+	blink	tr2, r63
+LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
+	movi	3, r38
+	shlli	r38, 29, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	add.l	r17, r63, r2
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
+	movi	3, r38
+	shlli	r38, 26, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	add.l	r17, r63, r3
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
+	movi	3, r38
+	shlli	r38, 23, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	add.l	r17, r63, r4
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
+	movi	3, r38
+	shlli	r38, 20, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	add.l	r17, r63, r5
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
+	movi	3, r38
+	shlli	r38, 16, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	add.l	r17, r63, r6
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
+	movi	3 << 12, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	add.l	r17, r63, r7
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
+	movi	3 << 8, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	add.l	r17, r63, r8
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
+	stx.q	r17, r63, r9
+	add.l	r17, r63, r9
+	blink	tr0, r63
+LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 29, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 26, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 23, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 20, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 16, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
+	movi	1 << 12, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
+	movi	1 << 8, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
+	andi	r0, 7 << 1, r38
+	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
+	shlli	r38, 2, r39
+	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
+	sub.l	r40, r39, r41
+	ptabs/l	r41, tr2
+	blink	tr2, r63
+LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
+	stx.q	r17, r63, r9
+LOCAL(ia_return):	/* Return.  */
+	blink	tr0, r63
+LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
+	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
+#endif /* L_shcompact_incoming_args */
+#endif
+#if __SH5__
+#ifdef L_nested_trampoline
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
+	.global	GLOBAL(GCC_nested_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
+GLOBAL(GCC_nested_trampoline):
+	.mode	SHmedia
+	ptrel/u	r63, tr0
+	gettr	tr0, r0
+#if __SH5__ == 64
+	ld.q	r0, 24, r1
+#else
+	ld.l	r0, 24, r1
+#endif
+	ptabs/l	r1, tr1
+#if __SH5__ == 64
+	ld.q	r0, 32, r1
+#else
+	ld.l	r0, 28, r1
+#endif
+	blink	tr1, r63
+
+	ENDFUNC(GLOBAL(GCC_nested_trampoline))
+#endif /* L_nested_trampoline */
+#endif /* __SH5__ */
+#if __SH5__ == 32
+#ifdef L_push_pop_shmedia_regs
+	.section	.text..SHmedia32,"ax"
+	.mode	SHmedia
+	.align	2
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_push_shmedia_regs)
+	FUNC(GLOBAL(GCC_push_shmedia_regs))
+GLOBAL(GCC_push_shmedia_regs):
+	addi.l	r15, -14*8, r15
+	fst.d	r15, 13*8, dr62
+	fst.d	r15, 12*8, dr60
+	fst.d	r15, 11*8, dr58
+	fst.d	r15, 10*8, dr56
+	fst.d	r15,  9*8, dr54
+	fst.d	r15,  8*8, dr52
+	fst.d	r15,  7*8, dr50
+	fst.d	r15,  6*8, dr48
+	fst.d	r15,  5*8, dr46
+	fst.d	r15,  4*8, dr44
+	fst.d	r15,  3*8, dr42
+	fst.d	r15,  2*8, dr40
+	fst.d	r15,  1*8, dr38
+	fst.d	r15,  0*8, dr36
+#else /* ! __SH4_NOFPU__ */
+	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+GLOBAL(GCC_push_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+	ptabs/l	r18, tr0
+	addi.l	r15, -27*8, r15
+	gettr	tr7, r62
+	gettr	tr6, r61
+	gettr	tr5, r60
+	st.q	r15, 26*8, r62
+	st.q	r15, 25*8, r61
+	st.q	r15, 24*8, r60
+	st.q	r15, 23*8, r59
+	st.q	r15, 22*8, r58
+	st.q	r15, 21*8, r57
+	st.q	r15, 20*8, r56
+	st.q	r15, 19*8, r55
+	st.q	r15, 18*8, r54
+	st.q	r15, 17*8, r53
+	st.q	r15, 16*8, r52
+	st.q	r15, 15*8, r51
+	st.q	r15, 14*8, r50
+	st.q	r15, 13*8, r49
+	st.q	r15, 12*8, r48
+	st.q	r15, 11*8, r47
+	st.q	r15, 10*8, r46
+	st.q	r15,  9*8, r45
+	st.q	r15,  8*8, r44
+	st.q	r15,  7*8, r35
+	st.q	r15,  6*8, r34
+	st.q	r15,  5*8, r33
+	st.q	r15,  4*8, r32
+	st.q	r15,  3*8, r31
+	st.q	r15,  2*8, r30
+	st.q	r15,  1*8, r29
+	st.q	r15,  0*8, r28
+	blink	tr0, r63
+#ifndef __SH4_NOFPU__	
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+#endif
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_pop_shmedia_regs)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs))
+GLOBAL(GCC_pop_shmedia_regs):
+	pt	.L0, tr1
+	movi	41*8, r0
+	fld.d	r15, 40*8, dr62
+	fld.d	r15, 39*8, dr60
+	fld.d	r15, 38*8, dr58
+	fld.d	r15, 37*8, dr56
+	fld.d	r15, 36*8, dr54
+	fld.d	r15, 35*8, dr52
+	fld.d	r15, 34*8, dr50
+	fld.d	r15, 33*8, dr48
+	fld.d	r15, 32*8, dr46
+	fld.d	r15, 31*8, dr44
+	fld.d	r15, 30*8, dr42
+	fld.d	r15, 29*8, dr40
+	fld.d	r15, 28*8, dr38
+	fld.d	r15, 27*8, dr36
+	blink	tr1, r63
+#else /* ! __SH4_NOFPU__	*/
+	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+GLOBAL(GCC_pop_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__	*/
+	movi	27*8, r0
+.L0:
+	ptabs	r18, tr0
+	ld.q	r15, 26*8, r62
+	ld.q	r15, 25*8, r61
+	ld.q	r15, 24*8, r60
+	ptabs	r62, tr7
+	ptabs	r61, tr6
+	ptabs	r60, tr5
+	ld.q	r15, 23*8, r59
+	ld.q	r15, 22*8, r58
+	ld.q	r15, 21*8, r57
+	ld.q	r15, 20*8, r56
+	ld.q	r15, 19*8, r55
+	ld.q	r15, 18*8, r54
+	ld.q	r15, 17*8, r53
+	ld.q	r15, 16*8, r52
+	ld.q	r15, 15*8, r51
+	ld.q	r15, 14*8, r50
+	ld.q	r15, 13*8, r49
+	ld.q	r15, 12*8, r48
+	ld.q	r15, 11*8, r47
+	ld.q	r15, 10*8, r46
+	ld.q	r15,  9*8, r45
+	ld.q	r15,  8*8, r44
+	ld.q	r15,  7*8, r35
+	ld.q	r15,  6*8, r34
+	ld.q	r15,  5*8, r33
+	ld.q	r15,  4*8, r32
+	ld.q	r15,  3*8, r31
+	ld.q	r15,  2*8, r30
+	ld.q	r15,  1*8, r29
+	ld.q	r15,  0*8, r28
+	add.l	r15, r0, r15
+	blink	tr0, r63
+
+#ifndef __SH4_NOFPU__
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+#endif
+#endif /* __SH5__ == 32 */
+#endif /* L_push_pop_shmedia_regs */
+
+#ifdef L_div_table
+#if __SH5__
+#if defined(__pic__) && defined(__SHMEDIA__)
+	.global	GLOBAL(sdivsi3)
+	FUNC(GLOBAL(sdivsi3))
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+#if 0
+/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
+   in a text section does not work (at least for shared libraries):
+   the linker sets the LSB of the address as if this was SHmedia code.  */
+#define TEXT_DATA_BUG
+#endif
+	.align	2
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ .global GLOBAL(sdivsi3)
+GLOBAL(sdivsi3):
+#ifdef TEXT_DATA_BUG
+ ptb datalabel Local_div_table,tr0
+#else
+ ptb GLOBAL(div_table_internal),tr0
+#endif
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ /* bubble */
+ gettr tr0,r20
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+	ENDFUNC(GLOBAL(sdivsi3))
+/* This table has been generated by divtab.c .
+Defects for bias -330:
+   Max defect: 6.081536e-07 at -1.000000e+00
+   Min defect: 2.849516e-08 at 1.030651e+00
+   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
+   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
+   Defect at 1: 1.238659e-07
+   Defect at -2: 1.061708e-07 */
+#else /* ! __pic__ || ! __SHMEDIA__ */
+	.section	.rodata
+#endif /* __pic__ */
+#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
+	.balign 2
+	.type	Local_div_table,@object
+	.size	Local_div_table,128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+Local_div_table:
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+	.section	.rodata
+#endif /* TEXT_DATA_BUG */
+	.balign 2
+	.type	GLOBAL(div_table),@object
+	.size	GLOBAL(div_table),128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+	.global	GLOBAL(div_table)
+GLOBAL(div_table):
+	HIDDEN_ALIAS(div_table_internal,div_table)
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+
+#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code uses shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4.
+   Uses a lookup table for divisors in the range -128 .. +128, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+	.balign 4
+	.global	GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	mov.w LOCAL(c128_w), r1
+	div0u
+	mov r4,r0
+	shlr8 r0
+	cmp/hi r1,r5
+	extu.w r5,r1
+	bf LOCAL(udiv_le128)
+	cmp/eq r5,r1
+	bf LOCAL(udiv_ge64k)
+	shlr r0
+	mov r5,r1
+	shll16 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+	div1 r5,r0
+	div1 r5,r0
+	bra LOCAL(udiv_25)
+	div1 r5,r0
+
+LOCAL(div_le128):
+	mova LOCAL(div_table_ix),r0
+	bra LOCAL(div_le128_2)
+	mov.b @(r0,r5),r1
+LOCAL(udiv_le128):
+	mov.l r4,@-r15
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mov.l r5,@-r15
+LOCAL(div_le128_2):
+	mova LOCAL(div_table_inv),r0
+	mov.l @(r0,r1),r1
+	mov r5,r0
+	tst #0xfe,r0
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	bt/s LOCAL(div_by_1)
+	mov r4,r0
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	rts
+	shld r1,r0
+
+LOCAL(div_by_1_neg):
+	neg r4,r0
+LOCAL(div_by_1):
+	mov.l @r15+,r5
+	rts
+	mov.l @r15+,r4
+
+LOCAL(div_ge64k):
+	bt/s LOCAL(div_r8)
+	div0u
+	shll8 r5
+	bra LOCAL(div_ge64k_2)
+	div1 r5,r0
+LOCAL(udiv_ge64k):
+	cmp/hi r0,r5
+	mov r5,r1
+	bt LOCAL(udiv_r8)
+	shll8 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+LOCAL(div_ge64k_2):
+	div1 r5,r0
+	mov.l LOCAL(zero_l),r1
+	.rept 4
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_end)
+	xor r4,r0
+	
+LOCAL(div_r8):
+	shll16 r4
+	bra LOCAL(div_r8_2)
+	shll8 r4
+LOCAL(udiv_r8):
+	mov.l r4,@-r15
+	shll16 r4
+	clrt
+	shll8 r4
+	mov.l r5,@-r15
+LOCAL(div_r8_2):
+	rotcl r4
+	mov r0,r1
+	div1 r5,r1
+	mov r4,r0
+	rotcl r0
+	mov r5,r4
+	div1 r5,r1
+	.rept 5
+	rotcl r0; div1 r5,r1
+	.endr
+	rotcl r0
+	mov.l @r15+,r5
+	div1 r4,r1
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+
+	.global	GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(sdivsi3_i4i))
+	/* This is link-compatible with a GLOBAL(sdivsi3) call,
+	   but we effectively clobber only r1.  */
+GLOBAL(sdivsi3_i4i):
+	mov.l r4,@-r15
+	cmp/pz r5
+	mov.w LOCAL(c128_w), r1
+	bt/s LOCAL(pos_divisor)
+	cmp/pz r4
+	mov.l r5,@-r15
+	neg r5,r5
+	bt/s LOCAL(neg_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(pos_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k)
+	cmp/hi r0,r5
+	div0u
+	shll16 r5
+	div1 r5,r0
+	div1 r5,r0
+	div1 r5,r0
+LOCAL(udiv_25):
+	mov.l LOCAL(zero_l),r1
+	div1 r5,r0
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 3
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r0
+	mov.l @r15+,r5
+	or r4,r0
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+LOCAL(div_le128_neg):
+	tst #0xfe,r0
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mova LOCAL(div_table_inv),r0
+	bt/s LOCAL(div_by_1_neg)
+	mov.l @(r0,r1),r1
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	shld r1,r0
+	rts
+	neg r0,r0
+
+LOCAL(pos_divisor):
+	mov.l r5,@-r15
+	bt/s LOCAL(pos_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(neg_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128_neg)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k_neg)
+	cmp/hi r0,r5
+	div0u
+	mov.l LOCAL(zero_l),r1
+	shll16 r5
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 7
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_neg_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r1
+	mov.l @r15+,r5
+	or r4,r1
+LOCAL(div_r8_neg_end):
+	mov.l @r15+,r4
+	rotcl r1
+	rts
+	neg r1,r0
+
+LOCAL(div_ge64k_neg):
+	bt/s LOCAL(div_r8_neg)
+	div0u
+	shll8 r5
+	mov.l LOCAL(zero_l),r1
+	.rept 6
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_neg_end)
+	xor r4,r0
+
+LOCAL(c128_w):
+	.word 128
+
+LOCAL(div_r8_neg):
+	clrt
+	shll16 r4
+	mov r4,r1
+	shll8 r1
+	mov r5,r4
+	.rept 7
+	rotcl r1; div1 r5,r0
+	.endr
+	mov.l @r15+,r5
+	rotcl r1
+	bra LOCAL(div_r8_neg_end)
+	div1 r4,r0
+
+LOCAL(m256_w):
+	.word 0xff00
+/* This table has been generated by divtab-sh4.c.  */
+	.balign 4
+LOCAL(div_table_clz):
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	-1
+	.byte	-1
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+/* Lookup table translating positive divisor to index into table of
+   normalized inverse.  N.B. the '0' entry is also the last entry of the
+ previous table, and causes an unaligned access for division by zero.  */
+LOCAL(div_table_ix):
+	.byte	-6
+	.byte	-128
+	.byte	-128
+	.byte	0
+	.byte	-128
+	.byte	-64
+	.byte	0
+	.byte	64
+	.byte	-128
+	.byte	-96
+	.byte	-64
+	.byte	-32
+	.byte	0
+	.byte	32
+	.byte	64
+	.byte	96
+	.byte	-128
+	.byte	-112
+	.byte	-96
+	.byte	-80
+	.byte	-64
+	.byte	-48
+	.byte	-32
+	.byte	-16
+	.byte	0
+	.byte	16
+	.byte	32
+	.byte	48
+	.byte	64
+	.byte	80
+	.byte	96
+	.byte	112
+	.byte	-128
+	.byte	-120
+	.byte	-112
+	.byte	-104
+	.byte	-96
+	.byte	-88
+	.byte	-80
+	.byte	-72
+	.byte	-64
+	.byte	-56
+	.byte	-48
+	.byte	-40
+	.byte	-32
+	.byte	-24
+	.byte	-16
+	.byte	-8
+	.byte	0
+	.byte	8
+	.byte	16
+	.byte	24
+	.byte	32
+	.byte	40
+	.byte	48
+	.byte	56
+	.byte	64
+	.byte	72
+	.byte	80
+	.byte	88
+	.byte	96
+	.byte	104
+	.byte	112
+	.byte	120
+	.byte	-128
+	.byte	-124
+	.byte	-120
+	.byte	-116
+	.byte	-112
+	.byte	-108
+	.byte	-104
+	.byte	-100
+	.byte	-96
+	.byte	-92
+	.byte	-88
+	.byte	-84
+	.byte	-80
+	.byte	-76
+	.byte	-72
+	.byte	-68
+	.byte	-64
+	.byte	-60
+	.byte	-56
+	.byte	-52
+	.byte	-48
+	.byte	-44
+	.byte	-40
+	.byte	-36
+	.byte	-32
+	.byte	-28
+	.byte	-24
+	.byte	-20
+	.byte	-16
+	.byte	-12
+	.byte	-8
+	.byte	-4
+	.byte	0
+	.byte	4
+	.byte	8
+	.byte	12
+	.byte	16
+	.byte	20
+	.byte	24
+	.byte	28
+	.byte	32
+	.byte	36
+	.byte	40
+	.byte	44
+	.byte	48
+	.byte	52
+	.byte	56
+	.byte	60
+	.byte	64
+	.byte	68
+	.byte	72
+	.byte	76
+	.byte	80
+	.byte	84
+	.byte	88
+	.byte	92
+	.byte	96
+	.byte	100
+	.byte	104
+	.byte	108
+	.byte	112
+	.byte	116
+	.byte	120
+	.byte	124
+	.byte	-128
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
+	.balign 4
+LOCAL(zero_l):
+	.long	0x0
+	.long	0xF81F81F9
+	.long	0xF07C1F08
+	.long	0xE9131AC0
+	.long	0xE1E1E1E2
+	.long	0xDAE6076C
+	.long	0xD41D41D5
+	.long	0xCD856891
+	.long	0xC71C71C8
+	.long	0xC0E07039
+	.long	0xBACF914D
+	.long	0xB4E81B4F
+	.long	0xAF286BCB
+	.long	0xA98EF607
+	.long	0xA41A41A5
+	.long	0x9EC8E952
+	.long	0x9999999A
+	.long	0x948B0FCE
+	.long	0x8F9C18FA
+	.long	0x8ACB90F7
+	.long	0x86186187
+	.long	0x81818182
+	.long	0x7D05F418
+	.long	0x78A4C818
+	.long	0x745D1746
+	.long	0x702E05C1
+	.long	0x6C16C16D
+	.long	0x68168169
+	.long	0x642C8591
+	.long	0x60581606
+	.long	0x5C9882BA
+	.long	0x58ED2309
+LOCAL(div_table_inv):
+	.long	0x55555556
+	.long	0x51D07EAF
+	.long	0x4E5E0A73
+	.long	0x4AFD6A06
+	.long	0x47AE147B
+	.long	0x446F8657
+	.long	0x41414142
+	.long	0x3E22CBCF
+	.long	0x3B13B13C
+	.long	0x38138139
+	.long	0x3521CFB3
+	.long	0x323E34A3
+	.long	0x2F684BDB
+	.long	0x2C9FB4D9
+	.long	0x29E4129F
+	.long	0x27350B89
+	.long	0x24924925
+	.long	0x21FB7813
+	.long	0x1F7047DD
+	.long	0x1CF06ADB
+	.long	0x1A7B9612
+	.long	0x18118119
+	.long	0x15B1E5F8
+	.long	0x135C8114
+	.long	0x11111112
+	.long	0xECF56BF
+	.long	0xC9714FC
+	.long	0xA6810A7
+	.long	0x8421085
+	.long	0x624DD30
+	.long	0x4104105
+	.long	0x2040811
+	/* maximum error: 0.987342 scaled: 0.921875*/
+
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+
+#ifdef L_udiv_qrnnd_16
+#if !__SHMEDIA__
+	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
+	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
+	/* n1 < d, but n1 might be larger than d1.  */
+	.global GLOBAL(udiv_qrnnd_16)
+	.balign 8
+GLOBAL(udiv_qrnnd_16):
+	div0u
+	cmp/hi r6,r0
+	bt .Lots
+	.rept 16
+	div1 r6,r0 
+	.endr
+	extu.w r0,r1
+	bt 0f
+	add r6,r0
+0:	rotcl r1
+	mulu.w r1,r5
+	xtrct r4,r0
+	swap.w r0,r0
+	sts macl,r2
+	cmp/hs r2,r0
+	sub r2,r0
+	bt 0f
+	addc r5,r0
+	add #-1,r1
+	bt 0f
+1:	add #-1,r1
+	rts
+	add r5,r0
+	.balign 8
+.Lots:
+	sub r5,r0
+	swap.w r4,r1
+	xtrct r0,r1
+	clrt
+	mov r1,r0
+	addc r5,r0
+	mov #-1,r1
+	SL1(bf, 1b,
+	shlr16 r1)
+0:	rts
+	nop
+	ENDFUNC(GLOBAL(udiv_qrnnd_16))
+#endif /* !__SHMEDIA__ */
+#endif /* L_udiv_qrnnd_16 */
diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs.h b/gcc-4.9/libgcc/config/sh/lib1funcs.h
new file mode 100644
index 000000000..be25dc825
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/lib1funcs.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __ELF__
+#define LOCAL(X)	.L_##X
+#define FUNC(X)		.type X,@function
+#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)	ENDFUNC0(X)
+#else
+#define LOCAL(X)	L_##X
+#define FUNC(X)
+#define HIDDEN_FUNC(X)
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
+#define ENDFUNC(X)
+#endif
+
+#define	CONCAT(A,B)	A##B
+#define	GLOBAL0(U,X)	CONCAT(U,__##X)
+#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
+
+#define ALIAS(X,Y)	.global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
+
+#if defined __SH2A__ && defined __FMOVD_ENABLED__
+#undef  FMOVD_WORKS
+#define FMOVD_WORKS
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define DR00 fr1
+#define DR01 fr0
+#define DR20 fr3
+#define DR21 fr2
+#define DR40 fr5
+#define DR41 fr4
+#else /* !__LITTLE_ENDIAN__ */
+#define DR00 fr0
+#define DR01 fr1
+#define DR20 fr2
+#define DR21 fr3
+#define DR40 fr4
+#define DR41 fr5
+#endif /* !__LITTLE_ENDIAN__ */
+
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	in_slot, in_slot_arg2; branch dest
+#define SL1(branch, dest, in_slot) \
+	in_slot; branch dest
+#else /* ! __sh1__ */
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	branch##.s dest; in_slot, in_slot_arg2
+#define SL1(branch, dest, in_slot) \
+	branch##/s dest; in_slot
+#endif /* !__sh1__ */
diff --git a/gcc-4.9/libgcc/config/sh/libgcc-excl.ver b/gcc-4.9/libgcc/config/sh/libgcc-excl.ver
new file mode 100644
index 000000000..325c74054
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/libgcc-excl.ver
@@ -0,0 +1,8 @@
+# Exclude various symbols which should not be visible in libgcc.so for SH.
+%exclude {
+  __ashlsi3
+  __ashrsi3
+  __lshrsi3
+  __mulsi3 # this is an SH1-only symbol.
+  __udivsi3
+}
diff --git a/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver b/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver
new file mode 100644
index 000000000..17a1d809e
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver
@@ -0,0 +1,48 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+# Note that we cannot use the default libgcc-glibc.ver file on sh,
+# because GLIBC_2.0 does not exist on this architecture, as the first 
+# ever glibc release on the platform was GLIBC_2.2.
+
+%exclude {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.2
+GLIBC_2.2 {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
diff --git a/gcc-4.9/libgcc/config/sh/linux-atomic.c b/gcc-4.9/libgcc/config/sh/linux-atomic.c
new file mode 100644
index 000000000..1ef02eaa6
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/linux-atomic.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Atomic built-in C functions for link compatibility with older code that
+   was compiled to emit function calls for atomic built-ins.
+   Notice that if no atomic model has been selected the functions in this
+   file must not be generated, or else they will result in infinite no-op
+   loops.
+   Notice also, that all the generated functions below take three parameters,
+   which is not actually true for some of the built-in functions.  However,
+   on SH this does not matter, since the first four parameters are always
+   passed in call clobbered registers.
+   The return type for the sync_bool_compare_and_swap functions is also
+   actually supposed to be a bool, but this also doesn't matter since any
+   int return type <= 32 bit is returned in R0 on SH.  */
+
+#if !__SH_ATOMIC_MODEL_NONE__
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define uint8_t_sz 1
+#define uint16_t_sz 2
+#define uint32_t_sz 4
+
+#define typesz(x) x##_sz
+
+#define concat(x,y) __ ## x ## _ ## y
+#define eval(x,y) concat (x,y)
+#define genname(f,t) eval(f, typesz (t))
+
+#define func1(name, type) \
+  type __attribute__((visibility("hidden"))) \
+  genname (name, type) (type* x, type y, type z) \
+  { \
+    return __##name (x, y, z); \
+  }
+
+#define genfuncs(name) \
+  func1 (name, uint8_t) \
+  func1 (name, uint16_t) \
+  func1 (name, uint32_t)
+
+genfuncs (sync_lock_test_and_set)
+genfuncs (sync_val_compare_and_swap)
+genfuncs (sync_bool_compare_and_swap)
+genfuncs (sync_fetch_and_add)
+genfuncs (sync_fetch_and_or)
+genfuncs (sync_fetch_and_and)
+genfuncs (sync_fetch_and_xor)
+genfuncs (sync_fetch_and_sub)
+genfuncs (sync_fetch_and_nand)
+genfuncs (sync_add_and_fetch)
+genfuncs (sync_or_and_fetch)
+genfuncs (sync_and_and_fetch)
+genfuncs (sync_xor_and_fetch)
+genfuncs (sync_sub_and_fetch)
+genfuncs (sync_nand_and_fetch)
+
+#endif
diff --git a/gcc-4.9/libgcc/config/sh/linux-unwind.h b/gcc-4.9/libgcc/config/sh/linux-unwind.h
new file mode 100644
index 000000000..4875706d4
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/linux-unwind.h
@@ -0,0 +1,255 @@
+/* DWARF2 EH unwinding support for SH Linux.
+   Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include "insn-constants.h"
+
+# if defined (__SH5__)
+#define SH_DWARF_FRAME_GP0	0
+#define SH_DWARF_FRAME_FP0	77
+#define SH_DWARF_FRAME_BT0	68
+#define SH_DWARF_FRAME_PR_MEDIA	18
+#define SH_DWARF_FRAME_SR	65
+#define SH_DWARF_FRAME_FPSCR	76
+#else
+#define SH_DWARF_FRAME_GP0	0
+#define SH_DWARF_FRAME_FP0	25
+#define SH_DWARF_FRAME_XD0	87
+#define SH_DWARF_FRAME_PR	17
+#define SH_DWARF_FRAME_GBR	18
+#define SH_DWARF_FRAME_MACH	20
+#define SH_DWARF_FRAME_MACL	21
+#define SH_DWARF_FRAME_PC	16
+#define SH_DWARF_FRAME_SR	22
+#define SH_DWARF_FRAME_FPUL	23
+#define SH_DWARF_FRAME_FPSCR	24
+#endif /* defined (__SH5__) */
+
+#if defined (__SH5__)
+
+#define MD_FALLBACK_FRAME_STATE_FOR shmedia_fallback_frame_state
+
+static _Unwind_Reason_Code
+shmedia_fallback_frame_state (struct _Unwind_Context *context,
+			      _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i, r;
+
+  /* movi 0x10,r9; shori 0x77,r9; trapa	r9; nop (sigreturn)  */
+  /* movi 0x10,r9; shori 0xad,r9; trapa	r9; nop (rt_sigreturn)  */
+  if ((*(unsigned long *) (pc-1)  == 0xcc004090)
+      && (*(unsigned long *) (pc+3)  == 0xc801dc90)
+      && (*(unsigned long *) (pc+7)  == 0x6c91fff0)
+      && (*(unsigned long *) (pc+11)  == 0x6ff0fff0))
+    sc = context->cfa;
+  else if ((*(unsigned long *) (pc-1)  == 0xcc004090)
+	   && (*(unsigned long *) (pc+3)  == 0xc802b490)
+	   && (*(unsigned long *) (pc+7)  == 0x6c91fff0)
+	   && (*(unsigned long *) (pc+11)  == 0x6ff0fff0))
+    {
+      struct rt_sigframe {
+	siginfo_t *pinfo;
+	void *puc;
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[15];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  for (i = 0; i < 63; i++)
+    {
+      if (i == 15)
+	continue;
+
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (long)&(sc->sc_regs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset
+    = (long)&(sc->sc_sr) - new_cfa;
+
+  r = SH_DWARF_FRAME_BT0;
+  for (i = 0; i < 8; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_tregs[i]) - new_cfa;
+    }
+
+  r = SH_DWARF_FRAME_FP0;
+  for (i = 0; i < 32; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_fpregs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset
+    = (long)&(sc->sc_fpscr) - new_cfa;
+
+  /* We use the slot for the zero register to save return address.  */
+  fs->regs.reg[63].how = REG_SAVED_OFFSET;
+  fs->regs.reg[63].loc.offset
+    = (long)&(sc->sc_pc) - new_cfa;
+  fs->retaddr_column = 63;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#else /* defined (__SH5__) */
+
+#define MD_FALLBACK_FRAME_STATE_FOR sh_fallback_frame_state
+
+static _Unwind_Reason_Code
+sh_fallback_frame_state (struct _Unwind_Context *context,
+			 _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i;
+#if defined (__SH3E__) || defined (__SH4__)
+  int r;
+#endif
+
+  /* mov.w 1f,r3; trapa #0x10; 1: .short 0x77  (sigreturn)  */
+  /* mov.w 1f,r3; trapa #0x10; 1: .short 0xad  (rt_sigreturn)  */
+  /* Newer kernel uses pad instructions to avoid an SH-4 core bug.  */
+  /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0;
+     or r0,r0; 1: .short 0x77  (sigreturn)  */
+  /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0;
+     or r0,r0; 1: .short 0xad  (rt_sigreturn)  */
+  if (((*(unsigned short *) (pc+0)  == 0x9300)
+       && (*(unsigned short *) (pc+2)  == 0xc310)
+       && (*(unsigned short *) (pc+4)  == 0x0077))
+      || (((*(unsigned short *) (pc+0)  == 0x9305)
+	   && (*(unsigned short *) (pc+2)  == 0xc310)
+	   && (*(unsigned short *) (pc+14)  == 0x0077))))
+    sc = context->cfa;
+  else if (((*(unsigned short *) (pc+0) == 0x9300)
+	    && (*(unsigned short *) (pc+2)  == 0xc310)
+	    && (*(unsigned short *) (pc+4)  == 0x00ad))
+	   || (((*(unsigned short *) (pc+0) == 0x9305)
+		&& (*(unsigned short *) (pc+2)  == 0xc310)
+		&& (*(unsigned short *) (pc+14)  == 0x00ad))))
+    {
+      struct rt_sigframe {
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[15];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  for (i = 0; i < 15; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (long)&(sc->sc_regs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_PR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_PR].loc.offset
+    = (long)&(sc->sc_pr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset
+    = (long)&(sc->sc_sr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_GBR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_GBR].loc.offset
+    = (long)&(sc->sc_gbr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_MACH].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_MACH].loc.offset
+    = (long)&(sc->sc_mach) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_MACL].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_MACL].loc.offset
+    = (long)&(sc->sc_macl) - new_cfa;
+
+#if defined (__SH3E__) || defined (__SH4__)
+  r = SH_DWARF_FRAME_FP0;
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_fpregs[i]) - new_cfa;
+    }
+
+  r = SH_DWARF_FRAME_XD0;
+  for (i = 0; i < 8; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_xfpregs[2*i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_FPUL].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPUL].loc.offset
+    = (long)&(sc->sc_fpul) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset
+    = (long)&(sc->sc_fpscr) - new_cfa;
+#endif
+
+  fs->regs.reg[SH_DWARF_FRAME_PC].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_PC].loc.offset
+    = (long)&(sc->sc_pc) - new_cfa;
+  fs->retaddr_column = SH_DWARF_FRAME_PC;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+#endif /* defined (__SH5__) */
+
+#endif /* inhibit_libc */
diff --git a/gcc-4.9/libgcc/config/sh/t-linux b/gcc-4.9/libgcc/config/sh/t-linux
new file mode 100644
index 000000000..d316db937
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/t-linux
@@ -0,0 +1,18 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
+LIB2ADD = $(srcdir)/config/sh/linux-atomic.c
+
+HOST_LIBGCC2_CFLAGS += -DNO_FPSCR_VALUES
+
+# Silence atomic built-in related warnings in linux-atomic.c.
+# Unfortunately the conflicting types warning can't be disabled selectively.
+HOST_LIBGCC2_CFLAGS += -w -Wno-sync-nand
+
+# Override t-slibgcc-elf-ver to export some libgcc symbols with
+# the symbol versions that glibc used, and hide some lib1func
+# routines which should not be called via PLT.  We have to create
+# the list from scratch.
+SHLIB_MAPFILES = \
+	libgcc-std.ver \
+	$(srcdir)/config/sh/libgcc-excl.ver \
+	$(srcdir)/config/sh/libgcc-glibc.ver
diff --git a/gcc-4.9/libgcc/config/sh/t-netbsd b/gcc-4.9/libgcc/config/sh/t-netbsd
new file mode 100644
index 000000000..93fe287e5
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/t-netbsd
@@ -0,0 +1,2 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate
+
diff --git a/gcc-4.9/libgcc/config/sh/t-sh b/gcc-4.9/libgcc/config/sh/t-sh
new file mode 100644
index 000000000..111bbf603
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/t-sh
@@ -0,0 +1,60 @@
+# Copyright (C) 1993-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = sh/lib1funcs.S
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
+  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _div_table _udiv_qrnnd_16 \
+  $(LIB1ASMFUNCS_CACHE)
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
+crt1.o: $(srcdir)/config/sh/crt1.S
+	$(gcc_compile) -c $<
+
+ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S
+	$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $<
+libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o
+	$(AR_CREATE_FOR_TARGET) $@ $<
+
+ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S
+	$(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $<
+libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o
+	$(AR_CREATE_FOR_TARGET) $@ $<
+
+ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S
+	$(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $<
+libic_invalidate_array_4a.a: ic_invalidate_array_4a.o
+	$(AR_CREATE_FOR_TARGET) $@ $<
+
+sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
+	$(gcc_compile) -c -DL_sdivsi3_i4i $<
+udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S
+	$(gcc_compile) -c -DL_udivsi3_i4i $<
+unwind-dw2-Os-4-200.o: $(srcdir)/unwind-dw2.c
+	$(gcc_compile) $(LIBGCC2_CFLAGS) $(vis_hide) -fexceptions -Os -c $<
+
+OBJS_Os_4_200=sdivsi3_i4i-Os-4-200.o udivsi3_i4i-Os-4-200.o unwind-dw2-Os-4-200.o
+libgcc-Os-4-200.a: $(OBJS_Os_4_200)
+	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200)
+
+div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.S
+	$(gcc_compile) -c -DL_div_table $<
+
+libgcc-4-300.a: div_table-4-300.o
+	$(AR_CREATE_FOR_TARGET) $@ div_table-4-300.o
+
diff --git a/gcc-4.9/libgcc/config/sh/t-sh64 b/gcc-4.9/libgcc/config/sh/t-sh64
new file mode 100644
index 000000000..fa9950e03
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/t-sh64
@@ -0,0 +1,6 @@
+LIB1ASMFUNCS = \
+  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _shcompact_call_trampoline _shcompact_return_trampoline \
+  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
+  _push_pop_shmedia_regs \
+  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
diff --git a/gcc-4.9/libgcc/config/sh/t-superh b/gcc-4.9/libgcc/config/sh/t-superh
new file mode 100644
index 000000000..b87aa5a31
--- /dev/null
+++ b/gcc-4.9/libgcc/config/sh/t-superh
@@ -0,0 +1,11 @@
+# Compile crt1-mmu.o as crt1.o with -DMMU_SUPPORT
+crt1-mmu.o: $(srcdir)/config/sh/crt1.S
+	$(gcc_compile) -c -DMMU_SUPPORT $<
+
+# Compile gcrt1-mmu.o as crt1-mmu.o with -DPROFILE
+gcrt1-mmu.o: $(srcdir)/config/sh/crt1.S
+	$(gcc_compile) -c -DPROFILE -DMMU_SUPPORT $<
+
+# For sh4-400: Compile gcrt1.o as crt1.o with -DPROFILE
+gcrt1.o: $(srcdir)/config/sh/crt1.S
+	$(gcc_compile) -c -DPROFILE $<
author	Ben Cheng <bccheng@google.com>	2014-03-25 22:37:19 -0700
committer	Ben Cheng <bccheng@google.com>	2014-03-25 22:37:19 -0700
commit	1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch)
tree	c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libgcc/config/sh
parent	283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff)
download	toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip