diff options
author | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-03-25 22:37:19 -0700 |
commit | 1bc5aee63eb72b341f506ad058502cd0361f0d10 (patch) | |
tree | c607e8252f3405424ff15bc2d00aa38dadbb2518 /gcc-4.9/libgcc/config/sh | |
parent | 283a0bf58fcf333c58a2a92c3ebbc41fb9eb1fdb (diff) | |
download | toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.gz toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.tar.bz2 toolchain_gcc-1bc5aee63eb72b341f506ad058502cd0361f0d10.zip |
Initial checkin of GCC 4.9.0 from trunk (r208799).
Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba
Diffstat (limited to 'gcc-4.9/libgcc/config/sh')
-rw-r--r-- | gcc-4.9/libgcc/config/sh/crt1.S | 1368 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/crti.S | 125 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/crtn.S | 77 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S | 936 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S | 322 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/lib1funcs.S | 4047 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/lib1funcs.h | 74 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/libgcc-excl.ver | 8 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/libgcc-glibc.ver | 48 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/linux-atomic.c | 81 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/linux-unwind.h | 255 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/t-linux | 18 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/t-netbsd | 2 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/t-sh | 60 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/t-sh64 | 6 | ||||
-rw-r--r-- | gcc-4.9/libgcc/config/sh/t-superh | 11 |
16 files changed, 7438 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/sh/crt1.S b/gcc-4.9/libgcc/config/sh/crt1.S new file mode 100644 index 000000000..d8b929549 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/crt1.S @@ -0,0 +1,1368 @@ +/* Copyright (C) 2000-2014 Free Software Foundation, Inc. + This file was pretty much copied from newlib. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#ifdef MMU_SUPPORT + /* Section used for exception/timer interrupt stack area */ + .section .data.vbr.stack,"aw" + .align 4 + .global __ST_VBR +__ST_VBR: + .zero 1024 * 2 /* ; 2k for VBR handlers */ +/* Label at the highest stack address where the stack grows from */ +__timer_stack: +#endif /* MMU_SUPPORT */ + + /* ;---------------------------------------- + Normal newlib crt1.S */ + +#ifdef __SH5__ + .section .data,"aw" + .global ___data +___data: + + .section .rodata,"a" + .global ___rodata +___rodata: + +#define ICCR_BASE 0x01600000 +#define OCCR_BASE 0x01e00000 +#define MMUIR_BASE 0x00000000 +#define MMUDR_BASE 0x00800000 + +#define PTE_ENABLED 1 +#define PTE_DISABLED 0 + +#define PTE_SHARED (1 << 1) +#define PTE_NOT_SHARED 0 + +#define PTE_CB_UNCACHEABLE 0 +#define PTE_CB_DEVICE 1 +#define PTE_CB_CACHEABLE_WB 2 +#define PTE_CB_CACHEABLE_WT 3 + +#define PTE_SZ_4KB (0 << 3) +#define PTE_SZ_64KB (1 << 3) +#define PTE_SZ_1MB (2 << 3) +#define PTE_SZ_512MB (3 << 3) + +#define PTE_PRR (1 << 6) +#define PTE_PRX (1 << 7) +#define PTE_PRW (1 << 8) +#define PTE_PRU (1 << 9) + +#define SR_MMU_BIT 31 +#define SR_BL_BIT 28 + +#define ALIGN_4KB (0xfff) +#define ALIGN_1MB (0xfffff) +#define ALIGN_512MB (0x1fffffff) + +#define DYNACON_BASE 0x0f000000 +#define DM_CB_DLINK_BASE 0x0c000000 +#define DM_DB_DLINK_BASE 0x0b000000 + +#define FEMI_AREA_0 0x00000000 +#define FEMI_AREA_1 0x04000000 +#define FEMI_AREA_2 0x05000000 +#define FEMI_AREA_3 0x06000000 +#define FEMI_AREA_4 0x07000000 +#define FEMI_CB 0x08000000 + +#define EMI_BASE 0X80000000 + +#define DMA_BASE 0X0e000000 + +#define CPU_BASE 0X0d000000 + +#define PERIPH_BASE 0X09000000 +#define DMAC_BASE 0x0e000000 +#define INTC_BASE 0x0a000000 +#define CPRC_BASE 0x0a010000 +#define TMU_BASE 0x0a020000 +#define SCIF_BASE 0x0a030000 +#define RTC_BASE 0x0a040000 + + + +#define LOAD_CONST32(val, reg) \ + movi ((val) >> 16) & 65535, reg; \ + shori (val) & 65535, reg + +#define LOAD_PTEH_VAL(sym, align, bits, scratch_reg, reg) \ + LOAD_ADDR (sym, reg); \ + LOAD_CONST32 ((align), scratch_reg); \ + andc reg, scratch_reg, reg; \ + LOAD_CONST32 ((bits), scratch_reg); \ + or reg, scratch_reg, reg + +#define LOAD_PTEL_VAL(sym, align, bits, scratch_reg, reg) \ + LOAD_ADDR (sym, reg); \ + LOAD_CONST32 ((align), scratch_reg); \ + andc reg, scratch_reg, reg; \ + LOAD_CONST32 ((bits), scratch_reg); \ + or reg, scratch_reg, reg + +#define SET_PTE(pte_addr_reg, pteh_val_reg, ptel_val_reg) \ + putcfg pte_addr_reg, 0, r63; \ + putcfg pte_addr_reg, 1, ptel_val_reg; \ + putcfg pte_addr_reg, 0, pteh_val_reg + +#if __SH5__ == 64 + .section .text,"ax" +#define LOAD_ADDR(sym, reg) \ + movi (sym >> 48) & 65535, reg; \ + shori (sym >> 32) & 65535, reg; \ + shori (sym >> 16) & 65535, reg; \ + shori sym & 65535, reg +#else + .mode SHmedia + .section .text..SHmedia32,"ax" +#define LOAD_ADDR(sym, reg) \ + movi (sym >> 16) & 65535, reg; \ + shori sym & 65535, reg +#endif + .global start +start: + LOAD_ADDR (_stack, r15) + +#ifdef MMU_SUPPORT + ! Set up the VM using the MMU and caches + + ! .vm_ep is first instruction to execute + ! after VM initialization + pt/l .vm_ep, tr1 + + ! Configure instruction cache (ICCR) + movi 3, r2 + movi 0, r3 + LOAD_ADDR (ICCR_BASE, r1) + putcfg r1, 0, r2 + putcfg r1, 1, r3 + + ! movi 7, r2 ! write through + ! Configure operand cache (OCCR) + LOAD_ADDR (OCCR_BASE, r1) + putcfg r1, 0, r2 + putcfg r1, 1, r3 + + ! Disable all PTE translations + LOAD_ADDR (MMUIR_BASE, r1) + LOAD_ADDR (MMUDR_BASE, r2) + movi 64, r3 + pt/l .disable_ptes_loop, tr0 +.disable_ptes_loop: + putcfg r1, 0, r63 + putcfg r2, 0, r63 + addi r1, 16, r1 + addi r2, 16, r2 + addi r3, -1, r3 + bgt r3, r63, tr0 + + LOAD_ADDR (MMUIR_BASE, r1) + + ! FEMI instruction mappings + ! Area 0 - 1Mb cacheable at 0x00000000 + ! Area 1 - None + ! Area 2 - 1Mb cacheable at 0x05000000 + ! - 1Mb cacheable at 0x05100000 + ! Area 3 - None + ! Area 4 - None + + ! Map a 1Mb page for instructions at 0x00000000 + LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for instructions at 0x05000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for instructions at 0x05100000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 512M page for instructions at EMI base + addi r1, 16, r1 + LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for instructions at DM_DB_DLINK_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRX | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + LOAD_ADDR (MMUDR_BASE, r1) + + ! FEMI data mappings + ! Area 0 - 1Mb cacheable at 0x00000000 + ! Area 1 - 1Mb device at 0x04000000 + ! Area 2 - 1Mb cacheable at 0x05000000 + ! - 1Mb cacheable at 0x05100000 + ! Area 3 - None + ! Area 4 - None + ! CB - 1Mb device at 0x08000000 + + ! Map a 1Mb page for data at 0x00000000 + LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x04000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x05000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1Mb page for data at 0x05100000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for registers at 0x08000000 + addi r1, 16, r1 + LOAD_PTEH_VAL (FEMI_CB, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (FEMI_CB, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 512M page for data at EMI + addi r1, 16, r1 + LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for DYNACON at DYNACON_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DYNACON_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DYNACON_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for instructions at DM_DB_DLINK_BASE + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for data at DM_DB_DLINK_BASE+0x1000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_CB_UNCACHEABLE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for stack DM_DB_DLINK_BASE+0x2000 + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c000000 - 0x0c0fffff + addi r1, 16, r1 + LOAD_PTEH_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c100000 - 0x0c1fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c200000 - 0x0c2fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c400000 - 0x0c4fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK + ! 0x0c800000 - 0x0c8fffff + addi r1, 16, r1 + LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map a 4K page for DMA control registers + addi r1, 16, r1 + LOAD_PTEH_VAL (DMA_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DMA_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map lots of 4K pages for peripherals + + ! /* peripheral */ + addi r1, 16, r1 + LOAD_PTEH_VAL (PERIPH_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (PERIPH_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* dmac */ + addi r1, 16, r1 + LOAD_PTEH_VAL (DMAC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (DMAC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* intc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (INTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (INTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* rtc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (RTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (RTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* dmac */ + addi r1, 16, r1 + LOAD_PTEH_VAL (TMU_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (TMU_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* scif */ + addi r1, 16, r1 + LOAD_PTEH_VAL (SCIF_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (SCIF_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + ! /* cprc */ + addi r1, 16, r1 + LOAD_PTEH_VAL (CPRC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (CPRC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Map CPU WPC registers + addi r1, 16, r1 + LOAD_PTEH_VAL (CPU_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL (CPU_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + addi r1, 16, r1 + + LOAD_PTEH_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + addi r1, 16, r1 + LOAD_PTEH_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + addi r1, 16, r1 + LOAD_PTEH_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2) + LOAD_PTEL_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3) + SET_PTE (r1, r2, r3) + + ! Switch over to virtual addressing and enabled cache + getcon sr, r1 + movi 1, r2 + shlli r2, SR_BL_BIT, r2 + or r1, r2, r1 + putcon r1, ssr + getcon sr, r1 + movi 1, r2 + shlli r2, SR_MMU_BIT, r2 + or r1, r2, r1 + putcon r1, ssr + gettr tr1, r1 + putcon r1, spc + synco + rte + + ! VM entry point. From now on, we are in VM mode. +.vm_ep: + + ! Install the trap handler, by seeding vbr with the + ! correct value, and by assigning sr.bl = 0. + + LOAD_ADDR (vbr_start, r1) + putcon r1, vbr + movi ~(1<<28), r1 + getcon sr, r2 + and r1, r2, r2 + putcon r2, sr +#endif /* MMU_SUPPORT */ + + pt/l .Lzero_bss_loop, tr0 + pt/l _init, tr5 + pt/l ___setup_argv_and_call_main, tr6 + pt/l _exit, tr7 + + ! zero out bss + LOAD_ADDR (_edata, r0) + LOAD_ADDR (_end, r1) +.Lzero_bss_loop: + stx.q r0, r63, r63 + addi r0, 8, r0 + bgt/l r1, r0, tr0 + + LOAD_ADDR (___data, r26) + LOAD_ADDR (___rodata, r27) + +#ifdef __SH_FPU_ANY__ + getcon sr, r0 + ! enable the FP unit, by resetting SR.FD + ! also zero out SR.FR, SR.SZ and SR.PR, as mandated by the ABI + movi 0, r1 + shori 0xf000, r1 + andc r0, r1, r0 + putcon r0, sr +#if __SH5__ == 32 + pt/l ___set_fpscr, tr0 + movi 0, r4 + blink tr0, r18 +#endif +#endif + + ! arrange for exit to call fini + pt/l _atexit, tr1 + LOAD_ADDR (_fini, r2) + blink tr1, r18 + + ! call init + blink tr5, r18 + + ! call the mainline + blink tr6, r18 + + ! call exit + blink tr7, r18 + ! We should never return from _exit but in case we do we would enter the + ! the following tight loop. This avoids executing any data that might follow. +limbo: + pt/l limbo, tr0 + blink tr0, r63 + +#ifdef MMU_SUPPORT + ! All these traps are handled in the same place. + .balign 256 +vbr_start: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_100: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 +vbr_100_end: + .balign 256 +vbr_200: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_300: + pt/l handler, tr0 ! tr0 trashed. + blink tr0, r63 + .balign 256 +vbr_400: ! Should be at vbr+0x400 +handler: + /* If the trap handler is there call it */ + LOAD_ADDR (__superh_trap_handler, r2) + pta chandler,tr2 + beq r2, r63, tr2 /* If zero, ie not present branch around to chandler */ + /* Now call the trap handler with as much of the context unchanged as possible. + Move trapping address into R18 to make it look like the trap point */ + getcon spc, r18 + pt/l __superh_trap_handler, tr0 + blink tr0, r7 +chandler: + getcon spc, r62 + getcon expevt, r2 + pt/l _exit, tr0 + blink tr0, r63 + + /* Simulated trap handler */ + .section .text..SHmedia32,"ax" +gcc2_compiled.: + .section .debug_abbrev +.Ldebug_abbrev0: + .section .text..SHmedia32 +.Ltext0: + .section .debug_info +.Ldebug_info0: + .section .debug_line +.Ldebug_line0: + .section .text..SHmedia32,"ax" + .align 5 + .global __superh_trap_handler + .type __superh_trap_handler,@function +__superh_trap_handler: +.LFB1: + ptabs r18, tr0 + addi.l r15, -8, r15 + st.l r15, 4, r14 + addi.l r15, -8, r15 + add.l r15, r63, r14 + st.l r14, 0, r2 + ptabs r7, tr0 + addi.l r14, 8, r14 + add.l r14, r63, r15 + ld.l r15, 4, r14 + addi.l r15, 8, r15 + blink tr0, r63 +.LFE1: +.Lfe1: + .size __superh_trap_handler,.Lfe1-__superh_trap_handler + + .section .text..SHmedia32 +.Letext0: + + .section .debug_info + .ualong 0xa7 + .uaword 0x2 + .ualong .Ldebug_abbrev0 + .byte 0x4 + .byte 0x1 + .ualong .Ldebug_line0 + .ualong .Letext0 + .ualong .Ltext0 + .string "trap_handler.c" + + .string "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + .string "GNU C 2.97-sh5-010522" + + .byte 0x1 + .byte 0x2 + .ualong 0x9a + .byte 0x1 + .string "_superh_trap_handler" + + .byte 0x1 + .byte 0x2 + .byte 0x1 + .ualong .LFB1 + .ualong .LFE1 + .byte 0x1 + .byte 0x5e + .byte 0x3 + .string "trap_reason" + + .byte 0x1 + .byte 0x1 + .ualong 0x9a + .byte 0x2 + .byte 0x91 + .byte 0x0 + .byte 0x0 + .byte 0x4 + .string "unsigned int" + + .byte 0x4 + .byte 0x7 + .byte 0x0 + + .section .debug_abbrev + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x10 + .byte 0x6 + .byte 0x12 + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x3 + .byte 0x8 + .byte 0x1b + .byte 0x8 + .byte 0x25 + .byte 0x8 + .byte 0x13 + .byte 0xb + .byte 0,0 + .byte 0x2 + .byte 0x2e + .byte 0x1 + .byte 0x1 + .byte 0x13 + .byte 0x3f + .byte 0xc + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x27 + .byte 0xc + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x1 + .byte 0x40 + .byte 0xa + .byte 0,0 + .byte 0x3 + .byte 0x5 + .byte 0x0 + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x49 + .byte 0x13 + .byte 0x2 + .byte 0xa + .byte 0,0 + .byte 0x4 + .byte 0x24 + .byte 0x0 + .byte 0x3 + .byte 0x8 + .byte 0xb + .byte 0xb + .byte 0x3e + .byte 0xb + .byte 0,0 + .byte 0 + + .section .debug_pubnames + .ualong 0x27 + .uaword 0x2 + .ualong .Ldebug_info0 + .ualong 0xab + .ualong 0x5b + .string "_superh_trap_handler" + + .ualong 0x0 + + .section .debug_aranges + .ualong 0x1c + .uaword 0x2 + .ualong .Ldebug_info0 + .byte 0x4 + .byte 0x0 + .uaword 0x0,0 + .ualong .Ltext0 + .ualong .Letext0-.Ltext0 + .ualong 0x0 + .ualong 0x0 + .ident "GCC: (GNU) 2.97-sh5-010522" +#endif /* MMU_SUPPORT */ +#else /* ! __SH5__ */ + + ! make a place to keep any previous value of the vbr register + ! this will only have a value if it has been set by redboot (for example) + .section .bss +old_vbr: + .long 0 +#ifdef PROFILE +profiling_enabled: + .long 0 +#endif + + + .section .text + .global start + .import ___rtos_profiler_start_timer + .weak ___rtos_profiler_start_timer +start: + mov.l stack_k,r15 + +#if defined (__SH3__) || (defined (__SH_FPU_ANY__) && ! defined (__SH2E__) && ! defined (__SH2A__)) || defined (__SH4_NOFPU__) +#define VBR_SETUP + ! before zeroing the bss ... + ! if the vbr is already set to vbr_start then the program has been restarted + ! (i.e. it is not the first time the program has been run since reset) + ! reset the vbr to its old value before old_vbr (in bss) is wiped + ! this ensures that the later code does not create a circular vbr chain + stc vbr, r1 + mov.l vbr_start_k, r2 + cmp/eq r1, r2 + bf 0f + ! reset the old vbr value + mov.l old_vbr_k, r1 + mov.l @r1, r2 + ldc r2, vbr +0: +#endif /* VBR_SETUP */ + + ! zero out bss + mov.l edata_k,r0 + mov.l end_k,r1 + mov #0,r2 +start_l: + mov.l r2,@r0 + add #4,r0 + cmp/ge r0,r1 + bt start_l + +#if defined (__SH_FPU_ANY__) + mov.l set_fpscr_k, r1 + mov #4,r4 + jsr @r1 + shll16 r4 ! Set DN bit (flush denormal inputs to zero) + lds r3,fpscr ! Switch to default precision +#endif /* defined (__SH_FPU_ANY__) */ + +#ifdef VBR_SETUP + ! save the existing contents of the vbr + ! there will only be a prior value when using something like redboot + ! otherwise it will be zero + stc vbr, r1 + mov.l old_vbr_k, r2 + mov.l r1, @r2 + ! setup vbr + mov.l vbr_start_k, r1 + ldc r1,vbr +#endif /* VBR_SETUP */ + + ! if an rtos is exporting a timer start fn, + ! then pick up an SR which does not enable ints + ! (the rtos will take care of this) + mov.l rtos_start_fn, r0 + mov.l sr_initial_bare, r1 + tst r0, r0 + bt set_sr + + mov.l sr_initial_rtos, r1 + +set_sr: + ! Set status register (sr) + ldc r1, sr + + ! arrange for exit to call fini + mov.l atexit_k,r0 + mov.l fini_k,r4 + jsr @r0 + nop + +#ifdef PROFILE + ! arrange for exit to call _mcleanup (via stop_profiling) + mova stop_profiling,r0 + mov.l atexit_k,r1 + jsr @r1 + mov r0, r4 + + ! Call profiler startup code + mov.l monstartup_k, r0 + mov.l start_k, r4 + mov.l etext_k, r5 + jsr @r0 + nop + + ! enable profiling trap + ! until now any trap 33s will have been ignored + ! This means that all library functions called before this point + ! (directly or indirectly) may have the profiling trap at the start. + ! Therefore, only mcount itself may not have the extra header. + mov.l profiling_enabled_k2, r0 + mov #1, r1 + mov.l r1, @r0 +#endif /* PROFILE */ + + ! call init + mov.l init_k,r0 + jsr @r0 + nop + + ! call the mainline + mov.l main_k,r0 + jsr @r0 + nop + + ! call exit + mov r0,r4 + mov.l exit_k,r0 + jsr @r0 + nop + + .balign 4 +#ifdef PROFILE +stop_profiling: + # stop mcount counting + mov.l profiling_enabled_k2, r0 + mov #0, r1 + mov.l r1, @r0 + + # call mcleanup + mov.l mcleanup_k, r0 + jmp @r0 + nop + + .balign 4 +mcleanup_k: + .long __mcleanup +monstartup_k: + .long ___monstartup +profiling_enabled_k2: + .long profiling_enabled +start_k: + .long _start +etext_k: + .long __etext +#endif /* PROFILE */ + + .align 2 +#if defined (__SH_FPU_ANY__) +set_fpscr_k: + .long ___set_fpscr +#endif /* defined (__SH_FPU_ANY__) */ + +stack_k: + .long _stack +edata_k: + .long _edata +end_k: + .long _end +main_k: + .long ___setup_argv_and_call_main +exit_k: + .long _exit +atexit_k: + .long _atexit +init_k: + .long _init +fini_k: + .long _fini +#ifdef VBR_SETUP +old_vbr_k: + .long old_vbr +vbr_start_k: + .long vbr_start +#endif /* VBR_SETUP */ + +sr_initial_rtos: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! Whether profiling or not, keep interrupts masked, + ! the RTOS will enable these if required. + .long 0x600000f1 + +rtos_start_fn: + .long ___rtos_profiler_start_timer + +#ifdef PROFILE +sr_initial_bare: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! For bare machine, we need to enable interrupts to get profiling working + .long 0x60000001 +#else + +sr_initial_bare: + ! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work. + ! Keep interrupts disabled - the application will enable as required. + .long 0x600000f1 +#endif + + ! supplied for backward compatibility only, in case of linking + ! code whose main() was compiled with an older version of GCC. + .global ___main +___main: + rts + nop +#ifdef VBR_SETUP +! Exception handlers + .section .text.vbr, "ax" +vbr_start: + + .org 0x100 +vbr_100: +#ifdef PROFILE + ! Note on register usage. + ! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling + ! then this is OK as we are just before executing any function code. + ! The other r4..r7 we save explicityl on the stack + ! Remaining registers are saved by normal ABI conventions and we assert we do not + ! use floating point registers. + mov.l expevt_k1, r1 + mov.l @r1, r1 + mov.l event_mask, r0 + and r0,r1 + mov.l trapcode_k, r2 + cmp/eq r1,r2 + bt 1f + bra handler_100 ! if not a trapa, go to default handler + nop +1: + mov.l trapa_k, r0 + mov.l @r0, r0 + shlr2 r0 ! trapa code is shifted by 2. + cmp/eq #33, r0 + bt 2f + bra handler_100 + nop +2: + + ! If here then it looks like we have trap #33 + ! Now we need to call mcount with the following convention + ! Save and restore r4..r7 + mov.l r4,@-r15 + mov.l r5,@-r15 + mov.l r6,@-r15 + mov.l r7,@-r15 + sts.l pr,@-r15 + + ! r4 is frompc. + ! r5 is selfpc + ! r0 is the branch back address. + ! The code sequence emitted by gcc for the profiling trap is + ! .align 2 + ! trapa #33 + ! .align 2 + ! .long lab Where lab is planted by the compiler. This is the address + ! of a datum that needs to be incremented. + sts pr, r4 ! frompc + stc spc, r5 ! selfpc + mov #2, r2 + not r2, r2 ! pattern to align to 4 + and r2, r5 ! r5 now has aligned address +! add #4, r5 ! r5 now has address of address + mov r5, r2 ! Remember it. +! mov.l @r5, r5 ! r5 has value of lable (lab in above example) + add #8, r2 + ldc r2, spc ! our return address avoiding address word + + ! only call mcount if profiling is enabled + mov.l profiling_enabled_k, r0 + mov.l @r0, r0 + cmp/eq #0, r0 + bt 3f + ! call mcount + mov.l mcount_k, r2 + jsr @r2 + nop +3: + lds.l @r15+,pr + mov.l @r15+,r7 + mov.l @r15+,r6 + mov.l @r15+,r5 + mov.l @r15+,r4 + rte + nop + .balign 4 +event_mask: + .long 0xfff +trapcode_k: + .long 0x160 +expevt_k1: + .long 0xff000024 ! Address of expevt +trapa_k: + .long 0xff000020 +mcount_k: + .long __call_mcount +profiling_enabled_k: + .long profiling_enabled +#endif + ! Non profiling case. +handler_100: + mov.l 2f, r0 ! load the old vbr setting (if any) + mov.l @r0, r0 + cmp/eq #0, r0 + bf 1f + ! no previous vbr - jump to own generic handler + bra handler + nop +1: ! there was a previous handler - chain them + add #0x7f, r0 ! 0x7f + add #0x7f, r0 ! 0xfe + add #0x2, r0 ! add 0x100 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr + + .org 0x400 +vbr_400: ! Should be at vbr+0x400 + mov.l 2f, r0 ! load the old vbr setting (if any) + mov.l @r0, r0 + cmp/eq #0, r0 + ! no previous vbr - jump to own generic handler + bt handler + ! there was a previous handler - chain them + rotcr r0 + rotcr r0 + add #0x7f, r0 ! 0x1fc + add #0x7f, r0 ! 0x3f8 + add #0x02, r0 ! 0x400 + rotcl r0 + rotcl r0 ! Add 0x400 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr +handler: + /* If the trap handler is there call it */ + mov.l superh_trap_handler_k, r0 + cmp/eq #0, r0 ! True if zero. + bf 3f + bra chandler + nop +3: + ! Here handler available, call it. + /* Now call the trap handler with as much of the context unchanged as possible. + Move trapping address into PR to make it look like the trap point */ + stc spc, r1 + lds r1, pr + mov.l expevt_k, r4 + mov.l @r4, r4 ! r4 is value of expevt, first parameter. + mov r1, r5 ! Remember trapping pc. + mov r1, r6 ! Remember trapping pc. + mov.l chandler_k, r1 + mov.l superh_trap_handler_k, r2 + ! jmp to trap handler to avoid disturbing pr. + jmp @r2 + nop + + .org 0x600 +vbr_600: +#ifdef PROFILE + ! Should be at vbr+0x600 + ! Now we are in the land of interrupts so need to save more state. + ! Save register state + mov.l interrupt_stack_k, r15 ! r15 has been saved to sgr. + mov.l r0,@-r15 + mov.l r1,@-r15 + mov.l r2,@-r15 + mov.l r3,@-r15 + mov.l r4,@-r15 + mov.l r5,@-r15 + mov.l r6,@-r15 + mov.l r7,@-r15 + sts.l pr,@-r15 + sts.l mach,@-r15 + sts.l macl,@-r15 +#if defined(__SH_FPU_ANY__) + ! Save fpul and fpscr, save fr0-fr7 in 64 bit mode + ! and set the pervading precision for the timer_handler + mov #0,r0 + sts.l fpul,@-r15 + sts.l fpscr,@-r15 + lds r0,fpscr ! Clear fpscr + fmov fr0,@-r15 + fmov fr1,@-r15 + fmov fr2,@-r15 + fmov fr3,@-r15 + mov.l pervading_precision_k,r0 + fmov fr4,@-r15 + fmov fr5,@-r15 + mov.l @r0,r0 + fmov fr6,@-r15 + fmov fr7,@-r15 + lds r0,fpscr +#endif /* __SH_FPU_ANY__ */ + ! Pass interrupted pc to timer_handler as first parameter (r4). + stc spc, r4 + mov.l timer_handler_k, r0 + jsr @r0 + nop +#if defined(__SH_FPU_ANY__) + mov #0,r0 + lds r0,fpscr ! Clear the fpscr + fmov @r15+,fr7 + fmov @r15+,fr6 + fmov @r15+,fr5 + fmov @r15+,fr4 + fmov @r15+,fr3 + fmov @r15+,fr2 + fmov @r15+,fr1 + fmov @r15+,fr0 + lds.l @r15+,fpscr + lds.l @r15+,fpul +#endif /* __SH_FPU_ANY__ */ + lds.l @r15+,macl + lds.l @r15+,mach + lds.l @r15+,pr + mov.l @r15+,r7 + mov.l @r15+,r6 + mov.l @r15+,r5 + mov.l @r15+,r4 + mov.l @r15+,r3 + mov.l @r15+,r2 + mov.l @r15+,r1 + mov.l @r15+,r0 + stc sgr, r15 ! Restore r15, destroyed by this sequence. + rte + nop +#if defined(__SH_FPU_ANY__) + .balign 4 +pervading_precision_k: +#define CONCAT1(A,B) A##B +#define CONCAT(A,B) CONCAT1(A,B) + .long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4 +#endif +#else + mov.l 2f, r0 ! Load the old vbr setting (if any). + mov.l @r0, r0 + cmp/eq #0, r0 + ! no previous vbr - jump to own handler + bt chandler + ! there was a previous handler - chain them + rotcr r0 + rotcr r0 + add #0x7f, r0 ! 0x1fc + add #0x7f, r0 ! 0x3f8 + add #0x7f, r0 ! 0x5f4 + add #0x03, r0 ! 0x600 + rotcl r0 + rotcl r0 ! Add 0x600 without corrupting another register + jmp @r0 + nop + .balign 4 +2: + .long old_vbr +#endif /* PROFILE code */ +chandler: + mov.l expevt_k, r4 + mov.l @r4, r4 ! r4 is value of expevt hence making this the return code + mov.l handler_exit_k,r0 + jsr @r0 + nop + ! We should never return from _exit but in case we do we would enter the + ! the following tight loop +limbo: + bra limbo + nop + .balign 4 +#ifdef PROFILE +interrupt_stack_k: + .long __timer_stack ! The high end of the stack +timer_handler_k: + .long __profil_counter +#endif +expevt_k: + .long 0xff000024 ! Address of expevt +chandler_k: + .long chandler +superh_trap_handler_k: + .long __superh_trap_handler +handler_exit_k: + .long _exit + .align 2 +! Simulated compile of trap handler. + .section .debug_abbrev,"",@progbits +.Ldebug_abbrev0: + .section .debug_info,"",@progbits +.Ldebug_info0: + .section .debug_line,"",@progbits +.Ldebug_line0: + .text +.Ltext0: + .align 5 + .type __superh_trap_handler,@function +__superh_trap_handler: +.LFB1: + mov.l r14,@-r15 +.LCFI0: + add #-4,r15 +.LCFI1: + mov r15,r14 +.LCFI2: + mov.l r4,@r14 + lds r1, pr + add #4,r14 + mov r14,r15 + mov.l @r15+,r14 + rts + nop +.LFE1: +.Lfe1: + .size __superh_trap_handler,.Lfe1-__superh_trap_handler + .section .debug_frame,"",@progbits +.Lframe0: + .ualong .LECIE0-.LSCIE0 +.LSCIE0: + .ualong 0xffffffff + .byte 0x1 + .string "" + .uleb128 0x1 + .sleb128 -4 + .byte 0x11 + .byte 0xc + .uleb128 0xf + .uleb128 0x0 + .align 2 +.LECIE0: +.LSFDE0: + .ualong .LEFDE0-.LASFDE0 +.LASFDE0: + .ualong .Lframe0 + .ualong .LFB1 + .ualong .LFE1-.LFB1 + .byte 0x4 + .ualong .LCFI0-.LFB1 + .byte 0xe + .uleb128 0x4 + .byte 0x4 + .ualong .LCFI1-.LCFI0 + .byte 0xe + .uleb128 0x8 + .byte 0x8e + .uleb128 0x1 + .byte 0x4 + .ualong .LCFI2-.LCFI1 + .byte 0xd + .uleb128 0xe + .align 2 +.LEFDE0: + .text +.Letext0: + .section .debug_info + .ualong 0xb3 + .uaword 0x2 + .ualong .Ldebug_abbrev0 + .byte 0x4 + .uleb128 0x1 + .ualong .Ldebug_line0 + .ualong .Letext0 + .ualong .Ltext0 + .string "trap_handler.c" + .string "xxxxxxxxxxxxxxxxxxxxxxxxxxxx" + .string "GNU C 3.2 20020529 (experimental)" + .byte 0x1 + .uleb128 0x2 + .ualong 0xa6 + .byte 0x1 + .string "_superh_trap_handler" + .byte 0x1 + .byte 0x2 + .byte 0x1 + .ualong .LFB1 + .ualong .LFE1 + .byte 0x1 + .byte 0x5e + .uleb128 0x3 + .string "trap_reason" + .byte 0x1 + .byte 0x1 + .ualong 0xa6 + .byte 0x2 + .byte 0x91 + .sleb128 0 + .byte 0x0 + .uleb128 0x4 + .string "unsigned int" + .byte 0x4 + .byte 0x7 + .byte 0x0 + .section .debug_abbrev + .uleb128 0x1 + .uleb128 0x11 + .byte 0x1 + .uleb128 0x10 + .uleb128 0x6 + .uleb128 0x12 + .uleb128 0x1 + .uleb128 0x11 + .uleb128 0x1 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x1b + .uleb128 0x8 + .uleb128 0x25 + .uleb128 0x8 + .uleb128 0x13 + .uleb128 0xb + .byte 0x0 + .byte 0x0 + .uleb128 0x2 + .uleb128 0x2e + .byte 0x1 + .uleb128 0x1 + .uleb128 0x13 + .uleb128 0x3f + .uleb128 0xc + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x3a + .uleb128 0xb + .uleb128 0x3b + .uleb128 0xb + .uleb128 0x27 + .uleb128 0xc + .uleb128 0x11 + .uleb128 0x1 + .uleb128 0x12 + .uleb128 0x1 + .uleb128 0x40 + .uleb128 0xa + .byte 0x0 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x5 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0x3a + .uleb128 0xb + .uleb128 0x3b + .uleb128 0xb + .uleb128 0x49 + .uleb128 0x13 + .uleb128 0x2 + .uleb128 0xa + .byte 0x0 + .byte 0x0 + .uleb128 0x4 + .uleb128 0x24 + .byte 0x0 + .uleb128 0x3 + .uleb128 0x8 + .uleb128 0xb + .uleb128 0xb + .uleb128 0x3e + .uleb128 0xb + .byte 0x0 + .byte 0x0 + .byte 0x0 + .section .debug_pubnames,"",@progbits + .ualong 0x27 + .uaword 0x2 + .ualong .Ldebug_info0 + .ualong 0xb7 + .ualong 0x67 + .string "_superh_trap_handler" + .ualong 0x0 + .section .debug_aranges,"",@progbits + .ualong 0x1c + .uaword 0x2 + .ualong .Ldebug_info0 + .byte 0x4 + .byte 0x0 + .uaword 0x0 + .uaword 0x0 + .ualong .Ltext0 + .ualong .Letext0-.Ltext0 + .ualong 0x0 + .ualong 0x0 +#endif /* VBR_SETUP */ +#endif /* ! __SH5__ */ diff --git a/gcc-4.9/libgcc/config/sh/crti.S b/gcc-4.9/libgcc/config/sh/crti.S new file mode 100644 index 000000000..550f63758 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/crti.S @@ -0,0 +1,125 @@ +/* Copyright (C) 2000-2014 Free Software Foundation, Inc. + This file was adapted from glibc sources. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +/* The code in sections .init and .fini is supposed to be a single + regular function. The function in .init is called directly from + start in crt1.S. The function in .fini is atexit()ed in crt1.S + too. + + crti.S contributes the prologue of a function to these sections, + and crtn.S comes up the epilogue. STARTFILE_SPEC should list + crti.o before any other object files that might add code to .init + or .fini sections, and ENDFILE_SPEC should list crtn.o after any + such object files. */ + + .section .init +/* The alignment below can't be smaller, otherwise the mova below + breaks. Yes, we might align just the label, but then we'd be + exchanging an alignment here for one there, since the code fragment + below ensures 4-byte alignment on __ELF__. */ +#ifdef __ELF__ + .p2align 2 +#else + .p2align 1 +#endif + .global _init +_init: +#if __SHMEDIA__ + addi r15, -16, r15 + st.q r15, 8, r14 + st.q r15, 0, r18 + add r15, r63, r14 +#elif __SH5__ && ! __SHMEDIA__ + mov r15,r0 + add #-8,r15 + mov.l r14,@-r0 + sts.l pr,@-r0 + mov r15,r14 + nop +#else +#ifdef __ELF__ + mov.l r12,@-r15 + mova 0f,r0 + mov.l 0f,r12 +#endif + mov.l r14,@-r15 +#ifdef __ELF__ + add r0,r12 +#endif + sts.l pr,@-r15 +#ifdef __ELF__ + bra 1f +#endif + mov r15,r14 +#ifdef __ELF__ +0: .long _GLOBAL_OFFSET_TABLE_ +1: +#endif +#endif /* __SHMEDIA__ */ + + .section .fini +/* The alignment below can't be smaller, otherwise the mova below + breaks. Yes, we might align just the label, but then we'd be + exchanging an alignment here for one there, since the code fragment + below ensures 4-byte alignment on __ELF__. */ +#ifdef __ELF__ + .p2align 2 +#else + .p2align 1 +#endif + .global _fini +_fini: +#if __SHMEDIA__ + addi r15, -16, r15 + st.q r15, 8, r14 + st.q r15, 0, r18 + add r15, r63, r14 +#elif __SH5__ && ! __SHMEDIA__ + mov r15,r0 + add #-8,r15 + mov.l r14,@-r0 + sts.l pr,@-r0 + mov r15,r14 + nop +#else +#ifdef __ELF__ + mov.l r12,@-r15 + mova 0f,r0 + mov.l 0f,r12 +#endif + mov.l r14,@-r15 +#ifdef __ELF__ + add r0,r12 +#endif + sts.l pr,@-r15 +#ifdef __ELF__ + bra 1f +#endif + mov r15,r14 +#ifdef __ELF__ +0: .long _GLOBAL_OFFSET_TABLE_ +1: +#endif +#endif /* __SHMEDIA__ */ diff --git a/gcc-4.9/libgcc/config/sh/crtn.S b/gcc-4.9/libgcc/config/sh/crtn.S new file mode 100644 index 000000000..9d7c4eb65 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/crtn.S @@ -0,0 +1,77 @@ +/* Copyright (C) 2000-2014 Free Software Foundation, Inc. + This file was adapted from glibc sources. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* See an explanation about .init and .fini in crti.S. */ + + .section .init +#if __SHMEDIA__ + add r14, r63, r15 + ld.q r15, 0, r18 + ptabs r18, tr0 + ld.q r15, 8, r14 + addi r15, 16, r15 + blink tr0, r63 +#elif __SH5__ && ! __SHMEDIA__ + mov r14,r15 + lds.l @r14+,pr + mov.l @r14,r14 + rts + add #8,r15 +#else + mov r14,r15 + lds.l @r15+,pr + mov.l @r15+,r14 + rts +#ifdef __ELF__ + mov.l @r15+,r12 +#else + nop +#endif +#endif /* __SHMEDIA__ */ + + .section .fini +#if __SHMEDIA__ + add r14, r63, r15 + ld.q r15, 0, r18 + ptabs r18, tr0 + ld.q r15, 8, r14 + addi r15, 16, r15 + blink tr0, r63 +#elif __SH5__ && ! __SHMEDIA__ + mov r14,r15 + lds.l @r14+,pr + mov.l @r14,r14 + rts + add #8,r15 +#else + mov r14,r15 + lds.l @r15+,pr + mov.l @r15+,r14 + rts +#ifdef __ELF__ + mov.l @r15+,r12 +#else + nop +#endif +#endif /* __SHMEDIA__ */ diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S b/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S new file mode 100644 index 000000000..5f05b0812 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/lib1funcs-4-300.S @@ -0,0 +1,936 @@ +/* Copyright (C) 2004-2014 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +/* libgcc routines for the STMicroelectronics ST40-300 CPU. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +#include "lib1funcs.h" + +#if !__SHMEDIA__ +#ifdef L_div_table +#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4-300. + Uses a lookup table for divisors in the range -128 .. +127, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .global GLOBAL(udivsi3_i4i) + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) + FUNC(GLOBAL(sdivsi3_i4i)) + + .balign 4 +LOCAL(div_ge8m): ! 10 cycles up to here + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + mov #24,r7 + shld r7,r6 + shad r0,r1 + rotcl r6 + div1 r5,r1 + swap.w r5,r0 ! detect -0x80000000 : 0x800000 + rotcl r6 + swap.w r4,r7 + div1 r5,r1 + swap.b r7,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + swap.w r0,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + add #-0x80,r0 + rotcl r6 + extu.w r0,r0 + div1 r5,r1 + neg r0,r0 + rotcl r6 + swap.w r0,r0 + div1 r5,r1 + mov.l @r15+,r7 + and r6,r0 + rotcl r6 + div1 r5,r1 + shll2 r0 + rotcl r6 + exts.b r0,r0 + div1 r5,r1 + swap.w r0,r0 + exts.w r0,r1 + exts.b r6,r0 + mov.l @r15+,r6 + rotcl r0 + rts + sub r1,r0 + ! 31 cycles up to here + + .balign 4 +LOCAL(udiv_ge64k): ! 3 cycles up to here + mov r4,r0 + shlr8 r0 + div0u + cmp/hi r0,r5 + bt LOCAL(udiv_r8) + mov.l r5,@-r15 + shll8 r5 + ! 7 cycles up to here + .rept 8 + div1 r5,r0 + .endr + extu.b r4,r1 ! 15 cycles up to here + extu.b r0,r6 + xor r1,r0 + xor r6,r0 + swap.b r6,r6 + .rept 8 + div1 r5,r0 + .endr ! 25 cycles up to here + extu.b r0,r0 + mov.l @r15+,r5 + or r6,r0 + mov.l @r15+,r6 + rts + rotcl r0 ! 28 cycles up to here + + .balign 4 +LOCAL(udiv_r8): ! 6 cycles up to here + mov.l r4,@-r15 + shll16 r4 + shll8 r4 + ! + shll r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov.l @r15+,r4 + div1 r5,r1 + ! 12 cycles up to here + .rept 6 + rotcl r0; div1 r5,r1 + .endr + mov.l @r15+,r6 ! 24 cycles up to here + rts + rotcl r0 + + .balign 4 +LOCAL(div_ge32k): ! 6 cycles up to here + mov.l r7,@-r15 + swap.w r5,r6 + exts.b r6,r7 + exts.w r6,r6 + cmp/eq r6,r7 + extu.b r1,r6 + bf/s LOCAL(div_ge8m) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + shad r0,r1 + shll8 r5 + div1 r5,r1 + mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00) + div1 r5,r1 + shlr8 r7 + div1 r5,r1 + swap.w r4,r0 + div1 r5,r1 + swap.b r0,r0 + div1 r5,r1 + or r0,r7 + div1 r5,r1 + add #-80,r7 + div1 r5,r1 + swap.w r7,r0 + div1 r5,r1 + or r0,r7 + extu.b r1,r0 + xor r6,r1 + xor r0,r1 + exts.b r0,r0 + div1 r5,r1 + extu.w r7,r7 + div1 r5,r1 + neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000 + div1 r5,r1 + and r0,r7 + div1 r5,r1 + swap.w r7,r7 ! 26 cycles up to here. + div1 r5,r1 + shll8 r0 + div1 r5,r1 + exts.w r7,r7 + div1 r5,r1 + add r0,r0 + div1 r5,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + add r1,r0 + mov #-8,r1 + rts + shad r1,r5 ! 34 cycles up to here + + .balign 4 +GLOBAL(udivsi3_i4i): + mov.l r6,@-r15 + extu.w r5,r6 + cmp/eq r5,r6 + mov #0x7f,r0 + bf LOCAL(udiv_ge64k) + cmp/hi r0,r5 + bf LOCAL(udiv_le128) + mov r4,r1 + shlr8 r1 + div0u + shlr r1 + shll16 r6 + div1 r6,r1 + extu.b r4,r0 ! 7 cycles up to here + .rept 8 + div1 r6,r1 + .endr ! 15 cycles up to here + xor r1,r0 ! xor dividend with result lsb + .rept 6 + div1 r6,r1 + .endr + mov.l r7,@-r15 ! 21 cycles up to here + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + extu.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + mov #0,r7 + div1 r6,r1 + ! + div1 r6,r1 + bra LOCAL(div_end) + div1 r6,r1 ! 28 cycles up to here + + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1, macl and mach */ + /* Because negative quotients are calculated as one's complements, + -0x80000000 divided by the smallest positive number of a number + range (0x80, 0x8000, 0x800000) causes saturation in the one's + complement representation, and we have to suppress the + one's -> two's complement adjustment. Since positive numbers + don't get such an adjustment, it's OK to also compute one's -> two's + complement adjustment suppression for a dividend of 0. */ + .balign 4 +GLOBAL(sdivsi3_i4i): + mov.l r6,@-r15 + exts.b r5,r6 + cmp/eq r5,r6 + mov #-1,r1 + bt/s LOCAL(div_le128) + cmp/pz r4 + addc r4,r1 + exts.w r5,r6 + cmp/eq r5,r6 + mov #-7,r0 + bf/s LOCAL(div_ge32k) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 + shll16 r6 ! 7 cycles up to here + shad r0,r1 + div0s r5,r4 + div1 r6,r1 + mov.l r7,@-r15 + div1 r6,r1 + mov r4,r0 ! re-compute adjusted dividend + div1 r6,r1 + mov #-31,r7 + div1 r6,r1 + shad r7,r0 + div1 r6,r1 + add r4,r0 ! adjusted dividend + div1 r6,r1 + mov.l r8,@-r15 + div1 r6,r1 + swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80 + div1 r6,r1 + swap.b r8,r8 + xor r1,r0 ! xor dividend with result lsb + div1 r6,r1 + div1 r6,r1 + or r5,r8 + div1 r6,r1 + add #-0x80,r8 ! r8 is 0 iff there is a match + div1 r6,r1 + swap.w r8,r7 ! or upper 16 bits... + div1 r6,r1 + or r7,r8 !...into lower 16 bits + div1 r6,r1 + extu.w r8,r8 + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + exts.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm. + div1 r6,r1 + and r0,r8 + div1 r6,r1 + swap.w r8,r7 + div1 r6,r1 + mov.l @r15+,r8 ! 58 insns, 29 cycles up to here +LOCAL(div_end): + div1 r6,r1 + shll8 r0 + div1 r6,r1 + exts.w r7,r7 + div1 r6,r1 + add r0,r0 + div1 r6,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + rts + add r1,r0 + + .balign 4 +LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + lds r4,mach + ! + ! + ! + tst r1,r1 + ! + bt 0f + dmulu.l r1,r4 +0: mov.b @(r0,r5),r1 + clrt + ! + ! + sts mach,r0 + addc r4,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + + .balign 4 +LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + neg r4,r6 + bf 0f + mov r4,r6 +0: lds r6,mach + tst r1,r1 + bt 0f + dmulu.l r1,r6 +0: div0s r4,r5 + mov.b @(r0,r5),r1 + bt/s LOCAL(le128_neg) + clrt + ! + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + +/* Could trap divide by zero for the cost of one cycle more mispredict penalty: +... + dmulu.l r1,r6 +0: div0s r4,r5 + bt/s LOCAL(le128_neg) + tst r5,r5 + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... +LOCAL(div_by_zero): + trapa # + .balign 4 +LOCAL(le128_neg): + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... */ + + .balign 4 +LOCAL(le128_neg): + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + shad r1,r0 + rts + neg r0,r0 + ENDFUNC(GLOBAL(udivsi3_i4i)) + ENDFUNC(GLOBAL(sdivsi3_i4i)) + +/* This table has been generated by divtab-sh4.c. */ + .balign 4 + .byte -7 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -1 + .byte -1 + .byte 0 +LOCAL(div_table_clz): + .byte 0 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32, + or in bit 33 for powers of two. */ + .balign 4 + .long 0x0 + .long 0x2040811 + .long 0x4104105 + .long 0x624DD30 + .long 0x8421085 + .long 0xA6810A7 + .long 0xC9714FC + .long 0xECF56BF + .long 0x11111112 + .long 0x135C8114 + .long 0x15B1E5F8 + .long 0x18118119 + .long 0x1A7B9612 + .long 0x1CF06ADB + .long 0x1F7047DD + .long 0x21FB7813 + .long 0x24924925 + .long 0x27350B89 + .long 0x29E4129F + .long 0x2C9FB4D9 + .long 0x2F684BDB + .long 0x323E34A3 + .long 0x3521CFB3 + .long 0x38138139 + .long 0x3B13B13C + .long 0x3E22CBCF + .long 0x41414142 + .long 0x446F8657 + .long 0x47AE147B + .long 0x4AFD6A06 + .long 0x4E5E0A73 + .long 0x51D07EAF + .long 0x55555556 + .long 0x58ED2309 + .long 0x5C9882BA + .long 0x60581606 + .long 0x642C8591 + .long 0x68168169 + .long 0x6C16C16D + .long 0x702E05C1 + .long 0x745D1746 + .long 0x78A4C818 + .long 0x7D05F418 + .long 0x81818182 + .long 0x86186187 + .long 0x8ACB90F7 + .long 0x8F9C18FA + .long 0x948B0FCE + .long 0x9999999A + .long 0x9EC8E952 + .long 0xA41A41A5 + .long 0xA98EF607 + .long 0xAF286BCB + .long 0xB4E81B4F + .long 0xBACF914D + .long 0xC0E07039 + .long 0xC71C71C8 + .long 0xCD856891 + .long 0xD41D41D5 + .long 0xDAE6076C + .long 0xE1E1E1E2 + .long 0xE9131AC0 + .long 0xF07C1F08 + .long 0xF81F81F9 + .long 0x0 + .long 0x4104105 + .long 0x8421085 + .long 0xC9714FC + .long 0x11111112 + .long 0x15B1E5F8 + .long 0x1A7B9612 + .long 0x1F7047DD + .long 0x24924925 + .long 0x29E4129F + .long 0x2F684BDB + .long 0x3521CFB3 + .long 0x3B13B13C + .long 0x41414142 + .long 0x47AE147B + .long 0x4E5E0A73 + .long 0x55555556 + .long 0x5C9882BA + .long 0x642C8591 + .long 0x6C16C16D + .long 0x745D1746 + .long 0x7D05F418 + .long 0x86186187 + .long 0x8F9C18FA + .long 0x9999999A + .long 0xA41A41A5 + .long 0xAF286BCB + .long 0xBACF914D + .long 0xC71C71C8 + .long 0xD41D41D5 + .long 0xE1E1E1E2 + .long 0xF07C1F08 + .long 0x0 + .long 0x8421085 + .long 0x11111112 + .long 0x1A7B9612 + .long 0x24924925 + .long 0x2F684BDB + .long 0x3B13B13C + .long 0x47AE147B + .long 0x55555556 + .long 0x642C8591 + .long 0x745D1746 + .long 0x86186187 + .long 0x9999999A + .long 0xAF286BCB + .long 0xC71C71C8 + .long 0xE1E1E1E2 + .long 0x0 + .long 0x11111112 + .long 0x24924925 + .long 0x3B13B13C + .long 0x55555556 + .long 0x745D1746 + .long 0x9999999A + .long 0xC71C71C8 + .long 0x0 + .long 0x24924925 + .long 0x55555556 + .long 0x9999999A + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x0 +LOCAL(div_table_inv): + .long 0x0 + .long 0x0 + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x9999999A + .long 0x55555556 + .long 0x24924925 + .long 0x0 + .long 0xC71C71C8 + .long 0x9999999A + .long 0x745D1746 + .long 0x55555556 + .long 0x3B13B13C + .long 0x24924925 + .long 0x11111112 + .long 0x0 + .long 0xE1E1E1E2 + .long 0xC71C71C8 + .long 0xAF286BCB + .long 0x9999999A + .long 0x86186187 + .long 0x745D1746 + .long 0x642C8591 + .long 0x55555556 + .long 0x47AE147B + .long 0x3B13B13C + .long 0x2F684BDB + .long 0x24924925 + .long 0x1A7B9612 + .long 0x11111112 + .long 0x8421085 + .long 0x0 + .long 0xF07C1F08 + .long 0xE1E1E1E2 + .long 0xD41D41D5 + .long 0xC71C71C8 + .long 0xBACF914D + .long 0xAF286BCB + .long 0xA41A41A5 + .long 0x9999999A + .long 0x8F9C18FA + .long 0x86186187 + .long 0x7D05F418 + .long 0x745D1746 + .long 0x6C16C16D + .long 0x642C8591 + .long 0x5C9882BA + .long 0x55555556 + .long 0x4E5E0A73 + .long 0x47AE147B + .long 0x41414142 + .long 0x3B13B13C + .long 0x3521CFB3 + .long 0x2F684BDB + .long 0x29E4129F + .long 0x24924925 + .long 0x1F7047DD + .long 0x1A7B9612 + .long 0x15B1E5F8 + .long 0x11111112 + .long 0xC9714FC + .long 0x8421085 + .long 0x4104105 + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ +#endif /* !__SHMEDIA__ */ diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S b/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S new file mode 100644 index 000000000..f541c81b1 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/lib1funcs-Os-4-200.S @@ -0,0 +1,322 @@ +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Moderately Space-optimized libgcc routines for the Renesas SH / + STMicroelectronics ST40 CPUs. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +#include "lib1funcs.h" + +#if !__SHMEDIA__ +#ifdef L_udivsi3_i4i + +/* 88 bytes; sh4-200 cycle counts: + divisor >= 2G: 11 cycles + dividend < 2G: 48 cycles + dividend >= 2G: divisor != 1: 54 cycles + dividend >= 2G, divisor == 1: 22 cycles */ +#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__) +!! args in r4 and r5, result in r0, clobber r1 + + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) +GLOBAL(udivsi3_i4i): + mova L1,r0 + cmp/pz r5 + sts fpscr,r1 + lds.l @r0+,fpscr + sts.l fpul,@-r15 + bf LOCAL(huge_divisor) + mov.l r1,@-r15 + lds r4,fpul + cmp/pz r4 +#ifdef FMOVD_WORKS + fmov.d dr0,@-r15 + float fpul,dr0 + fmov.d dr2,@-r15 + bt LOCAL(dividend_adjusted) + mov #1,r1 + fmov.d @r0,dr2 + cmp/eq r1,r5 + bt LOCAL(div_by_1) + fadd dr2,dr0 +LOCAL(dividend_adjusted): + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 +LOCAL(div_by_1): + fmov.d @r15+,dr2 + ftrc dr0,fpul + fmov.d @r15+,dr0 +#else /* !FMOVD_WORKS */ + fmov.s DR01,@-r15 + mov #1,r1 + fmov.s DR00,@-r15 + float fpul,dr0 + fmov.s DR21,@-r15 + bt/s LOCAL(dividend_adjusted) + fmov.s DR20,@-r15 + cmp/eq r1,r5 + bt LOCAL(div_by_1) + fmov.s @r0+,DR20 + fmov.s @r0,DR21 + fadd dr2,dr0 +LOCAL(dividend_adjusted): + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 +LOCAL(div_by_1): + fmov.s @r15+,DR20 + fmov.s @r15+,DR21 + ftrc dr0,fpul + fmov.s @r15+,DR00 + fmov.s @r15+,DR01 +#endif /* !FMOVD_WORKS */ + lds.l @r15+,fpscr + sts fpul,r0 + rts + lds.l @r15+,fpul + +#ifdef FMOVD_WORKS + .p2align 3 ! make double below 8 byte aligned. +#endif +LOCAL(huge_divisor): + lds r1,fpscr + add #4,r15 + cmp/hs r5,r4 + rts + movt r0 + + .p2align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + .double 4294967296 + + ENDFUNC(GLOBAL(udivsi3_i4i)) +#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */ + +#if 0 +/* With 36 bytes, the following would probably be the most compact + implementation, but with 139 cycles on an sh4-200, it is extremely slow. */ +GLOBAL(udivsi3_i4i): + mov.l r2,@-r15 + mov #0,r1 + div0u + mov r1,r2 + mov.l r3,@-r15 + mov r1,r3 + sett + mov r4,r0 +LOCAL(loop): + rotcr r2 + ; + bt/s LOCAL(end) + cmp/gt r2,r3 + rotcl r0 + bra LOCAL(loop) + div1 r5,r1 +LOCAL(end): + rotcl r0 + mov.l @r15+,r3 + rts + mov.l @r15+,r2 +#endif /* 0 */ + +/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i + sh4-200 run times: + udiv small divisor: 55 cycles + udiv large divisor: 52 cycles + sdiv small divisor, positive result: 59 cycles + sdiv large divisor, positive result: 56 cycles + sdiv small divisor, negative result: 65 cycles (*) + sdiv large divisor, negative result: 62 cycles (*) + (*): r2 is restored in the rts delay slot and has a lingering latency + of two more cycles. */ + .balign 4 + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) + FUNC(GLOBAL(sdivsi3_i4i)) +GLOBAL(udivsi3_i4i): + sts pr,r1 + mov.l r4,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 + swap.w r4,r0 + shlr16 r4 + bf/s LOCAL(large_divisor) + div0u + mov.l r5,@-r15 + shll16 r5 +LOCAL(sdiv_small_divisor): + div1 r5,r4 + bsr LOCAL(div6) + div1 r5,r4 + div1 r5,r4 + bsr LOCAL(div6) + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(div7) + swap.w r4,r4 + div1 r5,r4 + bsr LOCAL(div7) + div1 r5,r4 + xtrct r4,r0 + mov.l @r15+,r5 + swap.w r0,r0 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 +LOCAL(div7): + div1 r5,r4 +LOCAL(div6): + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +LOCAL(divx3): + rotcl r0 + div1 r5,r4 + rotcl r0 + div1 r5,r4 + rotcl r0 + rts + div1 r5,r4 + +LOCAL(large_divisor): + mov.l r5,@-r15 +LOCAL(sdiv_large_divisor): + xor r4,r0 + .rept 4 + rotcl r0 + bsr LOCAL(divx3) + div1 r5,r4 + .endr + mov.l @r15+,r5 + mov.l @r15+,r4 + jmp @r1 + rotcl r0 + ENDFUNC(GLOBAL(udivsi3_i4i)) + + .global GLOBAL(sdivsi3_i4i) +GLOBAL(sdivsi3_i4i): + mov.l r4,@-r15 + cmp/pz r5 + mov.l r5,@-r15 + bt/s LOCAL(pos_divisor) + cmp/pz r4 + neg r5,r5 + extu.w r5,r0 + bt/s LOCAL(neg_result) + cmp/eq r5,r0 + neg r4,r4 +LOCAL(pos_result): + swap.w r4,r0 + bra LOCAL(sdiv_check_divisor) + sts pr,r1 +LOCAL(pos_divisor): + extu.w r5,r0 + bt/s LOCAL(pos_result) + cmp/eq r5,r0 + neg r4,r4 +LOCAL(neg_result): + mova LOCAL(negate_result),r0 + ; + mov r0,r1 + swap.w r4,r0 + lds r2,macl + sts pr,r2 +LOCAL(sdiv_check_divisor): + shlr16 r4 + bf/s LOCAL(sdiv_large_divisor) + div0u + bra LOCAL(sdiv_small_divisor) + shll16 r5 + .balign 4 +LOCAL(negate_result): + neg r0,r0 + jmp @r2 + sts macl,r2 + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* !__SH_FPU_DOUBLE__ */ +#endif /* L_udivsi3_i4i */ + +#ifdef L_sdivsi3_i4i +#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__) +/* 48 bytes, 45 cycles on sh4-200 */ +!! args in r4 and r5, result in r0, clobber r1 + + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(sdivsi3_i4i)) +GLOBAL(sdivsi3_i4i): + sts.l fpscr,@-r15 + sts fpul,r1 + mova L1,r0 + lds.l @r0+,fpscr + lds r4,fpul +#ifdef FMOVD_WORKS + fmov.d dr0,@-r15 + float fpul,dr0 + lds r5,fpul + fmov.d dr2,@-r15 +#else + fmov.s DR01,@-r15 + fmov.s DR00,@-r15 + float fpul,dr0 + lds r5,fpul + fmov.s DR21,@-r15 + fmov.s DR20,@-r15 +#endif + float fpul,dr2 + fdiv dr2,dr0 +#ifdef FMOVD_WORKS + fmov.d @r15+,dr2 +#else + fmov.s @r15+,DR20 + fmov.s @r15+,DR21 +#endif + ftrc dr0,fpul +#ifdef FMOVD_WORKS + fmov.d @r15+,dr0 +#else + fmov.s @r15+,DR00 + fmov.s @r15+,DR01 +#endif + lds.l @r15+,fpscr + sts fpul,r0 + rts + lds r1,fpul + + .p2align 2 +L1: +#ifndef FMOVD_WORKS + .long 0x80000 +#else + .long 0x180000 +#endif + + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* __SH_FPU_DOUBLE__ */ +#endif /* L_sdivsi3_i4i */ +#endif /* !__SHMEDIA__ */ diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs.S b/gcc-4.9/libgcc/config/sh/lib1funcs.S new file mode 100644 index 000000000..3410cf7c1 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/lib1funcs.S @@ -0,0 +1,4047 @@ +/* Copyright (C) 1994-2014 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +!! libgcc routines for the Renesas / SuperH SH CPUs. +!! Contributed by Steve Chamberlain. +!! sac@cygnus.com + +!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines +!! recoded in assembly by Toshiyasu Morita +!! tm@netcom.com + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and + ELF local label prefixes by J"orn Rennecke + amylaar@cygnus.com */ + +#include "lib1funcs.h" + +/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, + so it is more convenient to define NO_FPSCR_VALUES here than to + define it on the command line. */ +#if defined __vxworks && defined __PIC__ +#define NO_FPSCR_VALUES +#endif + +#if ! __SH5__ +#ifdef L_ashiftrt + .global GLOBAL(ashiftrt_r4_0) + .global GLOBAL(ashiftrt_r4_1) + .global GLOBAL(ashiftrt_r4_2) + .global GLOBAL(ashiftrt_r4_3) + .global GLOBAL(ashiftrt_r4_4) + .global GLOBAL(ashiftrt_r4_5) + .global GLOBAL(ashiftrt_r4_6) + .global GLOBAL(ashiftrt_r4_7) + .global GLOBAL(ashiftrt_r4_8) + .global GLOBAL(ashiftrt_r4_9) + .global GLOBAL(ashiftrt_r4_10) + .global GLOBAL(ashiftrt_r4_11) + .global GLOBAL(ashiftrt_r4_12) + .global GLOBAL(ashiftrt_r4_13) + .global GLOBAL(ashiftrt_r4_14) + .global GLOBAL(ashiftrt_r4_15) + .global GLOBAL(ashiftrt_r4_16) + .global GLOBAL(ashiftrt_r4_17) + .global GLOBAL(ashiftrt_r4_18) + .global GLOBAL(ashiftrt_r4_19) + .global GLOBAL(ashiftrt_r4_20) + .global GLOBAL(ashiftrt_r4_21) + .global GLOBAL(ashiftrt_r4_22) + .global GLOBAL(ashiftrt_r4_23) + .global GLOBAL(ashiftrt_r4_24) + .global GLOBAL(ashiftrt_r4_25) + .global GLOBAL(ashiftrt_r4_26) + .global GLOBAL(ashiftrt_r4_27) + .global GLOBAL(ashiftrt_r4_28) + .global GLOBAL(ashiftrt_r4_29) + .global GLOBAL(ashiftrt_r4_30) + .global GLOBAL(ashiftrt_r4_31) + .global GLOBAL(ashiftrt_r4_32) + + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) + HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) + + .align 1 +GLOBAL(ashiftrt_r4_32): +GLOBAL(ashiftrt_r4_31): + rotcl r4 + rts + subc r4,r4 + +GLOBAL(ashiftrt_r4_30): + shar r4 +GLOBAL(ashiftrt_r4_29): + shar r4 +GLOBAL(ashiftrt_r4_28): + shar r4 +GLOBAL(ashiftrt_r4_27): + shar r4 +GLOBAL(ashiftrt_r4_26): + shar r4 +GLOBAL(ashiftrt_r4_25): + shar r4 +GLOBAL(ashiftrt_r4_24): + shlr16 r4 + shlr8 r4 + rts + exts.b r4,r4 + +GLOBAL(ashiftrt_r4_23): + shar r4 +GLOBAL(ashiftrt_r4_22): + shar r4 +GLOBAL(ashiftrt_r4_21): + shar r4 +GLOBAL(ashiftrt_r4_20): + shar r4 +GLOBAL(ashiftrt_r4_19): + shar r4 +GLOBAL(ashiftrt_r4_18): + shar r4 +GLOBAL(ashiftrt_r4_17): + shar r4 +GLOBAL(ashiftrt_r4_16): + shlr16 r4 + rts + exts.w r4,r4 + +GLOBAL(ashiftrt_r4_15): + shar r4 +GLOBAL(ashiftrt_r4_14): + shar r4 +GLOBAL(ashiftrt_r4_13): + shar r4 +GLOBAL(ashiftrt_r4_12): + shar r4 +GLOBAL(ashiftrt_r4_11): + shar r4 +GLOBAL(ashiftrt_r4_10): + shar r4 +GLOBAL(ashiftrt_r4_9): + shar r4 +GLOBAL(ashiftrt_r4_8): + shar r4 +GLOBAL(ashiftrt_r4_7): + shar r4 +GLOBAL(ashiftrt_r4_6): + shar r4 +GLOBAL(ashiftrt_r4_5): + shar r4 +GLOBAL(ashiftrt_r4_4): + shar r4 +GLOBAL(ashiftrt_r4_3): + shar r4 +GLOBAL(ashiftrt_r4_2): + shar r4 +GLOBAL(ashiftrt_r4_1): + rts + shar r4 + +GLOBAL(ashiftrt_r4_0): + rts + nop + + ENDFUNC(GLOBAL(ashiftrt_r4_0)) + ENDFUNC(GLOBAL(ashiftrt_r4_1)) + ENDFUNC(GLOBAL(ashiftrt_r4_2)) + ENDFUNC(GLOBAL(ashiftrt_r4_3)) + ENDFUNC(GLOBAL(ashiftrt_r4_4)) + ENDFUNC(GLOBAL(ashiftrt_r4_5)) + ENDFUNC(GLOBAL(ashiftrt_r4_6)) + ENDFUNC(GLOBAL(ashiftrt_r4_7)) + ENDFUNC(GLOBAL(ashiftrt_r4_8)) + ENDFUNC(GLOBAL(ashiftrt_r4_9)) + ENDFUNC(GLOBAL(ashiftrt_r4_10)) + ENDFUNC(GLOBAL(ashiftrt_r4_11)) + ENDFUNC(GLOBAL(ashiftrt_r4_12)) + ENDFUNC(GLOBAL(ashiftrt_r4_13)) + ENDFUNC(GLOBAL(ashiftrt_r4_14)) + ENDFUNC(GLOBAL(ashiftrt_r4_15)) + ENDFUNC(GLOBAL(ashiftrt_r4_16)) + ENDFUNC(GLOBAL(ashiftrt_r4_17)) + ENDFUNC(GLOBAL(ashiftrt_r4_18)) + ENDFUNC(GLOBAL(ashiftrt_r4_19)) + ENDFUNC(GLOBAL(ashiftrt_r4_20)) + ENDFUNC(GLOBAL(ashiftrt_r4_21)) + ENDFUNC(GLOBAL(ashiftrt_r4_22)) + ENDFUNC(GLOBAL(ashiftrt_r4_23)) + ENDFUNC(GLOBAL(ashiftrt_r4_24)) + ENDFUNC(GLOBAL(ashiftrt_r4_25)) + ENDFUNC(GLOBAL(ashiftrt_r4_26)) + ENDFUNC(GLOBAL(ashiftrt_r4_27)) + ENDFUNC(GLOBAL(ashiftrt_r4_28)) + ENDFUNC(GLOBAL(ashiftrt_r4_29)) + ENDFUNC(GLOBAL(ashiftrt_r4_30)) + ENDFUNC(GLOBAL(ashiftrt_r4_31)) + ENDFUNC(GLOBAL(ashiftrt_r4_32)) +#endif + +#ifdef L_ashiftrt_n + +! +! GLOBAL(ashrsi3) +! +! Entry: +! +! r4: Value to shift +! r5: Shift count +! +! Exit: +! +! r0: Result +! +! Destroys: +! +! T bit, r5 +! + + .global GLOBAL(ashrsi3) + HIDDEN_FUNC(GLOBAL(ashrsi3)) + .align 2 +GLOBAL(ashrsi3): + mov #31,r0 + and r0,r5 + mova LOCAL(ashrsi3_table),r0 + mov.b @(r0,r5),r5 +#ifdef __sh1__ + add r5,r0 + jmp @r0 +#else + braf r5 +#endif + mov r4,r0 + + .align 2 +LOCAL(ashrsi3_table): + .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) + .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) + +LOCAL(ashrsi3_31): + rotcl r0 + rts + subc r0,r0 + +LOCAL(ashrsi3_30): + shar r0 +LOCAL(ashrsi3_29): + shar r0 +LOCAL(ashrsi3_28): + shar r0 +LOCAL(ashrsi3_27): + shar r0 +LOCAL(ashrsi3_26): + shar r0 +LOCAL(ashrsi3_25): + shar r0 +LOCAL(ashrsi3_24): + shlr16 r0 + shlr8 r0 + rts + exts.b r0,r0 + +LOCAL(ashrsi3_23): + shar r0 +LOCAL(ashrsi3_22): + shar r0 +LOCAL(ashrsi3_21): + shar r0 +LOCAL(ashrsi3_20): + shar r0 +LOCAL(ashrsi3_19): + shar r0 +LOCAL(ashrsi3_18): + shar r0 +LOCAL(ashrsi3_17): + shar r0 +LOCAL(ashrsi3_16): + shlr16 r0 + rts + exts.w r0,r0 + +LOCAL(ashrsi3_15): + shar r0 +LOCAL(ashrsi3_14): + shar r0 +LOCAL(ashrsi3_13): + shar r0 +LOCAL(ashrsi3_12): + shar r0 +LOCAL(ashrsi3_11): + shar r0 +LOCAL(ashrsi3_10): + shar r0 +LOCAL(ashrsi3_9): + shar r0 +LOCAL(ashrsi3_8): + shar r0 +LOCAL(ashrsi3_7): + shar r0 +LOCAL(ashrsi3_6): + shar r0 +LOCAL(ashrsi3_5): + shar r0 +LOCAL(ashrsi3_4): + shar r0 +LOCAL(ashrsi3_3): + shar r0 +LOCAL(ashrsi3_2): + shar r0 +LOCAL(ashrsi3_1): + rts + shar r0 + +LOCAL(ashrsi3_0): + rts + nop + + ENDFUNC(GLOBAL(ashrsi3)) +#endif + +#ifdef L_ashiftlt + +! +! GLOBAL(ashlsi3) +! (For compatibility with older binaries, not used by compiler) +! +! Entry: +! r4: Value to shift +! r5: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit +! +! +! GLOBAL(ashlsi3_r0) +! +! Entry: +! r4: Value to shift +! r0: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit + + .global GLOBAL(ashlsi3) + .global GLOBAL(ashlsi3_r0) + HIDDEN_FUNC(GLOBAL(ashlsi3)) + HIDDEN_FUNC(GLOBAL(ashlsi3_r0)) +GLOBAL(ashlsi3): + mov r5,r0 + .align 2 +GLOBAL(ashlsi3_r0): + +#ifdef __sh1__ + and #31,r0 + shll2 r0 + mov.l r4,@-r15 + mov r0,r4 + mova LOCAL(ashlsi3_table),r0 + add r4,r0 + mov.l @r15+,r4 + jmp @r0 + mov r4,r0 + .align 2 +#else + and #31,r0 + shll2 r0 + braf r0 + mov r4,r0 +#endif + +LOCAL(ashlsi3_table): + rts // << 0 + nop +LOCAL(ashlsi_1): + rts // << 1 + shll r0 +LOCAL(ashlsi_2): // << 2 + rts + shll2 r0 + bra LOCAL(ashlsi_1) // << 3 + shll2 r0 + bra LOCAL(ashlsi_2) // << 4 + shll2 r0 + bra LOCAL(ashlsi_5) // << 5 + shll r0 + bra LOCAL(ashlsi_6) // << 6 + shll2 r0 + bra LOCAL(ashlsi_7) // << 7 + shll r0 +LOCAL(ashlsi_8): // << 8 + rts + shll8 r0 + bra LOCAL(ashlsi_8) // << 9 + shll r0 + bra LOCAL(ashlsi_8) // << 10 + shll2 r0 + bra LOCAL(ashlsi_11) // << 11 + shll r0 + bra LOCAL(ashlsi_12) // << 12 + shll2 r0 + bra LOCAL(ashlsi_13) // << 13 + shll r0 + bra LOCAL(ashlsi_14) // << 14 + shll8 r0 + bra LOCAL(ashlsi_15) // << 15 + shll8 r0 +LOCAL(ashlsi_16): // << 16 + rts + shll16 r0 + bra LOCAL(ashlsi_16) // << 17 + shll r0 + bra LOCAL(ashlsi_16) // << 18 + shll2 r0 + bra LOCAL(ashlsi_19) // << 19 + shll r0 + bra LOCAL(ashlsi_20) // << 20 + shll2 r0 + bra LOCAL(ashlsi_21) // << 21 + shll r0 + bra LOCAL(ashlsi_22) // << 22 + shll16 r0 + bra LOCAL(ashlsi_23) // << 23 + shll16 r0 + bra LOCAL(ashlsi_16) // << 24 + shll8 r0 + bra LOCAL(ashlsi_25) // << 25 + shll r0 + bra LOCAL(ashlsi_26) // << 26 + shll2 r0 + bra LOCAL(ashlsi_27) // << 27 + shll r0 + bra LOCAL(ashlsi_28) // << 28 + shll2 r0 + bra LOCAL(ashlsi_29) // << 29 + shll16 r0 + bra LOCAL(ashlsi_30) // << 30 + shll16 r0 + and #1,r0 // << 31 + rts + rotr r0 + +LOCAL(ashlsi_7): + shll2 r0 +LOCAL(ashlsi_5): +LOCAL(ashlsi_6): + shll2 r0 + rts +LOCAL(ashlsi_13): + shll2 r0 +LOCAL(ashlsi_12): +LOCAL(ashlsi_11): + shll8 r0 + rts +LOCAL(ashlsi_21): + shll2 r0 +LOCAL(ashlsi_20): +LOCAL(ashlsi_19): + shll16 r0 + rts +LOCAL(ashlsi_28): +LOCAL(ashlsi_27): + shll2 r0 +LOCAL(ashlsi_26): +LOCAL(ashlsi_25): + shll16 r0 + rts + shll8 r0 + +LOCAL(ashlsi_22): +LOCAL(ashlsi_14): + shlr2 r0 + rts + shll8 r0 + +LOCAL(ashlsi_23): +LOCAL(ashlsi_15): + shlr r0 + rts + shll8 r0 + +LOCAL(ashlsi_29): + shlr r0 +LOCAL(ashlsi_30): + shlr2 r0 + rts + shll16 r0 + + ENDFUNC(GLOBAL(ashlsi3)) + ENDFUNC(GLOBAL(ashlsi3_r0)) +#endif + +#ifdef L_lshiftrt + +! +! GLOBAL(lshrsi3) +! (For compatibility with older binaries, not used by compiler) +! +! Entry: +! r4: Value to shift +! r5: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit +! +! +! GLOBAL(lshrsi3_r0) +! +! Entry: +! r4: Value to shift +! r0: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit + + .global GLOBAL(lshrsi3) + .global GLOBAL(lshrsi3_r0) + HIDDEN_FUNC(GLOBAL(lshrsi3)) + HIDDEN_FUNC(GLOBAL(lshrsi3_r0)) +GLOBAL(lshrsi3): + mov r5,r0 + .align 2 +GLOBAL(lshrsi3_r0): + +#ifdef __sh1__ + and #31,r0 + shll2 r0 + mov.l r4,@-r15 + mov r0,r4 + mova LOCAL(lshrsi3_table),r0 + add r4,r0 + mov.l @r15+,r4 + jmp @r0 + mov r4,r0 + .align 2 +#else + and #31,r0 + shll2 r0 + braf r0 + mov r4,r0 +#endif +LOCAL(lshrsi3_table): + rts // >> 0 + nop +LOCAL(lshrsi_1): // >> 1 + rts + shlr r0 +LOCAL(lshrsi_2): // >> 2 + rts + shlr2 r0 + bra LOCAL(lshrsi_1) // >> 3 + shlr2 r0 + bra LOCAL(lshrsi_2) // >> 4 + shlr2 r0 + bra LOCAL(lshrsi_5) // >> 5 + shlr r0 + bra LOCAL(lshrsi_6) // >> 6 + shlr2 r0 + bra LOCAL(lshrsi_7) // >> 7 + shlr r0 +LOCAL(lshrsi_8): // >> 8 + rts + shlr8 r0 + bra LOCAL(lshrsi_8) // >> 9 + shlr r0 + bra LOCAL(lshrsi_8) // >> 10 + shlr2 r0 + bra LOCAL(lshrsi_11) // >> 11 + shlr r0 + bra LOCAL(lshrsi_12) // >> 12 + shlr2 r0 + bra LOCAL(lshrsi_13) // >> 13 + shlr r0 + bra LOCAL(lshrsi_14) // >> 14 + shlr8 r0 + bra LOCAL(lshrsi_15) // >> 15 + shlr8 r0 +LOCAL(lshrsi_16): // >> 16 + rts + shlr16 r0 + bra LOCAL(lshrsi_16) // >> 17 + shlr r0 + bra LOCAL(lshrsi_16) // >> 18 + shlr2 r0 + bra LOCAL(lshrsi_19) // >> 19 + shlr r0 + bra LOCAL(lshrsi_20) // >> 20 + shlr2 r0 + bra LOCAL(lshrsi_21) // >> 21 + shlr r0 + bra LOCAL(lshrsi_22) // >> 22 + shlr16 r0 + bra LOCAL(lshrsi_23) // >> 23 + shlr16 r0 + bra LOCAL(lshrsi_16) // >> 24 + shlr8 r0 + bra LOCAL(lshrsi_25) // >> 25 + shlr r0 + bra LOCAL(lshrsi_26) // >> 26 + shlr2 r0 + bra LOCAL(lshrsi_27) // >> 27 + shlr r0 + bra LOCAL(lshrsi_28) // >> 28 + shlr2 r0 + bra LOCAL(lshrsi_29) // >> 29 + shlr16 r0 + bra LOCAL(lshrsi_30) // >> 30 + shlr16 r0 + shll r0 // >> 31 + rts + movt r0 + +LOCAL(lshrsi_7): + shlr2 r0 +LOCAL(lshrsi_5): +LOCAL(lshrsi_6): + shlr2 r0 + rts +LOCAL(lshrsi_13): + shlr2 r0 +LOCAL(lshrsi_12): +LOCAL(lshrsi_11): + shlr8 r0 + rts +LOCAL(lshrsi_21): + shlr2 r0 +LOCAL(lshrsi_20): +LOCAL(lshrsi_19): + shlr16 r0 + rts +LOCAL(lshrsi_28): +LOCAL(lshrsi_27): + shlr2 r0 +LOCAL(lshrsi_26): +LOCAL(lshrsi_25): + shlr16 r0 + rts + shlr8 r0 + +LOCAL(lshrsi_22): +LOCAL(lshrsi_14): + shll2 r0 + rts + shlr8 r0 + +LOCAL(lshrsi_23): +LOCAL(lshrsi_15): + shll r0 + rts + shlr8 r0 + +LOCAL(lshrsi_29): + shll r0 +LOCAL(lshrsi_30): + shll2 r0 + rts + shlr16 r0 + + ENDFUNC(GLOBAL(lshrsi3)) + ENDFUNC(GLOBAL(lshrsi3_r0)) +#endif + +#ifdef L_movmem + .text + .balign 4 + .global GLOBAL(movmem) + HIDDEN_FUNC(GLOBAL(movmem)) + HIDDEN_ALIAS(movstr,movmem) + /* This would be a lot simpler if r6 contained the byte count + minus 64, and we wouldn't be called here for a byte count of 64. */ +GLOBAL(movmem): + sts.l pr,@-r15 + shll2 r6 + bsr GLOBAL(movmemSI52+2) + mov.l @(48,r5),r0 + .balign 4 +LOCAL(movmem_loop): /* Reached with rts */ + mov.l @(60,r5),r0 + add #-64,r6 + mov.l r0,@(60,r4) + tst r6,r6 + mov.l @(56,r5),r0 + bt LOCAL(movmem_done) + mov.l r0,@(56,r4) + cmp/pl r6 + mov.l @(52,r5),r0 + add #64,r5 + mov.l r0,@(52,r4) + add #64,r4 + bt GLOBAL(movmemSI52) +! done all the large groups, do the remainder +! jump to movmem+ + mova GLOBAL(movmemSI4)+4,r0 + add r6,r0 + jmp @r0 +LOCAL(movmem_done): ! share slot insn, works out aligned. + lds.l @r15+,pr + mov.l r0,@(56,r4) + mov.l @(52,r5),r0 + rts + mov.l r0,@(52,r4) + .balign 4 +! ??? We need aliases movstr* for movmem* for the older libraries. These +! aliases will be removed at the some point in the future. + .global GLOBAL(movmemSI64) + HIDDEN_FUNC(GLOBAL(movmemSI64)) + HIDDEN_ALIAS(movstrSI64,movmemSI64) +GLOBAL(movmemSI64): + mov.l @(60,r5),r0 + mov.l r0,@(60,r4) + .global GLOBAL(movmemSI60) + HIDDEN_FUNC(GLOBAL(movmemSI60)) + HIDDEN_ALIAS(movstrSI60,movmemSI60) +GLOBAL(movmemSI60): + mov.l @(56,r5),r0 + mov.l r0,@(56,r4) + .global GLOBAL(movmemSI56) + HIDDEN_FUNC(GLOBAL(movmemSI56)) + HIDDEN_ALIAS(movstrSI56,movmemSI56) +GLOBAL(movmemSI56): + mov.l @(52,r5),r0 + mov.l r0,@(52,r4) + .global GLOBAL(movmemSI52) + HIDDEN_FUNC(GLOBAL(movmemSI52)) + HIDDEN_ALIAS(movstrSI52,movmemSI52) +GLOBAL(movmemSI52): + mov.l @(48,r5),r0 + mov.l r0,@(48,r4) + .global GLOBAL(movmemSI48) + HIDDEN_FUNC(GLOBAL(movmemSI48)) + HIDDEN_ALIAS(movstrSI48,movmemSI48) +GLOBAL(movmemSI48): + mov.l @(44,r5),r0 + mov.l r0,@(44,r4) + .global GLOBAL(movmemSI44) + HIDDEN_FUNC(GLOBAL(movmemSI44)) + HIDDEN_ALIAS(movstrSI44,movmemSI44) +GLOBAL(movmemSI44): + mov.l @(40,r5),r0 + mov.l r0,@(40,r4) + .global GLOBAL(movmemSI40) + HIDDEN_FUNC(GLOBAL(movmemSI40)) + HIDDEN_ALIAS(movstrSI40,movmemSI40) +GLOBAL(movmemSI40): + mov.l @(36,r5),r0 + mov.l r0,@(36,r4) + .global GLOBAL(movmemSI36) + HIDDEN_FUNC(GLOBAL(movmemSI36)) + HIDDEN_ALIAS(movstrSI36,movmemSI36) +GLOBAL(movmemSI36): + mov.l @(32,r5),r0 + mov.l r0,@(32,r4) + .global GLOBAL(movmemSI32) + HIDDEN_FUNC(GLOBAL(movmemSI32)) + HIDDEN_ALIAS(movstrSI32,movmemSI32) +GLOBAL(movmemSI32): + mov.l @(28,r5),r0 + mov.l r0,@(28,r4) + .global GLOBAL(movmemSI28) + HIDDEN_FUNC(GLOBAL(movmemSI28)) + HIDDEN_ALIAS(movstrSI28,movmemSI28) +GLOBAL(movmemSI28): + mov.l @(24,r5),r0 + mov.l r0,@(24,r4) + .global GLOBAL(movmemSI24) + HIDDEN_FUNC(GLOBAL(movmemSI24)) + HIDDEN_ALIAS(movstrSI24,movmemSI24) +GLOBAL(movmemSI24): + mov.l @(20,r5),r0 + mov.l r0,@(20,r4) + .global GLOBAL(movmemSI20) + HIDDEN_FUNC(GLOBAL(movmemSI20)) + HIDDEN_ALIAS(movstrSI20,movmemSI20) +GLOBAL(movmemSI20): + mov.l @(16,r5),r0 + mov.l r0,@(16,r4) + .global GLOBAL(movmemSI16) + HIDDEN_FUNC(GLOBAL(movmemSI16)) + HIDDEN_ALIAS(movstrSI16,movmemSI16) +GLOBAL(movmemSI16): + mov.l @(12,r5),r0 + mov.l r0,@(12,r4) + .global GLOBAL(movmemSI12) + HIDDEN_FUNC(GLOBAL(movmemSI12)) + HIDDEN_ALIAS(movstrSI12,movmemSI12) +GLOBAL(movmemSI12): + mov.l @(8,r5),r0 + mov.l r0,@(8,r4) + .global GLOBAL(movmemSI8) + HIDDEN_FUNC(GLOBAL(movmemSI8)) + HIDDEN_ALIAS(movstrSI8,movmemSI8) +GLOBAL(movmemSI8): + mov.l @(4,r5),r0 + mov.l r0,@(4,r4) + .global GLOBAL(movmemSI4) + HIDDEN_FUNC(GLOBAL(movmemSI4)) + HIDDEN_ALIAS(movstrSI4,movmemSI4) +GLOBAL(movmemSI4): + mov.l @(0,r5),r0 + rts + mov.l r0,@(0,r4) + + ENDFUNC(GLOBAL(movmemSI64)) + ENDFUNC(GLOBAL(movmemSI60)) + ENDFUNC(GLOBAL(movmemSI56)) + ENDFUNC(GLOBAL(movmemSI52)) + ENDFUNC(GLOBAL(movmemSI48)) + ENDFUNC(GLOBAL(movmemSI44)) + ENDFUNC(GLOBAL(movmemSI40)) + ENDFUNC(GLOBAL(movmemSI36)) + ENDFUNC(GLOBAL(movmemSI32)) + ENDFUNC(GLOBAL(movmemSI28)) + ENDFUNC(GLOBAL(movmemSI24)) + ENDFUNC(GLOBAL(movmemSI20)) + ENDFUNC(GLOBAL(movmemSI16)) + ENDFUNC(GLOBAL(movmemSI12)) + ENDFUNC(GLOBAL(movmemSI8)) + ENDFUNC(GLOBAL(movmemSI4)) + ENDFUNC(GLOBAL(movmem)) +#endif + +#ifdef L_movmem_i4 + .text + .global GLOBAL(movmem_i4_even) + .global GLOBAL(movmem_i4_odd) + .global GLOBAL(movmemSI12_i4) + + HIDDEN_FUNC(GLOBAL(movmem_i4_even)) + HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) + HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) + + HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) + HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) + HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) + + .p2align 5 +L_movmem_2mod4_end: + mov.l r0,@(16,r4) + rts + mov.l r1,@(20,r4) + + .p2align 2 + +GLOBAL(movmem_i4_even): + mov.l @r5+,r0 + bra L_movmem_start_even + mov.l @r5+,r1 + +GLOBAL(movmem_i4_odd): + mov.l @r5+,r1 + add #-4,r4 + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r1,@(4,r4) + mov.l r2,@(8,r4) + +L_movmem_loop: + mov.l r3,@(12,r4) + dt r6 + mov.l @r5+,r0 + bt/s L_movmem_2mod4_end + mov.l @r5+,r1 + add #16,r4 +L_movmem_start_even: + mov.l @r5+,r2 + mov.l @r5+,r3 + mov.l r0,@r4 + dt r6 + mov.l r1,@(4,r4) + bf/s L_movmem_loop + mov.l r2,@(8,r4) + rts + mov.l r3,@(12,r4) + + ENDFUNC(GLOBAL(movmem_i4_even)) + ENDFUNC(GLOBAL(movmem_i4_odd)) + + .p2align 4 +GLOBAL(movmemSI12_i4): + mov.l @r5,r0 + mov.l @(4,r5),r1 + mov.l @(8,r5),r2 + mov.l r0,@r4 + mov.l r1,@(4,r4) + rts + mov.l r2,@(8,r4) + + ENDFUNC(GLOBAL(movmemSI12_i4)) +#endif + +#ifdef L_mulsi3 + + + .global GLOBAL(mulsi3) + HIDDEN_FUNC(GLOBAL(mulsi3)) + +! r4 = aabb +! r5 = ccdd +! r0 = aabb*ccdd via partial products +! +! if aa == 0 and cc = 0 +! r0 = bb*dd +! +! else +! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) +! + +GLOBAL(mulsi3): + mulu.w r4,r5 ! multiply the lsws macl=bb*dd + mov r5,r3 ! r3 = ccdd + swap.w r4,r2 ! r2 = bbaa + xtrct r2,r3 ! r3 = aacc + tst r3,r3 ! msws zero ? + bf hiset + rts ! yes - then we have the answer + sts macl,r0 + +hiset: sts macl,r0 ! r0 = bb*dd + mulu.w r2,r5 ! brewing macl = aa*dd + sts macl,r1 + mulu.w r3,r4 ! brewing macl = cc*bb + sts macl,r2 + add r1,r2 + shll16 r2 + rts + add r2,r0 + + ENDFUNC(GLOBAL(mulsi3)) +#endif +#endif /* ! __SH5__ */ + +/*------------------------------------------------------------------------------ + 32 bit signed integer division that uses FPU double precision division. */ + +#ifdef L_sdivsi3_i4 + .title "SH DIVIDE" + +#if defined (__SH4__) || defined (__SH2A__) +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber dr0, dr2. */ + + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + + ENDFUNC(GLOBAL(sdivsi3_i4)) + +#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber r2, dr0, dr2. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ + +#if ! __SH5__ || __SH5__ == 32 +#if __SH5__ + .mode SHcompact +#endif + .global GLOBAL(sdivsi3_i4) + HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) +GLOBAL(sdivsi3_i4): + +#ifndef __SH4A__ + mov.l r3,@-r15 + sts fpscr,r2 + mov #8,r3 + swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit) + or r2,r3 + lds r3,fpscr // Set FPSCR.PR = 1. + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + lds r2,fpscr + rts + mov.l @r15+,r3 +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */ + fpchg + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +#endif /* __SH4A__ */ + + ENDFUNC(GLOBAL(sdivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ +#endif /* ! __SH4__ || __SH2A__ */ +#endif /* L_sdivsi3_i4 */ + +//------------------------------------------------------------------------------ +#ifdef L_sdivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ +!! +!! Steve Chamberlain +!! sac@cygnus.com +!! +!! + +!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit + + .global GLOBAL(sdivsi3) +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns divsi3_i1 and + divsi3_i1_media. + +int __sdivsi3 (i, j) + int i, j; +{ + register unsigned long long r18 asm ("r18"); + register unsigned long long r19 asm ("r19"); + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r1 asm ("r1") = 1; + register int r2 asm ("r2") = i >> 31; + register int r3 asm ("r3") = j >> 31; + + r2 = r2 ? r2 : r1; + r3 = r3 ? r3 : r1; + r18 = i * r2; + r19 = j * r3; + r2 *= r3; + + r19 <<= 31; + r1 <<= 31; + do + if (r18 >= r19) + r0 |= r1, r18 -= r19; + while (r19 >>= 1, r1 >>= 1); + + return r2 * (int)r0; +} +*/ +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontadd), tr2 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r1 + shari.l r4, 31, r2 + shari.l r5, 31, r3 + cmveq r2, r1, r2 + cmveq r3, r1, r3 + muls.l r4, r2, r18 + muls.l r5, r3, r19 + muls.l r2, r3, r2 + shlli r19, 31, r19 + shlli r1, 31, r1 +LOCAL(sdivsi3_loop): + bgtu r19, r18, tr2 + or r0, r1, r0 + sub r18, r19, r18 +LOCAL(sdivsi3_dontadd): + shlri r1, 1, r1 + shlri r19, 1, r19 + bnei r1, 0, tr1 + muls.l r0, r2, r0 + add.l r0, r63, r0 + blink tr0, r63 +#elif 0 /* ! 0 */ + // inputs: r4,r5 + // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 + // result in r0 +GLOBAL(sdivsi3): + // can create absolute value without extra latency, + // but dependent on proper sign extension of inputs: + // shari.l r5,31,r2 + // xor r5,r2,r20 + // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. + shari.l r5,31,r2 + ori r2,1,r2 + muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. + movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 + shari.l r4,31,r3 + nsb r20,r0 + shlld r20,r0,r25 + shlri r25,48,r25 + sub r19,r25,r1 + mmulfx.w r1,r1,r2 + mshflo.w r1,r63,r1 + // If r4 was to be used in-place instead of r21, could use this sequence + // to compute absolute: + // sub r63,r4,r19 // compute absolute value of r4 + // shlri r4,32,r3 // into lower 32 bit of r4, keeping + // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. + ori r3,1,r3 + mmulfx.w r25,r2,r2 + sub r19,r0,r0 + muls.l r4,r3,r21 + msub.w r1,r2,r2 + addi r2,-2,r1 + mulu.l r21,r1,r19 + mmulfx.w r2,r2,r2 + shlli r1,15,r1 + shlrd r19,r0,r19 + mulu.l r19,r20,r3 + mmacnfx.wl r25,r2,r1 + ptabs r18,tr0 + sub r21,r3,r25 + + mulu.l r25,r1,r2 + addi r0,14,r0 + xor r4,r5,r18 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + add r19,r2,r19 + shari.l r18,31,r18 + sub r25,r3,r25 + + mulu.l r25,r1,r2 + sub r25,r20,r25 + add r19,r18,r19 + shlrd r2,r0,r2 + mulu.l r2,r20,r3 + addi r25,1,r25 + add r19,r2,r19 + + cmpgt r25,r3,r25 + add.l r19,r25,r0 + xor r0,r18,r0 + blink tr0,r63 +#else /* ! 0 && ! 0 */ + + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + HIDDEN_FUNC(GLOBAL(sdivsi3_2)) +#ifndef __pic__ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): /* this is the shcompact entry point */ + // The special SHmedia entry point sdivsi3_1 prevents accidental linking + // with the SHcompact implementation, which clobbers tr1 / tr2. + .global GLOBAL(sdivsi3_1) +GLOBAL(sdivsi3_1): + .global GLOBAL(div_table_internal) + movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 + shori GLOBAL(div_table_internal) & 65535, r20 +#endif + .global GLOBAL(sdivsi3_2) + // div_table in r20 + // clobbered: r1,r18,r19,r21,r25,tr0 +GLOBAL(sdivsi3_2): + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 +#ifndef __pic__ + ENDFUNC(GLOBAL(sdivsi3)) +#endif + ENDFUNC(GLOBAL(sdivsi3_2)) +#endif +#elif defined __SHMEDIA__ +/* m5compact-nofpu */ + // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + pt/l LOCAL(sdivsi3_dontsub), tr0 + pt/l LOCAL(sdivsi3_loop), tr1 + ptabs/l r18,tr2 + shari.l r4,31,r18 + shari.l r5,31,r19 + xor r4,r18,r20 + xor r5,r19,r21 + sub.l r20,r18,r20 + sub.l r21,r19,r21 + xor r18,r19,r19 + shlli r21,32,r25 + addi r25,-1,r21 + addz.l r20,r63,r20 +LOCAL(sdivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(sdivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + xor r20,r19,r20 + sub.l r20,r19,r0 + blink tr2,r63 + ENDFUNC(GLOBAL(sdivsi3)) +#else /* ! __SHMEDIA__ */ + FUNC(GLOBAL(sdivsi3)) +GLOBAL(sdivsi3): + mov r4,r1 + mov r5,r0 + + tst r0,r0 + bt div0 + mov #0,r2 + div0s r2,r1 + subc r3,r3 + subc r2,r1 + div0s r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + div1 r0,r3 + rotcl r1 + addc r2,r1 + rts + mov r1,r0 + + +div0: rts + mov #0,r0 + + ENDFUNC(GLOBAL(sdivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* L_sdivsi3 */ + +/*------------------------------------------------------------------------------ + 32 bit unsigned integer division that uses FPU double precision division. */ + +#ifdef L_udivsi3_i4 + .title "SH DIVIDE" + +#if defined (__SH4__) || defined (__SH2A__) +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */ + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + rts + ftrc dr0,fpul + +trivial: + rts + lds r4,fpul + + .align 2 +#ifdef FMOVD_WORKS + .align 3 // Make the double below 8 byte aligned. +#endif +L1: + .double 2147483648 + + ENDFUNC(GLOBAL(udivsi3_i4)) + +#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__) +#if ! __SH5__ || __SH5__ == 32 +!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 + .mode SHmedia + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + addz.l r4,r63,r20 + addz.l r5,r63,r21 + fmov.qd r20,dr0 + fmov.qd r21,dr32 + ptabs r18,tr0 + float.qd dr0,dr0 + float.qd dr32,dr32 + fdiv.d dr0,dr32,dr0 + ftrc.dq dr0,dr32 + fmov.s fr33,fr32 + blink tr0,r63 + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH5__ || __SH5__ == 32 */ + +#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ + + .global GLOBAL(udivsi3_i4) + HIDDEN_FUNC(GLOBAL(udivsi3_i4)) +GLOBAL(udivsi3_i4): + +#ifndef __SH4A__ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + sts.l fpscr,@-r15 + xor r1,r4 + mov.l @(0,r15),r0 + xor r1,r5 + mov.l L2,r1 + lds r4,fpul + or r0,r1 + mova L1,r0 + lds r1,fpscr +#ifdef FMOVD_WORKS + fmov.d @r0+,dr4 +#else + fmov.s @r0+,DR40 + fmov.s @r0,DR41 +#endif + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + lds.l @r15+,fpscr + +#ifdef FMOVD_WORKS + .align 3 // Make the double below 8 byte aligned. +#endif +trivial: + rts + lds r4,fpul + + .align 2 +L2: +#ifdef FMOVD_WORKS + .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1 +#else + .long 0x80000 // FPSCR.PR = 1 +#endif +L1: + .double 2147483648 + +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. + Although on SH4A fmovd usually works, it would require either additional + two fschg instructions or an FPSCR push + pop. It's not worth the effort + for loading only one double constant. */ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + fpchg + mova L1,r0 + xor r1,r4 + fmov.s @r0+,DR40 + lds r4,fpul + fmov.s @r0,DR41 + xor r1,r5 + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +trivial: + rts + lds r4,fpul + + .align 2 +L1: + .double 2147483648 + +#endif /* __SH4A__ */ + + + ENDFUNC(GLOBAL(udivsi3_i4)) +#endif /* ! __SH4__ */ +#endif /* L_udivsi3_i4 */ + +#ifdef L_udivsi3 +/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with + sh2e/sh3e code. */ + +!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit + .global GLOBAL(udivsi3) + HIDDEN_FUNC(GLOBAL(udivsi3)) + +#if __SHMEDIA__ +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 2 +#if 0 +/* The assembly code that follows is a hand-optimized version of the C + code that follows. Note that the registers that are modified are + exactly those listed as clobbered in the patterns udivsi3_i1 and + udivsi3_i1_media. + +unsigned +__udivsi3 (i, j) + unsigned i, j; +{ + register unsigned long long r0 asm ("r0") = 0; + register unsigned long long r18 asm ("r18") = 1; + register unsigned long long r4 asm ("r4") = i; + register unsigned long long r19 asm ("r19") = j; + + r19 <<= 31; + r18 <<= 31; + do + if (r4 >= r19) + r0 |= r18, r4 -= r19; + while (r19 >>= 1, r18 >>= 1); + + return r0; +} +*/ +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontadd), tr2 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18, tr0 + movi 0, r0 + movi 1, r18 + addz.l r5, r63, r19 + addz.l r4, r63, r4 + shlli r19, 31, r19 + shlli r18, 31, r18 +LOCAL(udivsi3_loop): + bgtu r19, r4, tr2 + or r0, r18, r0 + sub r4, r19, r4 +LOCAL(udivsi3_dontadd): + shlri r18, 1, r18 + shlri r19, 1, r19 + bnei r18, 0, tr1 + blink tr0, r63 +#else +GLOBAL(udivsi3): + // inputs: r4,r5 + // clobbered: r18,r19,r20,r21,r22,r25,tr0 + // result in r0. + addz.l r5,r63,r22 + nsb r22,r0 + shlld r22,r0,r25 + shlri r25,48,r25 + movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 + sub r20,r25,r21 + mmulfx.w r21,r21,r19 + mshflo.w r21,r63,r21 + ptabs r18,tr0 + mmulfx.w r25,r19,r19 + sub r20,r0,r0 + /* bubble */ + msub.w r21,r19,r19 + addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 + before the msub.w, but we need a different value for + r19 to keep errors under control. */ + mulu.l r4,r21,r18 + mmulfx.w r19,r19,r19 + shlli r21,15,r21 + shlrd r18,r0,r18 + mulu.l r18,r22,r20 + mmacnfx.wl r25,r19,r21 + /* bubble */ + sub r4,r20,r25 + + mulu.l r25,r21,r19 + addi r0,14,r0 + /* bubble */ + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + add r18,r19,r18 + /* bubble */ + sub.l r25,r20,r25 + + mulu.l r25,r21,r19 + addz.l r25,r63,r25 + sub r25,r22,r25 + shlrd r19,r0,r19 + mulu.l r19,r22,r20 + addi r25,1,r25 + add r18,r19,r18 + + cmpgt r25,r20,r25 + add.l r18,r25,r0 + blink tr0,r63 +#endif +#elif defined (__SHMEDIA__) +/* m5compact-nofpu - more emphasis on code size than on speed, but don't + ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. + So use a short shmedia loop. */ + // clobbered: r20,r21,r25,tr0,tr1,tr2 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 +GLOBAL(udivsi3): + pt/l LOCAL(udivsi3_dontsub), tr0 + pt/l LOCAL(udivsi3_loop), tr1 + ptabs/l r18,tr2 + shlli r5,32,r25 + addi r25,-1,r21 + addz.l r4,r63,r20 +LOCAL(udivsi3_loop): + shlli r20,1,r20 + bgeu/u r21,r20,tr0 + sub r20,r21,r20 +LOCAL(udivsi3_dontsub): + addi.l r25,-1,r25 + bnei r25,-32,tr1 + add.l r20,r63,r0 + blink tr2,r63 +#else /* ! defined (__SHMEDIA__) */ +LOCAL(div8): + div1 r5,r4 +LOCAL(div7): + div1 r5,r4; div1 r5,r4; div1 r5,r4 + div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 + +LOCAL(divx4): + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + div1 r5,r4; rotcl r0 + rts; div1 r5,r4 + +GLOBAL(udivsi3): + sts.l pr,@-r15 + extu.w r5,r0 + cmp/eq r5,r0 +#ifdef __sh1__ + bf LOCAL(large_divisor) +#else + bf/s LOCAL(large_divisor) +#endif + div0u + swap.w r4,r0 + shlr16 r4 + bsr LOCAL(div8) + shll16 r5 + bsr LOCAL(div7) + div1 r5,r4 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(div8) + swap.w r4,r4 + bsr LOCAL(div7) + div1 r5,r4 + lds.l @r15+,pr + xtrct r4,r0 + swap.w r0,r0 + rotcl r0 + rts + shlr16 r5 + +LOCAL(large_divisor): +#ifdef __sh1__ + div0u +#endif + mov #0,r0 + xtrct r4,r0 + xtrct r0,r4 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + bsr LOCAL(divx4) + rotcl r0 + lds.l @r15+,pr + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3)) +#endif /* ! __SHMEDIA__ */ +#endif /* L_udivsi3 */ + +#ifdef L_udivdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(udivdi3) + FUNC(GLOBAL(udivdi3)) +GLOBAL(udivdi3): + HIDDEN_ALIAS(udivdi3_internal,udivdi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r8 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r8,r0,r8 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r8,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + shlld r5,r0,r8 + addi r20,30-22,r0 + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + add r8,r21,r8 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + addi r2,1,r2 + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + add r8,r7,r8 + sub r2,r3,r2 + cmpgt r2,r5,r5 + add r8,r5,r2 + /* could test r3 here to check for divide by zero. */ + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + /* bubble */ + cmpgtu r5,r2,r5 + sub r8,r5,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(udivdi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_udivdi3 */ + +#ifdef L_divdi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(divdi3) + FUNC(GLOBAL(divdi3)) +GLOBAL(divdi3): + pta GLOBAL(udivdi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r23,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(divdi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_divdi3 */ + +#ifdef L_umoddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(umoddi3) + FUNC(GLOBAL(umoddi3)) +GLOBAL(umoddi3): + HIDDEN_ALIAS(umoddi3_internal,umoddi3) + shlri r3,1,r4 + nsb r4,r22 + shlld r3,r22,r6 + shlri r6,49,r5 + movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ + sub r21,r5,r1 + mmulfx.w r1,r1,r4 + mshflo.w r1,r63,r1 + sub r63,r22,r20 // r63 == 64 % 64 + mmulfx.w r5,r4,r4 + pta LOCAL(large_divisor),tr0 + addi r20,32,r9 + msub.w r1,r4,r1 + madd.w r1,r1,r1 + mmulfx.w r1,r1,r4 + shlri r6,32,r7 + bgt/u r9,r63,tr0 // large_divisor + mmulfx.w r5,r4,r4 + shlri r2,32+14,r19 + addi r22,-31,r0 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r19,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + mulu.l r5,r3,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + shlld r5,r0,r5 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r2,r5,r2 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ + + shlri r2,22,r21 + mulu.l r21,r1,r21 + addi r20,30-22,r0 + /* bubble */ /* could test r3 here to check for divide by zero. */ + shlrd r21,r0,r21 + mulu.l r21,r3,r5 + mcmpgt.l r21,r63,r21 // See Note 1 + addi r20,30,r0 + mshfhi.l r63,r21,r21 + sub r2,r5,r2 + andc r2,r21,r2 + + /* small divisor: need a third divide step */ + mulu.l r2,r1,r7 + ptabs r18,tr0 + sub r2,r3,r8 /* re-use r8 here for rest - r3 */ + shlrd r7,r0,r7 + mulu.l r7,r3,r5 + /* bubble */ + addi r8,1,r7 + cmpgt r7,r5,r7 + cmvne r7,r8,r2 + sub r2,r5,r2 + blink tr0,r63 + +LOCAL(large_divisor): + mmulfx.w r5,r4,r4 + shlrd r2,r9,r25 + shlri r25,32,r8 + msub.w r1,r4,r1 + + mulu.l r1,r7,r4 + addi r1,-3,r5 + mulu.l r5,r8,r5 + sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 + shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as + the case may be, %0000000000000000 000.11111111111, still */ + muls.l r1,r4,r4 /* leaving at least one sign bit. */ + shlri r5,14-1,r8 + mulu.l r8,r7,r5 + mshalds.l r1,r21,r1 + shari r4,26,r4 + add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) + sub r25,r5,r25 + /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ + + shlri r25,22,r21 + mulu.l r21,r1,r21 + pta LOCAL(no_lo_adj),tr0 + addi r22,32,r0 + shlri r21,40,r21 + mulu.l r21,r7,r5 + add r8,r21,r8 + shlld r2,r0,r2 + sub r25,r5,r25 + bgtu/u r7,r25,tr0 // no_lo_adj + addi r8,1,r8 + sub r25,r7,r25 +LOCAL(no_lo_adj): + mextr4 r2,r25,r2 + + /* large_divisor: only needs a few adjustments. */ + mulu.l r8,r6,r5 + ptabs r18,tr0 + add r2,r6,r7 + cmpgtu r5,r2,r8 + cmvne r8,r7,r2 + sub r2,r5,r2 + shlrd r2,r22,r2 + blink tr0,r63 + ENDFUNC(GLOBAL(umoddi3)) +/* Note 1: To shift the result of the second divide stage so that the result + always fits into 32 bits, yet we still reduce the rest sufficiently + would require a lot of instructions to do the shifts just right. Using + the full 64 bit shift result to multiply with the divisor would require + four extra instructions for the upper 32 bits (shift / mulu / shift / sub). + Fortunately, if the upper 32 bits of the shift result are nonzero, we + know that the rest after taking this partial result into account will + fit into 32 bits. So we just clear the upper 32 bits of the rest if the + upper 32 bits of the partial result are nonzero. */ +#endif /* __SHMEDIA__ */ +#endif /* L_umoddi3 */ + +#ifdef L_moddi3 +#ifdef __SHMEDIA__ + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(moddi3) + FUNC(GLOBAL(moddi3)) +GLOBAL(moddi3): + pta GLOBAL(umoddi3_internal),tr0 + shari r2,63,r22 + shari r3,63,r23 + xor r2,r22,r2 + xor r3,r23,r3 + sub r2,r22,r2 + sub r3,r23,r3 + beq/u r22,r63,tr0 + ptabs r18,tr1 + blink tr0,r18 + sub r63,r2,r2 + blink tr1,r63 + ENDFUNC(GLOBAL(moddi3)) +#endif /* __SHMEDIA__ */ +#endif /* L_moddi3 */ + +#ifdef L_set_fpscr +#if !defined (__SH2A_NOFPU__) +#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 +#ifdef __SH5__ + .mode SHcompact +#endif + .global GLOBAL(set_fpscr) + HIDDEN_FUNC(GLOBAL(set_fpscr)) +GLOBAL(set_fpscr): + lds r4,fpscr +#ifdef __PIC__ + mov.l r12,@-r15 +#ifdef __vxworks + mov.l LOCAL(set_fpscr_L0_base),r12 + mov.l LOCAL(set_fpscr_L0_index),r0 + mov.l @r12,r12 + mov.l @(r0,r12),r12 +#else + mova LOCAL(set_fpscr_L0),r0 + mov.l LOCAL(set_fpscr_L0),r12 + add r0,r12 +#endif + mov.l LOCAL(set_fpscr_L1),r0 + mov.l @(r0,r12),r1 + mov.l @r15+,r12 +#else + mov.l LOCAL(set_fpscr_L1),r1 +#endif + swap.w r4,r0 + or #24,r0 +#ifndef FMOVD_WORKS + xor #16,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r3 + mov.l r3,@(4,r1) +#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r2 + mov.l r2,@r1 +#endif +#ifndef FMOVD_WORKS + xor #8,r0 +#else + xor #24,r0 +#endif +#if defined(__SH4__) || defined (__SH2A_DOUBLE__) + swap.w r0,r2 + rts + mov.l r2,@r1 +#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ + swap.w r0,r3 + rts + mov.l r3,@(4,r1) +#endif + .align 2 +#ifdef __PIC__ +#ifdef __vxworks +LOCAL(set_fpscr_L0_base): + .long ___GOTT_BASE__ +LOCAL(set_fpscr_L0_index): + .long ___GOTT_INDEX__ +#else +LOCAL(set_fpscr_L0): + .long _GLOBAL_OFFSET_TABLE_ +#endif +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values@GOT) +#else +LOCAL(set_fpscr_L1): + .long GLOBAL(fpscr_values) +#endif + + ENDFUNC(GLOBAL(set_fpscr)) +#ifndef NO_FPSCR_VALUES +#ifdef __ELF__ + .comm GLOBAL(fpscr_values),8,4 +#else + .comm GLOBAL(fpscr_values),8 +#endif /* ELF */ +#endif /* NO_FPSCR_VALUES */ +#endif /* SH2E / SH3E / SH4 */ +#endif /* __SH2A_NOFPU__ */ +#endif /* L_set_fpscr */ +#ifdef L_ic_invalidate +#if __SH5__ == 32 + .mode SHmedia + .section .text..SHmedia32,"ax" + .align 2 + .global GLOBAL(init_trampoline) + HIDDEN_FUNC(GLOBAL(init_trampoline)) +GLOBAL(init_trampoline): + st.l r0,8,r2 +#ifdef __LITTLE_ENDIAN__ + movi 9,r20 + shori 0x402b,r20 + shori 0xd101,r20 + shori 0xd002,r20 +#else + movi 0xffffffffffffd002,r20 + shori 0xd101,r20 + shori 0x402b,r20 + shori 9,r20 +#endif + st.q r0,0,r20 + st.l r0,12,r3 + ENDFUNC(GLOBAL(init_trampoline)) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb r0,0 + synco + icbi r0, 0 + ptabs r18, tr0 + synci + blink tr0, r63 + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4A__) + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): + ocbwb @r4 + synco + icbi @r4 + rts + nop + ENDFUNC(GLOBAL(ic_invalidate)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + /* For system code, we use ic_invalidate_line_i, but user code + needs a different mechanism. A kernel call is generally not + available, and it would also be slow. Different SH4 variants use + different sizes and associativities of the Icache. We use a small + bit of dispatch code that can be put hidden in every shared object, + which calls the actual processor-specific invalidation code in a + separate module. + Or if you have operating system support, the OS could mmap the + procesor-specific code from a single page, since it is highly + repetitive. */ + .global GLOBAL(ic_invalidate) + HIDDEN_FUNC(GLOBAL(ic_invalidate)) +GLOBAL(ic_invalidate): +#ifdef __pic__ +#ifdef __vxworks + mov.l 1f,r1 + mov.l 2f,r0 + mov.l @r1,r1 + mov.l 0f,r2 + mov.l @(r0,r1),r0 +#else + mov.l 1f,r1 + mova 1f,r0 + mov.l 0f,r2 + add r1,r0 +#endif + mov.l @(r0,r2),r1 +#else + mov.l 0f,r1 +#endif + ocbwb @r4 + mov.l @(8,r1),r0 + sub r1,r4 + and r4,r0 + add r1,r0 + jmp @r0 + mov.l @(4,r1),r0 + .align 2 +#ifndef __pic__ +0: .long GLOBAL(ic_invalidate_array) +#else /* __pic__ */ + .global GLOBAL(ic_invalidate_array) +0: .long GLOBAL(ic_invalidate_array)@GOT +#ifdef __vxworks +1: .long ___GOTT_BASE__ +2: .long ___GOTT_INDEX__ +#else +1: .long _GLOBAL_OFFSET_TABLE_ +#endif + ENDFUNC(GLOBAL(ic_invalidate)) +#endif /* __pic__ */ +#endif /* SH4 */ +#endif /* L_ic_invalidate */ + +#ifdef L_ic_invalidate_array +#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) + .global GLOBAL(ic_invalidate_array) + /* This is needed when an SH4 dso with trampolines is used on SH4A. */ + .global GLOBAL(ic_invalidate_array) + FUNC(GLOBAL(ic_invalidate_array)) +GLOBAL(ic_invalidate_array): + add r1,r4 + synco + icbi @r4 + rts + nop + .align 2 + .long 0 + ENDFUNC(GLOBAL(ic_invalidate_array)) +#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) + .global GLOBAL(ic_invalidate_array) + .p2align 5 + FUNC(GLOBAL(ic_invalidate_array)) +/* This must be aligned to the beginning of a cache line. */ +GLOBAL(ic_invalidate_array): +#ifndef WAYS +#define WAYS 4 +#define WAY_SIZE 0x4000 +#endif +#if WAYS == 1 + .rept WAY_SIZE * WAYS / 32 + rts + nop + .rept 7 + .long WAY_SIZE - 32 + .endr + .endr +#elif WAYS <= 6 + .rept WAY_SIZE * WAYS / 32 + braf r0 + add #-8,r0 + .long WAY_SIZE + 8 + .long WAY_SIZE - 32 + .rept WAYS-2 + braf r0 + nop + .endr + .rept 7 - WAYS + rts + nop + .endr + .endr +#else /* WAYS > 6 */ + /* This variant needs two different pages for mmap-ing. */ + .rept WAYS-1 + .rept WAY_SIZE / 32 + braf r0 + nop + .long WAY_SIZE + .rept 6 + .long WAY_SIZE - 32 + .endr + .endr + .endr + .rept WAY_SIZE / 32 + rts + .rept 15 + nop + .endr + .endr +#endif /* WAYS */ + ENDFUNC(GLOBAL(ic_invalidate_array)) +#endif /* SH4 */ +#endif /* L_ic_invalidate_array */ + +#if defined (__SH5__) && __SH5__ == 32 +#ifdef L_shcompact_call_trampoline + .section .rodata + .align 1 +LOCAL(ct_main_table): +.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) +.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function loads 64-bit general-purpose registers from the + stack, from a memory address contained in them or from an FP + register, according to a cookie passed in r1. Its execution + time is linear on the number of registers that actually have + to be copied. See sh.h for details on the actual bit pattern. + + The function to be called is passed in r0. If a 32-bit return + value is expected, the actual function will be tail-called, + otherwise the return address will be stored in r10 (that the + caller should expect to be clobbered) and the return value + will be expanded into r2/r3 upon return. */ + + .global GLOBAL(GCC_shcompact_call_trampoline) + FUNC(GLOBAL(GCC_shcompact_call_trampoline)) +GLOBAL(GCC_shcompact_call_trampoline): + ptabs/l r0, tr0 /* Prepare to call the actual function. */ + movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 + pt/l LOCAL(ct_loop), tr1 + addz.l r1, r63, r1 + shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 +LOCAL(ct_loop): + nsb r1, r28 + shlli r28, 1, r29 + ldx.w r0, r29, r30 +LOCAL(ct_main_label): + ptrel/l r30, tr2 + blink tr2, r63 +LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ + /* It must be dr0, so just do it. */ + fmov.dq dr0, r2 + movi 7, r30 + shlli r30, 29, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ + /* It is either dr0 or dr2. */ + movi 7, r30 + shlri r1, 26, r32 + shlli r30, 26, r31 + andc r1, r31, r1 + fmov.dq dr0, r3 + beqi/l r32, 4, tr1 + fmov.dq dr2, r3 + blink tr1, r63 +LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ + shlri r1, 23 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 +LOCAL(ct_r4_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 23, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r4_fp_copy): + fmov.dq dr0, r4 + blink tr1, r63 + fmov.dq dr2, r4 + blink tr1, r63 + fmov.dq dr4, r4 + blink tr1, r63 +LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ + shlri r1, 20 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 +LOCAL(ct_r5_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 20, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r5_fp_copy): + fmov.dq dr0, r5 + blink tr1, r63 + fmov.dq dr2, r5 + blink tr1, r63 + fmov.dq dr4, r5 + blink tr1, r63 + fmov.dq dr6, r5 + blink tr1, r63 +LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ + /* It must be dr8. */ + fmov.dq dr8, r6 + movi 15, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ + shlri r1, 16 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 +LOCAL(ct_r6_fp_base): + ptrel/l r32, tr2 + movi 7, r30 + shlli r30, 16, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r6_fp_copy): + fmov.dq dr0, r6 + blink tr1, r63 + fmov.dq dr2, r6 + blink tr1, r63 + fmov.dq dr4, r6 + blink tr1, r63 + fmov.dq dr6, r6 + blink tr1, r63 +LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 12, r31 + shlri r1, 12, r32 + andc r1, r31, r1 + fmov.dq dr8, r7 + beqi/l r32, 8, tr1 + fmov.dq dr10, r7 + blink tr1, r63 +LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ + shlri r1, 12 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 +LOCAL(ct_r7_fp_base): + ptrel/l r32, tr2 + movi 7 << 12, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r7_fp_copy): + fmov.dq dr0, r7 + blink tr1, r63 + fmov.dq dr2, r7 + blink tr1, r63 + fmov.dq dr4, r7 + blink tr1, r63 + fmov.dq dr6, r7 + blink tr1, r63 +LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 8, r31 + andi r1, 1 << 8, r32 + andc r1, r31, r1 + fmov.dq dr8, r8 + beq/l r32, r63, tr1 + fmov.dq dr10, r8 + blink tr1, r63 +LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ + shlri r1, 8 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 +LOCAL(ct_r8_fp_base): + ptrel/l r32, tr2 + movi 7 << 8, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r8_fp_copy): + fmov.dq dr0, r8 + blink tr1, r63 + fmov.dq dr2, r8 + blink tr1, r63 + fmov.dq dr4, r8 + blink tr1, r63 + fmov.dq dr6, r8 + blink tr1, r63 +LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ + /* It is either dr8 or dr10. */ + movi 15 << 4, r31 + andi r1, 1 << 4, r32 + andc r1, r31, r1 + fmov.dq dr8, r9 + beq/l r32, r63, tr1 + fmov.dq dr10, r9 + blink tr1, r63 +LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ + shlri r1, 4 - 3, r34 + andi r34, 3 << 3, r33 + addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 +LOCAL(ct_r9_fp_base): + ptrel/l r32, tr2 + movi 7 << 4, r31 + andc r1, r31, r1 + blink tr2, r63 +LOCAL(ct_r9_fp_copy): + fmov.dq dr0, r9 + blink tr1, r63 + fmov.dq dr2, r9 + blink tr1, r63 + fmov.dq dr4, r9 + blink tr1, r63 + fmov.dq dr6, r9 + blink tr1, r63 +LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ + pt/l LOCAL(ct_r2_load), tr2 + movi 3, r30 + shlli r30, 29, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r2, 8, r3 + ldx.q r2, r63, r2 + /* Fall through. */ +LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ + pt/l LOCAL(ct_r3_load), tr2 + movi 3, r30 + shlli r30, 26, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r3, 8, r4 + ldx.q r3, r63, r3 +LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ + pt/l LOCAL(ct_r4_load), tr2 + movi 3, r30 + shlli r30, 23, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r4, 8, r5 + ldx.q r4, r63, r4 +LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ + pt/l LOCAL(ct_r5_load), tr2 + movi 3, r30 + shlli r30, 20, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r5, 8, r6 + ldx.q r5, r63, r5 +LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ + pt/l LOCAL(ct_r6_load), tr2 + movi 3 << 16, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r6, 8, r7 + ldx.q r6, r63, r6 +LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ + pt/l LOCAL(ct_r7_load), tr2 + movi 3 << 12, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r7, 8, r8 + ldx.q r7, r63, r7 +LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ + pt/l LOCAL(ct_r8_load), tr2 + movi 3 << 8, r31 + and r1, r31, r32 + andc r1, r31, r1 + beq/l r31, r32, tr2 + addi.l r8, 8, r9 + ldx.q r8, r63, r8 +LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ + pt/l LOCAL(ct_check_tramp), tr2 + ldx.q r9, r63, r9 + blink tr2, r63 +LOCAL(ct_r2_load): + ldx.q r2, r63, r2 + blink tr1, r63 +LOCAL(ct_r3_load): + ldx.q r3, r63, r3 + blink tr1, r63 +LOCAL(ct_r4_load): + ldx.q r4, r63, r4 + blink tr1, r63 +LOCAL(ct_r5_load): + ldx.q r5, r63, r5 + blink tr1, r63 +LOCAL(ct_r6_load): + ldx.q r6, r63, r6 + blink tr1, r63 +LOCAL(ct_r7_load): + ldx.q r7, r63, r7 + blink tr1, r63 +LOCAL(ct_r8_load): + ldx.q r8, r63, r8 + blink tr1, r63 +LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r2 + shlli r30, 29, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r3 + shlli r30, 26, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r4 + shlli r30, 23, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r5 + shlli r30, 20, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ + movi 1, r30 + ldx.q r15, r63, r6 + shlli r30, 16, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ + ldx.q r15, r63, r7 + movi 1 << 12, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ + ldx.q r15, r63, r8 + movi 1 << 8, r31 + addi.l r15, 8, r15 + andc r1, r31, r1 + blink tr1, r63 +LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ + andi r1, 7 << 1, r30 + movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 + shlli r30, 2, r31 + shori LOCAL(ct_end_of_pop_seq) & 65535, r32 + sub.l r32, r31, r33 + ptabs/l r33, tr2 + blink tr2, r63 +LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ + ldx.q r15, r63, r3 + addi.l r15, 8, r15 + ldx.q r15, r63, r4 + addi.l r15, 8, r15 + ldx.q r15, r63, r5 + addi.l r15, 8, r15 + ldx.q r15, r63, r6 + addi.l r15, 8, r15 + ldx.q r15, r63, r7 + addi.l r15, 8, r15 + ldx.q r15, r63, r8 + addi.l r15, 8, r15 +LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ + ldx.q r15, r63, r9 + addi.l r15, 8, r15 +LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ +LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ + pt/u LOCAL(ct_ret_wide), tr2 + andi r1, 1, r1 + bne/u r1, r63, tr2 +LOCAL(ct_call_func): /* Just branch to the function. */ + blink tr0, r63 +LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its + 64-bit return value. */ + add.l r18, r63, r10 + blink tr0, r18 + ptabs r10, tr0 +#if __LITTLE_ENDIAN__ + shari r2, 32, r3 + add.l r2, r63, r2 +#else + add.l r2, r63, r3 + shari r2, 32, r2 +#endif + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) +#endif /* L_shcompact_call_trampoline */ + +#ifdef L_shcompact_return_trampoline + /* This function does the converse of the code in `ret_wide' + above. It is tail-called by SHcompact functions returning + 64-bit non-floating-point values, to pack the 32-bit values in + r2 and r3 into r2. */ + + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + .global GLOBAL(GCC_shcompact_return_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) +GLOBAL(GCC_shcompact_return_trampoline): + ptabs/l r18, tr0 +#if __LITTLE_ENDIAN__ + addz.l r2, r63, r2 + shlli r3, 32, r3 +#else + addz.l r3, r63, r3 + shlli r2, 32, r2 +#endif + or r3, r2, r2 + blink tr0, r63 + + ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) +#endif /* L_shcompact_return_trampoline */ + +#ifdef L_shcompact_incoming_args + .section .rodata + .align 1 +LOCAL(ia_main_table): +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) +.word 1 /* Invalid, just loop */ +.word 1 /* Invalid, just loop */ +.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) +.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) + .mode SHmedia + .section .text..SHmedia32, "ax" + .align 2 + + /* This function stores 64-bit general-purpose registers back in + the stack, and loads the address in which each register + was stored into itself. The lower 32 bits of r17 hold the address + to begin storing, and the upper 32 bits of r17 hold the cookie. + Its execution time is linear on the + number of registers that actually have to be copied, and it is + optimized for structures larger than 64 bits, as opposed to + individual `long long' arguments. See sh.h for details on the + actual bit pattern. */ + + .global GLOBAL(GCC_shcompact_incoming_args) + FUNC(GLOBAL(GCC_shcompact_incoming_args)) +GLOBAL(GCC_shcompact_incoming_args): + ptabs/l r18, tr0 /* Prepare to return. */ + shlri r17, 32, r0 /* Load the cookie. */ + movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 + pt/l LOCAL(ia_loop), tr1 + add.l r17, r63, r17 + shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 +LOCAL(ia_loop): + nsb r0, r36 + shlli r36, 1, r37 + ldx.w r43, r37, r38 +LOCAL(ia_main_label): + ptrel/l r38, tr2 + blink tr2, r63 +LOCAL(ia_r2_ld): /* Store r2 and load its address. */ + movi 3, r38 + shlli r38, 29, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r2 + add.l r17, r63, r2 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r3_ld): /* Store r3 and load its address. */ + movi 3, r38 + shlli r38, 26, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r3 + add.l r17, r63, r3 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r4_ld): /* Store r4 and load its address. */ + movi 3, r38 + shlli r38, 23, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r4 + add.l r17, r63, r4 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r5_ld): /* Store r5 and load its address. */ + movi 3, r38 + shlli r38, 20, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r5 + add.l r17, r63, r5 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r6_ld): /* Store r6 and load its address. */ + movi 3, r38 + shlli r38, 16, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r6 + add.l r17, r63, r6 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r7_ld): /* Store r7 and load its address. */ + movi 3 << 12, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r7 + add.l r17, r63, r7 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r8_ld): /* Store r8 and load its address. */ + movi 3 << 8, r39 + and r0, r39, r40 + andc r0, r39, r0 + stx.q r17, r63, r8 + add.l r17, r63, r8 + addi.l r17, 8, r17 + beq/u r39, r40, tr1 +LOCAL(ia_r9_ld): /* Store r9 and load its address. */ + stx.q r17, r63, r9 + add.l r17, r63, r9 + blink tr0, r63 +LOCAL(ia_r2_push): /* Push r2 onto the stack. */ + movi 1, r38 + shlli r38, 29, r39 + andc r0, r39, r0 + stx.q r17, r63, r2 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r3_push): /* Push r3 onto the stack. */ + movi 1, r38 + shlli r38, 26, r39 + andc r0, r39, r0 + stx.q r17, r63, r3 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r4_push): /* Push r4 onto the stack. */ + movi 1, r38 + shlli r38, 23, r39 + andc r0, r39, r0 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r5_push): /* Push r5 onto the stack. */ + movi 1, r38 + shlli r38, 20, r39 + andc r0, r39, r0 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r6_push): /* Push r6 onto the stack. */ + movi 1, r38 + shlli r38, 16, r39 + andc r0, r39, r0 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r7_push): /* Push r7 onto the stack. */ + movi 1 << 12, r39 + andc r0, r39, r0 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_r8_push): /* Push r8 onto the stack. */ + movi 1 << 8, r39 + andc r0, r39, r0 + stx.q r17, r63, r8 + addi.l r17, 8, r17 + blink tr1, r63 +LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ + andi r0, 7 << 1, r38 + movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 + shlli r38, 2, r39 + shori LOCAL(ia_end_of_push_seq) & 65535, r40 + sub.l r40, r39, r41 + ptabs/l r41, tr2 + blink tr2, r63 +LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ + stx.q r17, r63, r3 + addi.l r17, 8, r17 + stx.q r17, r63, r4 + addi.l r17, 8, r17 + stx.q r17, r63, r5 + addi.l r17, 8, r17 + stx.q r17, r63, r6 + addi.l r17, 8, r17 + stx.q r17, r63, r7 + addi.l r17, 8, r17 + stx.q r17, r63, r8 + addi.l r17, 8, r17 +LOCAL(ia_r9_push): /* Push r9 onto the stack. */ + stx.q r17, r63, r9 +LOCAL(ia_return): /* Return. */ + blink tr0, r63 +LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ + ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) +#endif /* L_shcompact_incoming_args */ +#endif +#if __SH5__ +#ifdef L_nested_trampoline +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif + .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ + .global GLOBAL(GCC_nested_trampoline) + HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) +GLOBAL(GCC_nested_trampoline): + .mode SHmedia + ptrel/u r63, tr0 + gettr tr0, r0 +#if __SH5__ == 64 + ld.q r0, 24, r1 +#else + ld.l r0, 24, r1 +#endif + ptabs/l r1, tr1 +#if __SH5__ == 64 + ld.q r0, 32, r1 +#else + ld.l r0, 28, r1 +#endif + blink tr1, r63 + + ENDFUNC(GLOBAL(GCC_nested_trampoline)) +#endif /* L_nested_trampoline */ +#endif /* __SH5__ */ +#if __SH5__ == 32 +#ifdef L_push_pop_shmedia_regs + .section .text..SHmedia32,"ax" + .mode SHmedia + .align 2 +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_push_shmedia_regs) + FUNC(GLOBAL(GCC_push_shmedia_regs)) +GLOBAL(GCC_push_shmedia_regs): + addi.l r15, -14*8, r15 + fst.d r15, 13*8, dr62 + fst.d r15, 12*8, dr60 + fst.d r15, 11*8, dr58 + fst.d r15, 10*8, dr56 + fst.d r15, 9*8, dr54 + fst.d r15, 8*8, dr52 + fst.d r15, 7*8, dr50 + fst.d r15, 6*8, dr48 + fst.d r15, 5*8, dr46 + fst.d r15, 4*8, dr44 + fst.d r15, 3*8, dr42 + fst.d r15, 2*8, dr40 + fst.d r15, 1*8, dr38 + fst.d r15, 0*8, dr36 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_push_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +GLOBAL(GCC_push_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + ptabs/l r18, tr0 + addi.l r15, -27*8, r15 + gettr tr7, r62 + gettr tr6, r61 + gettr tr5, r60 + st.q r15, 26*8, r62 + st.q r15, 25*8, r61 + st.q r15, 24*8, r60 + st.q r15, 23*8, r59 + st.q r15, 22*8, r58 + st.q r15, 21*8, r57 + st.q r15, 20*8, r56 + st.q r15, 19*8, r55 + st.q r15, 18*8, r54 + st.q r15, 17*8, r53 + st.q r15, 16*8, r52 + st.q r15, 15*8, r51 + st.q r15, 14*8, r50 + st.q r15, 13*8, r49 + st.q r15, 12*8, r48 + st.q r15, 11*8, r47 + st.q r15, 10*8, r46 + st.q r15, 9*8, r45 + st.q r15, 8*8, r44 + st.q r15, 7*8, r35 + st.q r15, 6*8, r34 + st.q r15, 5*8, r33 + st.q r15, 4*8, r32 + st.q r15, 3*8, r31 + st.q r15, 2*8, r30 + st.q r15, 1*8, r29 + st.q r15, 0*8, r28 + blink tr0, r63 +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) +#endif +#ifndef __SH4_NOFPU__ + .global GLOBAL(GCC_pop_shmedia_regs) + FUNC(GLOBAL(GCC_pop_shmedia_regs)) +GLOBAL(GCC_pop_shmedia_regs): + pt .L0, tr1 + movi 41*8, r0 + fld.d r15, 40*8, dr62 + fld.d r15, 39*8, dr60 + fld.d r15, 38*8, dr58 + fld.d r15, 37*8, dr56 + fld.d r15, 36*8, dr54 + fld.d r15, 35*8, dr52 + fld.d r15, 34*8, dr50 + fld.d r15, 33*8, dr48 + fld.d r15, 32*8, dr46 + fld.d r15, 31*8, dr44 + fld.d r15, 30*8, dr42 + fld.d r15, 29*8, dr40 + fld.d r15, 28*8, dr38 + fld.d r15, 27*8, dr36 + blink tr1, r63 +#else /* ! __SH4_NOFPU__ */ + .global GLOBAL(GCC_pop_shmedia_regs_nofpu) + FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +GLOBAL(GCC_pop_shmedia_regs_nofpu): +#endif /* ! __SH4_NOFPU__ */ + movi 27*8, r0 +.L0: + ptabs r18, tr0 + ld.q r15, 26*8, r62 + ld.q r15, 25*8, r61 + ld.q r15, 24*8, r60 + ptabs r62, tr7 + ptabs r61, tr6 + ptabs r60, tr5 + ld.q r15, 23*8, r59 + ld.q r15, 22*8, r58 + ld.q r15, 21*8, r57 + ld.q r15, 20*8, r56 + ld.q r15, 19*8, r55 + ld.q r15, 18*8, r54 + ld.q r15, 17*8, r53 + ld.q r15, 16*8, r52 + ld.q r15, 15*8, r51 + ld.q r15, 14*8, r50 + ld.q r15, 13*8, r49 + ld.q r15, 12*8, r48 + ld.q r15, 11*8, r47 + ld.q r15, 10*8, r46 + ld.q r15, 9*8, r45 + ld.q r15, 8*8, r44 + ld.q r15, 7*8, r35 + ld.q r15, 6*8, r34 + ld.q r15, 5*8, r33 + ld.q r15, 4*8, r32 + ld.q r15, 3*8, r31 + ld.q r15, 2*8, r30 + ld.q r15, 1*8, r29 + ld.q r15, 0*8, r28 + add.l r15, r0, r15 + blink tr0, r63 + +#ifndef __SH4_NOFPU__ + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) +#else + ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) +#endif +#endif /* __SH5__ == 32 */ +#endif /* L_push_pop_shmedia_regs */ + +#ifdef L_div_table +#if __SH5__ +#if defined(__pic__) && defined(__SHMEDIA__) + .global GLOBAL(sdivsi3) + FUNC(GLOBAL(sdivsi3)) +#if __SH5__ == 32 + .section .text..SHmedia32,"ax" +#else + .text +#endif +#if 0 +/* ??? FIXME: Presumably due to a linker bug, exporting data symbols + in a text section does not work (at least for shared libraries): + the linker sets the LSB of the address as if this was SHmedia code. */ +#define TEXT_DATA_BUG +#endif + .align 2 + // inputs: r4,r5 + // clobbered: r1,r18,r19,r20,r21,r25,tr0 + // result in r0 + .global GLOBAL(sdivsi3) +GLOBAL(sdivsi3): +#ifdef TEXT_DATA_BUG + ptb datalabel Local_div_table,tr0 +#else + ptb GLOBAL(div_table_internal),tr0 +#endif + nsb r5, r1 + shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 + shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) + /* bubble */ + gettr tr0,r20 + ldx.ub r20, r21, r19 // u0.8 + shari r25, 32, r25 // normalize to s2.30 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + ptabs r18, tr0 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 + muls.l r19, r19, r21 // u0.28 + sub r63, r1, r1 + addi r1, 92, r1 + muls.l r25, r21, r18 // s2.58 + shlli r19, 45, r19 // multiply by two and convert to s2.58 + /* bubble */ + sub r19, r18, r18 + shari r18, 28, r18 // some 22 bit inverse in s1.30 + muls.l r18, r25, r0 // s2.60 + muls.l r18, r4, r25 // s32.30 + /* bubble */ + shari r0, 16, r19 // s-16.44 + muls.l r19, r18, r19 // s-16.74 + shari r25, 63, r0 + shari r4, 14, r18 // s19.-14 + shari r19, 30, r19 // s-16.44 + muls.l r19, r18, r19 // s15.30 + xor r21, r0, r21 // You could also use the constant 1 << 27. + add r21, r25, r21 + sub r21, r19, r21 + shard r21, r1, r21 + sub r21, r0, r0 + blink tr0, r63 + ENDFUNC(GLOBAL(sdivsi3)) +/* This table has been generated by divtab.c . +Defects for bias -330: + Max defect: 6.081536e-07 at -1.000000e+00 + Min defect: 2.849516e-08 at 1.030651e+00 + Max 2nd step defect: 9.606539e-12 at -1.000000e+00 + Min 2nd step defect: 0.000000e+00 at 0.000000e+00 + Defect at 1: 1.238659e-07 + Defect at -2: 1.061708e-07 */ +#else /* ! __pic__ || ! __SHMEDIA__ */ + .section .rodata +#endif /* __pic__ */ +#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) + .balign 2 + .type Local_div_table,@object + .size Local_div_table,128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 +Local_div_table: + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + .section .rodata +#endif /* TEXT_DATA_BUG */ + .balign 2 + .type GLOBAL(div_table),@object + .size GLOBAL(div_table),128 +/* negative division constants */ + .word -16638 + .word -17135 + .word -17737 + .word -18433 + .word -19103 + .word -19751 + .word -20583 + .word -21383 + .word -22343 + .word -23353 + .word -24407 + .word -25582 + .word -26863 + .word -28382 + .word -29965 + .word -31800 +/* negative division factors */ + .byte 66 + .byte 70 + .byte 75 + .byte 81 + .byte 87 + .byte 93 + .byte 101 + .byte 109 + .byte 119 + .byte 130 + .byte 142 + .byte 156 + .byte 172 + .byte 192 + .byte 214 + .byte 241 + .skip 16 + .global GLOBAL(div_table) +GLOBAL(div_table): + HIDDEN_ALIAS(div_table_internal,div_table) + .skip 16 +/* positive division factors */ + .byte 241 + .byte 214 + .byte 192 + .byte 172 + .byte 156 + .byte 142 + .byte 130 + .byte 119 + .byte 109 + .byte 101 + .byte 93 + .byte 87 + .byte 81 + .byte 75 + .byte 70 + .byte 66 +/* positive division constants */ + .word 31801 + .word 29966 + .word 28383 + .word 26864 + .word 25583 + .word 24408 + .word 23354 + .word 22344 + .word 21384 + .word 20584 + .word 19752 + .word 19104 + .word 18434 + .word 17738 + .word 17136 + .word 16639 + +#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code uses shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4. + Uses a lookup table for divisors in the range -128 .. +128, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .balign 4 + .global GLOBAL(udivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) +GLOBAL(udivsi3_i4i): + mov.w LOCAL(c128_w), r1 + div0u + mov r4,r0 + shlr8 r0 + cmp/hi r1,r5 + extu.w r5,r1 + bf LOCAL(udiv_le128) + cmp/eq r5,r1 + bf LOCAL(udiv_ge64k) + shlr r0 + mov r5,r1 + shll16 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 + div1 r5,r0 + div1 r5,r0 + bra LOCAL(udiv_25) + div1 r5,r0 + +LOCAL(div_le128): + mova LOCAL(div_table_ix),r0 + bra LOCAL(div_le128_2) + mov.b @(r0,r5),r1 +LOCAL(udiv_le128): + mov.l r4,@-r15 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mov.l r5,@-r15 +LOCAL(div_le128_2): + mova LOCAL(div_table_inv),r0 + mov.l @(r0,r1),r1 + mov r5,r0 + tst #0xfe,r0 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + bt/s LOCAL(div_by_1) + mov r4,r0 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + rts + shld r1,r0 + +LOCAL(div_by_1_neg): + neg r4,r0 +LOCAL(div_by_1): + mov.l @r15+,r5 + rts + mov.l @r15+,r4 + +LOCAL(div_ge64k): + bt/s LOCAL(div_r8) + div0u + shll8 r5 + bra LOCAL(div_ge64k_2) + div1 r5,r0 +LOCAL(udiv_ge64k): + cmp/hi r0,r5 + mov r5,r1 + bt LOCAL(udiv_r8) + shll8 r5 + mov.l r4,@-r15 + div1 r5,r0 + mov.l r1,@-r15 +LOCAL(div_ge64k_2): + div1 r5,r0 + mov.l LOCAL(zero_l),r1 + .rept 4 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_end) + xor r4,r0 + +LOCAL(div_r8): + shll16 r4 + bra LOCAL(div_r8_2) + shll8 r4 +LOCAL(udiv_r8): + mov.l r4,@-r15 + shll16 r4 + clrt + shll8 r4 + mov.l r5,@-r15 +LOCAL(div_r8_2): + rotcl r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov r5,r4 + div1 r5,r1 + .rept 5 + rotcl r0; div1 r5,r1 + .endr + rotcl r0 + mov.l @r15+,r5 + div1 r4,r1 + mov.l @r15+,r4 + rts + rotcl r0 + + ENDFUNC(GLOBAL(udivsi3_i4i)) + + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(sdivsi3_i4i)) + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1. */ +GLOBAL(sdivsi3_i4i): + mov.l r4,@-r15 + cmp/pz r5 + mov.w LOCAL(c128_w), r1 + bt/s LOCAL(pos_divisor) + cmp/pz r4 + mov.l r5,@-r15 + neg r5,r5 + bt/s LOCAL(neg_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(pos_result): + extu.w r5,r0 + bf LOCAL(div_le128) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k) + cmp/hi r0,r5 + div0u + shll16 r5 + div1 r5,r0 + div1 r5,r0 + div1 r5,r0 +LOCAL(udiv_25): + mov.l LOCAL(zero_l),r1 + div1 r5,r0 + div1 r5,r0 + mov.l r1,@-r15 + .rept 3 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r0 + mov.l @r15+,r5 + or r4,r0 + mov.l @r15+,r4 + rts + rotcl r0 + +LOCAL(div_le128_neg): + tst #0xfe,r0 + mova LOCAL(div_table_ix),r0 + mov.b @(r0,r5),r1 + mova LOCAL(div_table_inv),r0 + bt/s LOCAL(div_by_1_neg) + mov.l @(r0,r1),r1 + mova LOCAL(div_table_clz),r0 + dmulu.l r1,r4 + mov.b @(r0,r5),r1 + mov.l @r15+,r5 + sts mach,r0 + /* clrt */ + addc r4,r0 + mov.l @r15+,r4 + rotcr r0 + shld r1,r0 + rts + neg r0,r0 + +LOCAL(pos_divisor): + mov.l r5,@-r15 + bt/s LOCAL(pos_result) + cmp/hi r1,r5 + neg r4,r4 +LOCAL(neg_result): + extu.w r5,r0 + bf LOCAL(div_le128_neg) + cmp/eq r5,r0 + mov r4,r0 + shlr8 r0 + bf/s LOCAL(div_ge64k_neg) + cmp/hi r0,r5 + div0u + mov.l LOCAL(zero_l),r1 + shll16 r5 + div1 r5,r0 + mov.l r1,@-r15 + .rept 7 + div1 r5,r0 + .endr + mov.b r0,@(L_MSWLSB,r15) + xtrct r4,r0 + swap.w r0,r0 + .rept 8 + div1 r5,r0 + .endr + mov.b r0,@(L_LSWMSB,r15) +LOCAL(div_ge64k_neg_end): + .rept 8 + div1 r5,r0 + .endr + mov.l @r15+,r4 ! zero-extension and swap using LS unit. + extu.b r0,r1 + mov.l @r15+,r5 + or r4,r1 +LOCAL(div_r8_neg_end): + mov.l @r15+,r4 + rotcl r1 + rts + neg r1,r0 + +LOCAL(div_ge64k_neg): + bt/s LOCAL(div_r8_neg) + div0u + shll8 r5 + mov.l LOCAL(zero_l),r1 + .rept 6 + div1 r5,r0 + .endr + mov.l r1,@-r15 + div1 r5,r0 + mov.w LOCAL(m256_w),r1 + div1 r5,r0 + mov.b r0,@(L_LSWMSB,r15) + xor r4,r0 + and r1,r0 + bra LOCAL(div_ge64k_neg_end) + xor r4,r0 + +LOCAL(c128_w): + .word 128 + +LOCAL(div_r8_neg): + clrt + shll16 r4 + mov r4,r1 + shll8 r1 + mov r5,r4 + .rept 7 + rotcl r1; div1 r5,r0 + .endr + mov.l @r15+,r5 + rotcl r1 + bra LOCAL(div_r8_neg_end) + div1 r4,r0 + +LOCAL(m256_w): + .word 0xff00 +/* This table has been generated by divtab-sh4.c. */ + .balign 4 +LOCAL(div_table_clz): + .byte 0 + .byte 1 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* Lookup table translating positive divisor to index into table of + normalized inverse. N.B. the '0' entry is also the last entry of the + previous table, and causes an unaligned access for division by zero. */ +LOCAL(div_table_ix): + .byte -6 + .byte -128 + .byte -128 + .byte 0 + .byte -128 + .byte -64 + .byte 0 + .byte 64 + .byte -128 + .byte -96 + .byte -64 + .byte -32 + .byte 0 + .byte 32 + .byte 64 + .byte 96 + .byte -128 + .byte -112 + .byte -96 + .byte -80 + .byte -64 + .byte -48 + .byte -32 + .byte -16 + .byte 0 + .byte 16 + .byte 32 + .byte 48 + .byte 64 + .byte 80 + .byte 96 + .byte 112 + .byte -128 + .byte -120 + .byte -112 + .byte -104 + .byte -96 + .byte -88 + .byte -80 + .byte -72 + .byte -64 + .byte -56 + .byte -48 + .byte -40 + .byte -32 + .byte -24 + .byte -16 + .byte -8 + .byte 0 + .byte 8 + .byte 16 + .byte 24 + .byte 32 + .byte 40 + .byte 48 + .byte 56 + .byte 64 + .byte 72 + .byte 80 + .byte 88 + .byte 96 + .byte 104 + .byte 112 + .byte 120 + .byte -128 + .byte -124 + .byte -120 + .byte -116 + .byte -112 + .byte -108 + .byte -104 + .byte -100 + .byte -96 + .byte -92 + .byte -88 + .byte -84 + .byte -80 + .byte -76 + .byte -72 + .byte -68 + .byte -64 + .byte -60 + .byte -56 + .byte -52 + .byte -48 + .byte -44 + .byte -40 + .byte -36 + .byte -32 + .byte -28 + .byte -24 + .byte -20 + .byte -16 + .byte -12 + .byte -8 + .byte -4 + .byte 0 + .byte 4 + .byte 8 + .byte 12 + .byte 16 + .byte 20 + .byte 24 + .byte 28 + .byte 32 + .byte 36 + .byte 40 + .byte 44 + .byte 48 + .byte 52 + .byte 56 + .byte 60 + .byte 64 + .byte 68 + .byte 72 + .byte 76 + .byte 80 + .byte 84 + .byte 88 + .byte 92 + .byte 96 + .byte 100 + .byte 104 + .byte 108 + .byte 112 + .byte 116 + .byte 120 + .byte 124 + .byte -128 +/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ + .balign 4 +LOCAL(zero_l): + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 +LOCAL(div_table_inv): + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + + ENDFUNC(GLOBAL(sdivsi3_i4i)) +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ + +#ifdef L_udiv_qrnnd_16 +#if !__SHMEDIA__ + HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) + /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ + /* n1 < d, but n1 might be larger than d1. */ + .global GLOBAL(udiv_qrnnd_16) + .balign 8 +GLOBAL(udiv_qrnnd_16): + div0u + cmp/hi r6,r0 + bt .Lots + .rept 16 + div1 r6,r0 + .endr + extu.w r0,r1 + bt 0f + add r6,r0 +0: rotcl r1 + mulu.w r1,r5 + xtrct r4,r0 + swap.w r0,r0 + sts macl,r2 + cmp/hs r2,r0 + sub r2,r0 + bt 0f + addc r5,r0 + add #-1,r1 + bt 0f +1: add #-1,r1 + rts + add r5,r0 + .balign 8 +.Lots: + sub r5,r0 + swap.w r4,r1 + xtrct r0,r1 + clrt + mov r1,r0 + addc r5,r0 + mov #-1,r1 + SL1(bf, 1b, + shlr16 r1) +0: rts + nop + ENDFUNC(GLOBAL(udiv_qrnnd_16)) +#endif /* !__SHMEDIA__ */ +#endif /* L_udiv_qrnnd_16 */ diff --git a/gcc-4.9/libgcc/config/sh/lib1funcs.h b/gcc-4.9/libgcc/config/sh/lib1funcs.h new file mode 100644 index 000000000..be25dc825 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/lib1funcs.h @@ -0,0 +1,74 @@ +/* Copyright (C) 1994-2014 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef __ELF__ +#define LOCAL(X) .L_##X +#define FUNC(X) .type X,@function +#define HIDDEN_FUNC(X) FUNC(X); .hidden X +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) +#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X +#define ENDFUNC(X) ENDFUNC0(X) +#else +#define LOCAL(X) L_##X +#define FUNC(X) +#define HIDDEN_FUNC(X) +#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) +#define ENDFUNC(X) +#endif + +#define CONCAT(A,B) A##B +#define GLOBAL0(U,X) CONCAT(U,__##X) +#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) + +#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) + +#if defined __SH2A__ && defined __FMOVD_ENABLED__ +#undef FMOVD_WORKS +#define FMOVD_WORKS +#endif + +#ifdef __LITTLE_ENDIAN__ +#define DR00 fr1 +#define DR01 fr0 +#define DR20 fr3 +#define DR21 fr2 +#define DR40 fr5 +#define DR41 fr4 +#else /* !__LITTLE_ENDIAN__ */ +#define DR00 fr0 +#define DR01 fr1 +#define DR20 fr2 +#define DR21 fr3 +#define DR40 fr4 +#define DR41 fr5 +#endif /* !__LITTLE_ENDIAN__ */ + +#ifdef __sh1__ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + in_slot, in_slot_arg2; branch dest +#define SL1(branch, dest, in_slot) \ + in_slot; branch dest +#else /* ! __sh1__ */ +#define SL(branch, dest, in_slot, in_slot_arg2) \ + branch##.s dest; in_slot, in_slot_arg2 +#define SL1(branch, dest, in_slot) \ + branch##/s dest; in_slot +#endif /* !__sh1__ */ diff --git a/gcc-4.9/libgcc/config/sh/libgcc-excl.ver b/gcc-4.9/libgcc/config/sh/libgcc-excl.ver new file mode 100644 index 000000000..325c74054 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/libgcc-excl.ver @@ -0,0 +1,8 @@ +# Exclude various symbols which should not be visible in libgcc.so for SH. +%exclude { + __ashlsi3 + __ashrsi3 + __lshrsi3 + __mulsi3 # this is an SH1-only symbol. + __udivsi3 +} diff --git a/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver b/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver new file mode 100644 index 000000000..17a1d809e --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/libgcc-glibc.ver @@ -0,0 +1,48 @@ +# Copyright (C) 2002-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# In order to work around the very problems that force us to now generally +# create a libgcc.so, glibc reexported a number of routines from libgcc.a. +# By now choosing the same version tags for these specific routines, we +# maintain enough binary compatibility to allow future versions of glibc +# to defer implementation of these routines to libgcc.so via DT_AUXILIARY. + +# Note that we cannot use the default libgcc-glibc.ver file on sh, +# because GLIBC_2.0 does not exist on this architecture, as the first +# ever glibc release on the platform was GLIBC_2.2. + +%exclude { + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} + +%inherit GCC_3.0 GLIBC_2.2 +GLIBC_2.2 { + __register_frame + __register_frame_table + __deregister_frame + __register_frame_info + __deregister_frame_info + __frame_state_for + __register_frame_info_table +} diff --git a/gcc-4.9/libgcc/config/sh/linux-atomic.c b/gcc-4.9/libgcc/config/sh/linux-atomic.c new file mode 100644 index 000000000..1ef02eaa6 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/linux-atomic.c @@ -0,0 +1,81 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Atomic built-in C functions for link compatibility with older code that + was compiled to emit function calls for atomic built-ins. + Notice that if no atomic model has been selected the functions in this + file must not be generated, or else they will result in infinite no-op + loops. + Notice also, that all the generated functions below take three parameters, + which is not actually true for some of the built-in functions. However, + on SH this does not matter, since the first four parameters are always + passed in call clobbered registers. + The return type for the sync_bool_compare_and_swap functions is also + actually supposed to be a bool, but this also doesn't matter since any + int return type <= 32 bit is returned in R0 on SH. */ + +#if !__SH_ATOMIC_MODEL_NONE__ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +#define uint8_t_sz 1 +#define uint16_t_sz 2 +#define uint32_t_sz 4 + +#define typesz(x) x##_sz + +#define concat(x,y) __ ## x ## _ ## y +#define eval(x,y) concat (x,y) +#define genname(f,t) eval(f, typesz (t)) + +#define func1(name, type) \ + type __attribute__((visibility("hidden"))) \ + genname (name, type) (type* x, type y, type z) \ + { \ + return __##name (x, y, z); \ + } + +#define genfuncs(name) \ + func1 (name, uint8_t) \ + func1 (name, uint16_t) \ + func1 (name, uint32_t) + +genfuncs (sync_lock_test_and_set) +genfuncs (sync_val_compare_and_swap) +genfuncs (sync_bool_compare_and_swap) +genfuncs (sync_fetch_and_add) +genfuncs (sync_fetch_and_or) +genfuncs (sync_fetch_and_and) +genfuncs (sync_fetch_and_xor) +genfuncs (sync_fetch_and_sub) +genfuncs (sync_fetch_and_nand) +genfuncs (sync_add_and_fetch) +genfuncs (sync_or_and_fetch) +genfuncs (sync_and_and_fetch) +genfuncs (sync_xor_and_fetch) +genfuncs (sync_sub_and_fetch) +genfuncs (sync_nand_and_fetch) + +#endif diff --git a/gcc-4.9/libgcc/config/sh/linux-unwind.h b/gcc-4.9/libgcc/config/sh/linux-unwind.h new file mode 100644 index 000000000..4875706d4 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/linux-unwind.h @@ -0,0 +1,255 @@ +/* DWARF2 EH unwinding support for SH Linux. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. + Don't use this at all if inhibit_libc is used. */ + +#ifndef inhibit_libc + +#include <signal.h> +#include <sys/ucontext.h> +#include "insn-constants.h" + +# if defined (__SH5__) +#define SH_DWARF_FRAME_GP0 0 +#define SH_DWARF_FRAME_FP0 77 +#define SH_DWARF_FRAME_BT0 68 +#define SH_DWARF_FRAME_PR_MEDIA 18 +#define SH_DWARF_FRAME_SR 65 +#define SH_DWARF_FRAME_FPSCR 76 +#else +#define SH_DWARF_FRAME_GP0 0 +#define SH_DWARF_FRAME_FP0 25 +#define SH_DWARF_FRAME_XD0 87 +#define SH_DWARF_FRAME_PR 17 +#define SH_DWARF_FRAME_GBR 18 +#define SH_DWARF_FRAME_MACH 20 +#define SH_DWARF_FRAME_MACL 21 +#define SH_DWARF_FRAME_PC 16 +#define SH_DWARF_FRAME_SR 22 +#define SH_DWARF_FRAME_FPUL 23 +#define SH_DWARF_FRAME_FPSCR 24 +#endif /* defined (__SH5__) */ + +#if defined (__SH5__) + +#define MD_FALLBACK_FRAME_STATE_FOR shmedia_fallback_frame_state + +static _Unwind_Reason_Code +shmedia_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i, r; + + /* movi 0x10,r9; shori 0x77,r9; trapa r9; nop (sigreturn) */ + /* movi 0x10,r9; shori 0xad,r9; trapa r9; nop (rt_sigreturn) */ + if ((*(unsigned long *) (pc-1) == 0xcc004090) + && (*(unsigned long *) (pc+3) == 0xc801dc90) + && (*(unsigned long *) (pc+7) == 0x6c91fff0) + && (*(unsigned long *) (pc+11) == 0x6ff0fff0)) + sc = context->cfa; + else if ((*(unsigned long *) (pc-1) == 0xcc004090) + && (*(unsigned long *) (pc+3) == 0xc802b490) + && (*(unsigned long *) (pc+7) == 0x6c91fff0) + && (*(unsigned long *) (pc+11) == 0x6ff0fff0)) + { + struct rt_sigframe { + siginfo_t *pinfo; + void *puc; + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[15]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 15; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + for (i = 0; i < 63; i++) + { + if (i == 15) + continue; + + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long)&(sc->sc_regs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset + = (long)&(sc->sc_sr) - new_cfa; + + r = SH_DWARF_FRAME_BT0; + for (i = 0; i < 8; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_tregs[i]) - new_cfa; + } + + r = SH_DWARF_FRAME_FP0; + for (i = 0; i < 32; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_fpregs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset + = (long)&(sc->sc_fpscr) - new_cfa; + + /* We use the slot for the zero register to save return address. */ + fs->regs.reg[63].how = REG_SAVED_OFFSET; + fs->regs.reg[63].loc.offset + = (long)&(sc->sc_pc) - new_cfa; + fs->retaddr_column = 63; + fs->signal_frame = 1; + return _URC_NO_REASON; +} + +#else /* defined (__SH5__) */ + +#define MD_FALLBACK_FRAME_STATE_FOR sh_fallback_frame_state + +static _Unwind_Reason_Code +sh_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; +#if defined (__SH3E__) || defined (__SH4__) + int r; +#endif + + /* mov.w 1f,r3; trapa #0x10; 1: .short 0x77 (sigreturn) */ + /* mov.w 1f,r3; trapa #0x10; 1: .short 0xad (rt_sigreturn) */ + /* Newer kernel uses pad instructions to avoid an SH-4 core bug. */ + /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0; + or r0,r0; 1: .short 0x77 (sigreturn) */ + /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0; + or r0,r0; 1: .short 0xad (rt_sigreturn) */ + if (((*(unsigned short *) (pc+0) == 0x9300) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+4) == 0x0077)) + || (((*(unsigned short *) (pc+0) == 0x9305) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+14) == 0x0077)))) + sc = context->cfa; + else if (((*(unsigned short *) (pc+0) == 0x9300) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+4) == 0x00ad)) + || (((*(unsigned short *) (pc+0) == 0x9305) + && (*(unsigned short *) (pc+2) == 0xc310) + && (*(unsigned short *) (pc+14) == 0x00ad)))) + { + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[15]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 15; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + for (i = 0; i < 15; i++) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long)&(sc->sc_regs[i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_PR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_PR].loc.offset + = (long)&(sc->sc_pr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset + = (long)&(sc->sc_sr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_GBR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_GBR].loc.offset + = (long)&(sc->sc_gbr) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_MACH].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_MACH].loc.offset + = (long)&(sc->sc_mach) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_MACL].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_MACL].loc.offset + = (long)&(sc->sc_macl) - new_cfa; + +#if defined (__SH3E__) || defined (__SH4__) + r = SH_DWARF_FRAME_FP0; + for (i = 0; i < 16; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_fpregs[i]) - new_cfa; + } + + r = SH_DWARF_FRAME_XD0; + for (i = 0; i < 8; i++) + { + fs->regs.reg[r+i].how = REG_SAVED_OFFSET; + fs->regs.reg[r+i].loc.offset + = (long)&(sc->sc_xfpregs[2*i]) - new_cfa; + } + + fs->regs.reg[SH_DWARF_FRAME_FPUL].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPUL].loc.offset + = (long)&(sc->sc_fpul) - new_cfa; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset + = (long)&(sc->sc_fpscr) - new_cfa; +#endif + + fs->regs.reg[SH_DWARF_FRAME_PC].how = REG_SAVED_OFFSET; + fs->regs.reg[SH_DWARF_FRAME_PC].loc.offset + = (long)&(sc->sc_pc) - new_cfa; + fs->retaddr_column = SH_DWARF_FRAME_PC; + fs->signal_frame = 1; + return _URC_NO_REASON; +} +#endif /* defined (__SH5__) */ + +#endif /* inhibit_libc */ diff --git a/gcc-4.9/libgcc/config/sh/t-linux b/gcc-4.9/libgcc/config/sh/t-linux new file mode 100644 index 000000000..d316db937 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/t-linux @@ -0,0 +1,18 @@ +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + +LIB2ADD = $(srcdir)/config/sh/linux-atomic.c + +HOST_LIBGCC2_CFLAGS += -DNO_FPSCR_VALUES + +# Silence atomic built-in related warnings in linux-atomic.c. +# Unfortunately the conflicting types warning can't be disabled selectively. +HOST_LIBGCC2_CFLAGS += -w -Wno-sync-nand + +# Override t-slibgcc-elf-ver to export some libgcc symbols with +# the symbol versions that glibc used, and hide some lib1func +# routines which should not be called via PLT. We have to create +# the list from scratch. +SHLIB_MAPFILES = \ + libgcc-std.ver \ + $(srcdir)/config/sh/libgcc-excl.ver \ + $(srcdir)/config/sh/libgcc-glibc.ver diff --git a/gcc-4.9/libgcc/config/sh/t-netbsd b/gcc-4.9/libgcc/config/sh/t-netbsd new file mode 100644 index 000000000..93fe287e5 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/t-netbsd @@ -0,0 +1,2 @@ +LIB1ASMFUNCS_CACHE = _ic_invalidate + diff --git a/gcc-4.9/libgcc/config/sh/t-sh b/gcc-4.9/libgcc/config/sh/t-sh new file mode 100644 index 000000000..111bbf603 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/t-sh @@ -0,0 +1,60 @@ +# Copyright (C) 1993-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +LIB1ASMSRC = sh/lib1funcs.S +LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ + _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _div_table _udiv_qrnnd_16 \ + $(LIB1ASMFUNCS_CACHE) +LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array + +crt1.o: $(srcdir)/config/sh/crt1.S + $(gcc_compile) -c $< + +ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.S + $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 $< +libic_invalidate_array_4-100.a: ic_invalidate_array_4-100.o + $(AR_CREATE_FOR_TARGET) $@ $< + +ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.S + $(gcc_compile) -c -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 $< +libic_invalidate_array_4-200.a: ic_invalidate_array_4-200.o + $(AR_CREATE_FOR_TARGET) $@ $< + +ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.S + $(gcc_compile) -c -DL_ic_invalidate_array -D__FORCE_SH4A__ $< +libic_invalidate_array_4a.a: ic_invalidate_array_4a.o + $(AR_CREATE_FOR_TARGET) $@ $< + +sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S + $(gcc_compile) -c -DL_sdivsi3_i4i $< +udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.S + $(gcc_compile) -c -DL_udivsi3_i4i $< +unwind-dw2-Os-4-200.o: $(srcdir)/unwind-dw2.c + $(gcc_compile) $(LIBGCC2_CFLAGS) $(vis_hide) -fexceptions -Os -c $< + +OBJS_Os_4_200=sdivsi3_i4i-Os-4-200.o udivsi3_i4i-Os-4-200.o unwind-dw2-Os-4-200.o +libgcc-Os-4-200.a: $(OBJS_Os_4_200) + $(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200) + +div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.S + $(gcc_compile) -c -DL_div_table $< + +libgcc-4-300.a: div_table-4-300.o + $(AR_CREATE_FOR_TARGET) $@ div_table-4-300.o + diff --git a/gcc-4.9/libgcc/config/sh/t-sh64 b/gcc-4.9/libgcc/config/sh/t-sh64 new file mode 100644 index 000000000..fa9950e03 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/t-sh64 @@ -0,0 +1,6 @@ +LIB1ASMFUNCS = \ + _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ + _shcompact_call_trampoline _shcompact_return_trampoline \ + _shcompact_incoming_args _ic_invalidate _nested_trampoline \ + _push_pop_shmedia_regs \ + _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table diff --git a/gcc-4.9/libgcc/config/sh/t-superh b/gcc-4.9/libgcc/config/sh/t-superh new file mode 100644 index 000000000..b87aa5a31 --- /dev/null +++ b/gcc-4.9/libgcc/config/sh/t-superh @@ -0,0 +1,11 @@ +# Compile crt1-mmu.o as crt1.o with -DMMU_SUPPORT +crt1-mmu.o: $(srcdir)/config/sh/crt1.S + $(gcc_compile) -c -DMMU_SUPPORT $< + +# Compile gcrt1-mmu.o as crt1-mmu.o with -DPROFILE +gcrt1-mmu.o: $(srcdir)/config/sh/crt1.S + $(gcc_compile) -c -DPROFILE -DMMU_SUPPORT $< + +# For sh4-400: Compile gcrt1.o as crt1.o with -DPROFILE +gcrt1.o: $(srcdir)/config/sh/crt1.S + $(gcc_compile) -c -DPROFILE $< |