aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm
diff options
context:
space:
mode:
authorJing Yu <jingyu@google.com>2010-07-22 14:03:48 -0700
committerJing Yu <jingyu@google.com>2010-07-22 14:03:48 -0700
commitb094d6c4bf572654a031ecc4afe675154c886dc5 (patch)
tree89394c56b05e13a5413ee60237d65b0214fd98e2 /gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm
parentdc34721ac3bf7e3c406fba8cfe9d139393345ec5 (diff)
downloadtoolchain_gcc-b094d6c4bf572654a031ecc4afe675154c886dc5.tar.gz
toolchain_gcc-b094d6c4bf572654a031ecc4afe675154c886dc5.tar.bz2
toolchain_gcc-b094d6c4bf572654a031ecc4afe675154c886dc5.zip
commit gcc-4.4.3 which is used to build gcc-4.4.3 Android toolchain in master.
The source is based on fsf gcc-4.4.3 and contains local patches which are recorded in gcc-4.4.3/README.google. Change-Id: Id8c6d6927df274ae9749196a1cc24dbd9abc9887
Diffstat (limited to 'gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm')
-rw-r--r--gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm322
1 files changed, 322 insertions, 0 deletions
diff --git a/gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm b/gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm
new file mode 100644
index 000000000..aae57ccd3
--- /dev/null
+++ b/gcc-4.4.3/gcc/config/sh/lib1funcs-Os-4-200.asm
@@ -0,0 +1,322 @@
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Moderately Space-optimized libgcc routines for the Renesas SH /
+ STMicroelectronics ST40 CPUs.
+ Contributed by J"orn Rennecke joern.rennecke@st.com. */
+
+#include "lib1funcs.h"
+
+#if !__SHMEDIA__
+#ifdef L_udivsi3_i4i
+
+/* 88 bytes; sh4-200 cycle counts:
+ divisor >= 2G: 11 cycles
+ dividend < 2G: 48 cycles
+ dividend >= 2G: divisor != 1: 54 cycles
+ dividend >= 2G, divisor == 1: 22 cycles */
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in r0, clobber r1
+
+ .global GLOBAL(udivsi3_i4i)
+ FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+ mova L1,r0
+ cmp/pz r5
+ sts fpscr,r1
+ lds.l @r0+,fpscr
+ sts.l fpul,@-r15
+ bf LOCAL(huge_divisor)
+ mov.l r1,@-r15
+ lds r4,fpul
+ cmp/pz r4
+#ifdef FMOVD_WORKS
+ fmov.d dr0,@-r15
+ float fpul,dr0
+ fmov.d dr2,@-r15
+ bt LOCAL(dividend_adjusted)
+ mov #1,r1
+ fmov.d @r0,dr2
+ cmp/eq r1,r5
+ bt LOCAL(div_by_1)
+ fadd dr2,dr0
+LOCAL(dividend_adjusted):
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+LOCAL(div_by_1):
+ fmov.d @r15+,dr2
+ ftrc dr0,fpul
+ fmov.d @r15+,dr0
+#else /* !FMOVD_WORKS */
+ fmov.s DR01,@-r15
+ mov #1,r1
+ fmov.s DR00,@-r15
+ float fpul,dr0
+ fmov.s DR21,@-r15
+ bt/s LOCAL(dividend_adjusted)
+ fmov.s DR20,@-r15
+ cmp/eq r1,r5
+ bt LOCAL(div_by_1)
+ fmov.s @r0+,DR20
+ fmov.s @r0,DR21
+ fadd dr2,dr0
+LOCAL(dividend_adjusted):
+ lds r5,fpul
+ float fpul,dr2
+ fdiv dr2,dr0
+LOCAL(div_by_1):
+ fmov.s @r15+,DR20
+ fmov.s @r15+,DR21
+ ftrc dr0,fpul
+ fmov.s @r15+,DR00
+ fmov.s @r15+,DR01
+#endif /* !FMOVD_WORKS */
+ lds.l @r15+,fpscr
+ sts fpul,r0
+ rts
+ lds.l @r15+,fpul
+
+#ifdef FMOVD_WORKS
+ .p2align 3 ! make double below 8 byte aligned.
+#endif
+LOCAL(huge_divisor):
+ lds r1,fpscr
+ add #4,r15
+ cmp/hs r5,r4
+ rts
+ movt r0
+
+ .p2align 2
+L1:
+#ifndef FMOVD_WORKS
+ .long 0x80000
+#else
+ .long 0x180000
+#endif
+ .double 4294967296
+
+ ENDFUNC(GLOBAL(udivsi3_i4i))
+#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
+
+#if 0
+/* With 36 bytes, the following would probably be the most compact
+ implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
+GLOBAL(udivsi3_i4i):
+ mov.l r2,@-r15
+ mov #0,r1
+ div0u
+ mov r1,r2
+ mov.l r3,@-r15
+ mov r1,r3
+ sett
+ mov r4,r0
+LOCAL(loop):
+ rotcr r2
+ ;
+ bt/s LOCAL(end)
+ cmp/gt r2,r3
+ rotcl r0
+ bra LOCAL(loop)
+ div1 r5,r1
+LOCAL(end):
+ rotcl r0
+ mov.l @r15+,r3
+ rts
+ mov.l @r15+,r2
+#endif /* 0 */
+
+/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
+ sh4-200 run times:
+ udiv small divisor: 55 cycles
+ udiv large divisor: 52 cycles
+ sdiv small divisor, positive result: 59 cycles
+ sdiv large divisor, positive result: 56 cycles
+ sdiv small divisor, negative result: 65 cycles (*)
+ sdiv large divisor, negative result: 62 cycles (*)
+ (*): r2 is restored in the rts delay slot and has a lingering latency
+ of two more cycles. */
+ .balign 4
+ .global GLOBAL(udivsi3_i4i)
+ FUNC(GLOBAL(udivsi3_i4i))
+ FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+ sts pr,r1
+ mov.l r4,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+ swap.w r4,r0
+ shlr16 r4
+ bf/s LOCAL(large_divisor)
+ div0u
+ mov.l r5,@-r15
+ shll16 r5
+LOCAL(sdiv_small_divisor):
+ div1 r5,r4
+ bsr LOCAL(div6)
+ div1 r5,r4
+ div1 r5,r4
+ bsr LOCAL(div6)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div7)
+ swap.w r4,r4
+ div1 r5,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ mov.l @r15+,r5
+ swap.w r0,r0
+ mov.l @r15+,r4
+ jmp @r1
+ rotcl r0
+LOCAL(div7):
+ div1 r5,r4
+LOCAL(div6):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx3):
+ rotcl r0
+ div1 r5,r4
+ rotcl r0
+ div1 r5,r4
+ rotcl r0
+ rts
+ div1 r5,r4
+
+LOCAL(large_divisor):
+ mov.l r5,@-r15
+LOCAL(sdiv_large_divisor):
+ xor r4,r0
+ .rept 4
+ rotcl r0
+ bsr LOCAL(divx3)
+ div1 r5,r4
+ .endr
+ mov.l @r15+,r5
+ mov.l @r15+,r4
+ jmp @r1
+ rotcl r0
+ ENDFUNC(GLOBAL(udivsi3_i4i))
+
+ .global GLOBAL(sdivsi3_i4i)
+GLOBAL(sdivsi3_i4i):
+ mov.l r4,@-r15
+ cmp/pz r5
+ mov.l r5,@-r15
+ bt/s LOCAL(pos_divisor)
+ cmp/pz r4
+ neg r5,r5
+ extu.w r5,r0
+ bt/s LOCAL(neg_result)
+ cmp/eq r5,r0
+ neg r4,r4
+LOCAL(pos_result):
+ swap.w r4,r0
+ bra LOCAL(sdiv_check_divisor)
+ sts pr,r1
+LOCAL(pos_divisor):
+ extu.w r5,r0
+ bt/s LOCAL(pos_result)
+ cmp/eq r5,r0
+ neg r4,r4
+LOCAL(neg_result):
+ mova LOCAL(negate_result),r0
+ ;
+ mov r0,r1
+ swap.w r4,r0
+ lds r2,macl
+ sts pr,r2
+LOCAL(sdiv_check_divisor):
+ shlr16 r4
+ bf/s LOCAL(sdiv_large_divisor)
+ div0u
+ bra LOCAL(sdiv_small_divisor)
+ shll16 r5
+ .balign 4
+LOCAL(negate_result):
+ neg r0,r0
+ jmp @r2
+ sts macl,r2
+ ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* !__SH_FPU_DOUBLE__ */
+#endif /* L_udivsi3_i4i */
+
+#ifdef L_sdivsi3_i4i
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+/* 48 bytes, 45 cycles on sh4-200 */
+!! args in r4 and r5, result in r0, clobber r1
+
+ .global GLOBAL(sdivsi3_i4i)
+ FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(sdivsi3_i4i):
+ sts.l fpscr,@-r15
+ sts fpul,r1
+ mova L1,r0
+ lds.l @r0+,fpscr
+ lds r4,fpul
+#ifdef FMOVD_WORKS
+ fmov.d dr0,@-r15
+ float fpul,dr0
+ lds r5,fpul
+ fmov.d dr2,@-r15
+#else
+ fmov.s DR01,@-r15
+ fmov.s DR00,@-r15
+ float fpul,dr0
+ lds r5,fpul
+ fmov.s DR21,@-r15
+ fmov.s DR20,@-r15
+#endif
+ float fpul,dr2
+ fdiv dr2,dr0
+#ifdef FMOVD_WORKS
+ fmov.d @r15+,dr2
+#else
+ fmov.s @r15+,DR20
+ fmov.s @r15+,DR21
+#endif
+ ftrc dr0,fpul
+#ifdef FMOVD_WORKS
+ fmov.d @r15+,dr0
+#else
+ fmov.s @r15+,DR00
+ fmov.s @r15+,DR01
+#endif
+ lds.l @r15+,fpscr
+ sts fpul,r0
+ rts
+ lds r1,fpul
+
+ .p2align 2
+L1:
+#ifndef FMOVD_WORKS
+ .long 0x80000
+#else
+ .long 0x180000
+#endif
+
+ ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* __SH_FPU_DOUBLE__ */
+#endif /* L_sdivsi3_i4i */
+#endif /* !__SHMEDIA__ */