diff options
Diffstat (limited to 'gcc-4.9/libgcc/config')
1137 files changed, 351456 insertions, 0 deletions
diff --git a/gcc-4.9/libgcc/config/aarch64/crti.S b/gcc-4.9/libgcc/config/aarch64/crti.S new file mode 100644 index 0000000..9f607b7 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/crti.S @@ -0,0 +1,68 @@ +# Machine description for AArch64 architecture. +# Copyright (C) 2009-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +# This file creates a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + +#ifdef __ELF__ +#define TYPE(x) .type x,function +#else +#define TYPE(x) +#endif + + # Note - this macro is complemented by the FUNC_END macro + # in crtn.S. If you change this macro you must also change + # that macro match. +.macro FUNC_START + # Create a stack frame and save any call-preserved registers + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! +.endm + + .section ".init" + .align 2 + .global _init + TYPE(_init) +_init: + FUNC_START + + + .section ".fini" + .align 2 + .global _fini + TYPE(_fini) +_fini: + FUNC_START + +# end of crti.S diff --git a/gcc-4.9/libgcc/config/aarch64/crtn.S b/gcc-4.9/libgcc/config/aarch64/crtn.S new file mode 100644 index 0000000..2a41239 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/crtn.S @@ -0,0 +1,61 @@ +# Machine description for AArch64 architecture. +# Copyright (C) 2009-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + + # Note - this macro is complemented by the FUNC_START macro + # in crti.S. If you change this macro you must also change + # that macro match. + # + # Note - we do not try any fancy optimizations of the return + # sequences here, it is just not worth it. Instead keep things + # simple. Restore all the save resgisters, including the link + # register and then perform the correct function return instruction. +.macro FUNC_END + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x29, x30, [sp], #16 + ret +.endm + + + .section ".init" + ;; + FUNC_END + + .section ".fini" + ;; + FUNC_END + +# end of crtn.S diff --git a/gcc-4.9/libgcc/config/aarch64/linux-unwind.h b/gcc-4.9/libgcc/config/aarch64/linux-unwind.h new file mode 100644 index 0000000..6b5b3cd --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/linux-unwind.h @@ -0,0 +1,156 @@ +/* Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef inhibit_libc + +#include <signal.h> +#include <sys/ucontext.h> + + +/* Since insns are always stored LE, on a BE system the opcodes will + be loaded byte-reversed. Therefore, define two sets of opcodes, + one for LE and one for BE. */ + +#if __AARCH64EB__ +#define MOVZ_X8_8B 0x681180d2 +#define SVC_0 0x010000d4 +#else +#define MOVZ_X8_8B 0xd2801168 +#define SVC_0 0xd4000001 +#endif + +#define MD_FALLBACK_FRAME_STATE_FOR aarch64_fallback_frame_state + +static _Unwind_Reason_Code +aarch64_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState * fs) +{ + /* The kernel creates an rt_sigframe on the stack immediately prior + to delivering a signal. + + This structure must have the same shape as the linux kernel + equivalent. */ + struct rt_sigframe + { + siginfo_t info; + struct ucontext uc; + }; + + struct rt_sigframe *rt_; + _Unwind_Ptr new_cfa; + unsigned *pc = context->ra; + struct sigcontext *sc; + struct _aarch64_ctx *extension_marker; + int i; + + /* A signal frame will have a return address pointing to + __default_sa_restorer. This code is hardwired as: + + 0xd2801168 movz x8, #0x8b + 0xd4000001 svc 0x0 + */ + if (pc[0] != MOVZ_X8_8B || pc[1] != SVC_0) + { + return _URC_END_OF_STACK; + } + + rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + +/* This define duplicates the definition in aarch64.md */ +#define SP_REGNUM 31 + + new_cfa = (_Unwind_Ptr) sc; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = STACK_POINTER_REGNUM; + fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; + + for (i = 0; i < AARCH64_DWARF_NUMBER_R; i++) + { + fs->regs.reg[AARCH64_DWARF_R0 + i].how = REG_SAVED_OFFSET; + fs->regs.reg[AARCH64_DWARF_R0 + i].loc.offset = + (_Unwind_Ptr) & (sc->regs[i]) - new_cfa; + } + + /* The core context may be extended with an arbitrary set of + additional contexts appended sequentially. Each additional + context contains a magic identifier and size in bytes. The size + field can be used to skip over unrecognized context extensions. + The end of the context sequence is marked by a context with magic + 0 or size 0. */ + for (extension_marker = (struct _aarch64_ctx *) &sc->__reserved; + extension_marker->magic; + extension_marker = (struct _aarch64_ctx *) + ((unsigned char *) extension_marker + extension_marker->size)) + { + if (extension_marker->magic == FPSIMD_MAGIC) + { + struct fpsimd_context *ctx = + (struct fpsimd_context *) extension_marker; + int i; + + for (i = 0; i < AARCH64_DWARF_NUMBER_V; i++) + { + _Unwind_Sword offset; + + fs->regs.reg[AARCH64_DWARF_V0 + i].how = REG_SAVED_OFFSET; + + /* sigcontext contains 32 128bit registers for V0 to + V31. The kernel will have saved the contents of the + V registers. We want to unwind the callee save D + registers. Each D register comprises the least + significant half of the corresponding V register. We + need to offset into the saved V register dependent on + our endianness to find the saved D register. */ + + offset = (_Unwind_Ptr) & (ctx->vregs[i]) - new_cfa; + + /* The endianness adjustment code below expects that a + saved V register is 16 bytes. */ + gcc_assert (sizeof (ctx->vregs[0]) == 16); +#if defined (__AARCH64EB__) + offset = offset + 8; +#endif + fs->regs.reg[AARCH64_DWARF_V0 + i].loc.offset = offset; + } + } + else + { + /* There is context provided that we do not recognize! */ + } + } + + fs->regs.reg[31].how = REG_SAVED_OFFSET; + fs->regs.reg[31].loc.offset = (_Unwind_Ptr) & (sc->sp) - new_cfa; + + fs->signal_frame = 1; + + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_VAL_OFFSET; + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset = + (_Unwind_Ptr) (sc->pc) - new_cfa; + + fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN; + + return _URC_NO_REASON; +} + +#endif diff --git a/gcc-4.9/libgcc/config/aarch64/sfp-exceptions.c b/gcc-4.9/libgcc/config/aarch64/sfp-exceptions.c new file mode 100644 index 0000000..f3a95e8 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/sfp-exceptions.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2012-2014 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include "sfp-machine.h" + +void +__sfp_handle_exceptions (int _fex) +{ + const float fp_max = __FLT_MAX__; + const float fp_min = __FLT_MIN__; + const float fp_1e32 = 1.0e32f; + const float fp_zero = 0.0; + const float fp_one = 1.0; + unsigned fpsr; + + if (_fex & FP_EX_INVALID) + { + __asm__ __volatile__ ("fdiv\ts0, %s0, %s0" + : + : "w" (fp_zero) + : "s0"); + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); + } + if (_fex & FP_EX_DIVZERO) + { + __asm__ __volatile__ ("fdiv\ts0, %s0, %s1" + : + : "w" (fp_one), "w" (fp_zero) + : "s0"); + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); + } + if (_fex & FP_EX_OVERFLOW) + { + __asm__ __volatile__ ("fadd\ts0, %s0, %s1" + : + : "w" (fp_max), "w" (fp_1e32) + : "s0"); + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); + } + if (_fex & FP_EX_UNDERFLOW) + { + __asm__ __volatile__ ("fmul\ts0, %s0, %s0" + : + : "w" (fp_min) + : "s0"); + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); + } + if (_fex & FP_EX_INEXACT) + { + __asm__ __volatile__ ("fsub\ts0, %s0, %s1" + : + : "w" (fp_max), "w" (fp_one) + : "s0"); + __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr)); + } +} diff --git a/gcc-4.9/libgcc/config/aarch64/sfp-machine.h b/gcc-4.9/libgcc/config/aarch64/sfp-machine.h new file mode 100644 index 0000000..203e478 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/sfp-machine.h @@ -0,0 +1,125 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long long +#define _FP_WS_TYPE signed long long +#define _FP_I_TYPE long long + +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); +#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype)) + +/* The type of the result of a floating point comparison. This must + match __libgcc_cmp_return__ in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* This appears to be in line with the VFP conventions in the v7-a + ARM-ARM. Need to check with the v8 version. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define FP_EX_INVALID 0x01 +#define FP_EX_DIVZERO 0x02 +#define FP_EX_OVERFLOW 0x04 +#define FP_EX_UNDERFLOW 0x08 +#define FP_EX_INEXACT 0x10 +#define FP_EX_SHIFT 8 +#define FP_EX_ALL \ + (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \ + | FP_EX_INEXACT) + +#define _FP_TININESS_AFTER_ROUNDING 0 + +void __sfp_handle_exceptions (int); + +#define FP_HANDLE_EXCEPTIONS \ + do { \ + if (__builtin_expect (_fex, 0)) \ + __sfp_handle_exceptions (_fex); \ + } while (0); + +#define FP_TRAPPING_EXCEPTIONS ((_fpcr >> FP_EX_SHIFT) & FP_EX_ALL) + +#define FP_RND_NEAREST 0x000000 +#define FP_RND_PINF 0x400000 +#define FP_RND_MINF 0x800000 +#define FP_RND_ZERO 0xc00000 +#define FP_RND_MASK 0xc00000 + +#define _FP_DECL_EX \ + unsigned long int _fpcr __attribute__ ((unused)) = FP_RND_NEAREST + +#define FP_INIT_ROUNDMODE \ + do { \ + __asm__ __volatile__ ("mrs %0, fpcr" \ + : "=r" (_fpcr)); \ + } while (0) + +#define FP_ROUNDMODE (_fpcr & FP_RND_MASK) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#if defined __AARCH64EB__ +# define __BYTE_ORDER __BIG_ENDIAN +#else +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif + + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); diff --git a/gcc-4.9/libgcc/config/aarch64/sync-cache.c b/gcc-4.9/libgcc/config/aarch64/sync-cache.c new file mode 100644 index 0000000..8d206c9 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/sync-cache.c @@ -0,0 +1,72 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +void __aarch64_sync_cache_range (const void *, const void *); + +void +__aarch64_sync_cache_range (const void *base, const void *end) +{ + unsigned icache_lsize; + unsigned dcache_lsize; + static unsigned int cache_info = 0; + const char *address; + + if (! cache_info) + /* CTR_EL0 [3:0] contains log2 of icache line size in words. + CTR_EL0 [19:16] contains log2 of dcache line size in words. */ + asm volatile ("mrs\t%0, ctr_el0":"=r" (cache_info)); + + icache_lsize = 4 << (cache_info & 0xF); + dcache_lsize = 4 << ((cache_info >> 16) & 0xF); + + /* Loop over the address range, clearing one cache line at once. + Data cache must be flushed to unification first to make sure the + instruction cache fetches the updated data. 'end' is exclusive, + as per the GNU definition of __clear_cache. */ + + /* Make the start address of the loop cache aligned. */ + address = (const char*) ((__UINTPTR_TYPE__) base + & ~ (__UINTPTR_TYPE__) (dcache_lsize - 1)); + + for (; address < (const char *) end; address += dcache_lsize) + asm volatile ("dc\tcvau, %0" + : + : "r" (address) + : "memory"); + + asm volatile ("dsb\tish" : : : "memory"); + + /* Make the start address of the loop cache aligned. */ + address = (const char*) ((__UINTPTR_TYPE__) base + & ~ (__UINTPTR_TYPE__) (icache_lsize - 1)); + + for (; address < (const char *) end; address += icache_lsize) + asm volatile ("ic\tivau, %0" + : + : "r" (address) + : "memory"); + + asm volatile ("dsb\tish; isb" : : : "memory"); +} diff --git a/gcc-4.9/libgcc/config/aarch64/t-aarch64 b/gcc-4.9/libgcc/config/aarch64/t-aarch64 new file mode 100644 index 0000000..118cc43 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/t-aarch64 @@ -0,0 +1,21 @@ +# Machine description for AArch64 architecture. +# Copyright (C) 2012-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +LIB2ADD += $(srcdir)/config/aarch64/sync-cache.c diff --git a/gcc-4.9/libgcc/config/aarch64/t-softfp b/gcc-4.9/libgcc/config/aarch64/t-softfp new file mode 100644 index 0000000..586dca2 --- /dev/null +++ b/gcc-4.9/libgcc/config/aarch64/t-softfp @@ -0,0 +1,9 @@ +softfp_float_modes := tf +softfp_int_modes := si di ti +softfp_extensions := sftf dftf +softfp_truncations := tfsf tfdf +softfp_exclude_libgcc2 := n + +TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes + +LIB2ADD += $(srcdir)/config/aarch64/sfp-exceptions.c diff --git a/gcc-4.9/libgcc/config/alpha/crtfastmath.c b/gcc-4.9/libgcc/config/alpha/crtfastmath.c new file mode 100644 index 0000000..8a71176 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/crtfastmath.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2001-2014 Free Software Foundation, Inc. + * Contributed by Richard Henderson (rth@redhat.com) + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/* Assume OSF/1 compatible interfaces. */ + +extern void __ieee_set_fp_control (unsigned long int); + +#define IEEE_MAP_DMZ (1UL<<12) /* Map denorm inputs to zero */ +#define IEEE_MAP_UMZ (1UL<<13) /* Map underflowed outputs to zero */ + +static void __attribute__((constructor)) +set_fast_math (void) +{ + __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ); +} diff --git a/gcc-4.9/libgcc/config/alpha/libgcc-alpha-ldbl.ver b/gcc-4.9/libgcc/config/alpha/libgcc-alpha-ldbl.ver new file mode 100644 index 0000000..aa7f7c2 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/libgcc-alpha-ldbl.ver @@ -0,0 +1,50 @@ +# Copyright (C) 2006-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +%ifdef __LONG_DOUBLE_128__ + +# long double 128 bit support in libgcc_s.so.1 is only available +# when configured with --with-long-double-128. Make sure all the +# symbols are available at @@GCC_LDBL_* versions to make it clear +# there is a configurable symbol set. + +%exclude { + __fixtfdi + __fixunstfdi + __floatditf + + __divtc3 + __multc3 + __powitf2 +} + +%inherit GCC_LDBL_3.0 GCC_3.0 +GCC_LDBL_3.0 { + __fixtfdi + __fixunstfdi + __floatditf +} + +%inherit GCC_LDBL_4.0.0 GCC_4.0.0 +GCC_LDBL_4.0.0 { + __divtc3 + __multc3 + __powitf2 +} + +%endif diff --git a/gcc-4.9/libgcc/config/alpha/linux-unwind.h b/gcc-4.9/libgcc/config/alpha/linux-unwind.h new file mode 100644 index 0000000..b5bfd1c --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/linux-unwind.h @@ -0,0 +1,101 @@ +/* DWARF2 EH unwinding support for Alpha Linux. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef inhibit_libc +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include <signal.h> +#include <sys/ucontext.h> + +#define MD_FALLBACK_FRAME_STATE_FOR alpha_fallback_frame_state + +static _Unwind_Reason_Code +alpha_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned int *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; + + if (pc[0] != 0x47fe0410 /* mov $30,$16 */ + || pc[2] != 0x00000083) /* callsys */ + return _URC_END_OF_STACK; + if (context->cfa == 0) + return _URC_END_OF_STACK; + if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */ + sc = context->cfa; + else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */ + { + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[30]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + for (i = 0; i < 30; ++i) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long) &sc->sc_regs[i] - new_cfa; + } + for (i = 0; i < 31; ++i) + { + fs->regs.reg[i+32].how = REG_SAVED_OFFSET; + fs->regs.reg[i+32].loc.offset + = (long) &sc->sc_fpregs[i] - new_cfa; + } + fs->regs.reg[64].how = REG_SAVED_OFFSET; + fs->regs.reg[64].loc.offset = (long)&sc->sc_pc - new_cfa; + fs->retaddr_column = 64; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#define MD_FROB_UPDATE_CONTEXT alpha_frob_update_context + +/* Fix up for signal handlers that don't have S flag set. */ + +static void +alpha_frob_update_context (struct _Unwind_Context *context, + _Unwind_FrameState *fs ATTRIBUTE_UNUSED) +{ + unsigned int *pc = context->ra; + + if (pc[0] == 0x47fe0410 /* mov $30,$16 */ + && pc[2] == 0x00000083 /* callsys */ + && (pc[1] == 0x201f0067 /* lda $0,NR_sigreturn */ + || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */ + _Unwind_SetSignalFrame (context, 1); +} +#endif diff --git a/gcc-4.9/libgcc/config/alpha/qrnnd.S b/gcc-4.9/libgcc/config/alpha/qrnnd.S new file mode 100644 index 0000000..358ed4c --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/qrnnd.S @@ -0,0 +1,175 @@ + # Alpha 21064 __udiv_qrnnd + # Copyright (C) 1992-2014 Free Software Foundation, Inc. + + # This file is part of GCC. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by + # the Free Software Foundation; either version 3 of the License, or (at your + # option) any later version. + + # This file is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # Under Section 7 of GPL version 3, you are granted additional + # permissions described in the GCC Runtime Library Exception, version + # 3.1, as published by the Free Software Foundation. + + # You should have received a copy of the GNU General Public License and + # a copy of the GCC Runtime Library Exception along with this program; + # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + # <http://www.gnu.org/licenses/>. + +#ifdef __ELF__ +.section .note.GNU-stack,"" +#endif + + .set noreorder + .set noat + + .text + + .globl __udiv_qrnnd + .ent __udiv_qrnnd +#ifdef __VMS__ +__udiv_qrnnd..en: + .frame $29,0,$26,0 + .prologue +#else +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#endif + +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 +#define AT $at + + ldiq cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,$Odd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addq + subq n1,d,AT + addq n0,tmp,n0 + cmovne tmp,AT,n1 + + cmpult n1,d,tmp + addq n0,1,AT + cmoveq tmp,AT,n0 + subq n1,d,AT + cmoveq tmp,AT,n1 + + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +#ifdef __VMS__ + .link + .align 3 +__udiv_qrnnd: + .pdesc __udiv_qrnnd..en,null +#endif + .end __udiv_qrnnd diff --git a/gcc-4.9/libgcc/config/alpha/t-alpha b/gcc-4.9/libgcc/config/alpha/t-alpha new file mode 100644 index 0000000..0b6ffb1 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/t-alpha @@ -0,0 +1,2 @@ +# This is a support routine for longlong.h, used by libgcc2.c. +LIB2ADD += $(srcdir)/config/alpha/qrnnd.S diff --git a/gcc-4.9/libgcc/config/alpha/t-ieee b/gcc-4.9/libgcc/config/alpha/t-ieee new file mode 100644 index 0000000..5fdc729 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/t-ieee @@ -0,0 +1,2 @@ +# All alphas get an IEEE complaint set of libraries. +HOST_LIBGCC2_CFLAGS += -mieee diff --git a/gcc-4.9/libgcc/config/alpha/t-linux b/gcc-4.9/libgcc/config/alpha/t-linux new file mode 100644 index 0000000..fabf38f --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/t-linux @@ -0,0 +1 @@ +SHLIB_MAPFILES += $(srcdir)/config/alpha/libgcc-alpha-ldbl.ver diff --git a/gcc-4.9/libgcc/config/alpha/t-vms b/gcc-4.9/libgcc/config/alpha/t-vms new file mode 100644 index 0000000..870e44c --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/t-vms @@ -0,0 +1,11 @@ +# This object must be linked with in order to make the executable debuggable. +# vms-ld handles it automatically when passed -g. +vms-dwarf2.o: $(srcdir)/config/alpha/vms-dwarf2.S + $(gcc_compile) -c -x assembler-with-cpp $< + +vms-dwarf2eh.o: $(srcdir)/config/alpha/vms-dwarf2eh.S + $(gcc_compile) -c -x assembler-with-cpp $< + +LIB2ADD += $(srcdir)/config/alpha/vms-gcc_shell_handler.c + +HOST_LIBGCC2_CFLAGS=-mpointer-size=64 diff --git a/gcc-4.9/libgcc/config/alpha/vms-dwarf2.S b/gcc-4.9/libgcc/config/alpha/vms-dwarf2.S new file mode 100644 index 0000000..2c1751c --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/vms-dwarf2.S @@ -0,0 +1,77 @@ +/* VMS dwarf2 section sequentializer. + Copyright (C) 2001-2014 Free Software Foundation, Inc. + Contributed by Douglas B. Rupp (rupp@gnat.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Linking with this file forces Dwarf2 debug sections to be + sequentially loaded by the VMS linker, enabling GDB to read them. */ + +.section .debug_abbrev,NOWRT + .align 0 + .globl $dwarf2.debug_abbrev +$dwarf2.debug_abbrev: + +.section .debug_aranges,NOWRT + .align 0 + .globl $dwarf2.debug_aranges +$dwarf2.debug_aranges: + +.section .debug_frame,NOWRT + .align 0 + .globl $dwarf2.debug_frame +$dwarf2.debug_frame: + +.section .debug_info,NOWRT + .align 0 + .globl $dwarf2.debug_info +$dwarf2.debug_info: + +.section .debug_line,NOWRT + .align 0 + .globl $dwarf2.debug_line +$dwarf2.debug_line: + +.section .debug_loc,NOWRT + .align 0 + .globl $dwarf2.debug_loc +$dwarf2.debug_loc: + +.section .debug_macinfo,NOWRT + .align 0 + .globl $dwarf2.debug_macinfo +$dwarf2.debug_macinfo: + +.section .debug_pubnames,NOWRT + .align 0 + .globl $dwarf2.debug_pubnames +$dwarf2.debug_pubnames: + +.section .debug_str,NOWRT + .align 0 + .globl $dwarf2.debug_str +$dwarf2.debug_str: + +.section .debug_zzzzzz,NOWRT + .align 0 + .globl $dwarf2.debug_zzzzzz +$dwarf2.debug_zzzzzz: diff --git a/gcc-4.9/libgcc/config/alpha/vms-dwarf2eh.S b/gcc-4.9/libgcc/config/alpha/vms-dwarf2eh.S new file mode 100644 index 0000000..8f8072f --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/vms-dwarf2eh.S @@ -0,0 +1,30 @@ +/* VMS dwarf2 exception handling section sequentializer. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + Contributed by Douglas B. Rupp (rupp@gnat.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Linking with this file forces the Dwarf2 EH section to be + individually loaded by the VMS linker an the unwinder to read it. */ + +.section .eh_frame,NOWRT + .align 0 diff --git a/gcc-4.9/libgcc/config/alpha/vms-gcc_shell_handler.c b/gcc-4.9/libgcc/config/alpha/vms-gcc_shell_handler.c new file mode 100644 index 0000000..199c416 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/vms-gcc_shell_handler.c @@ -0,0 +1,123 @@ +/* Static condition handler for Alpha/VMS. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file implements __gcc_shell_handler, the static VMS condition handler + used as the indirection wrapper around user level handlers installed with + establish_vms_condition_handler GCC builtin. + + [ABI] in comments refers to the "HP OpenVMS calling standard" document + dated January 2005. */ + +#include <vms/chfdef.h> +#include <vms/pdscdef.h> +#include <vms/ssdef.h> + +typedef void * ADDR; +typedef unsigned long long REG; + +#define REG_AT(addr) (*(REG *)(addr)) + +/* Compute pointer to procedure descriptor (Procedure Value) from Frame + Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure]. */ +#define PV_FOR(FP) \ + (((FP) != 0) \ + ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0) + +long +__gcc_shell_handler (struct chf$signal_array *sig_arr, + struct chf$mech_array *mech_arr); + +/* Helper for __gcc_shell_handler. Fetch the pointer to procedure currently + registered as the VMS condition handler for the live function with a frame + pointer FP. */ + +static ADDR +get_dyn_handler_pointer (REG fp) +{ + /* From the frame pointer we find the procedure descriptor, and fetch + the handler_data field from there. This field contains the offset + from FP at which the address of the currently installed handler is + to be found. */ + + PDSCDEF * pd = PV_FOR (fp); + /* Procedure descriptor pointer for the live subprogram with FP as the frame + pointer, and to which _gcc_shell_handler is attached as a condition + handler. */ + + REG handler_slot_offset; + /* Offset from FP at which the address of the currently established real + condition handler is to be found. This offset is available from the + handler_data field of the procedure descriptor. */ + + REG handler_data_offset; + /* The handler_data field position in the procedure descriptor, which + depends on the kind of procedure at hand. */ + + switch (pd->pdsc$w_flags & 0xf) + { + case PDSC$K_KIND_FP_STACK: /* [3.4.2 PD for stack frame procedures] */ + handler_data_offset = 40; + break; + + case PDSC$K_KIND_FP_REGISTER: /* [3.4.5 PD for reg frame procedures] */ + handler_data_offset = 32; + break; + + default: + handler_data_offset = 0; + break; + } + + /* If we couldn't determine the handler_data field position, give up. */ + if (handler_data_offset == 0) + return 0; + + /* Otherwise, fetch the fp offset at which the real handler address is to be + found, then fetch and return the latter in turn. */ + + handler_slot_offset = REG_AT ((REG)pd + handler_data_offset); + + return (ADDR) REG_AT (fp + handler_slot_offset); +} + +/* The static VMS condition handler for GCC code. Fetch the address of the + currently established condition handler, then resignal if there is none or + call the handler with the VMS condition arguments. */ + +long +__gcc_shell_handler (struct chf$signal_array *sig_arr, + struct chf$mech_array *mech_arr) +{ + long ret; + long (*user_handler) (struct chf$signal_array *, struct chf$mech_array *); + + user_handler = get_dyn_handler_pointer (mech_arr->chf$q_mch_frame); + if (!user_handler) + ret = SS$_RESIGNAL; + else + ret = user_handler (sig_arr, mech_arr); + + return ret; +} + diff --git a/gcc-4.9/libgcc/config/alpha/vms-unwind.h b/gcc-4.9/libgcc/config/alpha/vms-unwind.h new file mode 100644 index 0000000..8bb4777 --- /dev/null +++ b/gcc-4.9/libgcc/config/alpha/vms-unwind.h @@ -0,0 +1,292 @@ +/* Fallback frame unwinding for Alpha/VMS. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <stdio.h> +#include <vms/pdscdef.h> +#include <vms/libicb.h> +#include <vms/chfctxdef.h> +#include <vms/chfdef.h> + +#define MD_FALLBACK_FRAME_STATE_FOR alpha_vms_fallback_frame_state + +typedef void * ADDR; +typedef unsigned long long REG; +typedef PDSCDEF * PV; + +#define REG_AT(addr) (*(REG *)(addr)) +#define ADDR_AT(addr) (*(ADDR *)(addr)) + +/* Compute pointer to procedure descriptor (Procedure Value) from Frame + Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure]. */ +#define PV_FOR(FP) \ + (((FP) != 0) \ + ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0) + +extern int SYS$GL_CALL_HANDL; +/* This is actually defined as a "long", but in system code where longs + are always 4bytes while GCC longs might be 8bytes. */ + +#define UPDATE_FS_FOR_CFA_GR(FS, GRN, LOC, CFA) \ +do { \ +(FS)->regs.reg[GRN].how = REG_SAVED_OFFSET; \ +(FS)->regs.reg[GRN].loc.offset = (_Unwind_Sword) ((REG) (LOC) - (REG) (CFA)); \ +} while (0); + +#define GIVEUP_ON_FAILURE(STATUS) \ + { if ((((STATUS) & 1) != 1)) return _URC_END_OF_STACK; } +#define DENOTES_EXC_DISPATCHER(PV) ((PV) == (ADDR) (REG) SYS$GL_CALL_HANDL) + +#define RA_COLUMN (DWARF_ALT_FRAME_RETURN_COLUMN) + +static int +alpha_vms_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + static int eh_debug = -1; + + /* Our goal is to update FS to reflect the state one step up CONTEXT, that + is: the CFA, return address and *saved* registers locations associated + with the function designated by CONTEXT->ra. We are called when the + libgcc unwinder has not found any dwarf FDE for this address, which + typically happens when trying to propagate a language exception through a + signal global vector or frame based handler. + + The CONTEXT->reg[] entries reflect the state/location of register saves + so designate values live at the CONTEXT->ra point. Of precious value to + us here is the frame pointer (r29), which gets us a procedure value. */ + + PV pv = (context->reg[29] != 0) ? PV_FOR (ADDR_AT (context->reg[29])) : 0; + + int pkind = pv ? pv->pdsc$w_flags & 0xf : 0; + /* VMS procedure kind, as indicated by the procedure descriptor. We only + know how to deal with FP_STACK or FP_REGISTER here. */ + + ADDR new_cfa = 0; + /* CFA we will establish for the caller, computed in different ways, + e.g. depending whether we cross an exception dispatcher frame. */ + + CHFCTX *chfctx = 0; + /* Pointer to the VMS CHF context associated with an exception dispatcher + frame, if we happen to come across one. */ + + int i,j; + + if (eh_debug == -1) + { + char * eh_debug_env = getenv ("EH_DEBUG"); + eh_debug = eh_debug_env ? atoi (eh_debug_env) : 0; + } + + if (eh_debug) + printf ("MD_FALLBACK running ...\n"); + + /* We only know how to deal with stack or reg frame procedures, so give + up if we're handed anything else. */ + if (pkind != PDSC$K_KIND_FP_STACK && pkind != PDSC$K_KIND_FP_REGISTER) + return _URC_END_OF_STACK; + + if (eh_debug) + printf ("FALLBACK: CTX FP = 0x%p, PV = 0x%p, EN = 0x%llx, RA = 0x%p\n", + ADDR_AT (context->reg[29]), pv, pv->pdsc$q_entry, context->ra); + + fs->retaddr_column = RA_COLUMN; + + /* If PV designates a VMS exception vector or condition handler, we need to + do as if the caller was the signaling point and estabish the state of the + intermediate VMS code (CFA, RA and saved register locations) as if it was + a single regular function. This requires special processing. + + The datastructures available from an condition dispatcher frame (signal + context) do not contain the values of most callee-saved registers, so + whathever PV designates, we need to account for the registers it saves. + + Besides, we need to express all the locations with respect to a + consistent CFA value, so we compute this first. */ + + if (DENOTES_EXC_DISPATCHER (pv)) + { + /* The CFA to establish is the signaling point's stack pointer. We + compute it using the system invocation context unwinding services and + save the CHF context data pointer along the way for later uses. */ + + INVO_CONTEXT_BLK icb; + int status, invo_handle; + + if (eh_debug) + printf ("FALLBACK: SYS$HANDLER\n"); + + icb.libicb$q_ireg [29] = REG_AT (context->reg[29]); + icb.libicb$q_ireg [30] = 0; + invo_handle = LIB$GET_INVO_HANDLE (&icb); + + status = LIB$GET_INVO_CONTEXT (invo_handle, &icb); + GIVEUP_ON_FAILURE (status); + + chfctx = (CHFCTX *) icb.libicb$ph_chfctx_addr; + + status = LIB$GET_PREV_INVO_CONTEXT (&icb); + GIVEUP_ON_FAILURE (status); + + new_cfa = (ADDR) icb.libicb$q_ireg[30]; + } + else + { + /* The CFA to establish is the SP value on entry of the procedure + designated by PV, which we compute as the corresponding frame base + register value + frame size. Note that the frame base may differ + from CONTEXT->cfa, typically if the caller has performed dynamic + stack allocations. */ + + int base_reg = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30; + ADDR base_addr = ADDR_AT (context->reg[base_reg]); + + new_cfa = base_addr + pv->pdsc$l_size; + } + + /* State to compute the caller's CFA by adding an offset to the current + one in CONTEXT. */ + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = new_cfa - context->cfa; + + /* Regular unwind first, accounting for the register saves performed by + the procedure designated by PV. */ + + switch (pkind) + { + case PDSC$K_KIND_FP_STACK: + { + /* The saved registers are all located in the Register Save Area, + except for the procedure value register (R27) found at the frame + base address. */ + + int base_reg = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30; + ADDR base_addr = ADDR_AT (context->reg[base_reg]); + ADDR rsa_addr = base_addr + pv->pdsc$w_rsa_offset; + + if (eh_debug) + printf ("FALLBACK: STACK frame procedure\n"); + + UPDATE_FS_FOR_CFA_GR (fs, 27, base_addr, new_cfa); + + /* The first RSA entry is for the return address register, R26. */ + + UPDATE_FS_FOR_CFA_GR (fs, 26, rsa_addr, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, rsa_addr, new_cfa); + + /* The following entries are for registers marked as saved according + to ireg_mask. */ + for (i = 0, j = 0; i < 32; i++) + if ((1 << i) & pv->pdsc$l_ireg_mask) + UPDATE_FS_FOR_CFA_GR (fs, i, rsa_addr + 8 * ++j, new_cfa); + + /* ??? floating point registers ? */ + + break; + } + + case PDSC$K_KIND_FP_REGISTER: + { + if (eh_debug) + printf ("FALLBACK: REGISTER frame procedure\n"); + + fs->regs.reg[RA_COLUMN].how = REG_SAVED_REG; + fs->regs.reg[RA_COLUMN].loc.reg = pv->pdsc$b_save_ra; + + fs->regs.reg[29].how = REG_SAVED_REG; + fs->regs.reg[29].loc.reg = pv->pdsc$b_save_fp; + + break; + } + + default: + /* Should never reach here. */ + return _URC_END_OF_STACK; + } + + /* If PV designates an exception dispatcher, we have to adjust the return + address column to get at the signal occurrence point, and account for + what the CHF context contains. */ + + if (DENOTES_EXC_DISPATCHER (pv)) + { + /* The PC of the instruction causing the condition is available from the + signal argument vector. Extra saved register values are available + from the mechargs array. */ + + CHF$SIGNAL_ARRAY *sigargs + = (CHF$SIGNAL_ARRAY *) chfctx->chfctx$q_sigarglst; + + CHF$MECH_ARRAY *mechargs + = (CHF$MECH_ARRAY *) chfctx->chfctx$q_mcharglst; + + ADDR condpc_addr + = &((int *)(&sigargs->chf$l_sig_name)) [sigargs->chf$is_sig_args-2]; + + ADDR rei_frame_addr = (void *) mechargs->chf$q_mch_esf_addr; + + /* Adjust the return address location. */ + + UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, condpc_addr, new_cfa); + + /* The frame pointer at the condition point is available from the + chf context directly. */ + + UPDATE_FS_FOR_CFA_GR (fs, 29, &chfctx->chfctx$q_expt_fp, new_cfa); + + /* Registers available from the mechargs array. */ + + UPDATE_FS_FOR_CFA_GR (fs, 0, &mechargs->chf$q_mch_savr0, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 1, &mechargs->chf$q_mch_savr1, new_cfa); + + UPDATE_FS_FOR_CFA_GR (fs, 16, &mechargs->chf$q_mch_savr16, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 17, &mechargs->chf$q_mch_savr17, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 18, &mechargs->chf$q_mch_savr18, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 19, &mechargs->chf$q_mch_savr19, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 20, &mechargs->chf$q_mch_savr20, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 21, &mechargs->chf$q_mch_savr21, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 22, &mechargs->chf$q_mch_savr22, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 23, &mechargs->chf$q_mch_savr23, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 24, &mechargs->chf$q_mch_savr24, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 25, &mechargs->chf$q_mch_savr25, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 26, &mechargs->chf$q_mch_savr26, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 27, &mechargs->chf$q_mch_savr27, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 28, &mechargs->chf$q_mch_savr28, new_cfa); + + /* Registers R2 to R7 are available from the rei frame pointer. */ + + for (i = 2; i <= 7; i ++) + UPDATE_FS_FOR_CFA_GR (fs, i, rei_frame_addr+(i - 2)*8, new_cfa); + + /* ??? floating point registers ? */ + } + + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + + + diff --git a/gcc-4.9/libgcc/config/arc/asm.h b/gcc-4.9/libgcc/config/arc/asm.h new file mode 100644 index 0000000..447f22c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/asm.h @@ -0,0 +1,29 @@ +/* Assembler macros for the Synopsys DesignWare ARC CPU. + + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define FUNC(X) .type X,@function +#define ENDFUNC(X) .size X, .-X diff --git a/gcc-4.9/libgcc/config/arc/crtg.S b/gcc-4.9/libgcc/config/arc/crtg.S new file mode 100644 index 0000000..c375cee --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/crtg.S @@ -0,0 +1,51 @@ +/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU. + + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + .section .init + .global _init + .global _fini + .global __monstartup + mov_s r0,_init + mov_s r1,_fini + jl __monstartup + + .section .__arc_profile_desc, "a" + .global __arc_profile_desc_secstart + .balign 4 +__arc_profile_desc_secstart: + .section .__arc_profile_forward, "a" + .global __arc_profile_forward_secstart + .balign 4 +__arc_profile_forward_secstart: + .section .__arc_profile_counters, "aw" + .global __arc_profile_counters_secstart + .balign 4 +__arc_profile_counters_secstart: + + .section .fini + .global _mcleanup + jl _mcleanup diff --git a/gcc-4.9/libgcc/config/arc/crtgend.S b/gcc-4.9/libgcc/config/arc/crtgend.S new file mode 100644 index 0000000..667222b --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/crtgend.S @@ -0,0 +1,33 @@ +/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU. + + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + .section .__arc_profile_desc, "a" + .global __arc_profile_desc_secend +__arc_profile_desc_secend: + .section .__arc_profile_forward, "a" + .global __arc_profile_forward_secend +__arc_profile_forward_secend: diff --git a/gcc-4.9/libgcc/config/arc/crti.S b/gcc-4.9/libgcc/config/arc/crti.S new file mode 100644 index 0000000..71d67cd --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/crti.S @@ -0,0 +1,41 @@ +/* .fini/.init stack frame setup for the Synopsys DesignWare ARC CPU. + + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +# This file contains the stack frame setup for contents of the .fini and +# .init sections. + + .section .init + .global _init + .word 0 +_init: + push_s blink + + .section .fini + .global _fini + .word 0 +_fini: + push_s blink diff --git a/gcc-4.9/libgcc/config/arc/crtn.S b/gcc-4.9/libgcc/config/arc/crtn.S new file mode 100644 index 0000000..d4be36b --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/crtn.S @@ -0,0 +1,38 @@ +/* Ensure .fini/.init return for the Synopsys DesignWare ARC CPU. + + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +# This file just makes sure that the .fini and .init sections do in +# fact return. This file is the last thing linked into any executable. + + .section .init + pop_s blink + j_s [blink] + + + .section .fini + pop_s blink + j_s [blink] diff --git a/gcc-4.9/libgcc/config/arc/divtab-arc700.c b/gcc-4.9/libgcc/config/arc/divtab-arc700.c new file mode 100644 index 0000000..0074819 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/divtab-arc700.c @@ -0,0 +1,70 @@ +/* Copyright (C) 2004-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Calculate division table for ARC700 integer division + Contributed by Joern Rennecke + joern.rennecke@arc.com */ + +#include <stdio.h> +#include <math.h> + +int +main () +{ + int i, j; + unsigned x; + double q, r, err, max_err = -1; + + puts("/* This table has been generated by divtab-arc700.c. */"); + puts("\ +/* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.\n\ + For powers of two, we list unnormalized numbers instead. The values\n\ + for powers of 2 are loaded, but not used. The value for 1 is actually\n\ + the first instruction after .Lmuldiv. */\n\ + .balign 4"); + puts (".Ldivtab:\n"); + for (i = 256; i >= 2; --i) + { + j = i < 0 ? -i : i; + if (j & (j-1)) + while (j < 128) + j += j; + else + /* Power of two. */ + j *= 128; + q = 4.*(1<<30)*128/j; + r = ceil (q); + printf ("\t.long\t0x%X\n", (unsigned) r); + err = r - q; + if (err > max_err) + max_err = err; + } +#if 0 + printf ("\t/* maximum error: %f */\n", max_err); +#endif + exit (0); +} diff --git a/gcc-4.9/libgcc/config/arc/dp-hack.h b/gcc-4.9/libgcc/config/arc/dp-hack.h new file mode 100644 index 0000000..30cd068 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/dp-hack.h @@ -0,0 +1,101 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* This file selects the double-precision parts of fp-bit.c that are + still needed for some ARC hardware variants; it also renames functions + that duplicate asm-coded functionality so that their results can be + used to compare with the optimized versions for debugging. */ + +#define FINE_GRAINED_LIBRARIES +#define ARC_DP_DEBUG 1 +#if !defined (__ARC_NORM__) || ARC_DP_DEBUG +#define L_pack_df +#define L_unpack_df +#define L_make_df +#define L_thenan_df +#define L_sf_to_df +#endif +#ifndef __ARC_NORM__ +#define L_addsub_df +#elif ARC_DP_DEBUG +#define L_addsub_df +#define __adddf3 __adddf3_c +#define __subdf3 __subdf3_c +#endif +#ifndef __ARC_NORM__ +#define L_mul_df +#define L_div_df +#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \ + && !defined(__ARC_MUL32BY16__)) +#define L_mul_df +#define L_div_df +#undef QUIET_NAN +#define QUIET_NAN 0xfffffffffffffLL +#elif ARC_DP_DEBUG +#define L_mul_df +#define __muldf3 __muldf3_c +#define L_div_df +#define __divdf3 __divdf3_c +#endif +#ifndef __ARC_NORM__ +#define L_df_to_sf +#define L_si_to_df +#define L_df_to_si +#define L_tf_to_usi /* need to defined this instead of df_to_usi */ +#define L_usi_to_df +#elif ARC_DP_DEBUG +#define L_df_to_sf +#define __truncdfsf2 __truncdfsf2_c +#define L_si_to_df +#define __floatsidf __floatsidf_c +#define L_df_to_si +#define __fixdfsi __fixdfsi_c +#define L_tf_to_usi +#define __fixunsdfsi __fixunsdfsi_c +#define L_usi_to_df +#define __floatunsidf __floatunsidf_c +#endif +#ifndef __ARC_NORM__ +#define L_fpcmp_parts_df +#define L_compare_df +#define L_eq_df +#define L_ne_df +#define L_gt_df +#define L_ge_df +#define L_lt_df +#define L_le_df +#define L_unord_df +#define L_negate_df +#elif ARC_DP_DEBUG +#define L_fpcmp_parts_df +#define L_eq_df +#define __eqdf2 __eqdf2_c +#define L_gt_df +#define __gtdf2 __gtdf2_c +#define L_ge_df +#define __gedf2 __gedf2_c +#define L_unord_df +#define __unorddf2 __unorddf2_c +#endif diff --git a/gcc-4.9/libgcc/config/arc/fp-hack.h b/gcc-4.9/libgcc/config/arc/fp-hack.h new file mode 100644 index 0000000..dbd9f0c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/fp-hack.h @@ -0,0 +1,85 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* This file selects the single-precision parts of fp-bit.c that are + still needed for some ARC hardware variants; it also renames functions + that duplicate asm-coded functionality so that their results can be + used to compare with the optimized versions for debugging. */ + +#define ARC_FP_DEBUG 1 +#define FINE_GRAINED_LIBRARIES +#if !defined (__ARC_NORM__) || ARC_FP_DEBUG +#define L_pack_sf +#define L_unpack_sf +#define L_make_sf +#define L_thenan_sf +#endif +#ifndef __ARC_NORM__ +#define L_addsub_sf +#define L_mul_sf +#define L_div_sf +#define L_sf_to_df +#define L_si_to_sf +#define L_sf_to_si +#define L_usi_to_sf +#elif ARC_FP_DEBUG +#define L_addsub_sf +#define __addsf3 __addsf3_c +#define __subsf3 __subsf3_c +#define L_mul_sf +#define __mulsf3 __mulsf3_c +#define L_div_sf +#define __divsf3 __divsf3_c +#define L_sf_to_df +#define __extendsfdf2 __extendsfdf2_c +#define L_si_to_sf +#define __floatsisf __floatsisf_c +#define L_sf_to_si +#define __fixsfsi __fixsfsi_c +#define L_usi_to_sf +#define __floatunsisf __floatunsisf_c +#endif +#ifndef __ARC_NORM__ +#define L_fpcmp_parts_sf +#define L_compare_sf +#define L_eq_sf +#define L_ne_sf +#define L_gt_sf +#define L_ge_sf +#define L_lt_sf +#define L_le_sf +#define L_unord_sf +#define L_negate_sf +#elif ARC_FP_DEBUG +#define L_fpcmp_parts_sf +#define L_eq_sf +#define __eqsf2 __eqsf2_c +#define L_gt_sf +#define __gtsf2 __gtsf2_c +#define L_ge_sf +#define __gesf2 __gesf2_c +#define L_unord_sf +#define __unordsf2 __unordsf2_c +#endif diff --git a/gcc-4.9/libgcc/config/arc/gmon/atomic.h b/gcc-4.9/libgcc/config/arc/gmon/atomic.h new file mode 100644 index 0000000..5db8378 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/atomic.h @@ -0,0 +1,26 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* File deliberately left blank. */ diff --git a/gcc-4.9/libgcc/config/arc/gmon/auxreg.h b/gcc-4.9/libgcc/config/arc/gmon/auxreg.h new file mode 100644 index 0000000..b23f7b7 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/auxreg.h @@ -0,0 +1,35 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define LP_START 0x02 +#define LP_END 0x03 +#define IDENTITY 0x04 +#define STATUS32 0x0a +#define COUNT0 0x21 /* Timer 0 count */ +#define CONTROL0 0x22 /* Timer 0 control */ +#define LIMIT0 0x23 /* Timer 0 limit */ +#define INT_VECTOR_BASE 0x25 +#define D_CACHE_BUILD 0x72 +#define DC_FLDL 0x4c diff --git a/gcc-4.9/libgcc/config/arc/gmon/dcache_linesz.S b/gcc-4.9/libgcc/config/arc/gmon/dcache_linesz.S new file mode 100644 index 0000000..e532d37 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/dcache_linesz.S @@ -0,0 +1,55 @@ +/* This file contains code to do profiling. + + Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../asm.h" +#include "auxreg.h" +/* This file contains code to do profiling. */ + .weak __profile_timer_cycles + .global __profile_timer_cycles + .set __profile_timer_cycles, 200 + .text + ; For Arctangent-A5, if no data cache is present, a read of the + ; cache build register returns the ID register. For ARC600 and + ; later, the version field will be zero. + .global __dcache_linesz + .balign 4 +__dcache_linesz: + lr r12,[D_CACHE_BUILD] + extb_s r0,r12 + breq_s r0,0,.Lsz_nocache + brge r0,0x20,.Lsz_havecache + lr r0,[IDENTITY] + breq r12,r0,.Lsz_nocache +.Lsz_havecache: + lsr_s r12,r12,16 + mov_s r0,16 + bmsk_s r12,r12,3 + asl_s r0,r0,r12 + j_s [blink] +.Lsz_nocache: + mov_s r0,1 + j_s [blink] diff --git a/gcc-4.9/libgcc/config/arc/gmon/gmon.c b/gcc-4.9/libgcc/config/arc/gmon/gmon.c new file mode 100644 index 0000000..3fd6083 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/gmon.c @@ -0,0 +1,450 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (C) 2007-2014 Free Software Foundation, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#if 0 +#include <sys/param.h> +#include <sys/time.h> +#endif +#include <sys/gmon.h> +#include <sys/gmon_out.h> + +#include <stddef.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#if 0 +#include <libc-internal.h> +#include <not-cancel.h> + +#ifdef USE_IN_LIBIO +# include <wchar.h> +#endif +#endif +#define internal_function +#define weak_alias(fun,aliasid) extern __typeof(fun) aliasid __attribute__ ((weak, alias (#fun))); +#define __libc_enable_secure 0 + +/* Head of basic-block list or NULL. */ +struct __bb *__bb_head attribute_hidden; + +struct gmonparam _gmonparam attribute_hidden = { GMON_PROF_OFF }; + +/* + * See profil(2) where this is described: + */ +static int s_scale; +#define SCALE_1_TO_1 0x10000L + +#define ERR(s) write (STDERR_FILENO, s, sizeof (s) - 1) + +void moncontrol (int mode); +void __moncontrol (int mode); +static void write_hist (int fd) internal_function; +static void write_call_graph (int fd) internal_function; +static void write_bb_counts (int fd) internal_function; + +/* + * Control profiling + * profiling is what mcount checks to see if + * all the data structures are ready. + */ +void +__moncontrol (int mode) +{ + struct gmonparam *p = &_gmonparam; + + /* Don't change the state if we ran into an error. */ + if (p->state == GMON_PROF_ERROR) + return; + + if (mode) + { + /* start */ + __profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale); + p->state = GMON_PROF_ON; + } + else + { + /* stop */ + __profil(NULL, 0, 0, 0); + p->state = GMON_PROF_OFF; + } +} +weak_alias (__moncontrol, moncontrol) + + +void +__monstartup (u_long lowpc, u_long highpc) +{ + register int o; + char *cp; + struct gmonparam *p = &_gmonparam; + int linesz; + + /* + * round lowpc and highpc to multiples of the density we're using + * so the rest of the scaling (here and in gprof) stays in ints. + */ + p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); + if (sizeof *p->froms % sizeof(HISTCOUNTER) != 0) + { + p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); + p->textsize = p->highpc - p->lowpc; + p->kcountsize = ROUNDUP((p->textsize + HISTFRACTION - 1) / HISTFRACTION, + sizeof (*p->froms)); + } + else + { + /* Avoid odd scales by rounding up highpc to get kcountsize rounded. */ + p->textsize = ROUNDUP (highpc - p->lowpc, + HISTFRACTION * sizeof (*p->froms)); + p->highpc = p->lowpc + p->textsize; + p->kcountsize = p->textsize / HISTFRACTION; + } + p->hashfraction = HASHFRACTION; + p->log_hashfraction = -1; + /* The following test must be kept in sync with the corresponding + test in mcount.c. */ + if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) { + /* if HASHFRACTION is a power of two, mcount can use shifting + instead of integer division. Precompute shift amount. */ + p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1; + } + p->tolimit = p->textsize * ARCDENSITY / 100; + if (p->tolimit < MINARCS) + p->tolimit = MINARCS; + else if (p->tolimit > MAXARCS) + p->tolimit = MAXARCS; + p->tossize = p->tolimit * sizeof(struct tostruct); + + /* p->kcount must not share cache lines with the adjacent data, because + we use uncached accesses while profiling. */ + linesz = __dcache_linesz (); + cp = calloc (ROUNDUP (p->kcountsize, linesz) + p->tossize + + (linesz - 1), 1); + if (! cp) + { + ERR("monstartup: out of memory\n"); + p->tos = NULL; + p->state = GMON_PROF_ERROR; + /* In case we loose the error state due to a race, + prevent invalid writes also by clearing tolimit. */ + p->tolimit = 0; + return; + } + p->tos = (struct tostruct *)cp; + cp += p->tossize; + cp = (char *) ROUNDUP ((ptrdiff_t) cp, linesz); + p->kcount = (HISTCOUNTER *)cp; + cp += ROUNDUP (p->kcountsize, linesz); + + p->tos[0].link = 0; + + o = p->highpc - p->lowpc; + if (p->kcountsize < (u_long) o) + { +#ifndef hp300 + s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1; +#else + /* avoid floating point operations */ + int quot = o / p->kcountsize; + + if (quot >= 0x10000) + s_scale = 1; + else if (quot >= 0x100) + s_scale = 0x10000 / quot; + else if (o >= 0x800000) + s_scale = 0x1000000 / (o / (p->kcountsize >> 8)); + else + s_scale = 0x1000000 / ((o << 8) / p->kcountsize); +#endif + } else + s_scale = SCALE_1_TO_1; + + __moncontrol(1); +} +weak_alias (__monstartup, monstartup) + + +static void +internal_function +write_hist (int fd) +{ + u_char tag = GMON_TAG_TIME_HIST; + struct arc_gmon_hist_hdr thdr __attribute__ ((aligned (__alignof__ (char *)))); + int r; + + if (_gmonparam.kcountsize > 0) + { + *(char **) thdr.low_pc = (char *) _gmonparam.lowpc; + *(char **) thdr.high_pc = (char *) _gmonparam.highpc; + *(int32_t *) thdr.hist_size = (_gmonparam.kcountsize + / sizeof (HISTCOUNTER)); + *(int32_t *) thdr.prof_rate = __profile_frequency (); + strncpy (thdr.dimen, "seconds", sizeof (thdr.dimen)); + thdr.dimen_abbrev = 's'; + + r = write (fd, &tag, sizeof tag); + if (r != sizeof tag) + return; + r = write (fd, &thdr, sizeof thdr); + if (r != sizeof thdr) + return; + r = write (fd,_gmonparam.kcount, _gmonparam.kcountsize); + if ((unsigned) r != _gmonparam.kcountsize) + return; + } +} + + +static void +internal_function +write_call_graph (int fd) +{ +#define NARCS_PER_WRITE 64 +#define BYTES_PER_ARC (1 + sizeof (struct gmon_cg_arc_record)) +#define BYTES_PER_WRITE (BYTES_PER_ARC * NARCS_PER_WRITE) + ARCINDEX to_index; + u_long frompc, selfpc, count; + char buffer[BYTES_PER_WRITE], *p; + u_long *prof_desc = __arc_profile_desc_secstart; + u_long *prof_count = __arc_profile_counters_secstart; + u_long *prof_desc_end = __arc_profile_desc_secend; + u_long *prof_forward = __arc_profile_forward_secstart; + + for (p = buffer; p < buffer + BYTES_PER_WRITE; p += BYTES_PER_ARC) + *p = GMON_TAG_CG_ARC; + p = buffer; + frompc = *prof_desc++ & -2; + while (prof_desc < prof_desc_end) + { + selfpc = *prof_desc++; + if (selfpc & 1) + { + frompc = selfpc & -2; + selfpc = *prof_desc++; + } + count = *prof_count++; + if (selfpc) + { + struct arc + { + char *frompc; + char *selfpc; + int32_t count; + } + arc; + + if (!count) + continue; + arc.frompc = (char *) frompc; + arc.selfpc = (char *) selfpc; + arc.count = count; + memcpy (p + 1, &arc, sizeof arc); + p += 1 + sizeof arc; + + if (p == buffer + BYTES_PER_WRITE) + { + write (fd, buffer, BYTES_PER_WRITE); + p = buffer; + } + } + else + { + for (to_index = count; + to_index != 0; + to_index = _gmonparam.tos[to_index].link) + { + struct arc + { + char *frompc; + char *selfpc; + int32_t count; + } + arc; + + arc.frompc = (char *) frompc; + arc.selfpc = (char *) _gmonparam.tos[to_index].selfpc; + arc.count = _gmonparam.tos[to_index].count; + memcpy (p + 1, &arc, sizeof arc); + p += 1 + sizeof arc; + + if (p == buffer + BYTES_PER_WRITE) + { + write (fd, buffer, BYTES_PER_WRITE); + p = buffer; + } + } + } + } + while (prof_forward < __arc_profile_forward_secend) + { + /* ??? The 'call count' is actually supposed to be a fixed point + factor, with 16 bits each before and after the point. + It would be much nicer if we figured out the actual number + of calls to the caller, and multiplied that with the fixed point + factor to arrive at the estimated calls for the callee. */ + memcpy (p + 1, prof_forward, 3 * sizeof *prof_forward); + prof_forward += 3; + p += 1 + 3 * sizeof *prof_forward; + if (p == buffer + BYTES_PER_WRITE) + { + write (fd, buffer, BYTES_PER_WRITE); + p = buffer; + } + } + if (p != buffer) + write (fd, buffer, p - buffer); +} + + +static void +internal_function +write_bb_counts (int fd) +{ + struct __bb *grp; + u_char tag = GMON_TAG_BB_COUNT; + size_t ncounts; + size_t i; + + struct { unsigned long address; long count; } bbbody[8]; + size_t nfilled; + + /* Write each group of basic-block info (all basic-blocks in a + compilation unit form a single group). */ + + for (grp = __bb_head; grp; grp = grp->next) + { + ncounts = grp->ncounts; + write (fd, &tag, 1); + write (fd, &ncounts, sizeof ncounts); + for (nfilled = i = 0; i < ncounts; ++i) + { + if (nfilled == sizeof (bbbody) / sizeof (bbbody[0])) + { + write (fd, bbbody, sizeof bbbody); + nfilled = 0; + } + + bbbody[nfilled].address = grp->addresses[i]; + bbbody[nfilled++].count = grp->counts[i]; + } + if (nfilled > 0) + write (fd, bbbody, nfilled * sizeof bbbody[0]); + } +} + + +static void +write_gmon (void) +{ + struct gmon_hdr ghdr __attribute__ ((aligned (__alignof__ (int)))); + int fd = -1; + char *env; + +#ifndef O_NOFOLLOW +# define O_NOFOLLOW 0 +#endif + + env = getenv ("GMON_OUT_PREFIX"); + if (env != NULL && !__libc_enable_secure) + { + size_t len = strlen (env); + char buf[len + 20]; + snprintf (buf, sizeof (buf), "%s.%u", env, getpid ()); + fd = open (buf, O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW, 0666); + } + + if (fd == -1) + { + fd = open ("gmon.out", O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW, + 0666); + if (fd < 0) + { + perror ("_mcleanup: gmon.out"); + return; + } + } + + /* write gmon.out header: */ + memset (&ghdr, '\0', sizeof (struct gmon_hdr)); + memcpy (&ghdr.cookie[0], GMON_MAGIC, sizeof (ghdr.cookie)); + *(int32_t *) ghdr.version = GMON_VERSION; + write (fd, &ghdr, sizeof (struct gmon_hdr)); + + /* write PC histogram: */ + write_hist (fd); + + /* write call-graph: */ + write_call_graph (fd); + + /* write basic-block execution counts: */ + write_bb_counts (fd); + + close (fd); +} + + +void +__write_profiling (void) +{ + int save = _gmonparam.state; + _gmonparam.state = GMON_PROF_OFF; + if (save == GMON_PROF_ON) + write_gmon (); + _gmonparam.state = save; +} +#ifndef SHARED +/* This symbol isn't used anywhere in the DSO and it is not exported. + This would normally mean it should be removed to get the same API + in static libraries. But since profiling is special in static libs + anyway we keep it. But not when building the DSO since some + quality assurance tests will otherwise trigger. */ +weak_alias (__write_profiling, write_profiling) +#endif + + +void +_mcleanup (void) +{ + __moncontrol (0); + + if (_gmonparam.state != GMON_PROF_ERROR) + write_gmon (); + + /* free the memory. */ + if (_gmonparam.tos != NULL) + free (_gmonparam.tos); +} diff --git a/gcc-4.9/libgcc/config/arc/gmon/machine-gmon.h b/gcc-4.9/libgcc/config/arc/gmon/machine-gmon.h new file mode 100644 index 0000000..2199d5a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/machine-gmon.h @@ -0,0 +1,65 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef MACHINE_GMON_H +#define MACHINE_GMON_H + +/* We can't fake out own <sys/types.h> header because the newlib / uclibc + headers in GCC_FOR_TARGET take precedence. */ + +#define __BEGIN_DECLS +#define __END_DECLS + +#define __THROW + +extern int __dcache_linesz (void); + +#define _MCOUNT_DECL(countp, selfpc) \ + static inline void _mcount_internal (void *countp, u_long selfpc) + +extern void _mcount (void); +extern void _mcount_call (void); + +/* N.B.: the calling point might be a sibcall, thus blink does not necessarily + hold the caller's address. r8 doesn't hold the caller's address, either, + but rather a pointer to the counter data structure associated with the + caller. + This function must be compiled with optimization turned on in order to + enable a sibcall for the final call to selfpc; this is important when trying + to profile a program with deep tail-recursion that would get a stack + overflow otherwise. */ +#define MCOUNT \ +void \ +_mcount_call (void) \ +{ \ + register void *countp __asm("r8"); \ + register u_long selfpc __asm("r9"); \ + _mcount_internal (countp, selfpc); \ + ((void (*)(void)) selfpc) (); \ +} + +extern int __profil (u_short *,size_t, size_t, u_int); + +#endif /* MACHINE_GMON_H */ diff --git a/gcc-4.9/libgcc/config/arc/gmon/mcount.c b/gcc-4.9/libgcc/config/arc/gmon/mcount.c new file mode 100644 index 0000000..ebd71e0 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/mcount.c @@ -0,0 +1,206 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Copyright (C) 2007-2014 Free Software Foundation, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS) +static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93"; +#endif + +#if 0 +#include <unistd.h> +#include <sys/param.h> +#endif +#include <sys/gmon.h> + +/* This file provides the machine-dependent definitions of the _MCOUNT_DECL + and MCOUNT macros. */ +#include <machine-gmon.h> + +#include <atomic.h> + +/* + * mcount is called on entry to each function compiled with the profiling + * switch set. _mcount(), which is declared in a machine-dependent way + * with _MCOUNT_DECL, does the actual work and is either inlined into a + * C routine or called by an assembly stub. In any case, this magic is + * taken care of by the MCOUNT definition in <machine/profile.h>. + * + * _mcount updates data structures that represent traversals of the + * program's call graph edges. frompc and selfpc are the return + * address and function address that represents the given call graph edge. + * + * Note: the original BSD code used the same variable (frompcindex) for + * both frompcindex and frompc. Any reasonable, modern compiler will + * perform this optimization. + */ +_MCOUNT_DECL(count_ptr, selfpc) /* _mcount; may be static, inline, etc */ +{ + register ARCINDEX *frompcindex; + register struct tostruct *top, *prevtop; + register struct gmonparam *p; + register ARCINDEX toindex; + + /* Check for nested function trampoline. */ + if (selfpc & 2) + selfpc = *(u_long *) (selfpc + 10); + + p = &_gmonparam; + /* + * check that we are profiling + * and that we aren't recursively invoked. + */ +#if 0 + if (catomic_compare_and_exchange_bool_acq (&p->state, GMON_PROF_BUSY, + GMON_PROF_ON)) + return; +#elif defined (__ARC700__) +/* ??? This could temporarily lose the ERROR / OFF condition in a race, + but doing an actual compare_and_exchange would be too costly. It would + be better if we had a semaphore independent of the 'sticky' state, but + then we could run into ABI compatibility problems with the size of struct + gmonparam. */ + { + u_long old_state; + + __asm ("ex %0,%1": "=r" (old_state), "+m" (p->state) + : "0" (GMON_PROF_BUSY)); + if (old_state != GMON_PROF_ON) + { + switch (old_state) + { + case GMON_PROF_OFF: + __asm ("ex %0,%1": "+r" (old_state), "+m" (p->state)); + if (old_state == GMON_PROF_BUSY + /* Switching off while we say we are busy while profiling + was actually already switched off is all right. */ + || old_state == GMON_PROF_OFF) + break; + /* It is not clear if we should allow switching on + profiling at this point, and how to handle further races. + For now, record an error in this case. */ + /* Fall through. */ + default: /* We expect here only GMON_PROF_ERROR. */ + p->state = GMON_PROF_ERROR; + break; + case GMON_PROF_BUSY: break; + } + return; + } + } +#else /* ??? No semaphore primitives available. */ + if (p->state != GMON_PROF_ON) + return; + p->state = GMON_PROF_BUSY; +#endif + + frompcindex = count_ptr; + toindex = *frompcindex; + if (toindex == 0) { + /* + * first time traversing this arc + */ + toindex = ++p->tos[0].link; + if (toindex >= (ARCINDEX) p->tolimit) + /* halt further profiling */ + goto overflow; + + *frompcindex = toindex; + top = &p->tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = 0; + goto done; + } + top = &p->tos[toindex]; + if (top->selfpc == selfpc) { + /* + * arc at front of chain; usual case. + */ + top->count++; + goto done; + } + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (; /* goto done */; ) { + if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ + toindex = ++p->tos[0].link; + if (toindex >= (ARCINDEX) p->tolimit) + goto overflow; + + top = &p->tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + /* + * otherwise, check the next arc on the chain. + */ + prevtop = top; + top = &p->tos[top->link]; + if (top->selfpc == selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ + top->count++; + toindex = prevtop->link; + prevtop->link = top->link; + top->link = *frompcindex; + *frompcindex = toindex; + goto done; + } + + } +done: + p->state = GMON_PROF_ON; + return; +overflow: + p->state = GMON_PROF_ERROR; + return; +} + +/* + * Actual definition of mcount function. Defined in <machine/profile.h>, + * which is included by <sys/gmon.h>. + */ +MCOUNT diff --git a/gcc-4.9/libgcc/config/arc/gmon/prof-freq-stub.S b/gcc-4.9/libgcc/config/arc/gmon/prof-freq-stub.S new file mode 100644 index 0000000..4a48c52 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/prof-freq-stub.S @@ -0,0 +1,40 @@ +/* This file contains code to do profiling. + + Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../asm.h" +/* This file contains code to do profiling. */ + .weak __profile_frequency_value + .global __profile_frequency_value + .set __profile_frequency_value, 1000 + .text + .balign 4 + .global __profile_frequency + FUNC(__profile_frequency) +__profile_frequency: + mov_s r0,__profile_frequency_value + j_s [blink] + ENDFUNC(__profile_frequency) diff --git a/gcc-4.9/libgcc/config/arc/gmon/prof-freq.c b/gcc-4.9/libgcc/config/arc/gmon/prof-freq.c new file mode 100644 index 0000000..4ae82d8 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/prof-freq.c @@ -0,0 +1,60 @@ +/* Return frequency of ticks reported by profil. Generic version. */ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Copyright (C) 2007-2014 Free Software Foundation, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include <sys/types.h> +#include <sys/time.h> +#if 0 +#include <libc-internal.h> +#else +#include "sys/gmon.h" +#endif + +int +__profile_frequency (void) +{ + /* + * Discover the tick frequency of the machine if something goes wrong, + * we return 0, an impossible hertz. + */ + struct itimerval tim; + + tim.it_interval.tv_sec = 0; + tim.it_interval.tv_usec = 1; + tim.it_value.tv_sec = 0; + tim.it_value.tv_usec = 0; + setitimer(ITIMER_REAL, &tim, 0); + setitimer(ITIMER_REAL, 0, &tim); + if (tim.it_interval.tv_usec < 2) + return 0; + return (1000000 / tim.it_interval.tv_usec); +} diff --git a/gcc-4.9/libgcc/config/arc/gmon/profil.S b/gcc-4.9/libgcc/config/arc/gmon/profil.S new file mode 100644 index 0000000..b9ca79e --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/profil.S @@ -0,0 +1,153 @@ +/* This file contains code to do profiling. + + Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../asm.h" +#include "auxreg.h" +/* This file contains code to do profiling. */ + .weak __profile_timer_cycles + .global __profile_timer_cycles + .set __profile_timer_cycles, 200 + + .section .bss + .global __profil_offset + .align 4 + .type __profil_offset, @object + .size __profil_offset, 4 +__profil_offset: + .zero 4 + + .text + .global __dcache_linesz + .global __profil + FUNC(__profil) +.Lstop_profiling: + sr r0,[CONTROL0] + j_s [blink] + .balign 4 +__profil: +.Lprofil: + breq_s r0,0,.Lstop_profiling + ; r0: buf r1: bufsiz r2: offset r3: scale + bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0. + push_s blink + lsr_s r2,r2,1 + mov_s r8,r0 + flag.ne 1 ; halt if wrong scale + sub_s r0,r0,r2 + st r0,[__profil_offset] + bl __dcache_linesz + pop_s blink + bbit1.d r0,0,nocache + mov_s r0,r8 +#ifdef __ARC700__ + add_s r1,r1,31 + lsr.f lp_count,r1,5 + lpne 2f + sr r0,[DC_FLDL] + add_s r0,r0,32 +#else /* !__ARC700__ */ +# FIX ME: set up loop according to cache line size + lr r12,[D_CACHE_BUILD] + sub_s r0,r0,16 + sub_s r1,r1,1 + lsr_s r12,r12,16 + asr_s r1,r1,4 + bmsk_s r12,r12,3 + asr_s r1,r1,r12 + add.f lp_count,r1,1 + mov_s r1,16 + asl_s r1,r1,r12 + lpne 2f + add r0,r0,r1 + sr r0,[DC_FLDL] +#endif /* __ARC700__ */ +2: b_s .Lcounters_cleared +nocache: +.Lcounters_cleared: + lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts + sr r3,[CONTROL0] + sr r3,[COUNT0] +0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF +0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4 + st_s r0,[r1,24]; timer0 uses vector3 + st_s r12,[r1,24+4]; timer0 uses vector3 + ;sr 10000,[LIMIT0] + sr __profile_timer_cycles,[LIMIT0] + mov_s r12,3 ; enable timer interrupts; count only when not halted. + sr r12,[CONTROL0] + lr r12,[STATUS32] + bset_s r12,r12,1 ; allow level 1 interrupts + flag r12 + mov_s r0,0 + j_s [blink] + .balign 4 +1: j __profil_irq + ENDFUNC(__profil) + + FUNC(__profil_irq) + .balign 4 ; make final jump unaligned to avoid delay penalty + .balign 32,0,12 ; make sure the code spans no more that two cache lines + nop_s +__profil_irq: + push_s r0 + ld r0,[__profil_offset] + push_s r1 + lsr r1,ilink1,2 + push_s r2 + ldw.as.di r2,[r0,r1] + add1 r0,r0,r1 + ld_s r1,[sp,4] + add_s r2,r2,1 + bbit1 r2,16,nostore + stw.di r2,[r0] +nostore:ld.ab r2,[sp,8] + pop_s r0 + j.f [ilink1] + ENDFUNC(__profil_irq) + +; could save one cycle if the counters were allocated at link time and +; the contents of __profil_offset were pre-computed at link time, like this: +#if 0 +; __profil_offset needs to be PROVIDEd as __profile_base-text/4 + .global __profil_offset + .balign 4 +__profil_irq: + push_s r0 + lsr r0,ilink1,2 + add1 r0,__profil_offset,r0 + push_s r1 + ldw.di r1,[r0] + + + add_s r1,r1,1 + bbit1 r1,16,nostore + stw.di r1,[r0] +nostore:pop_s r1 + pop_s r0 + j [ilink1] +#endif /* 0 */ diff --git a/gcc-4.9/libgcc/config/arc/gmon/sys/gmon.h b/gcc-4.9/libgcc/config/arc/gmon/sys/gmon.h new file mode 100644 index 0000000..33564ba --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/sys/gmon.h @@ -0,0 +1,217 @@ +/*- + * Copyright (c) 1982, 1986, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (C) 2007-2014 Free Software Foundation, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)gmon.h 8.2 (Berkeley) 1/4/94 + */ + +#ifndef _SYS_GMON_H +#define _SYS_GMON_H 1 + +#if 0 +#include <features.h> +#include <sys/types.h> +#else +#include <sys/types.h> +#include "machine-gmon.h" +#define attribute_hidden __attribute__ ((visibility("hidden"))) +#endif + +#include <stdint.h> + +/* + * See gmon_out.h for gmon.out format. + */ + +/* structure emitted by "gcc -a". This must match struct bb in + gcc/libgcc2.c. It is OK for gcc to declare a longer structure as + long as the members below are present. */ +struct __bb +{ + long zero_word; + const char *filename; + long *counts; + long ncounts; + struct __bb *next; + const unsigned long *addresses; +}; + +extern struct __bb *__bb_head; + +/* + * histogram counters are unsigned shorts (according to the kernel). + */ +#define HISTCOUNTER unsigned short + +/* + * fraction of text space to allocate for histogram counters here, 1/2 + */ +#define HISTFRACTION 2 + +/* + * Fraction of text space to allocate for from hash buckets. + * The value of HASHFRACTION is based on the minimum number of bytes + * of separation between two subroutine call points in the object code. + * Given MIN_SUBR_SEPARATION bytes of separation the value of + * HASHFRACTION is calculated as: + * + * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1); + * + * For example, on the VAX, the shortest two call sequence is: + * + * calls $0,(r0) + * calls $0,(r0) + * + * which is separated by only three bytes, thus HASHFRACTION is + * calculated as: + * + * HASHFRACTION = 3 / (2 * 2 - 1) = 1 + * + * Note that the division above rounds down, thus if MIN_SUBR_FRACTION + * is less than three, this algorithm will not work! + * + * In practice, however, call instructions are rarely at a minimal + * distance. Hence, we will define HASHFRACTION to be 2 across all + * architectures. This saves a reasonable amount of space for + * profiling data structures without (in practice) sacrificing + * any granularity. + */ +#define HASHFRACTION 2 + +/* + * Percent of text space to allocate for tostructs. + * This is a heuristic; we will fail with a warning when profiling programs + * with a very large number of very small functions, but that's + * normally OK. + * 2 is probably still a good value for normal programs. + * Profiling a test case with 64000 small functions will work if + * you raise this value to 3 and link statically (which bloats the + * text size, thus raising the number of arcs expected by the heuristic). + */ +#define ARCDENSITY 3 + +/* + * Always allocate at least this many tostructs. This + * hides the inadequacy of the ARCDENSITY heuristic, at least + * for small programs. + */ +#define MINARCS 50 + +/* + * The type used to represent indices into gmonparam.tos[]. + */ +#define ARCINDEX u_long + +/* + * Maximum number of arcs we want to allow. + * Used to be max representable value of ARCINDEX minus 2, but now + * that ARCINDEX is a long, that's too large; we don't really want + * to allow a 48 gigabyte table. + * The old value of 1<<16 wasn't high enough in practice for large C++ + * programs; will 1<<20 be adequate for long? FIXME + */ +#define MAXARCS (1 << 20) + +struct tostruct { + u_long selfpc; + long count; + ARCINDEX link; +}; + +/* + * a raw arc, with pointers to the calling site and + * the called site and a count. + */ +struct rawarc { + u_long raw_frompc; + u_long raw_selfpc; + long raw_count; +}; + +/* + * general rounding functions. + */ +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +/* + * The profiling data structures are housed in this structure. + */ +struct gmonparam { + long int state; + u_short *kcount; + u_long kcountsize; + ARCINDEX *froms; + u_long fromssize; + struct tostruct *tos; + u_long tossize; + long tolimit; + u_long lowpc; + u_long highpc; + u_long textsize; + u_long hashfraction; + long log_hashfraction; +}; +extern struct gmonparam _gmonparam; + +/* + * Possible states of profiling. + */ +#define GMON_PROF_ON 0 +#define GMON_PROF_BUSY 1 +#define GMON_PROF_ERROR 2 +#define GMON_PROF_OFF 3 + +/* + * Sysctl definitions for extracting profiling information from the kernel. + */ +#define GPROF_STATE 0 /* int: profiling enabling variable */ +#define GPROF_COUNT 1 /* struct: profile tick count buffer */ +#define GPROF_FROMS 2 /* struct: from location hash bucket */ +#define GPROF_TOS 3 /* struct: destination/count structure */ +#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */ + +__BEGIN_DECLS + +/* Set up data structures and start profiling. */ +extern void __monstartup (u_long __lowpc, u_long __highpc) __THROW; +extern void monstartup (u_long __lowpc, u_long __highpc) __THROW; + +/* Clean up profiling and write out gmon.out. */ +extern void _mcleanup (void) __THROW; + +extern void __write_profiling (void); +extern int attribute_hidden __profile_frequency (void); + +extern u_long __arc_profile_desc_secstart[], __arc_profile_desc_secend[]; +extern u_long __arc_profile_forward_secstart[], __arc_profile_forward_secend[]; +extern u_long __arc_profile_counters_secstart[]; + +__END_DECLS + +#endif /* sys/gmon.h */ diff --git a/gcc-4.9/libgcc/config/arc/gmon/sys/gmon_out.h b/gcc-4.9/libgcc/config/arc/gmon/sys/gmon_out.h new file mode 100644 index 0000000..13feb00 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/gmon/sys/gmon_out.h @@ -0,0 +1,55 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#define GMON_TAG_TIME_HIST 0 +#define GMON_TAG_CG_ARC 1 +#define GMON_TAG_BB_COUNT 2 + +#define GMON_MAGIC "gmon" +#define GMON_VERSION 1 + +struct arc_gmon_hist_hdr +{ + char low_pc[4]; + char high_pc[4]; + char hist_size[4]; + char prof_rate[4]; + char dimen[15]; + char dimen_abbrev; +}; + +struct gmon_cg_arc_record +{ + char afrompc[4]; + char selfpc[4]; + char count[4]; +}; + +struct gmon_hdr +{ + char cookie[4]; + char version[4]; + char c[12]; +}; diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S new file mode 100644 index 0000000..80b6455 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/adddf3.S @@ -0,0 +1,524 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +#if 0 /* DEBUG */ + .global __adddf3 + .balign 4 +__adddf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __adddf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __adddf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + .global __subdf3 + .balign 4 +__subdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __subdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __subdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort +#define __adddf3 __adddf3_asm +#define __subdf3 __subdf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* inputs: DBL0, DBL1 (r0-r3) + output: DBL0 (r0, r1) + clobber: r2-r10, r12, flags + All NaN highword bits must be 1. NaN low word is random. */ + + .balign 4 + .global __adddf3 + .global __subdf3 + .long 0x7ff00000 ; exponent mask + FUNC(__adddf3) + FUNC(__subdf3) +__subdf3: + bxor_l DBL1H,DBL1H,31 +__adddf3: + ld r9,[pcl,-8] + bmsk r4,DBL0H,30 + xor r10,DBL0H,DBL1H + and r6,DBL1H,r9 + sub.f r12,r4,r6 + asr_s r12,r12,20 + blo .Ldbl1_gt + brhs r4,r9,.Linf_nan + brhs r12,32,.Large_shift + brne r12,0,.Lsmall_shift + brge r10,0,.Ladd_same_exp ; r12 == 0 + +/* After subtracting, we need to normalize; when shifting to place the + leading 1 into position for the implicit 1 and adding that to DBL0H, + we increment the exponent. Thus, we have to subtract one more than + the shift count from the exponent beforehand. Iff the exponent drops thus + below zero (before adding in the fraction with the leading one), we have + generated a denormal number. Denormal handling is basicallly reducing the + shift count so that we produce a zero exponent instead; however, this way + the shift count can become zero (if we started out with exponent 1). + Therefore, a simple min operation is not good enough, since we don't + want to handle a zero normalizing shift in the main path. + On the plus side, we don't need to check for denorm input, the result + of subtracing these looks just the same as denormals generated during + subtraction. */ + bmsk r7,DBL1H,30 + cmp r4,r7 + cmp.eq DBL0L,DBL1L + blo .L_rsub_same_exp + sub.f DBL0L,DBL0L,DBL1L + bmsk r12,DBL0H,19 + bic DBL1H,DBL0H,r12 + sbc.f r4,r4,r7 + beq_l .Large_cancel + norm DBL1L,r4 + b.d .Lsub_done_same_exp + sub r12,DBL1L,9 + + .balign 4 +.Linf_nan: + ; If both inputs are inf, but with different signs, the result is NaN. + asr r12,r10,31 + or_s DBL1H,DBL1H,r12 + j_s.d [blink] + or.eq DBL0H,DBL0H,DBL1H + + .balign 4 +.L_rsub_same_exp: + rsub.f DBL0L,DBL0L,DBL1L + bmsk r12,DBL1H,19 + bic_s DBL1H,DBL1H,r12 + sbc.f r4,r7,r4 + beq_l .Large_cancel + norm DBL1L,r4 + + sub r12,DBL1L,9 +.Lsub_done_same_exp: + asl_s r12,r12,20 + sub_s DBL1L,DBL1L,10 + sub DBL0H,DBL1H,r12 + xor.f 0,DBL0H,DBL1H + bmi .Ldenorm +.Lpast_denorm: + neg_s r12,DBL1L + lsr r7,DBL0L,r12 + asl r12,r4,DBL1L + asl_s DBL0L,DBL0L,DBL1L + add_s r12,r12,r7 + j_s.d [blink] + add_l DBL0H,DBL0H,r12 + .balign 4 +.Ladd_same_exp: + /* This is a special case because we can't test for need to shift + down by checking if bit 20 of DBL0H changes. OTOH, here we know + that we always need to shift down. */ + ; The implicit 1 of DBL0 is not shifted together with the + ; fraction, thus effectively doubled, compensating for not setting + ; implicit1 for DBL1 + add_s r12,DBL0L,DBL1L + lsr.f 0,r12,2 ; round to even + breq r6,0,.Ldenorm_add + adc.f DBL0L,DBL0L,DBL1L + sub r7,DBL1H,DBL0H + sub1 r7,r7,r9 ; boost exponent by 2/2 + rrc DBL0L,DBL0L + asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++ + add.cs.f DBL0L,DBL0L,0x80000000 + add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1 + add.cs DBL0H,DBL0H,1 + bic.f 0,r9,DBL0H ; check for overflow -> infinity. + jne_l [blink] + and DBL0H,DBL0H,0xfff00000 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Large_shift: + brhs r12,55,.Lret_dbl0 + bmsk_s DBL1H,DBL1H,19 + brne r6,0,.Lno_denorm_large_shift + brhi.d r12,33,.Lfixed_denorm_large_shift + sub_s r12,r12,1 + breq r12,31, .Lfixed_denorm_small_shift +.Lshift32: + mov_s r12,DBL1L + mov_s DBL1L,DBL1H + brlt.d r10,0,.Lsub + mov_s DBL1H,0 + b_s .Ladd +.Ldenorm_add: + cmp_s r12,DBL1L + mov_s DBL0L,r12 + j_s.d [blink] + adc DBL0H,r4,DBL1H + +.Lret_dbl0: + j_s [blink] + .balign 4 +.Lsmall_shift: + breq.d r6,0,.Ldenorm_small_shift + bmsk_s DBL1H,DBL1H,19 + bset_s DBL1H,DBL1H,20 +.Lfixed_denorm_small_shift: + neg r8,r12 + asl r4,DBL1H,r8 + lsr_l DBL1H,DBL1H,r12 + lsr r5,DBL1L,r12 + asl r12,DBL1L,r8 + brge.d r10,0,.Ladd + or DBL1L,r4,r5 +/* subtract, abs(DBL0) > abs(DBL1) */ +/* DBL0H, DBL0L: original values + DBL1H, DBL1L: fraction with explicit leading 1, shifted into place + r4: orig. DBL0H & 0x7fffffff + r6: orig. DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r10: orig. DBL0H ^ DBL1H + r12: guard bits */ + .balign 4 +.Lsub: + neg.f r12,r12 + mov_s r7,DBL1H + bmsk r5,DBL0H,19 + sbc.f DBL0L,DBL0L,DBL1L + bic DBL1H,DBL0H,r5 + bset r5,r5,20 + sbc.f r4,r5,r7 + beq_l .Large_cancel_sub + norm DBL1L,r4 + bmsk r6,DBL1H,30 +.Lsub_done: + sub_s DBL1L,DBL1L,9 + breq DBL1L,1,.Lsub_done_noshift + asl r5,DBL1L,20 + sub_s DBL1L,DBL1L,1 + brlo r6,r5,.Ldenorm_sub + sub DBL0H,DBL1H,r5 +.Lpast_denorm_sub: + neg_s DBL1H,DBL1L + lsr r6,r12,DBL1H + asl_s r12,r12,DBL1L + and r8,r6,1 + add1.f 0,r8,r12 + add.ne.f r12,r12,r12 + asl r8,DBL0L,DBL1L + lsr r12,DBL0L,DBL1H + adc.f DBL0L,r8,r6 + asl r5,r4,DBL1L + add_s DBL0H,DBL0H,r12 + j_s.d [blink] + adc DBL0H,DBL0H,r5 + + .balign 4 +.Lno_denorm_large_shift: + breq.d r12,32,.Lshift32 + bset_l DBL1H,DBL1H,20 +.Lfixed_denorm_large_shift: + neg r8,r12 + asl r4,DBL1H,r8 + lsr r5,DBL1L,r12 + asl.f 0,DBL1L,r8 + lsr DBL1L,DBL1H,r12 + or r12,r4,r5 + tst.eq r12,1 + or.ne r12,r12,2 + brlt.d r10,0,.Lsub + mov_s DBL1H,0 + b_l .Ladd + + ; If a denorm is produced without shifting, we have an exact result - + ; no need for rounding. + .balign 4 +.Ldenorm_sub: + lsr DBL1L,r6,20 + xor DBL0H,r6,DBL1H + brne.d DBL1L,1,.Lpast_denorm_sub + sub_s DBL1L,DBL1L,1 +.Lsub_done_noshift: + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bclr r4,r4,20 + j_s.d [blink] + adc DBL0H,DBL1H,r4 + + .balign 4 +.Ldenorm_small_shift: + brne.d r12,1,.Lfixed_denorm_small_shift + sub_l r12,r12,1 + brlt r10,0,.Lsub +.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter + add.f DBL0L,DBL0L,DBL1L + add_s DBL1H,DBL1H,DBL0H + add.cs DBL1H,DBL1H,1 + xor_l DBL0H,DBL0H,DBL1H + bbit0 DBL0H,20,.Lno_shiftdown + lsr.f DBL0H,DBL1H + and r4,DBL0L,2 + bmsk DBL0H,DBL0H,18 + sbc DBL0H,DBL1H,DBL0H + rrc.f DBL0L,DBL0L + or.f r12,r12,r4 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bic.f 0,r9,DBL0H ; check for generating infinity with possible ... + jne.d [blink] ; ... non-zero fraction + add.cs DBL0H,DBL0H,1 + mov_s DBL0L,0 + bmsk DBL1H,DBL0H,19 + j_s.d [blink] + bic_s DBL0H,DBL0H,DBL1H +.Lno_shiftdown: + mov_s DBL0H,DBL1H + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + .balign 4 +.Ldenorm: + bmsk DBL0H,DBL1H,30 + lsr r12,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + sub_l DBL1L,r12,1 + bgt .Lpast_denorm + j_s.d [blink] + add_l DBL0H,DBL0H,r4 + + .balign 4 +.Large_cancel: + ;DBL0L: mantissa DBL1H: sign & exponent + norm.f DBL1L,DBL0L + bmsk DBL0H,DBL1H,30 + add_s DBL1L,DBL1L,22 + mov.mi DBL1L,21 + add_s r12,DBL1L,1 + asl_s r12,r12,20 + beq_s .Lret0 + brhs.d DBL0H,r12,.Lpast_denorm_large_cancel + sub DBL0H,DBL1H,r12 + bmsk DBL0H,DBL1H,30 + lsr r12,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + sub.f DBL1L,r12,1 + jle [blink] +.Lpast_denorm_large_cancel: + rsub.f r7,DBL1L,32 + lsr r7,DBL0L,r7 + asl_s DBL0L,DBL0L,DBL1L + mov.ls r7,DBL0L + add_s DBL0H,DBL0H,r7 + j_s.d [blink] + mov.ls DBL0L,0 +.Lret0: + j_s.d [blink] + mov_l DBL0H,0 + +/* r4:DBL0L:r12 : unnormalized result fraction + DBL1H: result sign and exponent */ +/* When seeing large cancellation, only the topmost guard bit might be set. */ + .balign 4 +.Large_cancel_sub: + norm.f DBL1L,DBL0L + bpnz.d 0f + bmsk DBL0H,DBL1H,30 + mov r5,22<<20 + bne.d 1f + mov_s DBL1L,21 + bset r5,r5,5+20 + add_s DBL1L,DBL1L,32 + brne r12,0,1f + j_s.d [blink] + mov_l DBL0H,0 + .balign 4 +0: add r5,DBL1L,23 + asl r5,r5,20 + add_s DBL1L,DBL1L,22 +1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub + sub DBL0H,DBL1H,r5 +.Lpast_denorm_large_cancel_sub: + rsub.f r7,DBL1L,32 + lsr r12,r12,r7 + lsr r7,DBL0L,r7 + asl_s DBL0L,DBL0L,DBL1L + add.ge DBL0H,DBL0H,r7 + add_s DBL0L,DBL0L,r12 + add.lt DBL0H,DBL0H,DBL0L + mov.eq DBL0L,r12 + j_s.d [blink] + mov.lt DBL0L,0 + .balign 4 +.Ldenorm_large_cancel_sub: + lsr r5,DBL0H,20 + xor_s DBL0H,DBL0H,DBL1H + brgt.d r5,1,.Lpast_denorm_large_cancel_sub + sub DBL1L,r5,1 + j_l [blink] ; denorm, no shift -> no rounding needed. + +/* r4: DBL0H & 0x7fffffff + r6: DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r10: sign difference + r12: shift count (negative) */ + .balign 4 +.Ldbl1_gt: + brhs r6,r9,.Lret_dbl1 ; inf or NaN + neg r8,r12 + brhs r8,32,.Large_shift_dbl0 +.Lsmall_shift_dbl0: + breq.d r6,0,.Ldenorm_small_shift_dbl0 + bmsk_s DBL0H,DBL0H,19 + bset_s DBL0H,DBL0H,20 +.Lfixed_denorm_small_shift_dbl0: + asl r4,DBL0H,r12 + lsr DBL0H,DBL0H,r8 + lsr r5,DBL0L,r8 + asl r12,DBL0L,r12 + brge.d r10,0,.Ladd_dbl1_gt + or DBL0L,r4,r5 +/* subtract, abs(DBL0) < abs(DBL1) */ +/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place + DBL1H, DBL1L: original values + r6: orig. DBL1H & 0x7ff00000 + r9: 0x7ff00000 + r12: guard bits */ + .balign 4 +.Lrsub: + neg.f r12,r12 + bmsk r7,DBL1H,19 + mov_s r5,DBL0H + sbc.f DBL0L,DBL1L,DBL0L + bic DBL1H,DBL1H,r7 + bset r7,r7,20 + sbc.f r4,r7,r5 + beq_l .Large_cancel_sub + norm DBL1L,r4 + b_l .Lsub_done ; note: r6 is already set up. + +.Lret_dbl1: + mov_s DBL0H,DBL1H + j_s.d [blink] + mov_l DBL0L,DBL1L + .balign 4 +.Ldenorm_small_shift_dbl0: + sub.f r8,r8,1 + bne.d .Lfixed_denorm_small_shift_dbl0 + add_s r12,r12,1 + brlt r10,0,.Lrsub +.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter + add.f DBL0L,DBL0L,DBL1L + add_s DBL0H,DBL0H,DBL1H + add.cs DBL0H,DBL0H,1 + xor DBL1H,DBL0H,DBL1H + bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt + lsr.f DBL1H,DBL0H + and r4,DBL0L,2 + bmsk DBL1H,DBL1H,18 + sbc DBL0H,DBL0H,DBL1H + rrc.f DBL0L,DBL0L + or.f r12,r12,r4 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + bic.f 0,r9,DBL0H ; check for generating infinity with possible ... + jne.d [blink] ; ... non-zero fraction + add.cs DBL0H,DBL0H,1 + mov_s DBL0L,0 + bmsk DBL1H,DBL0H,19 + j_s.d [blink] + bic_s DBL0H,DBL0H,DBL1H +.Lno_shiftdown_dbl1_gt: + add.f 0,r12,r12 + btst.eq DBL0L,0 + cmp.eq r12,r12 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + + .balign 4 +.Large_shift_dbl0: + brhs r8,55,.Lret_dbl1 + bmsk_s DBL0H,DBL0H,19 + brne r6,0,.Lno_denorm_large_shift_dbl0 + add_s r12,r12,1 + brne.d r8,33,.Lfixed_denorm_large_shift_dbl0 + sub r8,r8,1 + bset_s DBL0H,DBL0H,20 +.Lshift32_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL0H + brlt.d r10,0,.Lrsub + mov_s DBL0H,0 + b_s .Ladd_dbl1_gt + + .balign 4 +.Lno_denorm_large_shift_dbl0: + breq.d r8,32,.Lshift32_dbl0 + bset_l DBL0H,DBL0H,20 +.Lfixed_denorm_large_shift_dbl0: + asl r4,DBL0H,r12 + lsr r5,DBL0L,r8 + asl.f 0,DBL0L,r12 + lsr DBL0L,DBL0H,r8 + or r12,r4,r5 + tst.eq r12,1 + or.ne r12,r12,2 + brlt.d r10,0,.Lrsub + mov_s DBL0H,0 + b_l .Ladd_dbl1_gt + ENDFUNC(__adddf3) + ENDFUNC(__subdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S new file mode 100644 index 0000000..45ed093 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/addsf3.S @@ -0,0 +1,344 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +#if 0 /* DEBUG */ + .global __addsf3 + FUNC(__addsf3) + .balign 4 +__addsf3: + push_s blink + push_s r1 + bl.d __addsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __addsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__addsf3) + .global __subsf3 + FUNC(__subsf3) + .balign 4 +__subsf3: + push_s blink + push_s r1 + bl.d __subsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __subsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__subsf3) +#define __addsf3 __addsf3_asm +#define __subsf3 __subsf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* inputs: r0, r1 + output: r0 + clobber: r1-r10, r12, flags */ + + .balign 4 + .global __addsf3 + .global __subsf3 + FUNC(__addsf3) + FUNC(__subsf3) + .long 0x7f800000 ; exponent mask +__subsf3: + bxor_l r1,r1,31 +__addsf3: + ld r9,[pcl,-8] + bmsk r4,r0,30 + xor r10,r0,r1 + and r6,r1,r9 + sub.f r12,r4,r6 + asr_s r12,r12,23 + blo .Ldbl1_gt + brhs r4,r9,.Linf_nan + brne r12,0,.Lsmall_shift + brge r10,0,.Ladd_same_exp ; r12 == 0 +/* After subtracting, we need to normalize; when shifting to place the + leading 1 into position for the implicit 1 and adding that to DBL0, + we increment the exponent. Thus, we have to subtract one more than + the shift count from the exponent beforehand. Iff the exponent drops thus + below zero (before adding in the fraction with the leading one), we have + generated a denormal number. Denormal handling is basicallly reducing the + shift count so that we produce a zero exponent instead; FWIW, this way + the shift count can become zero (if we started out with exponent 1). + On the plus side, we don't need to check for denorm input, the result + of subtracing these looks just the same as denormals generated during + subtraction. */ + bmsk r7,r1,30 + breq r4,r7,.Lret0 + sub.f r5,r4,r7 + lsr r12,r4,23 + neg.cs r5,r5 + norm r3,r5 + bmsk r2,r0,22 + sub_s r3,r3,6 + min r12,r12,r3 + bic r1,r0,r2 + sub_s r3,r12,1 + asl_s r12,r12,23 + asl r2,r5,r3 + sub_s r1,r1,r12 + add_s r0,r1,r2 + j_s.d [blink] + bxor.cs r0,r0,31 + .balign 4 +.Linf_nan: + ; If both inputs are inf, but with different signs, the result is NaN. + asr r12,r10,31 + or_s r1,r1,r12 + j_s.d [blink] + or.eq r0,r0,r1 + .balign 4 +.Ladd_same_exp: + /* This is a special case because we can't test for need to shift + down by checking if bit 23 of DBL0 changes. OTOH, here we know + that we always need to shift down. */ + ; adding the two floating point numbers together makes the sign + ; cancel out and apear as carry; the exponent is doubled, and the + ; fraction also in need of shifting left by one. The two implicit + ; ones of the sources make an implicit 1 of the result, again + ; non-existent in a place shifted by one. + add.f r0,r0,r1 + btst_s r0,1 + breq r6,0,.Ldenorm_add + add.ne r0,r0,1 ; round to even. + rrc r0,r0 + bmsk r1,r9,23 + add r0,r0,r1 ; increment exponent + bic.f 0,r9,r0; check for overflow -> infinity. + jne_l [blink] + mov_s r0,r9 + j_s.d [blink] + bset.cs r0,r0,31 + +.Ldenorm_add: + j_s.d [blink] + add r0,r4,r1 + +.Lret_dbl0: + j_s [blink] + + .balign 4 +.Lsmall_shift: + brhi r12,25,.Lret_dbl0 + breq.d r6,0,.Ldenorm_small_shift + bmsk_s r1,r1,22 + bset_s r1,r1,23 +.Lfixed_denorm_small_shift: + neg r8,r12 + asl r5,r1,r8 + brge.d r10,0,.Ladd + lsr_l r1,r1,r12 +/* subtract, abs(DBL0) > abs(DBL1) */ +/* DBL0: original values + DBL1: fraction with explicit leading 1, shifted into place + r4: orig. DBL0 & 0x7fffffff + r6: orig. DBL1 & 0x7f800000 + r9: 0x7f800000 + r10: orig. DBL0H ^ DBL1H + r5 : guard bits */ + .balign 4 +.Lsub: + neg.f r12,r5 + bmsk r3,r0,22 + bset r5,r3,23 + sbc.f r4,r5,r1 + beq.d .Large_cancel_sub + bic r7,r0,r3 + norm r3,r4 + bmsk r6,r7,30 +.Lsub_done: + sub_s r3,r3,6 + breq r3,1,.Lsub_done_noshift + asl r5,r3,23 + sub_l r3,r3,1 + brlo r6,r5,.Ldenorm_sub + sub r0,r7,r5 + neg_s r1,r3 + lsr.f r2,r12,r1 + asl_s r12,r12,r3 + btst_s r2,0 + bmsk.eq.f r12,r12,30 + asl r5,r4,r3 + add_s r0,r0,r2 + adc.ne r0,r0,0 + j_s.d [blink] + add_l r0,r0,r5 + +.Lret0: + j_s.d [blink] + mov_l r0,0 + + .balign 4 +.Ldenorm_small_shift: + brne.d r12,1,.Lfixed_denorm_small_shift + sub_s r12,r12,1 + brlt.d r10,0,.Lsub + mov_s r5,r12 ; zero r5, and align following code +.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear. + bmsk r2,r0,22 + add_s r2,r2,r1 + bbit0.d r2,23,.Lno_shiftdown + add_s r0,r0,r1 + bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity + bmsk r1,r2,22 + lsr.ne.f r2,r2,2; cc: even ; hi: might round down + lsr.ne r1,r1,1 + rcmp.hi r5,1; hi : round down + bclr.hi r0,r0,0 + j_l.d [blink] + sub_s r0,r0,r1 + +/* r4: DBL0H & 0x7fffffff + r6: DBL1H & 0x7f800000 + r9: 0x7f800000 + r10: sign difference + r12: shift count (negative) */ + .balign 4 +.Ldbl1_gt: + brhs r6,r9,.Lret_dbl1 ; inf or NaN + neg r8,r12 + brhi r8,25,.Lret_dbl1 +.Lsmall_shift_dbl0: + breq.d r6,0,.Ldenorm_small_shift_dbl0 + bmsk_s r0,r0,22 + bset_s r0,r0,23 +.Lfixed_denorm_small_shift_dbl0: + asl r5,r0,r12 + brge.d r10,0,.Ladd_dbl1_gt + lsr r0,r0,r8 +/* subtract, abs(DBL0) < abs(DBL1) */ +/* DBL0: fraction with explicit leading 1, shifted into place + DBL1: original value + r6: orig. DBL1 & 0x7f800000 + r9: 0x7f800000 + r5: guard bits */ + .balign 4 +.Lrsub: + neg.f r12,r5 + bmsk r5,r1,22 + bic r7,r1,r5 + bset r5,r5,23 + sbc.f r4,r5,r0 + bne.d .Lsub_done ; note: r6 is already set up. + norm r3,r4 + /* Fall through */ + +/* r4:r12 : unnormalized result fraction + r7: result sign and exponent */ +/* When seeing large cancellation, only the topmost guard bit might be set. */ + .balign 4 +.Large_cancel_sub: + breq_s r12,0,.Lret0 + sub r0,r7,24<<23 + xor.f 0,r0,r7 ; test if exponent is negative + tst.pl r9,r0 ; test if exponent is zero + jpnz [blink] ; return if non-denormal result + bmsk r6,r7,30 + lsr r3,r6,23 + xor r0,r6,r7 + sub_s r3,r3,24-22 + j_s.d [blink] + bset r0,r0,r3 + + ; If a denorm is produced, we have an exact result - + ; no need for rounding. + .balign 4 +.Ldenorm_sub: + sub r3,r6,1 + lsr.f r3,r3,23 + xor r0,r6,r7 + neg_s r1,r3 + asl.ne r4,r4,r3 + lsr_s r12,r12,r1 + add_s r0,r0,r4 + j_s.d [blink] + add.ne r0,r0,r12 + + .balign 4 +.Lsub_done_noshift: + add.f 0,r12,r12 + btst.eq r4,0 + bclr r4,r4,23 + add r0,r7,r4 + j_s.d [blink] + adc.ne r0,r0,0 + + .balign 4 +.Lno_shiftdown: + add.f 0,r5,r5 + btst.eq r0,0 + cmp.eq r5,r5 + j_s.d [blink] + add.cs r0,r0,1 + +.Lret_dbl1: + j_s.d [blink] + mov_l r0,r1 + .balign 4 +.Ldenorm_small_shift_dbl0: + sub.f r8,r8,1 + bne.d .Lfixed_denorm_small_shift_dbl0 + add_s r12,r12,1 + brlt.d r10,0,.Lrsub + mov r5,0 +.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear. + bmsk r2,r1,22 + add_s r2,r2,r0 + bbit0.d r2,23,.Lno_shiftdown_dbl1_gt + add_s r0,r1,r0 + bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity + bmsk r1,r2,22 + lsr.ne.f r2,r2,2; cc: even ; hi: might round down + lsr.ne r1,r1,1 + rcmp.hi r5,1; hi : round down + bclr.hi r0,r0,0 + j_l.d [blink] + sub_s r0,r0,r1 + + .balign 4 +.Lno_shiftdown_dbl1_gt: + add.f 0,r5,r5 + btst.eq r0,0 + cmp.eq r5,r5 + j_s.d [blink] + add.cs r0,r0,1 + ENDFUNC(__addsf3) + ENDFUNC(__subsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h b/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h new file mode 100644 index 0000000..64e6d23 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc-ieee-754.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifdef __LITTLE_ENDIAN__ +#define DBL0L r0 +#define DBL0H r1 +#define DBL1L r2 +#define DBL1H r3 +#else +#define DBL0L r1 +#define DBL0H r0 +#define DBL1L r3 +#define DBL1H r2 +#endif +#define add_l add +#define asr_l asr +#define j_l j +#define jne_l jne +#define jeq_l jeq +#define or_l or +#define mov_l mov +#define b_l b +#define beq_l beq +#define bne_l bne +#define brne_l brne +#define bset_l bset +#define sub_l sub +#define sub1_l sub1 +#define lsr_l lsr +#define xor_l xor +#define bic_l bic +#define bmsk_l bmsk +#define bxor_l bxor +#define bcs_s blo_s diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S new file mode 100644 index 0000000..2381ba6 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divdf3.S @@ -0,0 +1,421 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + ??? The algorithm is still based on the ARC700 optimized code. + Maybe we could make better use of 32x16 bit multiply, or 64 bit multiply + results. + */ +#include "../arc-ieee-754.h" +#define mlo acc2 +#define mhi acc1 +#define mul64(b,c) mullw 0,b,c` machlw 0,b,c +#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) + .balign 4 +.L7ff00000: + .long 0x7ff00000 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd + +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mulu64 (r4,r8) + sub r7,r7,r11 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_2: + xor_s DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + j_s.d [blink] + xor_s DBL0H,DBL0H,DBL1H + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r4,r8,26 + sub3 r10,pcl,51;(.-.Ldivtab) >> 3 + ld.as r9,[pcl,-104]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + ld.as r4,[r10,r4] + lsr r12,DBL1L,20 + and.f r7,DBL1H,r9 + or r8,r8,r12 + mulu64 (r4,r8) + beq.d .Ldenorm_dbl1 +.Lpast_denorm_dbl1: + and.f r6,DBL0H,r9 + breq.d r7,r9,.Linf_nan_dbl1 + asl r4,r4,12 + sub r4,r4,mhi + mululw 0,r4,r4 + machulw r5,r4,r4 + bne.d .Lnormal_dbl0 + lsr r8,r8,1 + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_2 ; denorm/large number -> 0 + beq_s .Lret0_2 + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + + .balign 4 +.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + or.f 0,r6,DBL0L + cmp.ne r6,r9 + not_s DBL0L,DBL1H + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + + .balign 4 +.Lnormal_dbl0: + breq.d r6,r9,.Linf_nan_dbl0 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: + bset r8,r8,31 + mulu64 (r5,r8) + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + lsr.cc r5,r5,1 + sub r4,r4,mhi ; u1.31 inverse, about 30 bit + mululw 0,r5,r4 + machulw r11,r5,r4 ; result fraction highpart + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate */ \ + 0x3fe00000 + mulu64 (r11,r8) ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + mov r12,mlo ; u-28.31 + mulu64 (r11,DBL1L) ; mhi: u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + lsr r7,r11,9 + sub r5,DBL0L,mhi ; rest msw ; u-26.31:0 + mul64 (r5,r4) ; mhi: result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 + bclr r12,r9,20 ; 0x7fe00000 + brhs.d r6,r12,.Linf_denorm + bxor.mi DBL0H,DBL0H,31 + add.f r12,mhi,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + mov r10,mlo ; rest before considering r12 in r5 : -r10 + mululw 0,r12,DBL1L + machulw r7,r12,DBL1L ; mhi: u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + mulu64 (r12,r8) ; mlo: u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,mlo ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 +.Linf_denorm: + lsr r12,r6,28 + brlo.d r12,0xc,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add r12,mhi,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + min r7,r7,31 + asr DBL0L,r4,r7 + add DBL1H,r11,r10 + abs.f r10,r4 + sub.mi r10,r10,1 + add.f r7,r6,32-5 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_s [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + mov r11,mlo ; rest before considering r12 in r5 : -r11 + mulu64 (r12,r8) ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + mov DBL1H,mlo ; u-51.31:1 + mulu64 (r12,DBL1L) ; u-51.62:2 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,mhi ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + tst_s DBL1H,DBL1H + cmp.eq mlo,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S new file mode 100644 index 0000000..77b257d --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/divsf3.S @@ -0,0 +1,274 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "../arc-ieee-754.h" +#define mlo acc2 +#define mhi acc1 +#define mul64(b,c) mullw 0,b,c` machlw 0,b,c +#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +.L7f800000: + .long 0x7f800000 + .balign 4 + .global __divsf3_support +__divsf3_support: +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + sub3 r3,pcl,37;(.-.Ldivtab) >> 3 + lsr r2,r1,17 + and.f r11,r1,r9 + bmsk r5,r2,5 + beq.d .Ldenorm_fp1 + asl r6,r1,8 + and.f r2,r0,r9 + ld.as r5,[r3,r5] + asl r4,r1,9 + bset r6,r6,31 + breq.d r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp1: + mululw 0,r5,r4 + machulw r8,r5,r4 + breq.d r2,r9,.Linf_nan_fp0 + asl r5,r5,13 + sub r7,r5,r8 + mululw 0,r7,r6 + machulw r8,r7,r6 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + mulu64 (r8,r7) + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + 0x3f000000 + sub r7,r7,mhi ; u1.31 inverse, about 30 bit + mulu64 (r3,r7) + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bclr r3,r9,23 ; 0x7f000000 + brhs.d r2,r3,.Linf_denorm + bxor.mi r0,r0,31 +.Lpast_denorm: + add r3,mhi,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mulu64 (r3,r6) + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + sub.f 0,r12,mlo + j_s.d [blink] + sub.mi r0,r0,1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,mlo,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + .balign 4 +.Ldenorm_fp1: + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + bmsk r5,r5,5 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + ld.as r5,[r3,r5] + asl r6,r6,r12 + beq.d .Linf_NaN + and.f r2,r0,r9 + add r4,r6,r6 + asl_s r12,r12,23 + bne.d .Lpast_denorm_fp1 + add_s r2,r2,r12 +.Ldenorm_fp0: + mulu64 (r8,r7) + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S new file mode 100644 index 0000000..59df772 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/muldf3.S @@ -0,0 +1,231 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ + +__muldf3_support: /* This label makes debugger output saner. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_2: + breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output + norm.f r12,DBL1L + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_l DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + mululw 0,DBL0L,DBL1L + machulw r4,DBL0L,DBL1L + ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)] + bmsk r6,DBL0H,19 + bset r6,r6,20 + mov r8,acc2 + mululw 0,r4,1 + and r11,DBL0H,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,DBL1H,r9 + breq.d r12,0,.Ldenorm_dbl1 + maclw 0,r6,DBL1L + machulw 0,r6,DBL1L + breq.d r11,r9,.Linf_nan + bmsk r10,DBL1H,19 + breq.d r12,r9,.Linf_nan + bset r10,r10,20 + maclw 0,r10,DBL0L + machulw r5,r10,DBL0L + add_s r12,r12,r11 ; add exponents + mov r4,acc2 + mululw 0,r5,1 + maclw 0,r6,r10 + machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8 + tst r8,r8 + bclr r8,r9,30 ; 0x3ff00000 + bset.ne r4,r4,0 ; put least significant word into sticky bit + bclr r6,r9,20 ; 0x7fe00000 + lsr.f r10,r7,9 + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,acc2,r10 + lsr r5,acc2,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ +.Lret0_2: + lsr_s DBL0H,DBL0H,31 + asl_s DBL0H,DBL0H,31 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk.f DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + beq.d .Ldenorm_2 + norm r12,DBL1H + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_s DBL0L,0 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + lsr r6,r12,28 + brlo.d r6,0xc,.Linf + asr r6,r12,20 + add.f r10,r10,r6 + brgt.d r10,0,.Lshift_frac + mov_s r12,0 + beq.d .Lround_frac + add r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,acc2 + bset.ne r4,r4,1 + mululw 0,r7,1 + brge.d r10,1,.Lshift_frac + mov r7,0 + breq.d r10,0,.Lround_frac + add r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq acc2,0 + mov_s DBL0L,acc2 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + adc.eq DBL0H,DBL0H,0 + +.Linf: mov_s DBL0L,0 + xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S new file mode 100644 index 0000000..41681f5 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-dsp/mulsf3.S @@ -0,0 +1,176 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + breq.d r12,0,.Ldenorm_dbl1 + xor_s r0,r0,r1 + mululw 0,r2,r3 + machulw r6,r2,r3 + breq.d r11,r9,.Linf_nan_dbl0 + ld.as r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)] + breq.d r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + asl.f 0,r6,8 + mov r7,acc2 + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + add_s r2,r2,r2 + asl r2,r2,r4 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + mululw 0,r2,r3 + machulw r6,r2,r3 + sub.ne.f r12,r12,r4 + ld.as r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + mov.eq r1,-1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 +.Linf_nan_dbl0: + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl1: + breq.d r11,r9,.Linf_nan_dbl0_2 + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + mululw 0,r2,r4 + machulw r6,r2,r4 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + bmsk r8,r0,30 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.Linf_nan_dbl0_2: + bclr_s r1,r1,31 + xor_s r0,r0,r1 + sub.eq r1,r1,1 ; inf/nan * 0 -> nan + bic.f 0,r9,r1 + j_s.d [blink] + or.eq r0,r0,r1 ; r1 nan -> result nan + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S new file mode 100644 index 0000000..100c8bc --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S @@ -0,0 +1,410 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + ??? The algorithm is still based on the ARC700 optimized code. + Maybe we could make better use of 64 bit multiply results and/or mmed . + */ +#include "../arc-ieee-754.h" + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) + .balign 4 +.L7ff00000: + .long 0x7ff00000 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd + +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mulu64 r4,r8 + sub r7,r7,r11 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_2 ; denorm/large number -> 0 + beq_s .Lret0_2 + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_2: + xor_s DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + j_s.d [blink] + xor_s DBL0H,DBL0H,DBL1H + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r4,r8,26 + sub3 r10,pcl,61; (.-.Ldivtab) >> 3 + ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + ld.as r4,[r10,r4] + lsr r12,DBL1L,20 + and.f r7,DBL1H,r9 + or r8,r8,r12 + mulu64 r4,r8 + beq.d .Ldenorm_dbl1 +.Lpast_denorm_dbl1: + and.f r6,DBL0H,r9 + breq.d r7,r9,.Linf_nan_dbl1 + asl r4,r4,12 + sub r4,r4,mhi + mulu64 r4,r4 + beq.d .Ldenorm_dbl0 + lsr r8,r8,1 + breq.d r6,r9,.Linf_nan_dbl0 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: + bset r8,r8,31 + mulu64 mhi,r8 + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + lsr.cc r5,r5,1 + sub r4,r4,mhi ; u1.31 inverse, about 30 bit + mulu64 r5,r4 ; result fraction highpart + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate */ \ + 0x3fe00000 + mov r11,mhi ; result fraction highpart + mulu64 r11,r8 ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + mov r12,mlo ; u-28.31 + mulu64 r11,DBL1L ; mhi: u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + lsr r7,r11,9 + sub r5,DBL0L,mhi ; rest msw ; u-26.31:0 + mul64 r5,r4 ; mhi: result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 + bclr r12,r9,20 ; 0x7fe00000 + brhs.d r6,r12,.Linf_denorm + bxor.mi DBL0H,DBL0H,31 + add.f r12,mhi,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + mov r10,mlo ; rest before considering r12 in r5 : -r10 + mulu64 r12,DBL1L ; mhi: u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + mov r7,mhi ; u-51.32 + mulu64 r12,r8 ; mlo: u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,mlo ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + +.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + or.f 0,r6,DBL0L + cmp.ne r6,r9 + not_s DBL0L,DBL1H + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 +.Linf_denorm: + lsr r12,r6,28 + brlo.d r12,0xc,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add r12,mhi,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + min r7,r7,31 + asr DBL0L,r4,r7 + add DBL1H,r11,r10 + abs.f r10,r4 + sub.mi r10,r10,1 + add.f r7,r6,32-5 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_s [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + mov r11,mlo ; rest before considering r12 in r5 : -r11 + mulu64 r12,r8 ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + mov DBL1H,mlo ; u-51.31:1 + mulu64 r12,DBL1L ; u-51.62:2 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,mhi ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + tst_s DBL1H,DBL1H + cmp.eq mlo,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S new file mode 100644 index 0000000..ef54ffd --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S @@ -0,0 +1,274 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +.L7f800000: + .long 0x7f800000 + .balign 4 + .global __divsf3_support +__divsf3_support: +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + lsr r2,r1,17 + sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3 + bmsk_s r2,r2,5 + ld.as r5,[r3,r2] + asl r4,r1,9 + ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + mulu64 r5,r4 + and.f r11,r1,r9 + asl r6,r1,8 + bset r6,r6,31 + beq.d .Ldenorm_fp1 + asl r5,r5,13 + breq.d r11,r9,.Linf_nan_fp1 + and.f r2,r0,r9 + sub r7,r5,mhi + mulu64 r7,r6 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + breq.d r2,r9,.Linf_nan_fp0 + mulu64 mhi,r7 +.Lpast_denorm_fp1: + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + 0x3f000000 + sub r7,r7,mhi ; u1.31 inverse, about 30 bit + mulu64 r3,r7 + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bclr r3,r9,23 ; 0x7f000000 + brhs.d r2,r3,.Linf_denorm + bxor.mi r0,r0,31 +.Lpast_denorm: + add r3,mhi,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mulu64 r3,r6 + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + sub.f 0,r12,mlo + j_s.d [blink] + sub.mi r0,r0,1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,mlo,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + .balign 4 +.Ldenorm_fp1: + bclr r6,r6,31 + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + asl r6,r6,r12 + bmsk r5,r5,5 + ld.as r5,[r3,r5] + add r4,r6,r6 + ; load latency + mulu64 r5,r4 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + asl r5,r5,13 + sub r7,r5,mhi + beq.d .Linf_NaN + mulu64 r7,r6 + asl_s r12,r12,23 + and.f r2,r0,r9 + add_s r2,r2,r12 + asl r12,r0,8 + bne.d .Lpast_denorm_fp1 +.Ldenorm_fp0: mulu64 mhi,r7 + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S new file mode 100644 index 0000000..132b009 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S @@ -0,0 +1,234 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ + +__muldf3_support: /* This label makes debugger output saner. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_2: + breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output + norm.f r12,DBL1L + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_l DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + mulu64 DBL0L,DBL1L + ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)] + bmsk r6,DBL0H,19 + bset r6,r6,20 + and r11,DBL0H,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,DBL1H,r9 + breq.d r12,0,.Ldenorm_dbl1 + mov r8,mlo + mov r4,mhi + mulu64 r6,DBL1L + breq.d r11,r9,.Linf_nan + bmsk r10,DBL1H,19 + breq.d r12,r9,.Linf_nan + bset r10,r10,20 + add.f r4,r4,mlo + adc r5,mhi,0 + mulu64 r10,DBL0L + add_s r12,r12,r11 ; add exponents + add.f r4,r4,mlo + adc r5,r5,mhi + mulu64 r6,r10 + tst r8,r8 + bclr r8,r9,30 ; 0x3ff00000 + bset.ne r4,r4,0 ; put least significant word into sticky bit + bclr r6,r9,20 ; 0x7fe00000 + add.f r5,r5,mlo + adc r7,mhi,0 ; fraction product in r7:r5:r4 + lsr.f r10,r7,9 + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,r5,r10 + lsr r5,r5,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ + +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ +.Lret0_2: + lsr_s DBL0H,DBL0H,31 + asl_s DBL0H,DBL0H,31 + j_s.d [blink] + mov_s DBL0L,0 + .balign 4 +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk.f DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + beq.d .Ldenorm_2 + norm r12,DBL1H + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_s DBL0L,0 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + lsr r6,r12,28 + brlo.d r6,0xc,.Linf + asr r6,r12,20 + add.f r10,r10,r6 + brgt.d r10,0,.Lshift_frac + mov_s r12,0 + beq.d .Lround_frac + add r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,r5 + bset.ne r4,r4,1 + mov r5,r7 + brge.d r10,1,.Lshift_frac + mov r7,0 + breq.d r10,0,.Lround_frac + add r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq r5,0 + mov_s DBL0L,r5 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + adc.eq DBL0H,DBL0H,0 + +.Linf: mov_s DBL0L,0 + xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S new file mode 100644 index 0000000..ac346de --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S @@ -0,0 +1,180 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + mulu64 r2,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + breq.d r12,0,.Ldenorm_dbl1 + xor_s r0,r0,r1 + breq.d r11,r9,.Linf_nan_dbl0 + ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)] + breq.d r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + asl.f 0,mhi,8 + mov r6,mhi + mov r7,mlo + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + add_s r2,r2,r2 + asl r2,r2,r4 + mulu64 r2,r3 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + sub.ne.f r12,r12,r4 + ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 +.Linf_nan_dbl0: + bclr_s r1,r1,31 + cmp_s r1,r9 + jls.d [blink] + xor_s r0,r0,r1 +; r1 NaN -> result NaN +.Lretnan: + j_s.d [blink] + mov r0,-1 + + .balign 4 +.Ldenorm_dbl1: + breq.d r11,r9,.Linf_nan_dbl0_2 + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + mulu64 r2,r4 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)] + bhi.d .Lpast_denorm + bmsk r8,r0,30 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.Linf_nan_dbl0_2: + bclr_s r1,r1,31 + xor_s r0,r0,r1 + sub.eq r1,r1,1 ; inf/nan * 0 -> nan + bic.f 0,r9,r1 + j_s.d [blink] + or.eq r0,r0,r1 ; r1 nan -> result nan + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S new file mode 100644 index 0000000..d8ea881 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/divsf3.S @@ -0,0 +1,227 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + .balign 4 +__divdf3_support: /* This label makes debugger output saner. */ + FUNC(__divsf3) +.Ldenorm_fp0: + norm.f r12,r2 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0_NaN + tst r1,r9 + add_s r2,r2,r2 + sub_s r12,r12,8 + asl_s r2,r2,r12 + asl_l r12,r12,23 + bne.d .Lpast_denorm_fp0 + add r5,r5,r12 +/* r0 is subnormal, r1 is subnormal or 0. */ + + .balign 4 +.Ldenorm_fp1: + norm.f r12,r3 ; flag for x/0 -> Inf check + bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf + beq_s .Linf + add_s r3,r3,r3 + sub_s r12,r12,8 + asl_s r3,r3,r12 + asl_s r12,r12,23 + b.d .Lpast_denorm_fp1 + add r4,r4,r12 + +.Lret0_NaN: + bclr.f 0,r1,31 ; 0/0 -> NaN + bic r0,r10,r9 + j_s.d [blink] + sub.eq r0,r0,1 + + .balign 4 +.Linf_nan_fp0: + bic.f 0,r9,r1 ; fp1 Inf -> result NaN + bic r1,r5,r9 ; fp1 sign + sub.eq r1,r1,1 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_fp1: + bic r0,r4,r9 ; fp0 sign + bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan + xor.eq r1,r1,r9 + j_s.d [blink] + xor_s r0,r0,r1 + + .global __divsf3 + .balign 4 + .long 0x7f800000 ; exponent mask +__divsf3: + ld r9,[pcl,-4] + bmsk r2,r0,22 + xor r4,r0,r2 + bmsk r3,r1,22 + xor r5,r1,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_fp0 + xor r10,r4,r5 + breq r11,r9,.Linf_nan_fp0 + bset_s r2,r2,23 + and r11,r1,r9 + breq r11,0,.Ldenorm_fp1 + breq r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp0: + bset_s r3,r3,23 +.Lpast_denorm_fp1: + cmp r2,r3 + asl_s r2,r2,6+1 + asl_s r3,r3,7 + add.lo r2,r2,r2 + bclr r8,r9,30 ; exponent bias + bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted + sub r4,r4,r5 + add r4,r4,r8 + xor.f 0,r10,r4 + bmi .Linf_denorm + and.f r12,r4,r9 + beq .Ldenorm + sub_s r2,r2,r3 ; discard implicit 1 + rsub r3,r3,1 ; prime r3 for two-insn divide-step use +.Ldiv_23bit: + .rep 6 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + breq r12,r9,.Linf + bmsk r0,r2,6 + xor_s r2,r2,r0 +.Ldiv_17bit: + .rep 7 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_10bit: + .rep 7 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_3bit: + .rep 3 + add1.f r2,r3,r2 + sub.cc r2,r2,r3 + .endr + asl_s r0,r0,3 +.Ldiv_0bit: + add1.f r1,r3,r2 + sub.cc r1,r1,r3 + bmsk_s r2,r2,2 + tst r1,-0x7e ; 0xffffff82, test for rest or odd + bmsk_s r1,r1,0 + add_s r0,r0,r2 ; assemble fraction + add_s r0,r0,r4 ; add in sign & exponent + j_s.d [blink] + add.ne r0,r0,r1 ; round to nearest / even + + .balign 4 +.Linf: + j_s.d [blink] + or r0,r10,r9 + +.Lret_r4: + j_s.d [blink] + mov_s r0,r4 + .balign 4 +.Linf_denorm: + add.f r12,r4,r4 + asr_l r12,r12,24 + bpl .Linf + max r12,r12,-24 +.Ldenorm: + rsub r3,r3,1 + add r1,pcl,68; .Ldenorm_tab-. + ldw.as r12,[r1,r12] + mov_s r0,0 + lsr_s r2,r2 + sub_s r1,r1,r12 + j_s.d [r1] + bic r4,r10,r9 + .short .Ldenorm_tab-.Lret_r4 + .short .Ldenorm_tab-.Ldiv_0bit + .short .Ldenorm_tab-.Ldiv_3bit-2*8 + .short .Ldenorm_tab-.Ldiv_3bit-1*8 + .short .Ldenorm_tab-.Ldiv_3bit + .short .Ldenorm_tab-.Ldiv_10bit-6*8 + .short .Ldenorm_tab-.Ldiv_10bit-5*8 + .short .Ldenorm_tab-.Ldiv_10bit-3*8 + .short .Ldenorm_tab-.Ldiv_10bit-3*8 + .short .Ldenorm_tab-.Ldiv_10bit-2*8 + .short .Ldenorm_tab-.Ldiv_10bit-1*8 + .short .Ldenorm_tab-.Ldiv_10bit + .short .Ldenorm_tab-.Ldiv_17bit-6*8 + .short .Ldenorm_tab-.Ldiv_17bit-5*8 + .short .Ldenorm_tab-.Ldiv_17bit-4*8 + .short .Ldenorm_tab-.Ldiv_17bit-3*8 + .short .Ldenorm_tab-.Ldiv_17bit-2*8 + .short .Ldenorm_tab-.Ldiv_17bit-1*8 + .short .Ldenorm_tab-.Ldiv_17bit + .short .Ldenorm_tab-.Ldiv_23bit-5*8 + .short .Ldenorm_tab-.Ldiv_23bit-4*8 + .short .Ldenorm_tab-.Ldiv_23bit-3*8 + .short .Ldenorm_tab-.Ldiv_23bit-2*8 + .short .Ldenorm_tab-.Ldiv_23bit-1*8 +.Ldenorm_tab: + .short .Ldenorm_tab-.Ldiv_23bit + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S new file mode 100644 index 0000000..4a0736d --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/arc600/mulsf3.S @@ -0,0 +1,179 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "../arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,76]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r3,r4,23 + bmsk r2,r0,22 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_dbl0 + and r12,r1,r9 + xor_s r0,r0,r1 + breq.d r11,r9,.Linf_nan_dbl0 + bset_s r2,r2,23 + breq r12,0,.Ldenorm_dbl1 + breq r12,r9,.Linf_nan_dbl1 +.Lpast_denorm: + mov r6,0 + lsr.f r7,r2 +; We could so this a bit faster here with a 32 bit shift register and +; inserting the r2 factor / retrieving the low result a byte at a time, +; but that'd increase code size. + mov lp_count,24 + .balign 4 + lp 0f + add.cs r6,r6,r3 + lsr.f r6,r6 + rrc.f r7,r7 +0: + ld.as r4,[pcl,59]; [pcl,((.L7fffffff-.+2)/4)] + asl.f 0,r6,8 + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + add.cs r6,r6,1 + lsr.f 0,r6,1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + tst.pl r8,r9 + bic r0,r0,r4 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + asr_s r3,r3,23+1 + bset r6,r6,23 + bpnz.d .Linfinity + sub_s r3,r3,1 + neg_s r2,r3 + brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Ldenorm_dbl0: + asl_s r2,r2,8 + norm.f r4,r2 + lsr_s r2,r2,7 + asl r2,r2,r4 + breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + asl r4,r4,23 + sub.ne.f r12,r12,r4 + bhi.d .Lpast_denorm + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_dbl0: + sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf) + bic.f 0,r9,r2 + xor_s r0,r0,r1 + bclr_s r1,r1,31 + xor_s r0,r0,r1 + jne_s [blink] +.Lretnan: + j_s.d [blink] + mov r0,-1 + .balign 4 +.Ldenorm_dbl1: + norm.f r3,r4 + sub_s r3,r3,7 + asl r4,r4,r3 + sub_s r3,r3,1 + asl_s r3,r3,23 + sub.ne.f r11,r11,r3 + bhi.d .Lpast_denorm + mov_s r3,r4 + bmsk r3,r0,30 + j_s.d [blink] + bic_s r0,r0,r3 + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S new file mode 100644 index 0000000..dd74ba6 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divdf3.S @@ -0,0 +1,416 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + to calculate a := b/x as b*y, with y := 1/x: + - x is in the range [1..2) + - calculate 15..18 bit inverse y0 using a table of approximating polynoms. + Precision is higher for polynoms used to evaluate input with larger + value. + - Do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + The truncation error for the either is less than 1 + x/2 ulp. + A 31 bit inverse can be simply calculated by using x with implicit 1 + and chaining the multiplies. For a 32 bit inverse, we multiply y0^2 + with the bare fraction part of x, then add in y0^2 for the implicit + 1 of x. + - If calculating a 31 bit inverse, the systematic error is less than + -1 ulp; likewise, for 32 bit, it is less than -2 ulp. + - If we calculate our seed with a 32 bit fraction, we can archive a + tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we + only need to take the step to calculate the 2nd stage rest and + rounding adjust 1/32th of the time. However, if we use a 20 bit + fraction for the seed, the negative error can exceed -2 ulp/128, (2) + thus for a simple add / tst check, we need to do the 2nd stage + rest calculation/ rounding adjust 1/16th of the time. + (1): The inexactness of the 32 bit inverse contributes an error in the + range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the + rest contributes an error < +1/x ulp/128 . In the interval [1,2), + x/2 + 1/x <= 1.5 . + (2): Unless proven otherwise. I have not actually looked for an + example where -2 ulp/128 is exceeded, and my calculations indicate + that the excess, if existent, is less than -1/512 ulp. + */ +#include "arc-ieee-754.h" + +/* N.B. fp-bit.c does double rounding on denormal numbers. */ +#if 0 /* DEBUG */ + .global __divdf3 + FUNC(__divdf3) + .balign 4 +__divdf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __divdf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __divdf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + and r12,DBL0H,DBL1H + bic.f 0,0x7ff80000,r12 ; both NaN -> OK + jeq_s [blink] + bl abort + ENDFUNC(__divdf3) +#define __divdf3 __divdf3_asm +#endif /* DEBUG */ + + FUNC(__divdf3) +__divdf3_support: /* This label makes debugger output saner. */ + .balign 4 +.Ldenorm_dbl1: + brge r6, \ + 0x43500000,.Linf_NaN ; large number / denorm -> Inf + bmsk.f r12,DBL1H,19 + mov.eq r12,DBL1L + mov.eq DBL1L,0 + sub.eq r7,r7,32 + norm.f r11,r12 ; flag for x/0 -> Inf check + beq_s .Linf_NaN + mov.mi r11,0 + add.pl r11,r11,1 + add_s r12,r12,r12 + asl r8,r12,r11 + rsub r12,r11,31 + lsr r12,DBL1L,r12 + tst_s DBL1H,DBL1H + or r8,r8,r12 + lsr r4,r8,26 + lsr DBL1H,r8,12 + ld.as r4,[r10,r4] + bxor.mi DBL1H,DBL1H,31 + sub r11,r11,11 + asl DBL1L,DBL1L,r11 + sub r11,r11,1 + mpyhu r5,r4,r8 + sub r7,r7,r11 + asl r4,r4,12 + b.d .Lpast_denorm_dbl1 + asl r7,r7,20 + ; wb stall + + .balign 4 +.Ldenorm_dbl0: + bmsk.f r12,DBL0H,19 + ; wb stall + mov.eq r12,DBL0L + sub.eq r6,r6,32 + norm.f r11,r12 ; flag for 0/x -> 0 check + brge r7, \ + 0x43500000, .Lret0_NaN ; denorm/large number -> 0 + beq_s .Lret0_NaN + mov.mi r11,0 + add.pl r11,r11,1 + asl r12,r12,r11 + sub r6,r6,r11 + add.f 0,r6,31 + lsr r10,DBL0L,r6 + mov.mi r10,0 + add r6,r6,11+32 + neg.f r11,r6 + asl DBL0L,DBL0L,r11 + mov.pl DBL0L,0 + sub r6,r6,32-1 + b.d .Lpast_denorm_dbl0 + asl r6,r6,20 + +.Linf_NaN: + tst_s DBL0L,DBL0L ; 0/0 -> NaN + xor_s DBL1H,DBL1H,DBL0H + bclr.eq.f DBL0H,DBL0H,31 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + sub.eq DBL0H,DBL0H,1 + mov_s DBL0L,0 + j_s.d [blink] + or DBL0H,DBL0H,r9 + .balign 4 +.Lret0_NaN: + xor_s DBL1H,DBL1H,DBL0H + cmp_s r12,r9 + mov_s DBL0L,0 + bmsk DBL0H,DBL1H,30 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + sub.hi DBL0H,DBL0H,1 +.Linf_nan_dbl1: ; Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN + not_s DBL0L,DBL1H + cmp r6,r9 + sub_s.ne DBL0L,DBL0L,DBL0L + tst_s DBL0H,DBL0H + add_s DBL0H,DBL1H,DBL0L + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 +.Linf_nan_dbl0: + tst_s DBL1H,DBL1H + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + .balign 4 + .global __divdf3 +/* N.B. the spacing between divtab and the add3 to get its address must + be a multiple of 8. */ +__divdf3: + asl r8,DBL1H,12 + lsr r12,DBL1L,20 + lsr r4,r8,26 + add3 r10,pcl,59 ; (.Ldivtab-.) >> 3 + ld.as r4,[r10,r4] + ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000 + or r8,r8,r12 + mpyhu r5,r4,r8 + and.f r7,DBL1H,r9 + asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline. + beq.d .Ldenorm_dbl1 + and r6,DBL0H,r9 +.Lpast_denorm_dbl1: ; wb stall + sub r4,r4,r5 + mpyhu r5,r4,r4 + breq.d r6,0,.Ldenorm_dbl0 + lsr r8,r8,1 + asl r12,DBL0H,11 + lsr r10,DBL0L,21 +.Lpast_denorm_dbl0: ; wb stall + bset r8,r8,31 + mpyhu r11,r5,r8 + add_s r12,r12,r10 + bset r5,r12,31 + cmp r5,r8 + cmp.eq DBL0L,DBL1L + ; wb stall + lsr.cc r5,r5,1 + sub r4,r4,r11 ; u1.31 inverse, about 30 bit + mpyhu r11,r5,r4 ; result fraction highpart + breq r7,r9,.Linf_nan_dbl1 + lsr r8,r8,2 ; u3.29 + add r5,r6, /* wait for immediate / XMAC wb stall */ \ + 0x3fe00000 + ; wb stall (not for XMAC) + breq r6,r9,.Linf_nan_dbl0 + mpyu r12,r11,r8 ; u-28.31 + asl_s DBL1L,DBL1L,9 ; u-29.23:9 + sbc r6,r5,r7 + ; resource conflict (not for XMAC) + mpyhu r5,r11,DBL1L ; u-28.23:9 + add.cs DBL0L,DBL0L,DBL0L + asl_s DBL0L,DBL0L,6 ; u-26.25:7 + asl r10,r11,23 + sub_l DBL0L,DBL0L,r12 + ; wb stall (before 'and' for XMAC) + lsr r7,r11,9 + sub r5,DBL0L,r5 ; rest msw ; u-26.31:0 + mpyh r12,r5,r4 ; result fraction lowpart + xor.f 0,DBL0H,DBL1H + and DBL0H,r6,r9 + add_s DBL0H,DBL0H,r7 ; (XMAC wb stall) + bxor.mi DBL0H,DBL0H,31 + brhs r6, /* wb stall / wait for immediate */ \ + 0x7fe00000,.Linf_denorm + add.f r12,r12,0x11 + asr r9,r12,5 + sub.mi DBL0H,DBL0H,1 + add.f DBL0L,r9,r10 + tst r12,0x1c + jne.d [blink] + add.cs DBL0H,DBL0H,1 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in double + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. Since we want to know + only the sign bit, it is sufficient to calculate only the + highpart of the lower 64 bits. */ + sub.f DBL0L,DBL0L,1 + asl r12,r9,2 ; u-22.30:2 + mpyu r10,r11,DBL1L ; rest before considering r12 in r5 : -r10 + sub.cs DBL0H,DBL0H,1 + sub.f r12,r12,2 + ; resource conflict (not for XMAC) + mpyhu r7,r12,DBL1L ; u-51.32 + asl r5,r5,25 ; s-51.7:25 + lsr r10,r10,7 ; u-51.30:2 + ; resource conflict (not for XMAC) + ; resource conflict (not for XMAC) + mpyu r9,r12,r8 ; u-51.31:1 + sub r5,r5,r10 + add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L + bset r7,r7,0 ; make sure that the result is not zero, and that + ; wb stall (one earlier for XMAC) + sub r5,r5,r7 ; a highpart zero appears negative + sub.f r5,r5,r9 ; rest msw + add.pl.f DBL0L,DBL0L,1 + j_s.d [blink] + add.eq DBL0H,DBL0H,1 + + .balign 4 +.Linf_denorm: + brlo r6,0xc0000000,.Linf +.Ldenorm: + asr r6,r6,20 + neg r9,r6 + mov_s DBL0H,0 + brhs.d r9,54,.Lret0 + bxor.mi DBL0H,DBL0H,31 + add_l r12,r12,1 + and r12,r12,-4 + rsub r7,r6,5 + asr r10,r12,28 + bmsk r4,r12,27 + asrs DBL0L,r4,r7 + add DBL1H,r11,r10 + add.f r7,r6,32-5 + abss r10,r4 + asl r4,r4,r7 + mov.mi r4,r10 + add.f r10,r6,23 + rsub r7,r6,9 + lsr r7,DBL1H,r7 + asl r10,DBL1H,r10 + or.pnz DBL0H,DBL0H,r7 + or.mi r4,r4,r10 + mov.mi r10,r7 + add.f DBL0L,r10,DBL0L + add.cs.f DBL0H,DBL0H,1 ; carry clear after this point + bxor.f 0,r4,31 + add.pnz.f DBL0L,DBL0L,1 + add.cs.f DBL0H,DBL0H,1 + jne_l [blink] + /* Calculation so far was not conclusive; calculate further rest. */ + mpyu r11,r11,DBL1L ; rest before considering r12 in r5 : -r11 + asr.f r12,r12,3 + asl r5,r5,25 ; s-51.7:25 + ; resource conflict (not for XMAC) + mpyu DBL1H,r12,r8 ; u-51.31:1 + and r9,DBL0L,1 ; tie-breaker: round to even + lsr r11,r11,7 ; u-51.30:2 + ; resource conflict (not for XMAC) + mpyhu r8,r12,DBL1L ; u-51.32 + sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L + add_s DBL1H,DBL1H,r11 + ; resource conflict (not for XMAC) + ; resource conflict (not for XMAC) + mpyu r12,r12,DBL1L ; u-83.30:2 + sub DBL1H,DBL1H,r5 ; -rest msw + add_s DBL1H,DBL1H,r8 ; -rest msw + add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-( + ; wb stall (XMAC: Before add.f) + tst_s DBL1H,DBL1H + cmp.eq r12,r9 + add.cs.f DBL0L,DBL0L,1 + j_s.d [blink] + add.cs DBL0H,DBL0H,1 + +.Lret0: + /* return +- 0 */ + j_s.d [blink] + mov_s DBL0L,0 +.Linf: + mov_s DBL0H,r9 + mov_s DBL0L,0 + j_s.d [blink] + bxor.mi DBL0H,DBL0H,31 + + .balign 4 +.Ldivtab: + .long 0xfc0fffe1 + .long 0xf46ffdfb + .long 0xed1ffa54 + .long 0xe61ff515 + .long 0xdf7fee75 + .long 0xd91fe680 + .long 0xd2ffdd52 + .long 0xcd1fd30c + .long 0xc77fc7cd + .long 0xc21fbbb6 + .long 0xbcefaec0 + .long 0xb7efa100 + .long 0xb32f92bf + .long 0xae8f83b7 + .long 0xaa2f7467 + .long 0xa5ef6479 + .long 0xa1cf53fa + .long 0x9ddf433e + .long 0x9a0f3216 + .long 0x965f2091 + .long 0x92df0f11 + .long 0x8f6efd05 + .long 0x8c1eeacc + .long 0x88eed876 + .long 0x85dec615 + .long 0x82eeb3b9 + .long 0x800ea10b + .long 0x7d3e8e0f + .long 0x7a8e7b3f + .long 0x77ee6836 + .long 0x756e5576 + .long 0x72fe4293 + .long 0x709e2f93 + .long 0x6e4e1c7f + .long 0x6c0e095e + .long 0x69edf6c5 + .long 0x67cde3a5 + .long 0x65cdd125 + .long 0x63cdbe25 + .long 0x61ddab3f + .long 0x600d991f + .long 0x5e3d868c + .long 0x5c6d7384 + .long 0x5abd615f + .long 0x590d4ecd + .long 0x576d3c83 + .long 0x55dd2a89 + .long 0x545d18e9 + .long 0x52dd06e9 + .long 0x516cf54e + .long 0x4ffce356 + .long 0x4e9cd1ce + .long 0x4d3cbfec + .long 0x4becae86 + .long 0x4aac9da4 + .long 0x496c8c73 + .long 0x483c7bd3 + .long 0x470c6ae8 + .long 0x45dc59af + .long 0x44bc4915 + .long 0x43ac3924 + .long 0x428c27fb + .long 0x418c187a + .long 0x407c07bd +.L7ff00000: + .long 0x7ff00000 + ENDFUNC(__divdf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S new file mode 100644 index 0000000..620209d --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3-stdmul.S @@ -0,0 +1,281 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + - calculate 15..18 bit inverse using a table of approximating polynoms. + precision is higher for polynoms used to evaluate input with larger + value. + - do one newton-raphson iteration step to double the precision, + then multiply this with the divisor + -> more time to decide if dividend is subnormal + - the worst error propagation is on the side of the value range + with the least initial defect, thus giving us about 30 bits precision. + */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + FUNC(__divsf3) + .balign 4 +.L7f800000: + .long 0x7f800000 +.Ldivtab: + .long 0xfc0ffff0 + .long 0xf46ffefd + .long 0xed1ffd2a + .long 0xe627fa8e + .long 0xdf7ff73b + .long 0xd917f33b + .long 0xd2f7eea3 + .long 0xcd1fe986 + .long 0xc77fe3e7 + .long 0xc21fdddb + .long 0xbcefd760 + .long 0xb7f7d08c + .long 0xb32fc960 + .long 0xae97c1ea + .long 0xaa27ba26 + .long 0xa5e7b22e + .long 0xa1cfa9fe + .long 0x9ddfa1a0 + .long 0x9a0f990c + .long 0x9667905d + .long 0x92df878a + .long 0x8f6f7e84 + .long 0x8c27757e + .long 0x88f76c54 + .long 0x85df630c + .long 0x82e759c5 + .long 0x8007506d + .long 0x7d3f470a + .long 0x7a8f3da2 + .long 0x77ef341e + .long 0x756f2abe + .long 0x72f7212d + .long 0x709717ad + .long 0x6e4f0e44 + .long 0x6c1704d6 + .long 0x69e6fb44 + .long 0x67cef1d7 + .long 0x65c6e872 + .long 0x63cedf18 + .long 0x61e6d5cd + .long 0x6006cc6d + .long 0x5e36c323 + .long 0x5c76b9f3 + .long 0x5abeb0b7 + .long 0x5916a79b + .long 0x57769e77 + .long 0x55de954d + .long 0x54568c4e + .long 0x52d6834d + .long 0x51667a7f + .long 0x4ffe71b5 + .long 0x4e9e68f1 + .long 0x4d466035 + .long 0x4bf65784 + .long 0x4aae4ede + .long 0x496e4646 + .long 0x48363dbd + .long 0x47063547 + .long 0x45de2ce5 + .long 0x44be2498 + .long 0x43a61c64 + .long 0x4296144a + .long 0x41860c0e + .long 0x407e03ee +__divsf3_support: /* This label makes debugger output saner. */ +.Ldenorm_fp1: + bclr r6,r6,31 + norm.f r12,r6 ; flag for x/0 -> Inf check + add r6,r6,r6 + rsub r5,r12,16 + ror r5,r1,r5 + asl r6,r6,r12 + bmsk r5,r5,5 + ld.as r5,[r3,r5] + add r4,r6,r6 + ; load latency + mpyhu r7,r5,r4 + bic.ne.f 0, \ + 0x60000000,r0 ; large number / denorm -> Inf + beq_s .Linf_NaN + asl r5,r5,13 + ; wb stall + ; slow track + sub r7,r5,r7 + mpyhu r8,r7,r6 + asl_s r12,r12,23 + and.f r2,r0,r9 + add r2,r2,r12 + asl r12,r0,8 + ; wb stall + bne.d .Lpast_denorm_fp1 +.Ldenorm_fp0: + mpyhu r8,r8,r7 + bclr r12,r12,31 + norm.f r3,r12 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0 + asl_s r12,r12,r3 + asl_s r3,r3,23 + add_s r12,r12,r12 + add r11,r11,r3 + b.d .Lpast_denorm_fp0 + mov_s r3,r12 + .balign 4 +.Linf_NaN: + bclr.f 0,r0,31 ; 0/0 -> NaN + xor_s r0,r0,r1 + bmsk r1,r0,30 + bic_s r0,r0,r1 + sub.eq r0,r0,1 + j_s.d [blink] + or r0,r0,r9 +.Lret0: + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_s r0,r0,r1 +.Linf_nan_fp1: + lsr_s r0,r0,31 + bmsk.f 0,r1,22 + asl_s r0,r0,31 + bne_s 0f ; inf/inf -> nan + brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan +0: j_s.d [blink] + mov r0,-1 +.Lsigned0: +.Linf_nan_fp0: + tst_s r1,r1 + j_s.d [blink] + bxor.mi r0,r0,31 + .balign 4 + .global __divsf3 +/* N.B. the spacing between divtab and the sub3 to get its address must + be a multiple of 8. */ +__divsf3: + lsr r2,r1,17 + sub3 r3,pcl,55;(.-.Ldivtab) >> 3 + bmsk_s r2,r2,5 + ld.as r5,[r3,r2] + asl r4,r1,9 + ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000 + mpyhu r7,r5,r4 + asl r6,r1,8 + and.f r11,r1,r9 + bset r6,r6,31 + asl r5,r5,13 + ; wb stall + beq .Ldenorm_fp1 + sub r7,r5,r7 + mpyhu r8,r7,r6 + breq.d r11,r9,.Linf_nan_fp1 + and.f r2,r0,r9 + beq.d .Ldenorm_fp0 + asl r12,r0,8 + ; wb stall + breq r2,r9,.Linf_nan_fp0 + mpyhu r8,r8,r7 +.Lpast_denorm_fp1: + bset r3,r12,31 +.Lpast_denorm_fp0: + cmp_s r3,r6 + lsr.cc r3,r3,1 + add_s r2,r2, /* wait for immediate */ \ + /* wb stall */ \ + 0x3f000000 + sub r7,r7,r8 ; u1.31 inverse, about 30 bit + mpyhu r3,r3,r7 + sbc r2,r2,r11 + xor.f 0,r0,r1 + and r0,r2,r9 + bxor.mi r0,r0,31 + brhs r2, /* wb stall / wait for immediate */ \ + 0x7f000000,.Linf_denorm +.Lpast_denorm: + add_s r3,r3,0x22 ; round to nearest or higher + tst r3,0x3c ; check if rounding was unsafe + lsr r3,r3,6 + jne.d [blink] ; return if rounding was safe. + add_s r0,r0,r3 + /* work out exact rounding if we fall through here. */ + /* We know that the exact result cannot be represented in single + precision. Find the mid-point between the two nearest + representable values, multiply with the divisor, and check if + the result is larger than the dividend. */ + add_s r3,r3,r3 + sub_s r3,r3,1 + mpyu r3,r3,r6 + asr.f 0,r0,1 ; for round-to-even in case this is a denorm + rsub r2,r9,25 + asl_s r12,r12,r2 + ; wb stall + ; slow track + sub.f 0,r12,r3 + j_s.d [blink] + sub.mi r0,r0,1 +/* For denormal results, it is possible that an exact result needs + rounding, and thus the round-to-even rule has to come into play. */ +.Linf_denorm: + brlo r2,0xc0000000,.Linf +.Ldenorm: + asr_s r2,r2,23 + bic r0,r0,r9 + neg r9,r2 + brlo.d r9,25,.Lpast_denorm + lsr r3,r3,r9 + /* Fall through: return +- 0 */ + j_s [blink] +.Linf: + j_s.d [blink] + or r0,r0,r9 + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S new file mode 100644 index 0000000..edc16a8 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divsf3.S @@ -0,0 +1,221 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __divsf3 + FUNC(__divsf3) + .balign 4 +__divsf3: + push_s blink + push_s r1 + bl.d __divsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __divsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 +#if 1 + bne abort + jeq_s [blink] + b abort +#else + bne abort + j_s [blink] +#endif + ENDFUNC(__divsf3) +#define __divsf3 __divsf3_asm +#endif /* DEBUG */ + + .balign 4 +__divdf3_support: /* This label makes debugger output saner. */ + FUNC(__divsf3) +.Ldenorm_fp0: + norm.f r12,r2 ; flag for 0/x -> 0 check + bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0 + beq_s .Lret0_NaN + tst r1,r9 + add_s r2,r2,r2 + sub_s r12,r12,8 + asl_s r2,r2,r12 + asl_l r12,r12,23 + bne.d .Lpast_denorm_fp0 + add r5,r5,r12 +/* r0 is subnormal, r1 is subnormal or 0. */ + + .balign 4 +.Ldenorm_fp1: + norm.f r12,r3 ; flag for x/0 -> Inf check + bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf + beq_s .Linf + add_s r3,r3,r3 + sub_s r12,r12,8 + asl_s r3,r3,r12 + asl_s r12,r12,23 + b.d .Lpast_denorm_fp1 + add r4,r4,r12 + +.Lret0_NaN: + bclr.f 0,r1,31 ; 0/0 -> NaN + bic r0,r10,r9 + j_s.d [blink] + sub.eq r0,r0,1 + + .global __divsf3 + .balign 4 + .long 0x7f800000 ; exponent mask +__divsf3: + ld r9,[pcl,-4] + bmsk r2,r0,22 + xor r4,r0,r2 + bmsk r3,r1,22 + xor r5,r1,r3 + and r11,r0,r9 + breq.d r11,0,.Ldenorm_fp0 + xor r10,r4,r5 + breq r11,r9,.Linf_nan_fp0 + bset_s r2,r2,23 + and r11,r1,r9 + breq r11,0,.Ldenorm_fp1 + breq r11,r9,.Linf_nan_fp1 +.Lpast_denorm_fp0: + bset_s r3,r3,23 +.Lpast_denorm_fp1: + cmp r2,r3 + asl_s r2,r2,6+1 + asl_s r3,r3,7 + add.lo r2,r2,r2 + bclr r8,r9,30 ; exponent bias + bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted + sub r4,r4,r5 + add r4,r4,r8 + xor.f 0,r10,r4 + bmi .Linf_denorm + and r12,r4,r9 + breq r12,0,.Ldenorm + sub_s r2,r2,r3 ; discard implicit 1 +.Ldiv_23bit: + .rep 6 + divaw r2,r2,r3 + .endr + breq r12,r9,.Linf + bmsk r0,r2,6 + xor_s r2,r2,r0 +.Ldiv_17bit: + .rep 7 + divaw r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_10bit: + .rep 7 + divaw r2,r2,r3 + .endr + asl_s r0,r0,7 + bmsk r1,r2,6 + xor_s r2,r2,r1 + or_s r0,r0,r1 +.Ldiv_3bit: + .rep 3 + divaw r2,r2,r3 + .endr + asl_s r0,r0,3 +.Ldiv_0bit: + divaw r1,r2,r3 + bmsk_s r2,r2,2 + tst r1,-0x7e ; 0xffffff82, test for rest or odd + bmsk_s r1,r1,0 + add_s r0,r0,r2 ; assemble fraction + add_s r0,r0,r4 ; add in sign & exponent + j_s.d [blink] + add.ne r0,r0,r1 ; round to nearest / even + + .balign 4 +.Linf_nan_fp0: + bic.f 0,r9,r1 ; fp1 Inf -> result NaN + bic r1,r5,r9 ; fp1 sign + sub.eq r1,r1,1 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf_nan_fp1: + bic r0,r4,r9 ; fp0 sign + bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan + xor.eq r1,r1,r9 + j_s.d [blink] + xor_s r0,r0,r1 +.Linf: + j_s.d [blink] + or r0,r10,r9 + +.Lret_r4: + j_s.d [blink] + mov_s r0,r4 + .balign 4 +.Linf_denorm: + add.f r12,r4,r4 + asr_l r12,r12,24 + bpl .Linf + max r12,r12,-24 +.Ldenorm: + add r1,pcl,42; .Ldenorm_tab-. + ldb_s r12,[r12,r1] + mov_s r0,0 + lsr_s r2,r2 + sub_s r1,r1,r12 + j_s.d [r1] + bic r4,r10,r9 + .byte .Ldenorm_tab-.Lret_r4 + .byte .Ldenorm_tab-.Ldiv_0bit + .byte .Ldenorm_tab-.Ldiv_3bit-8 + .byte .Ldenorm_tab-.Ldiv_3bit-4 + .byte .Ldenorm_tab-.Ldiv_3bit + .byte .Ldenorm_tab-.Ldiv_10bit-24 + .byte .Ldenorm_tab-.Ldiv_10bit-20 + .byte .Ldenorm_tab-.Ldiv_10bit-16 + .byte .Ldenorm_tab-.Ldiv_10bit-12 + .byte .Ldenorm_tab-.Ldiv_10bit-8 + .byte .Ldenorm_tab-.Ldiv_10bit-4 + .byte .Ldenorm_tab-.Ldiv_10bit + .byte .Ldenorm_tab-.Ldiv_17bit-24 + .byte .Ldenorm_tab-.Ldiv_17bit-20 + .byte .Ldenorm_tab-.Ldiv_17bit-16 + .byte .Ldenorm_tab-.Ldiv_17bit-12 + .byte .Ldenorm_tab-.Ldiv_17bit-8 + .byte .Ldenorm_tab-.Ldiv_17bit-4 + .byte .Ldenorm_tab-.Ldiv_17bit + .byte .Ldenorm_tab-.Ldiv_23bit-20 + .byte .Ldenorm_tab-.Ldiv_23bit-16 + .byte .Ldenorm_tab-.Ldiv_23bit-12 + .byte .Ldenorm_tab-.Ldiv_23bit-8 + .byte .Ldenorm_tab-.Ldiv_23bit-4 +.Ldenorm_tab: + .byte .Ldenorm_tab-.Ldiv_23bit + ENDFUNC(__divsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c new file mode 100644 index 0000000..9142b45 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-df.c @@ -0,0 +1,161 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* We use a polynom similar to a Tchebycheff polynom to get an initial + seed, and then use a newton-raphson iteration step to get an + approximate result + If this result can't be rounded to the exact result with confidence, we + round to the value between the two closest representable values, and + test if the correctly rounded value is above or below this value. + + Because of the Newton-raphson iteration step, an error in the seed at X + is amplified by X. Therefore, we don't want a Tchebycheff polynom + or a polynom that is close to optimal according to the maximum norm + on the errro of the seed value; we want one that is close to optimal + according to the maximum norm on the error of the result, i.e. we + want the maxima of the polynom to increase linearily. + Given an interval [X0,X2) over which to approximate, + with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, + like for Tchebycheff polynoms: + P(0) := 1 + but then we have: + P(1) := X + S*D + P(2) := 2 * X^2 + S*D * X - D^2 + Then again: + P(n+1) := 2 * X * P(n) - D^2 * P (n-1) + */ + +static long double merr = 42.; + +double +err (long double a0, long double a1, long double x) +{ + long double y0 = a0 + (x-1)*a1; + + long double approx = 2. * y0 - y0 * x * y0; + long double true = 1./x; + long double err = approx - true; + + if (err <= -1./65536./16384.) + printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n", + (double)x, (double)approx, (double)true); + if (merr > err) + merr = err; + return err; +} + +int +main (void) +{ + long double T[5]; /* Taylor polynom */ + long double P[5][5]; + int i, j; + long double X0, X1, X2, S; + long double inc = 1./64; + long double D = inc*0.5; + long i0, i1, i2, io; + + memset (P, 0, sizeof (P)); + P[0][0] = 1.; + for (i = 1; i < 5; i++) + P[i][i] = 1 << i-1; + P[2][0] = -D*D; + for (X0 = 1.; X0 < 2.; X0 += inc) + { + X1 = X0 + inc * 0.5; + X2 = X0 + inc; + S = D / X1; + T[0] = 1./X1; + for (i = 1; i < 5; i++) + T[i] = T[i-1] * -T[0]; +#if 0 + printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], +(double)T[3], (double)T[4]); +#endif + P[1][0] = S*D; + P[2][1] = S*D; + for (i = 3; i < 5; i++) + { + P[i][0] = -D*D*P[i-2][0]; + for (j = 1; j < i; j++) + P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; + } +#if 0 + printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], +(double)P[3][3], (double)P[3][4]); + printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], +(double)P[4][3], (double)P[4][4]); +#endif + for (i = 4; i > 1; i--) + { + long double a = T[i]/P[i][i]; + + for (j = 0; j < i; j++) + T[j] -= a * P[i][j]; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif +#if 0 + i2 = T[2]*1024; + long double a = (T[2]-i/1024.)/P[2][2]; + for (j = 0; j < 2; j++) + T[j] -= a * P[2][j]; +#else + i2 = 0; +#endif + long double T0, Ti1; + for (i = 0, i0 = 0; i < 4; i++) + { + + i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5; + i1 = - (-i1 & 0x0fff); + Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL); + T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; + i0 = T0 * 1024 * 1024 + 0.5; + i0 &= 0xfffff; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif + io = (unsigned)(-i1 << 20) | i0; + long double A1 = (unsigned)io/-65536./65536.; + long double A0 = (unsigned)(io << 12)/65536./65536.; + long double Xm0 = 1./sqrt (-A1); + long double Xm1 = 0.5+0.5*-A0/A1; +#if 0 + printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0); + printf ("%.12f %.12f %.12f\n", + err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2)); + printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1); + printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1)); +#endif + printf ("\t.long 0x%x\n", io); + } +#if 0 + printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2)); +#endif + return 0; +} diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c new file mode 100644 index 0000000..ff0f08b --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/divtab-arc-sf.c @@ -0,0 +1,127 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* We use a polynom similar to a Tchebycheff polynom to get an initial + seed, and then use a newton-raphson iteration step to get an + approximate result + If this result can't be rounded to the exact result with confidence, we + round to the value between the two closest representable values, and + test if the correctly rounded value is above or below this value. + + Because of the Newton-raphson iteration step, an error in the seed at X + is amplified by X. Therefore, we don't want a Tchebycheff polynom + or a polynom that is close to optimal according to the maximum norm + on the errro of the seed value; we want one that is close to optimal + according to the maximum norm on the error of the result, i.e. we + want the maxima of the polynom to increase linearily. + Given an interval [X0,X2) over which to approximate, + with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, + like for Tchebycheff polynoms: + P(0) := 1 + but then we have: + P(1) := X + S*D + P(2) := 2 * X^2 + S*D * X - D^2 + Then again: + P(n+1) := 2 * X * P(n) - D^2 * P (n-1) + */ + +int +main (void) +{ + long double T[5]; /* Taylor polynom */ + long double P[5][5]; + int i, j; + long double X0, X1, X2, S; + long double inc = 1./64; + long double D = inc*0.5; + long i0, i1, i2; + + memset (P, 0, sizeof (P)); + P[0][0] = 1.; + for (i = 1; i < 5; i++) + P[i][i] = 1 << i-1; + P[2][0] = -D*D; + for (X0 = 1.; X0 < 2.; X0 += inc) + { + X1 = X0 + inc * 0.5; + X2 = X1 + inc; + S = D / X1; + T[0] = 1./X1; + for (i = 1; i < 5; i++) + T[i] = T[i-1] * -T[0]; +#if 0 + printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], +(double)T[3], (double)T[4]); +#endif + P[1][0] = S*D; + P[2][1] = S*D; + for (i = 3; i < 5; i++) + { + P[i][0] = -D*D*P[i-2][0]; + for (j = 1; j < i; j++) + P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; + } +#if 0 + printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], +(double)P[3][3], (double)P[3][4]); + printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], +(double)P[4][3], (double)P[4][4]); +#endif + for (i = 4; i > 1; i--) + { + long double a = T[i]/P[i][i]; + + for (j = 0; j < i; j++) + T[j] -= a * P[i][j]; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif +#if 0 + i2 = T[2]*512; + long double a = (T[2]-i/512.)/P[2][2]; + for (j = 0; j < 2; j++) + T[j] -= a * P[2][j]; +#else + i2 = 0; +#endif + for (i = 0, i0 = 0; i < 4; i++) + { + long double T0, Ti1; + + i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5; + i1 = - (-i1 & 0x1fff); + Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL); + T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; + i0 = T0 * 512 * 1024 + 0.5; + i0 &= 0x7ffff; + } +#if 0 + printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); +#endif + printf ("\t.long 0x%x\n", (-i1 << 19) | i0); + } + return 0; +} diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S new file mode 100644 index 0000000..1040153 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/eqdf2.S @@ -0,0 +1,76 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: z flag + clobber: r12, flags + For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __eqdf2 + .balign 4 + FUNC(__eqdf2) +__eqdf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqdf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __eqdf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__eqdf2) +#define __eqdf2 __eqdf2_asm +#endif /* DEBUG */ + .global __eqdf2 + .balign 4 + HIDDEN_FUNC(__eqdf2) + /* Good performance as long as the difference in high word is + well predictable (as seen from the branch predictor). */ +__eqdf2: + brne.d DBL0H,DBL1H,.Lhighdiff + bmsk r12,DBL0H,20 +#ifdef DPFP_COMPAT + or.f 0,DBL0L,DBL1L + bset.ne r12,r12,21 +#endif /* DPFP_COMPAT */ + add1.f r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN. */ + j_s.d [blink] + cmp.cc DBL0L,DBL1L + .balign 4 +.Lhighdiff: + or r12,DBL0H,DBL1H + or.f 0,DBL0L,DBL1L + j_s.d [blink] + bmsk.eq.f r12,r12,30 + ENDFUNC(__eqdf2) +/* ??? could we do better by speeding up some 'common' case of inequality? */ diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S new file mode 100644 index 0000000..8a56132 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/eqsf2.S @@ -0,0 +1,69 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: z flag + clobber: r12, flags + For NaNs, bit 22 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __eqsf2 + .balign 4 + FUNC(__eqsf2) +__eqsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __eqsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __eqsf2_asm` ld.ab r10,[sp,4] + pop_s blink + breq.d r11,0,0f + ld.ab r11,[sp,4] + jne_s [blink] + bl abort +0: jeq_s [blink] + bl abort + ENDFUNC(__eqsf2) +#define __eqsf2 __eqsf2_asm +#endif /* DEBUG */ + /* Good performance as long as the binary difference is + well predictable (as seen from the branch predictor). */ + .global __eqsf2 + .balign 4 + HIDDEN_FUNC(__eqsf2) +__eqsf2: + breq r0, r1,.Lno_bdiff + or r12,r0,r1 + j_s.d [blink] + bmsk.f 0,r12,30 +.Lno_bdiff: + bmsk r12,r0,23 + add1.f r12,r12,r0 /* set c iff NaN; also, clear z if NaN. */ + j_s.d [blink] + cmp.cc r0,r1 + ENDFUNC(__eqsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S new file mode 100644 index 0000000..cf6c98d --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/extendsfdf2.S @@ -0,0 +1,122 @@ +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __extendsfdf2 + .balign 4 + FUNC(__extendsfdf2) +__extendsfdf2: + push_s blink + bl.d __extendsfdf2_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __extendsfdf2_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__extendsfdf2) +#define __extendsfdf2 __extendsfdf2_asm +#endif /* DEBUG */ +#if 0 /* ARC600 */ +__extendsfdf2: + lsr r2,r0,23 + tst r2,0xff + bic.ne.f r2,0xff + beq_s .Linf_nan_denorm_0 +.. +.Linf_nan_denorm: + bbit1 r0,30,.Linf_nan +#endif + .global __extendsfdf2 + .balign 4 + FUNC(__extendsfdf2) +__extendsfdf2: + add.f r1,r0,r0 + norm r3,r1 +#ifdef __LITTLE_ENDIAN__ + lsr_s DBL0H,r1,4 + brhs r3,7,.Linf_nan_denorm_0 + asl_s DBL0L,r0,29 + add_s DBL0H,DBL0H, \ + 0x38000000 +#else + lsr r2,r1,4 + brhs r3,7,.Linf_nan_denorm_0 + asl_s DBL0L,r1,28 + add DBL0H,r2, \ + 0x38000000 +#endif + j_s.d [blink] + bxor.cs DBL0H,DBL0H,31 + .balign 4 +.Linf_nan_denorm_0: +#ifdef __LITTLE_ENDIAN__ + mov_s DBL0H,r0 + jeq.d [blink] + mov.eq DBL0L,0 +#else + jeq_s [blink] +#endif + bmi .Linf_nan + asl_s r0,r0,r3 + rsub r3,r3,0x380+6 +#ifdef __LITTLE_ENDIAN__ + asl_s r3,r3,20 + lsr DBL0H,r0,9 + asl_s DBL0L,r0,23 + add_s DBL0H,DBL0H,r3 + j_s.d [blink] + bxor.cs DBL0H,DBL0H,31 +#else + asl DBL0L,r0,23 + lsr_s DBL0H,r0,9 + asl_s r3,r3,20 + bxor.cs DBL0H,DBL0H,31 + j_s.d [blink] + add_l DBL0H,DBL0H,r3 +#endif +.Linf_nan: +#ifdef __LITTLE_ENDIAN__ + lsr DBL0H,r0,3 + + or_s DBL0H,DBL0H,r0 + j_s.d [blink] + mov_l DBL0L,0 +#else + lsr r3,r0,3 + mov_s DBL0L,0 + j_s.d [blink] + or_l DBL0H,r0,r3 +#endif + ENDFUNC(__extendsfdf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S new file mode 100644 index 0000000..82c2c02 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixdfsi.S @@ -0,0 +1,85 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__fixdfsi) + .global __fixdfsi + .balign 4 +__fixdfsi: + push_s blink + push_s r0 + bl.d __fixdfsi_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __fixdfsi_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixdfsi) +#define __fixdfsi __fixdfsi_asm +#endif /* DEBUG */ + +/* If the fraction has to be shifted left by a positive non-zero amount, + we have to combine bits from DBL0L and DBL0H. If we shift right, + or shift by zero, we only want to have the bits from DBL0H in r0. */ + + .global __fixdfsi + FUNC(__fixdfsi) + .balign 4 +__fixdfsi: + bbit0 DBL0H,30,.Lret0or1 + asr r2,DBL0H,20 + bmsk_s DBL0H,DBL0H,19 + sub_s r2,r2,19; 0x3ff+20-0x400 + neg_s r3,r2 + asr.f 0,r3,11 + bset_s DBL0H,DBL0H,20 +#ifdef __LITTLE_ENDIAN__ + mov.cs DBL0L,DBL0H + asl DBL0H,DBL0H,r2 +#else + asl.cc DBL0H,DBL0H,r2 + lsr.cs DBL0H,DBL0H,r3 +#endif + lsr_s DBL0L,DBL0L,r3 + + add.cc r0,r0,r1 + j_s.d [blink] + neg.pl r0,r0 +.Lret0or1: + add.f r0,DBL0H,0x100000 + lsr_s r0,r0,30 + + bmsk_s r0,r0,0 + j_s.d [blink] + neg.mi r0,r0 + ENDFUNC(__fixdfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S new file mode 100644 index 0000000..56ab2fd --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixsfsi.S @@ -0,0 +1,71 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __fixsfsi + FUNC(__fixsfsi) + .balign 4 +__fixsfsi: + push_s blink + bl.d __fixsfsi_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __fixsfsi_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixsfsi) +#define __fixsfsi __fixsfsi_asm +#endif /* DEBUG */ + + .global __fixsfsi + FUNC(__fixsfsi) + .balign 4 +__fixsfsi: + bbit0 r0,30,.Lret0or1 + lsr r2,r0,23 + bmsk_s r0,r0,22 + bset_s r0,r0,23 + sub_s r2,r2,22;0x7f+23-0x80 + asl.f 0,r2,24 + neg r3,r2 + asl.mi r0,r0,r2 + lsr.pl r0,r0,r3 + j_s.d [blink] + neg.cs r0,r0 +.Lret0or1: + add.f r0,r0,0x800000 + lsr_s r0,r0,30 + + bmsk_s r0,r0,0 + j_s.d [blink] + neg.mi r0,r0 + ENDFUNC(__fixsfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S b/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S new file mode 100644 index 0000000..13af5dc --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/fixunsdfsi.S @@ -0,0 +1,80 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__fixunsdfsi) + .global __fixunsdfsi + .balign 4 +__fixunsdfsi: + push_s blink + push_s r0 + bl.d __fixunsdfsi_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __fixunsdfsi_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__fixunsdfsi) +#define __fixunsdfsi __fixunsdfsi_asm +#endif /* DEBUG */ + + .global __fixunsdfsi + FUNC(__fixunsdfsi) + .balign 4 +__fixunsdfsi: + bbit0 DBL0H,30,.Lret0or1 + lsr r2,DBL0H,20 + bmsk_s DBL0H,DBL0H,19 + sub_s r2,r2,19; 0x3ff+20-0x400 + neg_s r3,r2 + btst_s r3,10 + bset_s DBL0H,DBL0H,20 +#ifdef __LITTLE_ENDIAN__ + mov.ne DBL0L,DBL0H + asl DBL0H,DBL0H,r2 +#else + asl.eq DBL0H,DBL0H,r2 + lsr.ne DBL0H,DBL0H,r3 +#endif + lsr DBL0L,DBL0L,r3 + j_s.d [blink] + add.eq r0,r0,r1 +.Lret0: + j_s.d [blink] + mov_l r0,0 +.Lret0or1: + add_s DBL0H,DBL0H,0x100000 + lsr_s DBL0H,DBL0H,30 + j_s.d [blink] + bmsk_l r0,DBL0H,0 + ENDFUNC(__fixunsdfsi) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S new file mode 100644 index 0000000..2e2363a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatsidf.S @@ -0,0 +1,77 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatsidf + .balign 4 + FUNC(__floatsidf) +__floatsidf: + push_s blink + bl.d __floatsidf_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __floatsidf_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__floatsidf) +#define __floatsidf __floatsidf_asm +#endif /* DEBUG */ + + .global __floatsidf + .balign 4 + FUNC(__floatsidf) +__floatsidf: + abs.f r1,r0 + jeq_s [blink] + lsr r2,r1 + mov r12,-0x41d ; -(0x3ff+31-1) + norm r2,r2 + bclr.cs r12,r12,11 + rsub.f r3,r2,11 + add_s r12,r2,r12 + add_s r2,r2,21 +#ifdef __LITTLE_ENDIAN__ + asl DBL0L,r1,r2 + lsr_s DBL0H,r1,r3 +#else + lsr DBL0H,r1,r3 + asl_s DBL0L,r1,r2 +#endif + asl_s r12,r12,20 + mov.lo DBL0H,DBL0L + sub_s DBL0H,DBL0H,r12 + j_s.d [blink] + mov.ls DBL0L,0 + ENDFUNC(__floatsidf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S new file mode 100644 index 0000000..0e35fe0 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatsisf.S @@ -0,0 +1,99 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatsisf + FUNC(__floatsisf) + .balign 4 +__floatsisf: + push_s blink + bl.d __floatsisf_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __floatsisf_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__floatsisf) + .global __floatunsisf + FUNC(__floatunsisf) + .balign 4 +__floatunsisf: + push_s blink + bl.d __floatunsisf_c + push_s r0 + ld_s r1,[sp] + st_s r0,[sp] + bl.d __floatunsisf_asm + mov_s r0,r1 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + bl abort + ENDFUNC(__floatunsisf) +#define __floatsisf __floatsisf_asm +#define __floatunsisf __floatunsisf_asm +#endif /* DEBUG */ + + .global __floatunsisf + .global __floatsisf + FUNC(__floatsisf) + FUNC(__floatunsisf) + .balign 4 +__floatunsisf: + lsr_s r2,r0 + mov_l r12,0x9d ; 0x7f + 31 - 1 + norm r2,r2 + brne_l r0,0,0f + j_s [blink] + .balign 4 +__floatsisf: + abs.f r0,r0 + jeq_s [blink] + lsr_s r2,r0 + mov_s r12,0x9d ; 0x7f + 31 - 1 + norm r2,r2 + bset.cs r12,r12,8 +0: rsub.f r3,r2,8 + bmsk r1,r0,r3 + ror r1,r1,r3 + lsr.pl r0,r0,r3 + neg_s r3,r3 + asl.mi r0,r0,r3 + sub_s r12,r12,r2 + asl_s r12,r12,23 + bxor.pl.f r1,r1,31 + add_s r0,r0,r12 + j_s.d [blink] + add.pnz r0,r0,1 + ENDFUNC(__floatunsisf) + ENDFUNC(__floatsisf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S b/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S new file mode 100644 index 0000000..4bdb965 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/floatunsidf.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __floatunsidf + .balign 4 + FUNC(__floatunsidf) +__floatunsidf: + push_s blink + bl.d __floatunsidf_c + push_s r0 + ld_s r2,[sp] + st_s r1,[sp] + push_s r0 + bl.d __floatunsidf_asm + mov_s r0,r2 + pop_s r2 + pop_s r3 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + bl abort + ENDFUNC(__floatunsidf) +#define __floatunsidf __floatunsidf_asm +#endif /* DEBUG */ + + .global __floatunsidf + .balign 4 + FUNC(__floatunsidf) +__floatunsidf: + lsr_s r1,r0 + breq_s r0,0,.Lret0 + norm r2,r1 + mov r12,-0x41d ; -(0x3ff+31-1) + rsub.f r3,r2,11 + add_s r12,r2,r12 + add_s r2,r2,21 +#ifdef __LITTLE_ENDIAN__ + lsr DBL0H,r0,r3 + asl_s DBL0L,r0,r2 +#else + asl DBL0L,r0,r2 + lsr_s DBL0H,r0,r3 +#endif + asl_s r12,r12,20 + mov.lo DBL0H,DBL0L + sub_s DBL0H,DBL0H,r12 +.Lret0: j_s.d [blink] + mov.ls DBL0L,0 + ENDFUNC(__floatunsidf) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S new file mode 100644 index 0000000..72b71af --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gedf2.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: c flags to be used for 'hs' condition + clobber: r12, flags */ +/* For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __gedf2 + .balign 4 + FUNC(__gedf2) +__gedf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gedf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gedf2_asm` ld.ab r10,[sp,4] + pop_s blink + brge.d r11,0,0f + ld.ab r11,[sp,4] + jlo [blink] + bl abort +0: jhs [blink] + bl abort + ENDFUNC(__gedf2) +#define __gedf2 __gedf2_asm +#endif /* DEBUG */ + .global __gedf2 + .balign 4 + HIDDEN_FUNC(__gedf2) +__gedf2: + or.f r12,DBL0H,DBL1H + bmi.d .Lneg + bmsk_s r12,r12,20 + add1.f 0,r12,DBL0H ; clear z; set c iff NaN + add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN + bbit1 DBL0H,31,.Lneg + cmp.cc DBL0H,DBL1H + j_s.d [blink] + cmp.eq DBL0L,DBL1L + .balign 4 +.Lneg: breq.d DBL1H,0,.L0 + add1.f 0,r12,DBL0H + add1.cc.f r12,r12,DBL1H + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.L0: + bxor.f 0,DBL0H,31 ; check for high word of -0. + beq_s .Lcheck_0 + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L +.Lcheck_0: + ; high words suggest DBL0 may be -0, DBL1 +0; check low words. + cmp_s DBL1H,DBL0L + j_s.d [blink] + cmp.cc DBL1H,DBL1L + ENDFUNC(__gedf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S new file mode 100644 index 0000000..896901a --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gesf2.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag to be used for 'hs' condition + clobber: r12,flags */ +/* For NaNs, bit 22.. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __gesf2 + .balign 4 + FUNC(__gesf2) +__gesf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gesf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gesf2_asm` ld.ab r10,[sp,4] + pop_s blink + brge.d r11,0,0f + ld.ab r11,[sp,4] + jlo [blink] + bl abort +0: jhs [blink] + bl abort + ENDFUNC(__gesf2) +#define __gesf2 __gesf2_asm +#endif /* DEBUG */ + .global __gesf2 + .balign 4 + HIDDEN_FUNC(__gesf2) +__gesf2: + or.f r12,r0,r1 + bmi.d .Lneg + bmsk_s r12,r12,23 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r0,r1 + .balign 4 +.Lneg: breq.d r1,0,.L0 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r1,r0 + .balign 4 +.L0: bxor.f 0,r0,31 ; check for -0 + j_s.d [blink] + cmp.hi r1,r0 + ENDFUNC(__gesf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S new file mode 100644 index 0000000..56c2a29 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gtdf2.S @@ -0,0 +1,86 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: DBL0, DBL1 + output: c,z flags to be used for 'hi' condition + clobber: r12, flags */ +/* For NaNs, bit 19.. bit 30 of the high word must be set. */ +#if 0 /* DEBUG */ + .global __gtdf2 + .balign 4 + FUNC(__gtdf2) +__gtdf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gtdf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gtdf2_asm` ld.ab r10,[sp,4] + pop_s blink + brgt.d r11,0,0f + ld.ab r11,[sp,4] + jls [blink] + bl abort +0: jhi [blink] + bl abort + ENDFUNC(__gtdf2) +#define __gtdf2 __gtdf2_asm +#endif /* DEBUG */ + .global __gtdf2 + .balign 4 + HIDDEN_FUNC(__gtdf2) +__gtdf2: + or.f r12,DBL0H,DBL1H + bmi.d .Lneg + bmsk_s r12,r12,20 + add1.f 0,r12,DBL0H ; clear z; set c iff NaN + add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN + ; don't care: z may or may not be cleared if there is no NaN event + cmp.cc DBL0H,DBL1H + j_s.d [blink] + cmp.eq DBL0L,DBL1L + .balign 4 +.Lneg: breq.d DBL0H,0,.L0 + add1.f 0,r12,DBL1H + add1.cc.f r12,r12,DBL0H + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.L0: + bxor.f 0,DBL1H,31 + beq_s .Lcheck_0 + cmp.cc DBL1H,DBL0H + j_s.d [blink] + cmp.eq DBL1L,DBL0L + .balign 4 +.Lcheck_0: + ; high words suggest DBL0 may be +0, DBL1 -0; check low words. + j_s.d [blink] + or.f 0,DBL0L,DBL1L + ENDFUNC(__gtdf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S new file mode 100644 index 0000000..6253d06 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/gtsf2.S @@ -0,0 +1,75 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c, z flags to be used for 'hi' condition + clobber: r12,flags */ +/* For NaNs, bit 22.. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __gtsf2 + .balign 4 + FUNC(__gtsf2) +__gtsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __gtsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __gtsf2_asm` ld.ab r10,[sp,4] + pop_s blink + brgt.d r11,0,0f + ld.ab r11,[sp,4] + jls [blink] + bl abort +0: jhi [blink] + bl abort + ENDFUNC(__gtsf2) +#define __gtsf2 __gtsf2_asm +#endif /* DEBUG */ + .global __gtsf2 + .balign 4 + HIDDEN_FUNC(__gtsf2) +__gtsf2: + or.f r12,r0,r1 + bmi.d .Lneg + bmsk_s r12,r12,23 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r0,r1 + .balign 4 +.Lneg: breq.d r0,0,.L0 + add1.f 0,r12,r0 ; check for NaN + add1.cc.f r12,r12,r1 + j_s.d [blink] + cmp.cc r1,r0 + .balign 4 +.L0: bxor.f 0,r1,31 ; check for -0 + j_s.d [blink] + cmp.hi r1,r0 + ENDFUNC(__gtsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S new file mode 100644 index 0000000..7826fe7 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/muldf3.S @@ -0,0 +1,235 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* XMAC schedule: directly back-to-back multiplies stall; the third + instruction after a multiply stalls unless it is also a multiply. */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __muldf3 + .balign 4 +__muldf3: + push_s blink + push_s r2 + push_s r3 + push_s r0 + bl.d __muldf3_c + push_s r1 + ld_s r2,[sp,12] + ld_s r3,[sp,8] + st_s r0,[sp,12] + st_s r1,[sp,8] + pop_s r1 + bl.d __muldf3_asm + pop_s r0 + pop_s r3 + pop_s r2 + pop_s blink + cmp r0,r2 + cmp.eq r1,r3 + jeq_s [blink] + b abort +#define __muldf3 __muldf3_asm +#endif /* DEBUG */ +/* N.B. This is optimized for ARC700. + ARC600 has very different scheduling / instruction selection criteria. */ +/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx , + we can do: + sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */ + +__muldf3_support: /* This label makes debugger output saner. */ +/* If one number is denormal, subtract some from the exponent of the other + one (if the other exponent is too small, return 0), and normalize the + denormal. Then re-run the computation. */ + .balign 4 + FUNC(__muldf3) +.Ldenorm_dbl0: + mov_s r12,DBL0L + mov_s DBL0L,DBL1L + mov_s DBL1L,r12 + mov_s r12,DBL0H + mov_s DBL0H,DBL1H + mov_s DBL1H,r12 + and r11,DBL0H,r9 +.Ldenorm_dbl1: + brhs r11,r9,.Linf_nan + brhs 0x3ca00001,r11,.Lret0 + sub_s DBL0H,DBL0H,DBL1H + bmsk_s DBL1H,DBL1H,30 + add_s DBL0H,DBL0H,DBL1H + breq_s DBL1H,0,.Ldenorm_2 + norm r12,DBL1H + + sub_s r12,r12,10 + asl r5,r12,20 + asl_s DBL1H,DBL1H,r12 + sub DBL0H,DBL0H,r5 + neg r5,r12 + lsr r6,DBL1L,r5 + asl_s DBL1L,DBL1L,r12 + b.d __muldf3 + add_s DBL1H,DBL1H,r6 + + .balign 4 +.Linf_nan: + bclr r12,DBL1H,31 + xor_s DBL1H,DBL1H,DBL0H + bclr_s DBL0H,DBL0H,31 + max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf + or.f 0,DBL0H,DBL0L + mov_s DBL0L,0 + or.ne.f DBL1L,DBL1L,r12 + not_s DBL0H,DBL0L ; inf * 0 -> NaN + mov.ne DBL0H,r8 + tst_s DBL1H,DBL1H + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +.Lret0: xor_s DBL0H,DBL0H,DBL1H + bclr DBL1H,DBL0H,31 + xor_s DBL0H,DBL0H,DBL1H + j_s.d [blink] + mov_l DBL0L,0 + + .balign 4 +.Ldenorm_2: + breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output + norm.f r12,DBL1L + + mov.mi r12,21 + add.pl r12,r12,22 + neg r11,r12 + asl_s r12,r12,20 + lsr.f DBL1H,DBL1L,r11 + ror DBL1L,DBL1L,r11 + sub_s DBL0H,DBL0H,r12 + mov.eq DBL1H,DBL1L + sub_s DBL1L,DBL1L,DBL1H + /* Fall through. */ + .global __muldf3 + .balign 4 +__muldf3: + ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)] + mpyhu r4,DBL0L,DBL1L + bmsk r6,DBL0H,19 + bset r6,r6,20 + mpyu r7,r6,DBL1L + and r11,DBL0H,r9 + breq r11,0,.Ldenorm_dbl0 + mpyhu r8,r6,DBL1L + bmsk r10,DBL1H,19 + bset r10,r10,20 + mpyhu r5,r10,DBL0L + add.f r4,r4,r7 + and r12,DBL1H,r9 + mpyhu r7,r6,r10 + breq r12,0,.Ldenorm_dbl1 + adc.f r5,r5,r8 + mpyu r8,r10,DBL0L + breq r11,r9,.Linf_nan + breq r12,r9,.Linf_nan + mpyu r6,r6,r10 + add.cs r7,r7,1 + add.f r4,r4,r8 + mpyu r10,DBL1L,DBL0L + bclr r8,r9,30 ; 0x3ff00000 + adc.f r5,r5,r6 + ; XMAC write-back stall / std. mult stall is one cycle later + bclr r6,r9,20 ; 0x7fe00000 + add.cs r7,r7,1 ; fraction product in r7:r5:r4 + tst r10,r10 + bset.ne r4,r4,0 ; put least significant word into sticky bit + lsr.f r10,r7,9 + add_l r12,r12,r11 ; add exponents + rsub.eq r8,r8,r9 ; 0x40000000 + sub r12,r12,r8 ; subtract bias + implicit 1 + brhs.d r12,r6,.Linf_denorm + rsub r10,r10,12 +.Lshift_frac: + neg r8,r10 + asl r6,r4,r10 + lsr DBL0L,r4,r8 + add.f 0,r6,r6 + btst.eq DBL0L,0 + cmp.eq r4,r4 ; round to nearest / round to even + asl r4,r5,r10 + lsr r5,r5,r8 + adc.f DBL0L,DBL0L,r4 + xor.f 0,DBL0H,DBL1H + asl r7,r7,r10 + add_s r12,r12,r5 + adc DBL0H,r12,r7 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + +/* We have checked for infinity / NaN input before, and transformed + denormalized inputs into normalized inputs. Thus, the worst case + exponent overflows are: + 1 + 1 - 0x400 == 0xc02 : maximum underflow + 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow + N.B. 0x7e and 0x7f are also values for overflow. + + If (r12 <= -54), we have an underflow to zero. */ + .balign 4 +.Linf_denorm: + brlo r12,0xc0000000,.Linf + asr r6,r12,20 + mov_s r12,0 + add.f r10,r10,r6 + brgt r10,0,.Lshift_frac + beq_s .Lround_frac + add.f r10,r10,32 +.Lshift32_frac: + tst r4,r4 + mov r4,r5 + bset.ne r4,r4,1 + mov r5,r7 + mov r7,0 + brge r10,1,.Lshift_frac + breq r10,0,.Lround_frac + add.f r10,r10,32 + brgt r10,21,.Lshift32_frac + b_s .Lret0 + +.Lround_frac: + add.f 0,r4,r4 + btst.eq r5,0 + mov_s DBL0L,r5 + mov_s DBL0H,r7 + adc.eq.f DBL0L,DBL0L,0 + j_s.d [blink] + + adc.eq DBL0H,DBL0H,0 + +.Linf: xor.f DBL1H,DBL1H,DBL0H + mov_s DBL0L,0 + mov_s DBL0H,r9 + j_s.d [blink] + bset.mi DBL0H,DBL0H,31 + ENDFUNC(__muldf3) + + .balign 4 +.L7ff00000: + .long 0x7ff00000 diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S b/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S new file mode 100644 index 0000000..4bd82f4 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/mulsf3.S @@ -0,0 +1,180 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* XMAC schedule: directly back-to-back multiplies stall; the third + instruction after a multiply stalls unless it is also a multiply. */ +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + .global __mulsf3 + FUNC(__mulsf3) + .balign 4 +__mulsf3: + push_s blink + push_s r1 + bl.d __mulsf3_c + push_s r0 + ld_s r1,[sp,4] + st_s r0,[sp,4] + bl.d __mulsf3_asm + pop_s r0 + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__mulsf3) +#define __mulsf3 __mulsf3_asm +#endif /* DEBUG */ + + .balign 4 + .global __mulsf3 + FUNC(__mulsf3) +__mulsf3: + ld.as r9,[pcl,79]; [pcl,((.L7f800000-.+2)/4)] + bmsk r4,r1,22 + bset r2,r0,23 + asl_s r2,r2,8 + bset r3,r4,23 + mpyhu r6,r2,r3 + and r11,r0,r9 + breq r11,0,.Ldenorm_dbl0 + mpyu r7,r2,r3 + breq r11,r9,.Linf_nan_dbl0 + and r12,r1,r9 + asl.f 0,r6,8 + breq r12,0,.Ldenorm_dbl1 +.Lpast_denorm: + xor_s r0,r0,r1 +.Lpast_denorm_dbl1: + add.pl r6,r6,r6 + bclr.pl r6,r6,23 + add.pl.f r7,r7,r7 + ld.as r4,[pcl,64]; [pcl,((.L7fffffff-.+2)/4)] + add.cs r6,r6,1 + lsr.f 0,r6,1 + breq r12,r9,.Linf_nan_dbl1 + add_s r12,r12,r11 + adc.f 0,r7,r4 + add_s r12,r12, \ + -0x3f800000 + adc.f r8,r6,r12 + bic r0,r0,r4 + tst.pl r8,r9 + min r3,r8,r9 + jpnz.d [blink] + add.pnz r0,r0,r3 +; infinity or denormal number + add.ne.f r3,r3,r3 + bpnz .Linfinity + asr_s r3,r3,23+1 + bset r6,r6,23 + sub_s r3,r3,1 + neg_s r2,r3 + brhi r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0 + lsr r2,r6,r2 + asl r9,r6,r3 + lsr.f 0,r2,1 + tst r7,r7 + add_s r0,r0,r2 + bset.ne r9,r9,0 + adc.f 0,r9,r4 + j_s.d [blink] + add.cs r0,r0,1 +.Linfinity: + j_s.d [blink] + add_s r0,r0,r9 + +.Lret_r0: j_s [blink] + + .balign 4 +.Linf_nan_dbl0: + sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf) + bic.f 0,r9,r2 + xor_s r0,r0,r1 + bclr_s r1,r1,31 + xor_s r0,r0,r1 + jne_s [blink] +.Lretnan: + j_s.d [blink] + mov r0,-1 +.Ldenorm_dbl0_inf_nan_dbl1: + bmsk.f 0,r0,30 + beq_s .Lretnan + xor_s r0,r0,r1 +.Linf_nan_dbl1: + xor_s r1,r1,r0 + bclr_s r1,r1,31 + j_s.d [blink] + xor_s r0,r0,r1 + + .balign 4 +.Ldenorm_dbl0: + bclr_s r2,r2,31 + norm.f r4,r2 + and r12,r1,r9 + add_s r2,r2,r2 + asl r2,r2,r4 + asl r4,r4,23 + mpyhu r6,r2,r3 + breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1 + sub.ne.f r12,r12,r4 + mpyu r7,r2,r3 + bhi.d .Lpast_denorm + asl.f 0,r6,8 + xor_s r0,r0,r1 + bmsk r1,r0,30 + j_s.d [blink] + bic_l r0,r0,r1 + + .balign 4 +.Ldenorm_dbl1: + norm.f r3,r4 + xor_s r0,r0,r1 + sub_s r3,r3,7 + asl r4,r4,r3 + sub_s r3,r3,1 + asl_s r3,r3,23 + mpyhu r6,r2,r4 + sub.ne.f r11,r11,r3 + bmsk r8,r0,30 + mpyu r7,r2,r4 + bhi.d .Lpast_denorm_dbl1 + asl.f 0,r6,8 + j_s.d [blink] + bic r0,r0,r8 + + .balign 4 +.L7f800000: + .long 0x7f800000 +.L7fffffff: + .long 0x7fffffff + ENDFUNC(__mulsf3) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S new file mode 100644 index 0000000..5fcdf4c --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/orddf2.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag + clobber: r12, flags + For NaNs, bit 19 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __orddf2 + .balign 4 + FUNC(__orddf2) +__orddf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __unorddf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __orddf2_asm` ld.ab r10,[sp,4] + pop_s blink + brne.d r11,0,0f + ld.ab r11,[sp,4] + jcc [blink] + bl abort +0: jcs [blink] + bl abort + ENDFUNC(__orddf2) +#define __orddf2 __orddf2_asm +#endif /* DEBUG */ + .global __orddf2 + .balign 4 + HIDDEN_FUNC(__orddf2) +__orddf2: + bmsk r12,DBL0H,20 + add1.f r12,r12,DBL0H /* clear z; set c if NaN. */ + bmsk r12,DBL1H,20 + j_s.d [blink] + add1.cc.f r12,r12,DBL1H /* clear z; set c if NaN. */ + ENDFUNC(__orddf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S new file mode 100644 index 0000000..de764a1 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/ordsf2.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" +/* inputs: r0, r1 + output: c flag + clobber: r12, flags + For NaNs, bit 22 .. bit 30 must be set. */ +#if 0 /* DEBUG */ + .global __ordsf2 + .balign 4 + FUNC(__ordsf2) +__ordsf2: + st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4] + st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4] + st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1` + bl.d __unordsf2_c` push_s r0 + mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3 + ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]` + ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4] + bl.d __ordsf2_asm` ld.ab r10,[sp,4] + pop_s blink + brne.d r11,0,0f + ld.ab r11,[sp,4] + jcc [blink] + bl abort +0: jcs [blink] + bl abort + ENDFUNC(__ordsf2) +#define __ordsf2 __ordsf2_asm +#endif /* DEBUG */ + .global __ordsf2 + .balign 4 + HIDDEN_FUNC(__ordsf2) +__ordsf2: + bmsk r12,r0,23 + add1.f r12,r12,r0 /* clear z; set c if NaN. */ + bmsk r12,r1,23 + j_s.d [blink] + add1.cc.f r12,r12,r1 /* clear z; set c if NaN. */ + ENDFUNC(__ordsf2) diff --git a/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S b/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S new file mode 100644 index 0000000..87f40e4 --- /dev/null +++ b/gcc-4.9/libgcc/config/arc/ieee-754/truncdfsf2.S @@ -0,0 +1,134 @@ +/* Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke <joern.rennecke@embecosm.com> + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "arc-ieee-754.h" + +#if 0 /* DEBUG */ + FUNC(__truncdfsf2) + .global __truncdfsf2 + .balign 4 +__truncdfsf2: + push_s blink + push_s r0 + bl.d __truncdfsf2_c + push_s r1 + mov_s r2,r0 + pop_s r1 + ld r0,[sp] + bl.d __truncdfsf2_asm + st r2,[sp] + pop_s r1 + pop_s blink + cmp r0,r1 + jeq_s [blink] + and r12,r0,r1 + bic.f 0,0x7f800000,r12 + bne 0f + bmsk.f 0,r0,22 + bmsk.ne.f r1,r1,22 + jne_s [blink] ; both NaN -> OK +0: bl abort + ENDFUNC(__truncdfsf2) +#define __truncdfsf2 __truncdfsf2_asm +#endif /* DEBUG */ + + .globa |