aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.2.1/gcc/config/arm/ieee754-df.S
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.2.1/gcc/config/arm/ieee754-df.S')
-rw-r--r--gcc-4.2.1/gcc/config/arm/ieee754-df.S1335
1 files changed, 0 insertions, 1335 deletions
diff --git a/gcc-4.2.1/gcc/config/arm/ieee754-df.S b/gcc-4.2.1/gcc/config/arm/ieee754-df.S
deleted file mode 100644
index 74d9f0d9c..000000000
--- a/gcc-4.2.1/gcc/config/arm/ieee754-df.S
+++ /dev/null
@@ -1,1335 +0,0 @@
-/* ieee754-df.S double-precision floating point support for ARM
-
- Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
- Contributed by Nicolas Pitre (nico@cam.org)
-
- This file is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- In addition to the permissions in the GNU General Public License, the
- Free Software Foundation gives you unlimited permission to link the
- compiled version of this file into combinations with other programs,
- and to distribute those combinations without any restriction coming
- from the use of this file. (The General Public License restrictions
- do apply in other respects; for example, they cover modification of
- the file, and distribution when not linked into a combine
- executable.)
-
- This file is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING. If not, write to
- the Free Software Foundation, 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
-
-/*
- * Notes:
- *
- * The goal of this code is to be as fast as possible. This is
- * not meant to be easy to understand for the casual reader.
- * For slightly simpler code please see the single precision version
- * of this file.
- *
- * Only the default rounding mode is intended for best performances.
- * Exceptions aren't supported yet, but that can be added quite easily
- * if necessary without impacting performances.
- */
-
-
-@ For FPA, float words are always big-endian.
-@ For VFP, floats words follow the memory system mode.
-#if defined(__VFP_FP__) && !defined(__ARMEB__)
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#else
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#endif
-
-
-#ifdef L_negdf2
-
-ARM_FUNC_START negdf2
-ARM_FUNC_ALIAS aeabi_dneg negdf2
-
- @ flip sign bit
- eor xh, xh, #0x80000000
- RET
-
- FUNC_END aeabi_dneg
- FUNC_END negdf2
-
-#endif
-
-#ifdef L_addsubdf3
-
-ARM_FUNC_START aeabi_drsub
-
- eor xh, xh, #0x80000000 @ flip sign bit of first arg
- b 1f
-
-ARM_FUNC_START subdf3
-ARM_FUNC_ALIAS aeabi_dsub subdf3
-
- eor yh, yh, #0x80000000 @ flip sign bit of second arg
-#if defined(__INTERWORKING_STUBS__)
- b 1f @ Skip Thumb-code prologue
-#endif
-
-ARM_FUNC_START adddf3
-ARM_FUNC_ALIAS aeabi_dadd adddf3
-
-1: stmfd sp!, {r4, r5, lr}
-
- @ Look for zeroes, equal values, INF, or NAN.
- mov r4, xh, lsl #1
- mov r5, yh, lsl #1
- teq r4, r5
- teqeq xl, yl
- orrnes ip, r4, xl
- orrnes ip, r5, yl
- mvnnes ip, r4, asr #21
- mvnnes ip, r5, asr #21
- beq LSYM(Lad_s)
-
- @ Compute exponent difference. Make largest exponent in r4,
- @ corresponding arg in xh-xl, and positive exponent difference in r5.
- mov r4, r4, lsr #21
- rsbs r5, r4, r5, lsr #21
- rsblt r5, r5, #0
- ble 1f
- add r4, r4, r5
- eor yl, xl, yl
- eor yh, xh, yh
- eor xl, yl, xl
- eor xh, yh, xh
- eor yl, xl, yl
- eor yh, xh, yh
-1:
- @ If exponent difference is too large, return largest argument
- @ already in xh-xl. We need up to 54 bit to handle proper rounding
- @ of 0x1p54 - 1.1.
- cmp r5, #54
- RETLDM "r4, r5" hi
-
- @ Convert mantissa to signed integer.
- tst xh, #0x80000000
- mov xh, xh, lsl #12
- mov ip, #0x00100000
- orr xh, ip, xh, lsr #12
- beq 1f
- rsbs xl, xl, #0
- rsc xh, xh, #0
-1:
- tst yh, #0x80000000
- mov yh, yh, lsl #12
- orr yh, ip, yh, lsr #12
- beq 1f
- rsbs yl, yl, #0
- rsc yh, yh, #0
-1:
- @ If exponent == difference, one or both args were denormalized.
- @ Since this is not common case, rescale them off line.
- teq r4, r5
- beq LSYM(Lad_d)
-LSYM(Lad_x):
-
- @ Compensate for the exponent overlapping the mantissa MSB added later
- sub r4, r4, #1
-
- @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
- rsbs lr, r5, #32
- blt 1f
- mov ip, yl, lsl lr
- adds xl, xl, yl, lsr r5
- adc xh, xh, #0
- adds xl, xl, yh, lsl lr
- adcs xh, xh, yh, asr r5
- b 2f
-1: sub r5, r5, #32
- add lr, lr, #32
- cmp yl, #1
- mov ip, yh, lsl lr
- orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
- adds xl, xl, yh, asr r5
- adcs xh, xh, yh, asr #31
-2:
- @ We now have a result in xh-xl-ip.
- @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
- and r5, xh, #0x80000000
- bpl LSYM(Lad_p)
- rsbs ip, ip, #0
- rscs xl, xl, #0
- rsc xh, xh, #0
-
- @ Determine how to normalize the result.
-LSYM(Lad_p):
- cmp xh, #0x00100000
- bcc LSYM(Lad_a)
- cmp xh, #0x00200000
- bcc LSYM(Lad_e)
-
- @ Result needs to be shifted right.
- movs xh, xh, lsr #1
- movs xl, xl, rrx
- mov ip, ip, rrx
- add r4, r4, #1
-
- @ Make sure we did not bust our exponent.
- mov r2, r4, lsl #21
- cmn r2, #(2 << 21)
- bcs LSYM(Lad_o)
-
- @ Our result is now properly aligned into xh-xl, remaining bits in ip.
- @ Round with MSB of ip. If halfway between two numbers, round towards
- @ LSB of xl = 0.
- @ Pack final result together.
-LSYM(Lad_e):
- cmp ip, #0x80000000
- moveqs ip, xl, lsr #1
- adcs xl, xl, #0
- adc xh, xh, r4, lsl #20
- orr xh, xh, r5
- RETLDM "r4, r5"
-
- @ Result must be shifted left and exponent adjusted.
-LSYM(Lad_a):
- movs ip, ip, lsl #1
- adcs xl, xl, xl
- adc xh, xh, xh
- tst xh, #0x00100000
- sub r4, r4, #1
- bne LSYM(Lad_e)
-
- @ No rounding necessary since ip will always be 0 at this point.
-LSYM(Lad_l):
-
-#if __ARM_ARCH__ < 5
-
- teq xh, #0
- movne r3, #20
- moveq r3, #52
- moveq xh, xl
- moveq xl, #0
- mov r2, xh
- cmp r2, #(1 << 16)
- movhs r2, r2, lsr #16
- subhs r3, r3, #16
- cmp r2, #(1 << 8)
- movhs r2, r2, lsr #8
- subhs r3, r3, #8
- cmp r2, #(1 << 4)
- movhs r2, r2, lsr #4
- subhs r3, r3, #4
- cmp r2, #(1 << 2)
- subhs r3, r3, #2
- sublo r3, r3, r2, lsr #1
- sub r3, r3, r2, lsr #3
-
-#else
-
- teq xh, #0
- moveq xh, xl
- moveq xl, #0
- clz r3, xh
- addeq r3, r3, #32
- sub r3, r3, #11
-
-#endif
-
- @ determine how to shift the value.
- subs r2, r3, #32
- bge 2f
- adds r2, r2, #12
- ble 1f
-
- @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
- @ since a register switch happened above.
- add ip, r2, #20
- rsb r2, r2, #12
- mov xl, xh, lsl ip
- mov xh, xh, lsr r2
- b 3f
-
- @ actually shift value left 1 to 20 bits, which might also represent
- @ 32 to 52 bits if counting the register switch that happened earlier.
-1: add r2, r2, #20
-2: rsble ip, r2, #32
- mov xh, xh, lsl r2
- orrle xh, xh, xl, lsr ip
- movle xl, xl, lsl r2
-
- @ adjust exponent accordingly.
-3: subs r4, r4, r3
- addge xh, xh, r4, lsl #20
- orrge xh, xh, r5
- RETLDM "r4, r5" ge
-
- @ Exponent too small, denormalize result.
- @ Find out proper shift value.
- mvn r4, r4
- subs r4, r4, #31
- bge 2f
- adds r4, r4, #12
- bgt 1f
-
- @ shift result right of 1 to 20 bits, sign is in r5.
- add r4, r4, #20
- rsb r2, r4, #32
- mov xl, xl, lsr r4
- orr xl, xl, xh, lsl r2
- orr xh, r5, xh, lsr r4
- RETLDM "r4, r5"
-
- @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
- @ a register switch from xh to xl.
-1: rsb r4, r4, #12
- rsb r2, r4, #32
- mov xl, xl, lsr r2
- orr xl, xl, xh, lsl r4
- mov xh, r5
- RETLDM "r4, r5"
-
- @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
- @ from xh to xl.
-2: mov xl, xh, lsr r4
- mov xh, r5
- RETLDM "r4, r5"
-
- @ Adjust exponents for denormalized arguments.
- @ Note that r4 must not remain equal to 0.
-LSYM(Lad_d):
- teq r4, #0
- eor yh, yh, #0x00100000
- eoreq xh, xh, #0x00100000
- addeq r4, r4, #1
- subne r5, r5, #1
- b LSYM(Lad_x)
-
-
-LSYM(Lad_s):
- mvns ip, r4, asr #21
- mvnnes ip, r5, asr #21
- beq LSYM(Lad_i)
-
- teq r4, r5
- teqeq xl, yl
- beq 1f
-
- @ Result is x + 0.0 = x or 0.0 + y = y.
- orrs ip, r4, xl
- moveq xh, yh
- moveq xl, yl
- RETLDM "r4, r5"
-
-1: teq xh, yh
-
- @ Result is x - x = 0.
- movne xh, #0
- movne xl, #0
- RETLDM "r4, r5" ne
-
- @ Result is x + x = 2x.
- movs ip, r4, lsr #21
- bne 2f
- movs xl, xl, lsl #1
- adcs xh, xh, xh
- orrcs xh, xh, #0x80000000
- RETLDM "r4, r5"
-2: adds r4, r4, #(2 << 21)
- addcc xh, xh, #(1 << 20)
- RETLDM "r4, r5" cc
- and r5, xh, #0x80000000
-
- @ Overflow: return INF.
-LSYM(Lad_o):
- orr xh, r5, #0x7f000000
- orr xh, xh, #0x00f00000
- mov xl, #0
- RETLDM "r4, r5"
-
- @ At least one of x or y is INF/NAN.
- @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
- @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
- @ if either is NAN: return NAN
- @ if opposite sign: return NAN
- @ otherwise return xh-xl (which is INF or -INF)
-LSYM(Lad_i):
- mvns ip, r4, asr #21
- movne xh, yh
- movne xl, yl
- mvneqs ip, r5, asr #21
- movne yh, xh
- movne yl, xl
- orrs r4, xl, xh, lsl #12
- orreqs r5, yl, yh, lsl #12
- teqeq xh, yh
- orrne xh, xh, #0x00080000 @ quiet NAN
- RETLDM "r4, r5"
-
- FUNC_END aeabi_dsub
- FUNC_END subdf3
- FUNC_END aeabi_dadd
- FUNC_END adddf3
-
-ARM_FUNC_START floatunsidf
-ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
-
- teq r0, #0
- moveq r1, #0
- RETc(eq)
- stmfd sp!, {r4, r5, lr}
- mov r4, #0x400 @ initial exponent
- add r4, r4, #(52-1 - 1)
- mov r5, #0 @ sign bit is 0
- .ifnc xl, r0
- mov xl, r0
- .endif
- mov xh, #0
- b LSYM(Lad_l)
-
- FUNC_END aeabi_ui2d
- FUNC_END floatunsidf
-
-ARM_FUNC_START floatsidf
-ARM_FUNC_ALIAS aeabi_i2d floatsidf
-
- teq r0, #0
- moveq r1, #0
- RETc(eq)
- stmfd sp!, {r4, r5, lr}
- mov r4, #0x400 @ initial exponent
- add r4, r4, #(52-1 - 1)
- ands r5, r0, #0x80000000 @ sign bit in r5
- rsbmi r0, r0, #0 @ absolute value
- .ifnc xl, r0
- mov xl, r0
- .endif
- mov xh, #0
- b LSYM(Lad_l)
-
- FUNC_END aeabi_i2d
- FUNC_END floatsidf
-
-ARM_FUNC_START extendsfdf2
-ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
-
- movs r2, r0, lsl #1 @ toss sign bit
- mov xh, r2, asr #3 @ stretch exponent
- mov xh, xh, rrx @ retrieve sign bit
- mov xl, r2, lsl #28 @ retrieve remaining bits
- andnes r3, r2, #0xff000000 @ isolate exponent
- teqne r3, #0xff000000 @ if not 0, check if INF or NAN
- eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
- RETc(ne) @ and return it.
-
- teq r2, #0 @ if actually 0
- teqne r3, #0xff000000 @ or INF or NAN
- RETc(eq) @ we are done already.
-
- @ value was denormalized. We can normalize it now.
- stmfd sp!, {r4, r5, lr}
- mov r4, #0x380 @ setup corresponding exponent
- and r5, xh, #0x80000000 @ move sign bit in r5
- bic xh, xh, #0x80000000
- b LSYM(Lad_l)
-
- FUNC_END aeabi_f2d
- FUNC_END extendsfdf2
-
-ARM_FUNC_START floatundidf
-ARM_FUNC_ALIAS aeabi_ul2d floatundidf
-
- orrs r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
- mvfeqd f0, #0.0
-#endif
- RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
- @ For hard FPA code we want to return via the tail below so that
- @ we can return the result in f0 as well as in r0/r1 for backwards
- @ compatibility.
- adr ip, LSYM(f0_ret)
- stmfd sp!, {r4, r5, ip, lr}
-#else
- stmfd sp!, {r4, r5, lr}
-#endif
-
- mov r5, #0
- b 2f
-
-ARM_FUNC_START floatdidf
-ARM_FUNC_ALIAS aeabi_l2d floatdidf
-
- orrs r2, r0, r1
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
- mvfeqd f0, #0.0
-#endif
- RETc(eq)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
- @ For hard FPA code we want to return via the tail below so that
- @ we can return the result in f0 as well as in r0/r1 for backwards
- @ compatibility.
- adr ip, LSYM(f0_ret)
- stmfd sp!, {r4, r5, ip, lr}
-#else
- stmfd sp!, {r4, r5, lr}
-#endif
-
- ands r5, ah, #0x80000000 @ sign bit in r5
- bpl 2f
- rsbs al, al, #0
- rsc ah, ah, #0
-2:
- mov r4, #0x400 @ initial exponent
- add r4, r4, #(52-1 - 1)
-
- @ FPA little-endian: must swap the word order.
- .ifnc xh, ah
- mov ip, al
- mov xh, ah
- mov xl, ip
- .endif
-
- movs ip, xh, lsr #22
- beq LSYM(Lad_p)
-
- @ The value is too big. Scale it down a bit...
- mov r2, #3
- movs ip, ip, lsr #3
- addne r2, r2, #3
- movs ip, ip, lsr #3
- addne r2, r2, #3
- add r2, r2, ip, lsr #3
-
- rsb r3, r2, #32
- mov ip, xl, lsl r3
- mov xl, xl, lsr r2
- orr xl, xl, xh, lsl r3
- mov xh, xh, lsr r2
- add r4, r4, r2
- b LSYM(Lad_p)
-
-#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
-
- @ Legacy code expects the result to be returned in f0. Copy it
- @ there as well.
-LSYM(f0_ret):
- stmfd sp!, {r0, r1}
- ldfd f0, [sp], #8
- RETLDM
-
-#endif
-
- FUNC_END floatdidf
- FUNC_END aeabi_l2d
- FUNC_END floatundidf
- FUNC_END aeabi_ul2d
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_muldivdf3
-
-ARM_FUNC_START muldf3
-ARM_FUNC_ALIAS aeabi_dmul muldf3
- stmfd sp!, {r4, r5, r6, lr}
-
- @ Mask out exponents, trap any zero/denormal/INF/NAN.
- mov ip, #0xff
- orr ip, ip, #0x700
- ands r4, ip, xh, lsr #20
- andnes r5, ip, yh, lsr #20
- teqne r4, ip
- teqne r5, ip
- bleq LSYM(Lml_s)
-
- @ Add exponents together
- add r4, r4, r5
-
- @ Determine final sign.
- eor r6, xh, yh
-
- @ Convert mantissa to unsigned integer.
- @ If power of two, branch to a separate path.
- bic xh, xh, ip, lsl #21
- bic yh, yh, ip, lsl #21
- orrs r5, xl, xh, lsl #12
- orrnes r5, yl, yh, lsl #12
- orr xh, xh, #0x00100000
- orr yh, yh, #0x00100000
- beq LSYM(Lml_1)
-
-#if __ARM_ARCH__ < 4
-
- @ Put sign bit in r6, which will be restored in yl later.
- and r6, r6, #0x80000000
-
- @ Well, no way to make it shorter without the umull instruction.
- stmfd sp!, {r6, r7, r8, r9, sl, fp}
- mov r7, xl, lsr #16
- mov r8, yl, lsr #16
- mov r9, xh, lsr #16
- mov sl, yh, lsr #16
- bic xl, xl, r7, lsl #16
- bic yl, yl, r8, lsl #16
- bic xh, xh, r9, lsl #16
- bic yh, yh, sl, lsl #16
- mul ip, xl, yl
- mul fp, xl, r8
- mov lr, #0
- adds ip, ip, fp, lsl #16
- adc lr, lr, fp, lsr #16
- mul fp, r7, yl
- adds ip, ip, fp, lsl #16
- adc lr, lr, fp, lsr #16
- mul fp, xl, sl
- mov r5, #0
- adds lr, lr, fp, lsl #16
- adc r5, r5, fp, lsr #16
- mul fp, r7, yh
- adds lr, lr, fp, lsl #16
- adc r5, r5, fp, lsr #16
- mul fp, xh, r8
- adds lr, lr, fp, lsl #16
- adc r5, r5, fp, lsr #16
- mul fp, r9, yl
- adds lr, lr, fp, lsl #16
- adc r5, r5, fp, lsr #16
- mul fp, xh, sl
- mul r6, r9, sl
- adds r5, r5, fp, lsl #16
- adc r6, r6, fp, lsr #16
- mul fp, r9, yh
- adds r5, r5, fp, lsl #16
- adc r6, r6, fp, lsr #16
- mul fp, xl, yh
- adds lr, lr, fp
- mul fp, r7, sl
- adcs r5, r5, fp
- mul fp, xh, yl
- adc r6, r6, #0
- adds lr, lr, fp
- mul fp, r9, r8
- adcs r5, r5, fp
- mul fp, r7, r8
- adc r6, r6, #0
- adds lr, lr, fp
- mul fp, xh, yh
- adcs r5, r5, fp
- adc r6, r6, #0
- ldmfd sp!, {yl, r7, r8, r9, sl, fp}
-
-#else
-
- @ Here is the actual multiplication.
- umull ip, lr, xl, yl
- mov r5, #0
- umlal lr, r5, xh, yl
- and yl, r6, #0x80000000
- umlal lr, r5, xl, yh
- mov r6, #0
- umlal r5, r6, xh, yh
-
-#endif
-
- @ The LSBs in ip are only significant for the final rounding.
- @ Fold them into lr.
- teq ip, #0
- orrne lr, lr, #1
-
- @ Adjust result upon the MSB position.
- sub r4, r4, #0xff
- cmp r6, #(1 << (20-11))
- sbc r4, r4, #0x300
- bcs 1f
- movs lr, lr, lsl #1
- adcs r5, r5, r5
- adc r6, r6, r6
-1:
- @ Shift to final position, add sign to result.
- orr xh, yl, r6, lsl #11
- orr xh, xh, r5, lsr #21
- mov xl, r5, lsl #11
- orr xl, xl, lr, lsr #21
- mov lr, lr, lsl #11
-
- @ Check exponent range for under/overflow.
- subs ip, r4, #(254 - 1)
- cmphi ip, #0x700
- bhi LSYM(Lml_u)
-
- @ Round the result, merge final exponent.
- cmp lr, #0x80000000
- moveqs lr, xl, lsr #1
- adcs xl, xl, #0
- adc xh, xh, r4, lsl #20
- RETLDM "r4, r5, r6"
-
- @ Multiplication by 0x1p*: let''s shortcut a lot of code.
-LSYM(Lml_1):
- and r6, r6, #0x80000000
- orr xh, r6, xh
- orr xl, xl, yl
- eor xh, xh, yh
- subs r4, r4, ip, lsr #1
- rsbgts r5, r4, ip
- orrgt xh, xh, r4, lsl #20
- RETLDM "r4, r5, r6" gt
-
- @ Under/overflow: fix things up for the code below.
- orr xh, xh, #0x00100000
- mov lr, #0
- subs r4, r4, #1
-
-LSYM(Lml_u):
- @ Overflow?
- bgt LSYM(Lml_o)
-
- @ Check if denormalized result is possible, otherwise return signed 0.
- cmn r4, #(53 + 1)
- movle xl, #0
- bicle xh, xh, #0x7fffffff
- RETLDM "r4, r5, r6" le
-
- @ Find out proper shift value.
- rsb r4, r4, #0
- subs r4, r4, #32
- bge 2f
- adds r4, r4, #12
- bgt 1f
-
- @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
- add r4, r4, #20
- rsb r5, r4, #32
- mov r3, xl, lsl r5
- mov xl, xl, lsr r4
- orr xl, xl, xh, lsl r5
- and r2, xh, #0x80000000
- bic xh, xh, #0x80000000
- adds xl, xl, r3, lsr #31
- adc xh, r2, xh, lsr r4
- orrs lr, lr, r3, lsl #1
- biceq xl, xl, r3, lsr #31
- RETLDM "r4, r5, r6"
-
- @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
- @ a register switch from xh to xl. Then round.
-1: rsb r4, r4, #12
- rsb r5, r4, #32
- mov r3, xl, lsl r4
- mov xl, xl, lsr r5
- orr xl, xl, xh, lsl r4
- bic xh, xh, #0x7fffffff
- adds xl, xl, r3, lsr #31
- adc xh, xh, #0
- orrs lr, lr, r3, lsl #1
- biceq xl, xl, r3, lsr #31
- RETLDM "r4, r5, r6"
-
- @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
- @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
-2: rsb r5, r4, #32
- orr lr, lr, xl, lsl r5
- mov r3, xl, lsr r4
- orr r3, r3, xh, lsl r5
- mov xl, xh, lsr r4
- bic xh, xh, #0x7fffffff
- bic xl, xl, xh, lsr r4
- add xl, xl, r3, lsr #31
- orrs lr, lr, r3, lsl #1
- biceq xl, xl, r3, lsr #31
- RETLDM "r4, r5, r6"
-
- @ One or both arguments are denormalized.
- @ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
- teq r4, #0
- bne 2f
- and r6, xh, #0x80000000
-1: movs xl, xl, lsl #1
- adc xh, xh, xh
- tst xh, #0x00100000
- subeq r4, r4, #1
- beq 1b
- orr xh, xh, r6
- teq r5, #0
- movne pc, lr
-2: and r6, yh, #0x80000000
-3: movs yl, yl, lsl #1
- adc yh, yh, yh
- tst yh, #0x00100000
- subeq r5, r5, #1
- beq 3b
- orr yh, yh, r6
- mov pc, lr
-
-LSYM(Lml_s):
- @ Isolate the INF and NAN cases away
- teq r4, ip
- and r5, ip, yh, lsr #20
- teqne r5, ip
- beq 1f
-
- @ Here, one or more arguments are either denormalized or zero.
- orrs r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
-
- @ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
- eor xh, xh, yh
- bic xh, xh, #0x7fffffff
- mov xl, #0
- RETLDM "r4, r5, r6"
-
-1: @ One or both args are INF or NAN.
- orrs r6, xl, xh, lsl #1
- moveq xl, yl
- moveq xh, yh
- orrnes r6, yl, yh, lsl #1
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
- teq r4, ip
- bne 1f
- orrs r6, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
-1: teq r5, ip
- bne LSYM(Lml_i)
- orrs r6, yl, yh, lsl #12
- movne xl, yl
- movne xh, yh
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
-
- @ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
- eor xh, xh, yh
-
- @ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
- and xh, xh, #0x80000000
- orr xh, xh, #0x7f000000
- orr xh, xh, #0x00f00000
- mov xl, #0
- RETLDM "r4, r5, r6"
-
- @ Return a quiet NAN.
-LSYM(Lml_n):
- orr xh, xh, #0x7f000000
- orr xh, xh, #0x00f80000
- RETLDM "r4, r5, r6"
-
- FUNC_END aeabi_dmul
- FUNC_END muldf3
-
-ARM_FUNC_START divdf3
-ARM_FUNC_ALIAS aeabi_ddiv divdf3
-
- stmfd sp!, {r4, r5, r6, lr}
-
- @ Mask out exponents, trap any zero/denormal/INF/NAN.
- mov ip, #0xff
- orr ip, ip, #0x700
- ands r4, ip, xh, lsr #20
- andnes r5, ip, yh, lsr #20
- teqne r4, ip
- teqne r5, ip
- bleq LSYM(Ldv_s)
-
- @ Substract divisor exponent from dividend''s.
- sub r4, r4, r5
-
- @ Preserve final sign into lr.
- eor lr, xh, yh
-
- @ Convert mantissa to unsigned integer.
- @ Dividend -> r5-r6, divisor -> yh-yl.
- orrs r5, yl, yh, lsl #12
- mov xh, xh, lsl #12
- beq LSYM(Ldv_1)
- mov yh, yh, lsl #12
- mov r5, #0x10000000
- orr yh, r5, yh, lsr #4
- orr yh, yh, yl, lsr #24
- mov yl, yl, lsl #8
- orr r5, r5, xh, lsr #4
- orr r5, r5, xl, lsr #24
- mov r6, xl, lsl #8
-
- @ Initialize xh with final sign bit.
- and xh, lr, #0x80000000
-
- @ Ensure result will land to known bit position.
- @ Apply exponent bias accordingly.
- cmp r5, yh
- cmpeq r6, yl
- adc r4, r4, #(255 - 2)
- add r4, r4, #0x300
- bcs 1f
- movs yh, yh, lsr #1
- mov yl, yl, rrx
-1:
- @ Perform first substraction to align result to a nibble.
- subs r6, r6, yl
- sbc r5, r5, yh
- movs yh, yh, lsr #1
- mov yl, yl, rrx
- mov xl, #0x00100000
- mov ip, #0x00080000
-
- @ The actual division loop.
-1: subs lr, r6, yl
- sbcs lr, r5, yh
- subcs r6, r6, yl
- movcs r5, lr
- orrcs xl, xl, ip
- movs yh, yh, lsr #1
- mov yl, yl, rrx
- subs lr, r6, yl
- sbcs lr, r5, yh
- subcs r6, r6, yl
- movcs r5, lr
- orrcs xl, xl, ip, lsr #1
- movs yh, yh, lsr #1
- mov yl, yl, rrx
- subs lr, r6, yl
- sbcs lr, r5, yh
- subcs r6, r6, yl
- movcs r5, lr
- orrcs xl, xl, ip, lsr #2
- movs yh, yh, lsr #1
- mov yl, yl, rrx
- subs lr, r6, yl
- sbcs lr, r5, yh
- subcs r6, r6, yl
- movcs r5, lr
- orrcs xl, xl, ip, lsr #3
-
- orrs lr, r5, r6
- beq 2f
- mov r5, r5, lsl #4
- orr r5, r5, r6, lsr #28
- mov r6, r6, lsl #4
- mov yh, yh, lsl #3
- orr yh, yh, yl, lsr #29
- mov yl, yl, lsl #3
- movs ip, ip, lsr #4
- bne 1b
-
- @ We are done with a word of the result.
- @ Loop again for the low word if this pass was for the high word.
- tst xh, #0x00100000
- bne 3f
- orr xh, xh, xl
- mov xl, #0
- mov ip, #0x80000000
- b 1b
-2:
- @ Be sure result starts in the high word.
- tst xh, #0x00100000
- orreq xh, xh, xl
- moveq xl, #0
-3:
- @ Check exponent range for under/overflow.
- subs ip, r4, #(254 - 1)
- cmphi ip, #0x700
- bhi LSYM(Lml_u)
-
- @ Round the result, merge final exponent.
- subs ip, r5, yh
- subeqs ip, r6, yl
- moveqs ip, xl, lsr #1
- adcs xl, xl, #0
- adc xh, xh, r4, lsl #20
- RETLDM "r4, r5, r6"
-
- @ Division by 0x1p*: shortcut a lot of code.
-LSYM(Ldv_1):
- and lr, lr, #0x80000000
- orr xh, lr, xh, lsr #12
- adds r4, r4, ip, lsr #1
- rsbgts r5, r4, ip
- orrgt xh, xh, r4, lsl #20
- RETLDM "r4, r5, r6" gt
-
- orr xh, xh, #0x00100000
- mov lr, #0
- subs r4, r4, #1
- b LSYM(Lml_u)
-
- @ Result mightt need to be denormalized: put remainder bits
- @ in lr for rounding considerations.
-LSYM(Ldv_u):
- orr lr, r5, r6
- b LSYM(Lml_u)
-
- @ One or both arguments is either INF, NAN or zero.
-LSYM(Ldv_s):
- and r5, ip, yh, lsr #20
- teq r4, ip
- teqeq r5, ip
- beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
- teq r4, ip
- bne 1f
- orrs r4, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
- teq r5, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
- mov xl, yl
- mov xh, yh
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
-1: teq r5, ip
- bne 2f
- orrs r5, yl, yh, lsl #12
- beq LSYM(Lml_z) @ <anything> / INF -> 0
- mov xl, yl
- mov xh, yh
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
-2: @ If both are nonzero, we need to normalize and resume above.
- orrs r6, xl, xh, lsl #1
- orrnes r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
- @ One or both arguments are 0.
- orrs r4, xl, xh, lsl #1
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
- orrs r5, yl, yh, lsl #1
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
-
- FUNC_END aeabi_ddiv
- FUNC_END divdf3
-
-#endif /* L_muldivdf3 */
-
-#ifdef L_cmpdf2
-
-@ Note: only r0 (return value) and ip are clobbered here.
-
-ARM_FUNC_START gtdf2
-ARM_FUNC_ALIAS gedf2 gtdf2
- mov ip, #-1
- b 1f
-
-ARM_FUNC_START ltdf2
-ARM_FUNC_ALIAS ledf2 ltdf2
- mov ip, #1
- b 1f
-
-ARM_FUNC_START cmpdf2
-ARM_FUNC_ALIAS nedf2 cmpdf2
-ARM_FUNC_ALIAS eqdf2 cmpdf2
- mov ip, #1 @ how should we specify unordered here?
-
-1: str ip, [sp, #-4]
-
- @ Trap any INF/NAN first.
- mov ip, xh, lsl #1
- mvns ip, ip, asr #21
- mov ip, yh, lsl #1
- mvnnes ip, ip, asr #21
- beq 3f
-
- @ Test for equality.
- @ Note that 0.0 is equal to -0.0.
-2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
- orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
- teqne xh, yh @ or xh == yh
- teqeq xl, yl @ and xl == yl
- moveq r0, #0 @ then equal.
- RETc(eq)
-
- @ Clear C flag
- cmn r0, #0
-
- @ Compare sign,
- teq xh, yh
-
- @ Compare values if same sign
- cmppl xh, yh
- cmpeq xl, yl
-
- @ Result:
- movcs r0, yh, asr #31
- mvncc r0, yh, asr #31
- orr r0, r0, #1
- RET
-
- @ Look for a NAN.
-3: mov ip, xh, lsl #1
- mvns ip, ip, asr #21
- bne 4f
- orrs ip, xl, xh, lsl #12
- bne 5f @ x is NAN
-4: mov ip, yh, lsl #1
- mvns ip, ip, asr #21
- bne 2b
- orrs ip, yl, yh, lsl #12
- beq 2b @ y is not NAN
-5: ldr r0, [sp, #-4] @ unordered return code
- RET
-
- FUNC_END gedf2
- FUNC_END gtdf2
- FUNC_END ledf2
- FUNC_END ltdf2
- FUNC_END nedf2
- FUNC_END eqdf2
- FUNC_END cmpdf2
-
-ARM_FUNC_START aeabi_cdrcmple
-
- mov ip, r0
- mov r0, r2
- mov r2, ip
- mov ip, r1
- mov r1, r3
- mov r3, ip
- b 6f
-
-ARM_FUNC_START aeabi_cdcmpeq
-ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
-
- @ The status-returning routines are required to preserve all
- @ registers except ip, lr, and cpsr.
-6: stmfd sp!, {r0, lr}
- ARM_CALL cmpdf2
- @ Set the Z flag correctly, and the C flag unconditionally.
- cmp r0, #0
- @ Clear the C flag if the return value was -1, indicating
- @ that the first operand was smaller than the second.
- cmnmi r0, #0
- RETLDM "r0"
-
- FUNC_END aeabi_cdcmple
- FUNC_END aeabi_cdcmpeq
- FUNC_END aeabi_cdrcmple
-
-ARM_FUNC_START aeabi_dcmpeq
-
- str lr, [sp, #-8]!
- ARM_CALL aeabi_cdcmple
- moveq r0, #1 @ Equal to.
- movne r0, #0 @ Less than, greater than, or unordered.
- RETLDM
-
- FUNC_END aeabi_dcmpeq
-
-ARM_FUNC_START aeabi_dcmplt
-
- str lr, [sp, #-8]!
- ARM_CALL aeabi_cdcmple
- movcc r0, #1 @ Less than.
- movcs r0, #0 @ Equal to, greater than, or unordered.
- RETLDM
-
- FUNC_END aeabi_dcmplt
-
-ARM_FUNC_START aeabi_dcmple
-
- str lr, [sp, #-8]!
- ARM_CALL aeabi_cdcmple
- movls r0, #1 @ Less than or equal to.
- movhi r0, #0 @ Greater than or unordered.
- RETLDM
-
- FUNC_END aeabi_dcmple
-
-ARM_FUNC_START aeabi_dcmpge
-
- str lr, [sp, #-8]!
- ARM_CALL aeabi_cdrcmple
- movls r0, #1 @ Operand 2 is less than or equal to operand 1.
- movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
- RETLDM
-
- FUNC_END aeabi_dcmpge
-
-ARM_FUNC_START aeabi_dcmpgt
-
- str lr, [sp, #-8]!
- ARM_CALL aeabi_cdrcmple
- movcc r0, #1 @ Operand 2 is less than operand 1.
- movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
- @ or they are unordered.
- RETLDM
-
- FUNC_END aeabi_dcmpgt
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_unorddf2
-
-ARM_FUNC_START unorddf2
-ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
-
- mov ip, xh, lsl #1
- mvns ip, ip, asr #21
- bne 1f
- orrs ip, xl, xh, lsl #12
- bne 3f @ x is NAN
-1: mov ip, yh, lsl #1
- mvns ip, ip, asr #21
- bne 2f
- orrs ip, yl, yh, lsl #12
- bne 3f @ y is NAN
-2: mov r0, #0 @ arguments are ordered.
- RET
-
-3: mov r0, #1 @ arguments are unordered.
- RET
-
- FUNC_END aeabi_dcmpun
- FUNC_END unorddf2
-
-#endif /* L_unorddf2 */
-
-#ifdef L_fixdfsi
-
-ARM_FUNC_START fixdfsi
-ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
-
- @ check exponent range.
- mov r2, xh, lsl #1
- adds r2, r2, #(1 << 21)
- bcs 2f @ value is INF or NAN
- bpl 1f @ value is too small
- mov r3, #(0xfffffc00 + 31)
- subs r2, r3, r2, asr #21
- bls 3f @ value is too large
-
- @ scale value
- mov r3, xh, lsl #11
- orr r3, r3, #0x80000000
- orr r3, r3, xl, lsr #21
- tst xh, #0x80000000 @ the sign bit
- mov r0, r3, lsr r2
- rsbne r0, r0, #0
- RET
-
-1: mov r0, #0
- RET
-
-2: orrs xl, xl, xh, lsl #12
- bne 4f @ x is NAN.
-3: ands r0, xh, #0x80000000 @ the sign bit
- moveq r0, #0x7fffffff @ maximum signed positive si
- RET
-
-4: mov r0, #0 @ How should we convert NAN?
- RET
-
- FUNC_END aeabi_d2iz
- FUNC_END fixdfsi
-
-#endif /* L_fixdfsi */
-
-#ifdef L_fixunsdfsi
-
-ARM_FUNC_START fixunsdfsi
-ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
-
- @ check exponent range.
- movs r2, xh, lsl #1
- bcs 1f @ value is negative
- adds r2, r2, #(1 << 21)
- bcs 2f @ value is INF or NAN
- bpl 1f @ value is too small
- mov r3, #(0xfffffc00 + 31)
- subs r2, r3, r2, asr #21
- bmi 3f @ value is too large
-
- @ scale value
- mov r3, xh, lsl #11
- orr r3, r3, #0x80000000
- orr r3, r3, xl, lsr #21
- mov r0, r3, lsr r2
- RET
-
-1: mov r0, #0
- RET
-
-2: orrs xl, xl, xh, lsl #12
- bne 4f @ value is NAN.
-3: mov r0, #0xffffffff @ maximum unsigned si
- RET
-
-4: mov r0, #0 @ How should we convert NAN?
- RET
-
- FUNC_END aeabi_d2uiz
- FUNC_END fixunsdfsi
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_truncdfsf2
-
-ARM_FUNC_START truncdfsf2
-ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
-
- @ check exponent range.
- mov r2, xh, lsl #1
- subs r3, r2, #((1023 - 127) << 21)
- subcss ip, r3, #(1 << 21)
- rsbcss ip, ip, #(254 << 21)
- bls 2f @ value is out of range
-
-1: @ shift and round mantissa
- and ip, xh, #0x80000000
- mov r2, xl, lsl #3
- orr xl, ip, xl, lsr #29
- cmp r2, #0x80000000
- adc r0, xl, r3, lsl #2
- biceq r0, r0, #1
- RET
-
-2: @ either overflow or underflow
- tst xh, #0x40000000
- bne 3f @ overflow
-
- @ check if denormalized value is possible
- adds r2, r3, #(23 << 21)
- andlt r0, xh, #0x80000000 @ too small, return signed 0.
- RETc(lt)
-
- @ denormalize value so we can resume with the code above afterwards.
- orr xh, xh, #0x00100000
- mov r2, r2, lsr #21
- rsb r2, r2, #24
- rsb ip, r2, #32
- movs r3, xl, lsl ip
- mov xl, xl, lsr r2
- orrne xl, xl, #1 @ fold r3 for rounding considerations.
- mov r3, xh, lsl #11
- mov r3, r3, lsr #11
- orr xl, xl, r3, lsl ip
- mov r3, r3, lsr r2
- mov r3, r3, lsl #1
- b 1b
-
-3: @ chech for NAN
- mvns r3, r2, asr #21
- bne 5f @ simple overflow
- orrs r3, xl, xh, lsl #12
- movne r0, #0x7f000000
- orrne r0, r0, #0x00c00000
- RETc(ne) @ return NAN
-
-5: @ return INF with sign
- and r0, xh, #0x80000000
- orr r0, r0, #0x7f000000
- orr r0, r0, #0x00800000
- RET
-
- FUNC_END aeabi_d2f
- FUNC_END truncdfsf2
-
-#endif /* L_truncdfsf2 */