diff options
Diffstat (limited to 'libc/arch-arm/bionic/memcmp.S')
-rw-r--r-- | libc/arch-arm/bionic/memcmp.S | 81 |
1 files changed, 71 insertions, 10 deletions
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S index 67dcddc1b..0fe26996c 100644 --- a/libc/arch-arm/bionic/memcmp.S +++ b/libc/arch-arm/bionic/memcmp.S @@ -43,36 +43,70 @@ * (2) The loads are scheduled in a way they won't stall */ +#if __ARM_ARCH__ >= 7 +#define __ARM_CORTEX + +#if defined(CORTEX_CACHE_LINE_32) +#define CACHE_LINE_SIZE 32 +#else +#define CACHE_LINE_SIZE 64 +#endif + +#endif /* __ARM_ARCH__ */ + + memcmp: .fnstart + +#if defined(__ARM_CORTEX) + pld [r0, #(CACHE_LINE_SIZE * 0)] + pld [r0, #(CACHE_LINE_SIZE * 1)] +#else + PLD (r0, #0) PLD (r1, #0) - +#endif /* take of the case where length is 0 or the buffers are the same */ cmp r0, r1 +#if !defined(__ARM_CORTEX) cmpne r2, #0 +#endif moveq r0, #0 bxeq lr +#if defined(__ARM_CORTEX) + pld [r1, #(CACHE_LINE_SIZE * 0)] + pld [r1, #(CACHE_LINE_SIZE * 1)] + + /* make sure we have at least 8+4 bytes, this simplify things below + * and avoid some overhead for small blocks + */ + cmp r2, #(8+4) + bmi 10f +#endif /* __ARM_CORTEX */ + + .save {r4, lr} /* save registers */ stmfd sp!, {r4, lr} - +#if !defined(__ARM_CORTEX) PLD (r0, #32) PLD (r1, #32) +#endif /* since r0 hold the result, move the first source * pointer somewhere else */ mov r4, r0 - + +#if !defined(__ARM_CORTEX) /* make sure we have at least 8+4 bytes, this simplify things below * and avoid some overhead for small blocks */ cmp r2, #(8+4) bmi 8f - +#endif /* align first pointer to word boundary * offset = -src & 3 */ @@ -109,8 +143,14 @@ memcmp: subs r2, r2, #(32 + 4) bmi 1f -0: PLD (r4, #64) +0: +#if defined(__ARM_CORTEX) + pld [r4, #(CACHE_LINE_SIZE * 2)] + pld [r1, #(CACHE_LINE_SIZE * 2)] +#else + PLD (r4, #64) PLD (r1, #64) +#endif ldr r0, [r4], #4 ldr lr, [r1, #4]! eors r0, r0, ip @@ -176,6 +216,21 @@ memcmp: 9: /* restore registers and return */ ldmfd sp!, {r4, lr} bx lr +#if defined(__ARM_CORTEX) +10: /* process less than 12 bytes */ + cmp r2, #0 + moveq r0, #0 + bxeq lr + mov r3, r0 +11: + ldrb r0, [r3], #1 + ldrb ip, [r1], #1 + subs r0, ip + bxne lr + subs r2, r2, #1 + bne 11b + bx lr +#endif /* __ARM_CORTEX */ .fnend @@ -198,8 +253,14 @@ memcmp: bic r1, r1, #3 ldr lr, [r1], #4 -6: PLD (r1, #64) +6: +#if defined(__ARM_CORTEX) + pld [r1, #(CACHE_LINE_SIZE * 2)] + pld [r4, #(CACHE_LINE_SIZE * 2)] +#else + PLD (r1, #64) PLD (r4, #64) +#endif mov ip, lr, lsr #16 ldr lr, [r1], #4 ldr r0, [r4], #4 @@ -240,13 +301,13 @@ memcmp: 4: /*************** offset is 1 or 3 (less optimized) ***************/ - stmfd sp!, {r5, r6, r7} + stmfd sp!, {r5, r6, r7} // r5 = rhs // r6 = lhs // r7 = scratch - mov r5, r0, lsl #3 /* r5 = right shift */ + mov r5, r0, lsl #3 /* r5 = right shift */ rsb r6, r5, #32 /* r6 = left shift */ /* align the unaligned pointer */ @@ -269,7 +330,7 @@ memcmp: bhs 6b sub r1, r1, r6, lsr #3 - ldmfd sp!, {r5, r6, r7} + ldmfd sp!, {r5, r6, r7} /* are we done? */ adds r2, r2, #8 @@ -284,5 +345,5 @@ memcmp: sub r1, r1, r6, lsr #3 sub r4, r4, #4 mov r2, #4 - ldmfd sp!, {r5, r6, r7} + ldmfd sp!, {r5, r6, r7} b 8b |