aboutsummaryrefslogtreecommitdiffstats
path: root/libc/arch-arm/bionic/memcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/arch-arm/bionic/memcmp.S')
-rw-r--r--libc/arch-arm/bionic/memcmp.S81
1 files changed, 71 insertions, 10 deletions
diff --git a/libc/arch-arm/bionic/memcmp.S b/libc/arch-arm/bionic/memcmp.S
index 67dcddc1b..0fe26996c 100644
--- a/libc/arch-arm/bionic/memcmp.S
+++ b/libc/arch-arm/bionic/memcmp.S
@@ -43,36 +43,70 @@
* (2) The loads are scheduled in a way they won't stall
*/
+#if __ARM_ARCH__ >= 7
+#define __ARM_CORTEX
+
+#if defined(CORTEX_CACHE_LINE_32)
+#define CACHE_LINE_SIZE 32
+#else
+#define CACHE_LINE_SIZE 64
+#endif
+
+#endif /* __ARM_ARCH__ */
+
+
memcmp:
.fnstart
+
+#if defined(__ARM_CORTEX)
+ pld [r0, #(CACHE_LINE_SIZE * 0)]
+ pld [r0, #(CACHE_LINE_SIZE * 1)]
+#else
+
PLD (r0, #0)
PLD (r1, #0)
-
+#endif
/* take of the case where length is 0 or the buffers are the same */
cmp r0, r1
+#if !defined(__ARM_CORTEX)
cmpne r2, #0
+#endif
moveq r0, #0
bxeq lr
+#if defined(__ARM_CORTEX)
+ pld [r1, #(CACHE_LINE_SIZE * 0)]
+ pld [r1, #(CACHE_LINE_SIZE * 1)]
+
+ /* make sure we have at least 8+4 bytes, this simplify things below
+ * and avoid some overhead for small blocks
+ */
+ cmp r2, #(8+4)
+ bmi 10f
+#endif /* __ARM_CORTEX */
+
+
.save {r4, lr}
/* save registers */
stmfd sp!, {r4, lr}
-
+#if !defined(__ARM_CORTEX)
PLD (r0, #32)
PLD (r1, #32)
+#endif
/* since r0 hold the result, move the first source
* pointer somewhere else
*/
mov r4, r0
-
+
+#if !defined(__ARM_CORTEX)
/* make sure we have at least 8+4 bytes, this simplify things below
* and avoid some overhead for small blocks
*/
cmp r2, #(8+4)
bmi 8f
-
+#endif
/* align first pointer to word boundary
* offset = -src & 3
*/
@@ -109,8 +143,14 @@ memcmp:
subs r2, r2, #(32 + 4)
bmi 1f
-0: PLD (r4, #64)
+0:
+#if defined(__ARM_CORTEX)
+ pld [r4, #(CACHE_LINE_SIZE * 2)]
+ pld [r1, #(CACHE_LINE_SIZE * 2)]
+#else
+ PLD (r4, #64)
PLD (r1, #64)
+#endif
ldr r0, [r4], #4
ldr lr, [r1, #4]!
eors r0, r0, ip
@@ -176,6 +216,21 @@ memcmp:
9: /* restore registers and return */
ldmfd sp!, {r4, lr}
bx lr
+#if defined(__ARM_CORTEX)
+10: /* process less than 12 bytes */
+ cmp r2, #0
+ moveq r0, #0
+ bxeq lr
+ mov r3, r0
+11:
+ ldrb r0, [r3], #1
+ ldrb ip, [r1], #1
+ subs r0, ip
+ bxne lr
+ subs r2, r2, #1
+ bne 11b
+ bx lr
+#endif /* __ARM_CORTEX */
.fnend
@@ -198,8 +253,14 @@ memcmp:
bic r1, r1, #3
ldr lr, [r1], #4
-6: PLD (r1, #64)
+6:
+#if defined(__ARM_CORTEX)
+ pld [r1, #(CACHE_LINE_SIZE * 2)]
+ pld [r4, #(CACHE_LINE_SIZE * 2)]
+#else
+ PLD (r1, #64)
PLD (r4, #64)
+#endif
mov ip, lr, lsr #16
ldr lr, [r1], #4
ldr r0, [r4], #4
@@ -240,13 +301,13 @@ memcmp:
4: /*************** offset is 1 or 3 (less optimized) ***************/
- stmfd sp!, {r5, r6, r7}
+ stmfd sp!, {r5, r6, r7}
// r5 = rhs
// r6 = lhs
// r7 = scratch
- mov r5, r0, lsl #3 /* r5 = right shift */
+ mov r5, r0, lsl #3 /* r5 = right shift */
rsb r6, r5, #32 /* r6 = left shift */
/* align the unaligned pointer */
@@ -269,7 +330,7 @@ memcmp:
bhs 6b
sub r1, r1, r6, lsr #3
- ldmfd sp!, {r5, r6, r7}
+ ldmfd sp!, {r5, r6, r7}
/* are we done? */
adds r2, r2, #8
@@ -284,5 +345,5 @@ memcmp:
sub r1, r1, r6, lsr #3
sub r4, r4, #4
mov r2, #4
- ldmfd sp!, {r5, r6, r7}
+ ldmfd sp!, {r5, r6, r7}
b 8b