From 18a8907c4efb82f839959cce3cec442a96d87f8e Mon Sep 17 00:00:00 2001 From: Brent DeGraaf Date: Tue, 8 Jul 2014 16:59:13 -0400 Subject: bionic: update memmove for 32 bits atomic When src/dst are 32bits aligned, the updated memmove will guarantee 32bits atomic. Change-Id: I21cb77451270d061b32e3e2d2fda22e7e373b7ff --- libc/arch-arm/krait/bionic/memmove.S | 94 ++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 31 deletions(-) diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S index b7b77ce7e..24fcec28f 100644 --- a/libc/arch-arm/krait/bionic/memmove.S +++ b/libc/arch-arm/krait/bionic/memmove.S @@ -1,5 +1,5 @@ /*************************************************************************** - Copyright (c) 2009-2013 The Linux Foundation. All rights reserved. + Copyright (c) 2009-2014 The Linux Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -84,7 +84,7 @@ _memmove_words: .save {r0, lr} cmp r2, #0 it ne - subsne r12, r0, r1 + subsne r12, r0, r1 // Warning: do not combine these "it" blocks it eq bxeq lr // memmove only if r1 < r0 < r1+r2 @@ -94,15 +94,28 @@ _memmove_words: cmpge r12, r0 it le ble memcpy - cmp r2, #63 - ble .Lneon_b2f_smallcopy + cmp r2, #4 + it le + ble .Lneon_b2f_smallcopy_loop push {r0, lr} add r0, r0, r2 add r1, r1, r2 + cmp r2, #64 + it ge + bge .Lneon_b2f_copy_64 + cmp r2, #32 + it ge + bge .Lneon_b2f_copy_32 + cmp r2, #8 + it ge + bge .Lneon_b2f_copy_8 + b .Lneon_b2f_copy_1 +.Lneon_b2f_copy_64: mov r12, r2, lsr #6 add r0, r0, #32 add r1, r1, #32 cmp r12, #PLDTHRESH + it le ble .Lneon_b2f_copy_64_loop_nopld sub r12, #PLDOFFS sub lr, r1, #(PLDOFFS)*PLDSIZE @@ -116,6 +129,7 @@ _memmove_words: subs r12, r12, #1 vst1.32 {q0, q1}, [r0]! vst1.32 {q2, q3}, [r0] + it ne bne .Lneon_b2f_copy_64_loop_outer mov r12, #PLDOFFS .Lneon_b2f_copy_64_loop_nopld: @@ -126,12 +140,15 @@ _memmove_words: subs r12, r12, #1 vst1.32 {q8, q9}, [r0]! vst1.32 {q10, q11}, [r0] + it ne bne .Lneon_b2f_copy_64_loop_nopld ands r2, r2, #0x3f + it eq beq .Lneon_memmove_done sub r1, r1, #32 sub r0, r0, #32 cmp r2, #32 + it lt blt .Lneon_b2f_copy_8 .Lneon_b2f_copy_32: sub r1, r1, #32 @@ -139,9 +156,11 @@ _memmove_words: vld1.32 {q0, q1}, [r1] vst1.32 {q0, q1}, [r0] ands r2, r2, #0x1f + it eq beq .Lneon_memmove_done .Lneon_b2f_copy_8: movs r12, r2, lsr #0x3 + it eq beq .Lneon_b2f_copy_1 .Lneon_b2f_copy_8_loop: sub r1, r1, #8 @@ -149,39 +168,52 @@ _memmove_words: vld1.32 {d0}, [r1] subs r12, r12, #1 vst1.32 {d0}, [r0] + it ne bne .Lneon_b2f_copy_8_loop ands r2, r2, #0x7 beq .Lneon_memmove_done .Lneon_b2f_copy_1: - sub r1, r1, r2 - sub r0, r0, r2 - ands r12, r2, #1 - beq .Lneon_b2f_copy_halfword_loop - subs r2, r2, #1 - ldrb r3, [r1, r2] - strb r3, [r0, r2] - beq .Lneon_memmove_done -.Lneon_b2f_copy_halfword_loop: - subs r2, r2, #2 - ldrh r3, [r1, r2] - strh r3, [r0, r2] - bne .Lneon_b2f_copy_halfword_loop + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] .Lneon_memmove_done: pop {r0, pc} -.Lneon_b2f_smallcopy: - ands r12, r2, #1 - beq .Lneon_b2f_halfword_small_loop - subs r2, r2, #1 - ldrb r3, [r1, r2] - strb r3, [r0, r2] - it eq - bxeq lr -.Lneon_b2f_halfword_small_loop: - subs r2, r2, #2 - ldrh r3, [r1, r2] - strh r3, [r0, r2] - bne .Lneon_b2f_halfword_small_loop +.Lneon_b2f_smallcopy_loop: + // 4 bytes or less + add r1, r1, r2 + add r0, r0, r2 + movs r12, r2, lsl #29 + itttt mi + submi r1, r1, #4 + submi r0, r0, #4 + ldrmi r3, [r1] + strmi r3, [r0] + movs r2, r2, lsl #31 + itttt cs + subcs r1, r1, #2 + subcs r0, r0, #2 + ldrhcs r3, [r1] + strhcs r3, [r0] + itttt mi + submi r1, r1, #1 + submi r0, r0, #1 + ldrbmi r12, [r1] + strbmi r12, [r0] bx lr - .cfi_endproc + .cfi_endproc END(memmove) -- cgit v1.2.3