From 18a8907c4efb82f839959cce3cec442a96d87f8e Mon Sep 17 00:00:00 2001
From: Brent DeGraaf <bdegraaf@codeaurora.org>
Date: Tue, 8 Jul 2014 16:59:13 -0400
Subject: bionic: update memmove for 32 bits atomic

When src/dst are 32bits aligned, the updated memmove will guarantee
32bits atomic.

Change-Id: I21cb77451270d061b32e3e2d2fda22e7e373b7ff
---
 libc/arch-arm/krait/bionic/memmove.S | 94 ++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 31 deletions(-)

diff --git a/libc/arch-arm/krait/bionic/memmove.S b/libc/arch-arm/krait/bionic/memmove.S
index b7b77ce7e..24fcec28f 100644
--- a/libc/arch-arm/krait/bionic/memmove.S
+++ b/libc/arch-arm/krait/bionic/memmove.S
@@ -1,5 +1,5 @@
 /***************************************************************************
- Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
+ Copyright (c) 2009-2014 The Linux Foundation. All rights reserved.
 
  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are met:
@@ -84,7 +84,7 @@ _memmove_words:
 	.save	{r0, lr}
 	cmp	r2, #0
 	it	ne
-	subsne	r12, r0, r1
+	subsne	r12, r0, r1	// Warning: do not combine these "it" blocks
 	it	eq
 	bxeq	lr
 //	memmove only if r1 < r0 < r1+r2
@@ -94,15 +94,28 @@ _memmove_words:
 	cmpge	r12, r0
 	it	le
 	ble	memcpy
-	cmp	r2, #63
-	ble	.Lneon_b2f_smallcopy
+	cmp	r2, #4
+	it	le
+	ble	.Lneon_b2f_smallcopy_loop
 	push	{r0, lr}
 	add	r0, r0, r2
 	add	r1, r1, r2
+	cmp	r2, #64
+	it	ge
+	bge	.Lneon_b2f_copy_64
+	cmp	r2, #32
+	it	ge
+	bge	.Lneon_b2f_copy_32
+	cmp	r2, #8
+	it	ge
+	bge	.Lneon_b2f_copy_8
+	b	.Lneon_b2f_copy_1
+.Lneon_b2f_copy_64:
 	mov	r12, r2, lsr #6
 	add	r0, r0, #32
 	add	r1, r1, #32
 	cmp	r12, #PLDTHRESH
+	it	le
 	ble	.Lneon_b2f_copy_64_loop_nopld
 	sub	r12, #PLDOFFS
 	sub	lr, r1, #(PLDOFFS)*PLDSIZE
@@ -116,6 +129,7 @@ _memmove_words:
 	subs	r12, r12, #1
 	vst1.32	{q0, q1}, [r0]!
 	vst1.32	{q2, q3}, [r0]
+	it	ne
 	bne	.Lneon_b2f_copy_64_loop_outer
 	mov	r12, #PLDOFFS
 .Lneon_b2f_copy_64_loop_nopld:
@@ -126,12 +140,15 @@ _memmove_words:
 	subs	r12, r12, #1
 	vst1.32	{q8, q9}, [r0]!
 	vst1.32	{q10, q11}, [r0]
+	it	ne
 	bne	.Lneon_b2f_copy_64_loop_nopld
 	ands	r2, r2, #0x3f
+	it	eq
 	beq	.Lneon_memmove_done
 	sub	r1, r1, #32
 	sub	r0, r0, #32
 	cmp	r2, #32
+	it	lt
 	blt	.Lneon_b2f_copy_8
 .Lneon_b2f_copy_32:
 	sub	r1, r1, #32
@@ -139,9 +156,11 @@ _memmove_words:
 	vld1.32	{q0, q1}, [r1]
 	vst1.32	{q0, q1}, [r0]
 	ands	r2, r2, #0x1f
+	it	eq
 	beq	.Lneon_memmove_done
 .Lneon_b2f_copy_8:
 	movs	r12, r2, lsr #0x3
+	it	eq
 	beq	.Lneon_b2f_copy_1
 .Lneon_b2f_copy_8_loop:
 	sub	r1, r1, #8
@@ -149,39 +168,52 @@ _memmove_words:
 	vld1.32	{d0}, [r1]
 	subs	r12, r12, #1
 	vst1.32	{d0}, [r0]
+	it	ne
 	bne	.Lneon_b2f_copy_8_loop
 	ands	r2, r2, #0x7
 	beq	.Lneon_memmove_done
 .Lneon_b2f_copy_1:
-	sub	r1, r1, r2
-	sub	r0, r0, r2
-	ands	r12, r2, #1
-	beq	.Lneon_b2f_copy_halfword_loop
-	subs	r2, r2, #1
-	ldrb	r3, [r1, r2]
-	strb	r3, [r0, r2]
-	beq	.Lneon_memmove_done
-.Lneon_b2f_copy_halfword_loop:
-	subs	r2, r2, #2
-	ldrh	r3, [r1, r2]
-	strh	r3, [r0, r2]
-	bne	.Lneon_b2f_copy_halfword_loop
+	movs	r12, r2, lsl #29
+	itttt	mi
+	submi	r1, r1, #4
+	submi	r0, r0, #4
+	ldrmi	r3, [r1]
+	strmi	r3, [r0]
+	movs	r2, r2, lsl #31
+	itttt	cs
+	subcs	r1, r1, #2
+	subcs	r0, r0, #2
+	ldrhcs	r3, [r1]
+	strhcs	r3, [r0]
+	itttt	mi
+	submi	r1, r1, #1
+	submi	r0, r0, #1
+	ldrbmi	r12, [r1]
+	strbmi	r12, [r0]
 .Lneon_memmove_done:
 	pop	{r0, pc}
-.Lneon_b2f_smallcopy:
-	ands	r12, r2, #1
-	beq	.Lneon_b2f_halfword_small_loop
-	subs	r2, r2, #1
-	ldrb	r3, [r1, r2]
-	strb	r3, [r0, r2]
-	it	eq
-	bxeq	lr
-.Lneon_b2f_halfword_small_loop:
-	subs	r2, r2, #2
-	ldrh	r3, [r1, r2]
-	strh	r3, [r0, r2]
-	bne	.Lneon_b2f_halfword_small_loop
+.Lneon_b2f_smallcopy_loop:
+	// 4 bytes or less
+	add	r1, r1, r2
+	add	r0, r0, r2
+	movs	r12, r2, lsl #29
+	itttt	mi
+	submi	r1, r1, #4
+	submi	r0, r0, #4
+	ldrmi	r3, [r1]
+	strmi	r3, [r0]
+	movs	r2, r2, lsl #31
+	itttt	cs
+	subcs	r1, r1, #2
+	subcs	r0, r0, #2
+	ldrhcs	r3, [r1]
+	strhcs	r3, [r0]
+	itttt	mi
+	submi	r1, r1, #1
+	submi	r0, r0, #1
+	ldrbmi	r12, [r1]
+	strbmi	r12, [r0]
 	bx	lr
-        .cfi_endproc
+	.cfi_endproc
 END(memmove)
 
-- 
cgit v1.2.3