diff options
| author | Stephen Hines <srhines@google.com> | 2014-11-13 15:45:27 -0800 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2014-11-18 14:34:46 -0800 |
| commit | b9675775b030b187b8528cba2d8e0e5c0a7bf8f7 (patch) | |
| tree | 2cd4f9b5c15c3fe5e97a230502a60aa65977e429 | |
| parent | 4283f579c424f07bc07c7f075398053eed3f8281 (diff) | |
| download | android_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.tar.gz android_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.tar.bz2 android_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.zip | |
Fix implementation of vload with [u]long3/4 and double3/4.
Bug: 18380209
These implementations were accidentally attempting to return <3 x i64> or
<4 x i64>, or the double equivalents. The ABI requires that this be converted
into a stack return instead, so we transform our hand-written bitcode to do
exactly that.
Change-Id: I2be489b23bf639b16d8762a11a8430f40ea5b16c
| -rw-r--r-- | driver/runtime/ll32/allocation.ll | 45 |
1 files changed, 27 insertions, 18 deletions
diff --git a/driver/runtime/ll32/allocation.ll b/driver/runtime/ll32/allocation.ll index d0b3932b..21d7cac5 100644 --- a/driver/runtime/ll32/allocation.ll +++ b/driver/runtime/ll32/allocation.ll @@ -650,17 +650,20 @@ define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %ag } -define <4 x i64> @__rsAllocationVLoadXImpl_long4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* %3 = load <4 x i64>* %2, align 8 - ret <4 x i64> %3 + store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52 + ret void } -define <3 x i64> @__rsAllocationVLoadXImpl_long3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 - %2 = bitcast i8* %1 to <3 x i64>* - %3 = load <3 x i64>* %2, align 8 - ret <3 x i64> %3 + %2 = bitcast i8* %1 to <4 x i64>* + %3 = load <4 x i64>* %2, align 8 + %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* + store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47 + ret void } define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 @@ -669,17 +672,20 @@ define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i3 ret <2 x i64> %3 } -define <4 x i64> @__rsAllocationVLoadXImpl_ulong4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x i64>* %3 = load <4 x i64>* %2, align 8 - ret <4 x i64> %3 + store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48 + ret void } -define <3 x i64> @__rsAllocationVLoadXImpl_ulong3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 - %2 = bitcast i8* %1 to <3 x i64>* - %3 = load <3 x i64>* %2, align 8 - ret <3 x i64> %3 + %2 = bitcast i8* %1 to <4 x i64>* + %3 = load <4 x i64>* %2, align 8 + %4 = bitcast <3 x i64>* %agg.result to <4 x i64>* + store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51 + ret void } define <2 x i64> @__rsAllocationVLoadXImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 @@ -821,17 +827,20 @@ define <2 x float> @__rsAllocationVLoadXImpl_float2([1 x i32] %a.coerce, i32 %x, ret <2 x float> %3 } -define <4 x double> @__rsAllocationVLoadXImpl_double4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 %2 = bitcast i8* %1 to <4 x double>* %3 = load <4 x double>* %2, align 8 - ret <4 x double> %3 + store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60 + ret void } -define <3 x double> @__rsAllocationVLoadXImpl_double3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { +define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 - %2 = bitcast i8* %1 to <3 x double>* - %3 = load <3 x double>* %2, align 8 - ret <3 x double> %3 + %2 = bitcast i8* %1 to <4 x double>* + %3 = load <4 x double>* %2, align 8 + %4 = bitcast <3 x double>* %agg.result to <4 x double>* + store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59 + ret void } define <2 x double> @__rsAllocationVLoadXImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 { %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2 |
