summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-11-13 15:45:27 -0800
committerStephen Hines <srhines@google.com>2014-11-18 14:34:46 -0800
commitb9675775b030b187b8528cba2d8e0e5c0a7bf8f7 (patch)
tree2cd4f9b5c15c3fe5e97a230502a60aa65977e429
parent4283f579c424f07bc07c7f075398053eed3f8281 (diff)
downloadandroid_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.tar.gz
android_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.tar.bz2
android_frameworks_rs-b9675775b030b187b8528cba2d8e0e5c0a7bf8f7.zip
Fix implementation of vload with [u]long3/4 and double3/4.
Bug: 18380209 These implementations were accidentally attempting to return <3 x i64> or <4 x i64>, or the double equivalents. The ABI requires that this be converted into a stack return instead, so we transform our hand-written bitcode to do exactly that. Change-Id: I2be489b23bf639b16d8762a11a8430f40ea5b16c
-rw-r--r--driver/runtime/ll32/allocation.ll45
1 files changed, 27 insertions, 18 deletions
diff --git a/driver/runtime/ll32/allocation.ll b/driver/runtime/ll32/allocation.ll
index d0b3932b..21d7cac5 100644
--- a/driver/runtime/ll32/allocation.ll
+++ b/driver/runtime/ll32/allocation.ll
@@ -650,17 +650,20 @@ define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %ag
}
-define <4 x i64> @__rsAllocationVLoadXImpl_long4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i64>*
%3 = load <4 x i64>* %2, align 8
- ret <4 x i64> %3
+ store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
+ ret void
}
-define <3 x i64> @__rsAllocationVLoadXImpl_long3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
- %2 = bitcast i8* %1 to <3 x i64>*
- %3 = load <3 x i64>* %2, align 8
- ret <3 x i64> %3
+ %2 = bitcast i8* %1 to <4 x i64>*
+ %3 = load <4 x i64>* %2, align 8
+ %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
+ store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
+ ret void
}
define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
@@ -669,17 +672,20 @@ define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i3
ret <2 x i64> %3
}
-define <4 x i64> @__rsAllocationVLoadXImpl_ulong4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x i64>*
%3 = load <4 x i64>* %2, align 8
- ret <4 x i64> %3
+ store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
+ ret void
}
-define <3 x i64> @__rsAllocationVLoadXImpl_ulong3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
- %2 = bitcast i8* %1 to <3 x i64>*
- %3 = load <3 x i64>* %2, align 8
- ret <3 x i64> %3
+ %2 = bitcast i8* %1 to <4 x i64>*
+ %3 = load <4 x i64>* %2, align 8
+ %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
+ store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
+ ret void
}
define <2 x i64> @__rsAllocationVLoadXImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
@@ -821,17 +827,20 @@ define <2 x float> @__rsAllocationVLoadXImpl_float2([1 x i32] %a.coerce, i32 %x,
ret <2 x float> %3
}
-define <4 x double> @__rsAllocationVLoadXImpl_double4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
%2 = bitcast i8* %1 to <4 x double>*
%3 = load <4 x double>* %2, align 8
- ret <4 x double> %3
+ store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
+ ret void
}
-define <3 x double> @__rsAllocationVLoadXImpl_double3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
+define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
- %2 = bitcast i8* %1 to <3 x double>*
- %3 = load <3 x double>* %2, align 8
- ret <3 x double> %3
+ %2 = bitcast i8* %1 to <4 x double>*
+ %3 = load <4 x double>* %2, align 8
+ %4 = bitcast <3 x double>* %agg.result to <4 x double>*
+ store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
+ ret void
}
define <2 x double> @__rsAllocationVLoadXImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
%1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2