summaryrefslogtreecommitdiffstats
path: root/cpu_ref/rsCpuScript.cpp
diff options
context:
space:
mode:
authorChris Wailes <chriswailes@google.com>2014-07-16 15:18:30 -0700
committerStephen Hines <srhines@google.com>2014-08-12 17:02:01 -0700
commit818cfa034e257c7bb48356257f5cb67334e19aa6 (patch)
tree27ad9d05d771ae01aa678d71593a7c062b2d2105 /cpu_ref/rsCpuScript.cpp
parentfb0a274983ae8bfb07aff8c292305389789d6e92 (diff)
downloadandroid_frameworks_rs-818cfa034e257c7bb48356257f5cb67334e19aa6.tar.gz
android_frameworks_rs-818cfa034e257c7bb48356257f5cb67334e19aa6.tar.bz2
android_frameworks_rs-818cfa034e257c7bb48356257f5cb67334e19aa6.zip
Collapse code paths for single- and multi-input kernels.
This patch simplifies the RenderScript driver and CPU reference implementation by removing the distinction between sing- and multi-input kernels in many places. The distinction is maintained in some places due to the need to maintain backwards compatibility. This permits the deletion of some functions and struct members that are no longer needed. Several related functions were also cleaned up. Change-Id: I77e4b155cc7ca1581b05bf901c70ae53a9ff0b12
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
-rw-r--r--cpu_ref/rsCpuScript.cpp214
1 files changed, 49 insertions, 165 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index a11fda19..05984207 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
+void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
MTLaunchStruct *mtls) {
memset(mtls, 0, sizeof(MTLaunchStruct));
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
- return;
- }
-
- if (ain != NULL) {
- const Type *inType = ain->getType();
+ for (int index = inLen; --index >= 0;) {
+ const Allocation* ain = ains[index];
- mtls->fep.dimX = inType->getDimX();
- mtls->fep.dimY = inType->getDimY();
- mtls->fep.dimZ = inType->getDimZ();
-
- } else if (aout != NULL) {
- const Type *outType = aout->getType();
-
- mtls->fep.dimX = outType->getDimX();
- mtls->fep.dimY = outType->getDimY();
- mtls->fep.dimZ = outType->getDimZ();
-
- } else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (ain != NULL && aout != NULL) {
- if (!ain->hasSameDims(aout)) {
+ // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
+ if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output allocations do not match.");
-
+ "rsForEach called with null in allocations");
return;
}
}
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->zEnd = mtls->fep.dimZ;
- } else {
- rsAssert(sc->zStart < mtls->fep.dimZ);
- rsAssert(sc->zEnd <= mtls->fep.dimZ);
- rsAssert(sc->zStart < sc->zEnd);
- mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
- mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
- if (mtls->zStart >= mtls->zEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- mtls->rsc = mCtx;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 1;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
- mtls->isThreadable = mIsThreadable;
-
- if (ain) {
- mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
- }
-}
-
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
- Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ains != NULL) {
- for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
-
- if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- }
- }
-
if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null out allocations");
return;
}
- if (ains != NULL) {
+ if (inLen > 0) {
const Allocation *ain0 = ains[0];
const Type *inType = ain0->getType();
@@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->fep.dimZ = outType->getDimZ();
} else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null allocations");
return;
}
- if (ains != NULL && aout != NULL) {
+ if (inLen > 0 && aout != NULL) {
if (!ains[0]->hasSameDims(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"Failed to launch kernel; dimensions of input and output allocations do not match.");
@@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
- rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0));
+ rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
mtls->rsc = mCtx;
mtls->ains = ains;
@@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->mSliceSize = 1;
mtls->mSliceNum = 0;
- mtls->fep.ptrIns = NULL;
- mtls->fep.eStrideIn = 0;
+ mtls->fep.inPtrs = NULL;
+ mtls->fep.inStrides = NULL;
mtls->isThreadable = mIsThreadable;
- if (ains) {
- mtls->fep.ptrIns = new const uint8_t*[inLen];
- mtls->fep.inStrides = new StridePair[inLen];
+ if (inLen > 0) {
+
+ if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
+ mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff;
+ mtls->fep.inStrides = mtls->inStridesBuff;
+ } else {
+ mtls->fep.heapAllocatedArrays = true;
+
+ mtls->fep.inPtrs = new const uint8_t*[inLen];
+ mtls->fep.inStrides = new StridePair[inLen];
+ }
+
+ mtls->fep.inLen = inLen;
for (int index = inLen; --index >= 0;) {
const Allocation *ain = ains[index];
- mtls->fep.ptrIns[index] =
+ mtls->fep.inPtrs[index] =
(const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
mtls->fep.inStrides[index].eStride =
@@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
}
}
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
+ mtls->fep.outPtr = NULL;
+ mtls->fep.outStride.eStride = 0;
+ mtls->fep.outStride.yStride = 0;
+ if (aout != NULL) {
+ mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+
+ mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
+ mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
}
}
void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- forEachKernelSetup(slot, &mtls);
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-}
-
-void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
forEachKernelSetup(slot, &mtls);
@@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
return NULL;
}
-void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
-void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
}