summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy McFadden <fadden@android.com>2010-04-23 16:34:52 -0700
committerAndy McFadden <fadden@android.com>2010-04-29 10:34:30 -0700
commit8911f7a2222124ba724a4a9281555b74d0e098e2 (patch)
treed14964da4f4ba33384208ca9aaaeefc42eeffca0
parent12a744d079c4ff434db2f4a9db461386a0ec9e2c (diff)
downloadandroid_dalvik-8911f7a2222124ba724a4a9281555b74d0e098e2.tar.gz
android_dalvik-8911f7a2222124ba724a4a9281555b74d0e098e2.tar.bz2
android_dalvik-8911f7a2222124ba724a4a9281555b74d0e098e2.zip
Dalvik Zip rewrite.
Change the way zip archives are handled. This is necessary to deal with very large (~1GB) APK files, for which our current approach of mapping the entire file falls over. We now do the classic scavenger hunt for the End Of Central Directory magic on a buffer of data read from the file, instead of a memory-mapped section. We use what we find to create a map that covers the Central Directory only. For most uses in the VM this is all we really need, since we just want to check file attributes vs. the optimized DEX to see if we're out of date. If the caller is interested in unpacking the file contents, we have to do an additional file read to discover the size of the Local File Header section so we can skip past it. We also now do a file-to-file extraction using read() calls instead of a buffer-to-file extraction on mmap()ed data. No difference in performance (as measured by first-boot dexopt). Since this is more of a rewrite than an update, I also took the opportunity to change buffer size variables from "long" to "size_t", and normalized return values to int (some were using bool, which is common in the VM but was mixed in the zip code). Failure messages are now all LOGW with the word "Zip" up front (didn't want to change log tag away from "dalvikvm"). Also, removed a not-quite-right check in the "map part of a file" code, and clarified that the file offset is absolute. For bug 2620103. Change-Id: I745fb15abb541376f467969ffe422222676f1e5f
-rw-r--r--dexopt/OptMain.c9
-rw-r--r--libdex/CmdUtils.c2
-rw-r--r--libdex/SysUtil.c18
-rw-r--r--libdex/SysUtil.h2
-rw-r--r--libdex/ZipArchive.c521
-rw-r--r--libdex/ZipArchive.h35
-rw-r--r--vm/JarFile.c2
7 files changed, 353 insertions, 236 deletions
diff --git a/dexopt/OptMain.c b/dexopt/OptMain.c
index 1e47009b6..edd5425dc 100644
--- a/dexopt/OptMain.c
+++ b/dexopt/OptMain.c
@@ -55,7 +55,8 @@ static int extractAndProcessZip(int zipFd, int cacheFd,
{
ZipArchive zippy;
ZipEntry zipEntry;
- long uncompLen, modWhen, crc32;
+ size_t uncompLen;
+ long modWhen, crc32;
off_t dexOffset;
int err;
int result = -1;
@@ -99,8 +100,8 @@ static int extractAndProcessZip(int zipFd, int cacheFd,
/*
* Extract some info about the zip entry.
*/
- if (!dexZipGetEntryInfo(&zippy, zipEntry, NULL, &uncompLen, NULL, NULL,
- &modWhen, &crc32))
+ if (dexZipGetEntryInfo(&zippy, zipEntry, NULL, &uncompLen, NULL, NULL,
+ &modWhen, &crc32) != 0)
{
LOGW("DexOptZ: zip archive GetEntryInfo failed on %s\n", debugFileName);
goto bail;
@@ -113,7 +114,7 @@ static int extractAndProcessZip(int zipFd, int cacheFd,
/*
* Extract the DEX data into the cache file at the current offset.
*/
- if (!dexZipExtractEntryToFile(&zippy, zipEntry, cacheFd)) {
+ if (dexZipExtractEntryToFile(&zippy, zipEntry, cacheFd) != 0) {
LOGW("DexOptZ: extraction of %s from %s failed\n",
kClassesDex, debugFileName);
goto bail;
diff --git a/libdex/CmdUtils.c b/libdex/CmdUtils.c
index 102664cd5..109d3e52a 100644
--- a/libdex/CmdUtils.c
+++ b/libdex/CmdUtils.c
@@ -70,7 +70,7 @@ UnzipToFileResult dexUnzipToFile(const char* zipFileName,
goto bail;
}
- if (!dexZipExtractEntryToFile(&archive, entry, fd)) {
+ if (dexZipExtractEntryToFile(&archive, entry, fd) != 0) {
fprintf(stderr, "Extract of '%s' from '%s' failed\n",
kFileToExtract, zipFileName);
result = kUTFRBadZip;
diff --git a/libdex/SysUtil.c b/libdex/SysUtil.c
index 7c6aaeffd..bcba489ae 100644
--- a/libdex/SysUtil.c
+++ b/libdex/SysUtil.c
@@ -266,33 +266,23 @@ int sysMapFileInShmemWritableReadOnly(int fd, MemMapping* pMap)
}
/*
- * Map part of a file (from fd's current offset) into a shared, read-only
- * memory segment.
+ * Map part of a file into a shared, read-only memory segment. The "start"
+ * offset is absolute, not relative.
*
* On success, returns 0 and fills out "pMap". On failure, returns a nonzero
* value and does not disturb "pMap".
*/
-int sysMapFileSegmentInShmem(int fd, off_t start, long length,
+int sysMapFileSegmentInShmem(int fd, off_t start, size_t length,
MemMapping* pMap)
{
#ifdef HAVE_POSIX_FILEMAP
- off_t dummy;
- size_t fileLength, actualLength;
+ size_t actualLength;
off_t actualStart;
int adjust;
void* memPtr;
assert(pMap != NULL);
- if (getFileStartAndLength(fd, &dummy, &fileLength) < 0)
- return -1;
-
- if (start + length > (long)fileLength) {
- LOGW("bad segment: st=%d len=%ld flen=%d\n",
- (int) start, length, (int) fileLength);
- return -1;
- }
-
/* adjust to be page-aligned */
adjust = start % DEFAULT_PAGE_SIZE;
actualStart = start - adjust;
diff --git a/libdex/SysUtil.h b/libdex/SysUtil.h
index b300a7b3a..95e5be25d 100644
--- a/libdex/SysUtil.h
+++ b/libdex/SysUtil.h
@@ -80,7 +80,7 @@ int sysMapFileInShmemWritableReadOnly(int fd, MemMapping* pMap);
/*
* Like sysMapFileInShmemReadOnly, but on only part of a file.
*/
-int sysMapFileSegmentInShmem(int fd, off_t start, long length,
+int sysMapFileSegmentInShmem(int fd, off_t start, size_t length,
MemMapping* pMap);
/*
diff --git a/libdex/ZipArchive.c b/libdex/ZipArchive.c
index 7c7e18e95..c935f11d4 100644
--- a/libdex/ZipArchive.c
+++ b/libdex/ZipArchive.c
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/*
* Read-only access to Zip archives, with minimal heap allocation.
*/
@@ -21,10 +22,13 @@
#include <zlib.h>
#include <stdlib.h>
+#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
+#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd
+
/*
* Zip file constants.
@@ -32,6 +36,7 @@
#define kEOCDSignature 0x06054b50
#define kEOCDLen 22
#define kEOCDNumEntries 8 // offset to #of entries in file
+#define kEOCDSize 12 // size of the central directory
#define kEOCDFileOffset 16 // offset to central directory
#define kMaxCommentLen 65535 // longest possible in ushort
@@ -72,7 +77,7 @@ static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
if (ent < 0 || ent >= pArchive->mHashTableSize ||
pArchive->mHashTable[ent].name == NULL)
{
- LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent);
+ LOGW("Zip: invalid ZipEntry %p (%ld)\n", entry, ent);
return -1;
}
return ent;
@@ -134,156 +139,205 @@ static u4 get4LE(unsigned char const* pSrc)
}
/*
- * Parse the Zip archive, verifying its contents and initializing internal
- * data structures.
+ * Find the zip Central Directory and memory-map it.
+ *
+ * On success, returns 0 after populating fields from the EOCD area:
+ * mDirectoryOffset
+ * mDirectoryMap
+ * mNumEntries
*/
-static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
+static int mapCentralDirectory(int fd, const char* debugFileName,
+ ZipArchive* pArchive)
{
-#define CHECK_OFFSET(_off) { \
- if ((unsigned int) (_off) >= maxOffset) { \
- LOGE("ERROR: bad offset %u (max %d): %s\n", \
- (unsigned int) (_off), maxOffset, #_off); \
- goto bail; \
- } \
+ u1* scanBuf = NULL;
+ int result = -1;
+
+ /*
+ * Get and test file length.
+ */
+ off_t fileLength = lseek(fd, 0, SEEK_END);
+ if (fileLength < kEOCDLen) {
+ LOGV("Zip: length %ld is too small to be zip\n", (long) fileLength);
+ goto bail;
}
- bool result = false;
- const unsigned char* basePtr = (const unsigned char*)pMap->addr;
- const unsigned char* ptr;
- size_t length = pMap->length;
- unsigned int i, numEntries, cdOffset;
- unsigned int val;
/*
- * The first 4 bytes of the file will either be the local header
- * signature for the first file (kLFHSignature) or, if the archive doesn't
- * have any files in it, the end-of-central-directory signature
- * (kEOCDSignature).
+ * Perform the traditional EOCD snipe hunt.
+ *
+ * We're searching for the End of Central Directory magic number,
+ * which appears at the start of the EOCD block. It's followed by
+ * 18 bytes of EOCD stuff and up to 64KB of archive comment. We
+ * need to read the last part of the file into a buffer, dig through
+ * it to find the magic number, parse some values out, and use those
+ * to determine the extent of the CD.
+ *
+ * We start by pulling in the last part of the file.
*/
- val = get4LE(basePtr);
- if (val == kEOCDSignature) {
- LOGI("Found Zip archive, but it looks empty\n");
+ size_t readAmount = kMaxEOCDSearch;
+ if (readAmount > (size_t) fileLength)
+ readAmount = fileLength;
+ off_t searchStart = fileLength - readAmount;
+
+ scanBuf = (u1*) malloc(readAmount);
+ if (lseek(fd, searchStart, SEEK_SET) != searchStart) {
+ LOGW("Zip: seek %ld failed: %s\n", (long) searchStart, strerror(errno));
goto bail;
- } else if (val != kLFHSignature) {
- LOGV("Not a Zip archive (found 0x%08x)\n", val);
+ }
+ ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scanBuf, readAmount));
+ if (actual != (ssize_t) readAmount) {
+ LOGW("Zip: read %zd failed: %s\n", readAmount, strerror(errno));
goto bail;
}
/*
- * Find the EOCD. We'll find it immediately unless they have a file
- * comment.
+ * Scan backward for the EOCD magic. In an archive without a trailing
+ * comment, we'll find it on the first try. (We may want to consider
+ * doing an initial minimal read; if we don't find it, retry with a
+ * second read as above.)
*/
- ptr = basePtr + length - kEOCDLen;
-
- while (ptr >= basePtr) {
- if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature)
+ int i;
+ for (i = readAmount - kEOCDLen; i >= 0; i--) {
+ if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
+ LOGV("+++ Found EOCD at buf+%d\n", i);
break;
- ptr--;
+ }
}
- if (ptr < basePtr) {
- LOGI("Could not find end-of-central-directory in Zip\n");
+ if (i < 0) {
+ LOGD("Zip: EOCD not found, %s is not zip\n", debugFileName);
goto bail;
}
+ off_t eocdOffset = searchStart + i;
+ const u1* eocdPtr = scanBuf + i;
+
+ assert(eocdOffset < fileLength);
+
/*
- * There are two interesting items in the EOCD block: the number of
- * entries in the file, and the file offset of the start of the
- * central directory.
- *
- * (There's actually a count of the #of entries in this file, and for
- * all files which comprise a spanned archive, but for our purposes
- * we're only interested in the current file. Besides, we expect the
- * two to be equivalent for our stuff.)
+ * Grab the CD offset and size, and the number of entries in the
+ * archive. Verify that they look reasonable.
*/
- numEntries = get2LE(ptr + kEOCDNumEntries);
- cdOffset = get4LE(ptr + kEOCDFileOffset);
+ u4 numEntries = get2LE(eocdPtr + kEOCDNumEntries);
+ u4 dirSize = get4LE(eocdPtr + kEOCDSize);
+ u4 dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
+
+ if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
+ LOGW("Zip: bad offsets (dir %ld, size %u, eocd %ld)\n",
+ (long) dirOffset, dirSize, (long) eocdOffset);
+ goto bail;
+ }
+ if (numEntries == 0) {
+ LOGW("Zip: empty archive?\n");
+ goto bail;
+ }
- /* valid offsets are [0,EOCD] */
- unsigned int maxOffset;
- maxOffset = (ptr - basePtr) +1;
+ LOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
+ numEntries, dirSize, dirOffset);
- LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
- if (numEntries == 0 || cdOffset >= length) {
- LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
- numEntries, cdOffset, length);
+ /*
+ * It all looks good. Create a mapping for the CD, and set the fields
+ * in pArchive.
+ */
+ if (sysMapFileSegmentInShmem(fd, dirOffset, dirSize,
+ &pArchive->mDirectoryMap) != 0)
+ {
+ LOGW("Zip: cd map failed\n");
goto bail;
}
+ pArchive->mNumEntries = numEntries;
+ pArchive->mDirectoryOffset = dirOffset;
+
+ result = 0;
+
+bail:
+ free(scanBuf);
+ return result;
+}
+
+/*
+ * Parses the Zip archive's Central Directory. Allocates and populates the
+ * hash table.
+ *
+ * Returns 0 on success.
+ */
+static int parseZipArchive(ZipArchive* pArchive)
+{
+ int result = -1;
+ const u1* cdPtr = (const u1*)pArchive->mDirectoryMap.addr;
+ size_t cdLength = pArchive->mDirectoryMap.length;
+ int numEntries = pArchive->mNumEntries;
+
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
- pArchive->mNumEntries = numEntries;
pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
pArchive->mHashTable = (ZipHashEntry*)
calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
/*
* Walk through the central directory, adding entries to the hash
- * table.
+ * table and verifying values.
*/
- ptr = basePtr + cdOffset;
+ const u1* ptr = cdPtr;
+ int i;
for (i = 0; i < numEntries; i++) {
- unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
- const unsigned char* localHdr;
- unsigned int hash;
-
if (get4LE(ptr) != kCDESignature) {
- LOGW("Missed a central dir sig (at %d)\n", i);
+ LOGW("Zip: missed a central dir sig (at %d)\n", i);
+ goto bail;
+ }
+ if (ptr + kCDELen > cdPtr + cdLength) {
+ LOGW("Zip: ran off the end (at %d)\n", i);
goto bail;
}
- if (ptr + kCDELen > basePtr + length) {
- LOGW("Ran off the end (at %d)\n", i);
+
+ long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
+ if (localHdrOffset >= pArchive->mDirectoryOffset) {
+ LOGW("Zip: bad LFH offset %ld at entry %d\n", localHdrOffset, i);
goto bail;
}
- localHdrOffset = get4LE(ptr + kCDELocalOffset);
- CHECK_OFFSET(localHdrOffset);
+ unsigned int fileNameLen, extraLen, commentLen, hash;
fileNameLen = get2LE(ptr + kCDENameLen);
extraLen = get2LE(ptr + kCDEExtraLen);
commentLen = get2LE(ptr + kCDECommentLen);
- //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n",
- // i, localHdrOffset, fileNameLen, extraLen, commentLen);
- //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen);
-
/* add the CDE filename to the hash table */
hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
- localHdr = basePtr + localHdrOffset;
- if (get4LE(localHdr) != kLFHSignature) {
- LOGW("Bad offset to local header: %d (at %d)\n",
- localHdrOffset, i);
+ ptr += kCDELen + fileNameLen + extraLen + commentLen;
+ if ((size_t)(ptr - cdPtr) > cdLength) {
+ LOGW("Zip: bad CD advance (%d vs %zd) at entry %d\n",
+ (int) (ptr - cdPtr), cdLength, i);
goto bail;
}
-
- ptr += kCDELen + fileNameLen + extraLen + commentLen;
- CHECK_OFFSET(ptr - basePtr);
}
+ LOGV("+++ zip good scan %d entries\n", numEntries);
- result = true;
+ result = 0;
bail:
return result;
-#undef CHECK_OFFSET
}
/*
- * Open the specified file read-only. We memory-map the entire thing and
- * parse the contents.
+ * Open the specified file read-only. We examine the contents and verify
+ * that it appears to be a valid zip file.
*
* This will be called on non-Zip files, especially during VM startup, so
* we don't want to be too noisy about certain types of failure. (Do
* we want a "quiet" flag?)
*
- * On success, we fill out the contents of "pArchive" and return 0.
+ * On success, we fill out the contents of "pArchive" and return 0. On
+ * failure we return the errno value.
*/
int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
{
int fd, err;
- LOGV("Opening archive '%s' %p\n", fileName, pArchive);
+ LOGV("Opening as zip '%s' %p\n", fileName, pArchive);
memset(pArchive, 0, sizeof(ZipArchive));
@@ -298,47 +352,32 @@ int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
}
/*
- * Prepare to access a ZipArchive in an open file descriptor.
+ * Prepare to access a ZipArchive through an open file descriptor.
+ *
+ * On success, we fill out the contents of "pArchive" and return 0.
*/
int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
{
- MemMapping map;
- int err;
+ int result = -1;
- map.addr = NULL;
memset(pArchive, 0, sizeof(*pArchive));
-
pArchive->mFd = fd;
- if (sysMapFileInShmemReadOnly(pArchive->mFd, &map) != 0) {
- err = -1;
- LOGW("Map of '%s' failed\n", debugFileName);
- goto bail;
- }
-
- if (map.length < kEOCDLen) {
- err = -1;
- LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length);
+ if (mapCentralDirectory(fd, debugFileName, pArchive) != 0)
goto bail;
- }
- if (!parseZipArchive(pArchive, &map)) {
- err = -1;
- LOGV("Parsing '%s' failed\n", debugFileName);
+ if (parseZipArchive(pArchive) != 0) {
+ LOGV("Zip: parsing '%s' failed\n", debugFileName);
goto bail;
}
/* success */
- err = 0;
- sysCopyMap(&pArchive->mMap, &map);
- map.addr = NULL;
+ result = 0;
bail:
- if (err != 0)
+ if (result != 0)
dexZipCloseArchive(pArchive);
- if (map.addr != NULL)
- sysReleaseShmem(&map);
- return err;
+ return result;
}
@@ -354,10 +393,12 @@ void dexZipCloseArchive(ZipArchive* pArchive)
if (pArchive->mFd >= 0)
close(pArchive->mFd);
- sysReleaseShmem(&pArchive->mMap);
+ sysReleaseShmem(&pArchive->mDirectoryMap);
free(pArchive->mHashTable);
+ /* ensure nobody tries to use the ZipArchive after it's closed */
+ pArchive->mDirectoryOffset = -1;
pArchive->mFd = -1;
pArchive->mNumEntries = -1;
pArchive->mHashTableSize = -1;
@@ -382,7 +423,7 @@ ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
{
/* match */
- return (ZipEntry) (ent + kZipEntryAdj);
+ return (ZipEntry)(long)(ent + kZipEntryAdj);
}
ent = (ent + 1) & (hashTableSize-1);
@@ -421,27 +462,27 @@ ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
/*
* Get the useful fields from the zip entry.
*
- * Returns "false" if the offsets to the fields or the contents of the fields
- * appear to be bogus.
+ * Returns non-zero if the contents of the fields (particularly the data
+ * offset) appear to be bogus.
*/
-bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
- int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset,
+int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
+ int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset,
long* pModWhen, long* pCrc32)
{
int ent = entryToIndex(pArchive, entry);
if (ent < 0)
- return false;
+ return -1;
/*
* Recover the start of the central directory entry from the filename
- * pointer.
+ * pointer. The filename is the first entry past the fixed-size data,
+ * so we can just subtract back from that.
*/
const unsigned char* basePtr = (const unsigned char*)
- pArchive->mMap.addr;
+ pArchive->mDirectoryMap.addr;
const unsigned char* ptr = (const unsigned char*)
pArchive->mHashTable[ent].name;
- size_t zipLength =
- pArchive->mMap.length;
+ off_t cdOffset = pArchive->mDirectoryOffset;
ptr -= kCDELen;
@@ -454,87 +495,120 @@ bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
if (pCrc32 != NULL)
*pCrc32 = get4LE(ptr + kCDECRC);
+ size_t compLen = get4LE(ptr + kCDECompLen);
+ if (pCompLen != NULL)
+ *pCompLen = compLen;
+ size_t uncompLen = get4LE(ptr + kCDEUncompLen);
+ if (pUncompLen != NULL)
+ *pUncompLen = uncompLen;
+
/*
- * We need to make sure that the lengths are not so large that somebody
- * trying to map the compressed or uncompressed data runs off the end
- * of the mapped region.
+ * If requested, determine the offset of the start of the data. All we
+ * have is the offset to the Local File Header, which is variable size,
+ * so we have to read the contents of the struct to figure out where
+ * the actual data starts.
+ *
+ * We also need to make sure that the lengths are not so large that
+ * somebody trying to map the compressed or uncompressed data runs
+ * off the end of the mapped region.
+ *
+ * Note we don't verify compLen/uncompLen if they don't request the
+ * dataOffset, because dataOffset is expensive to determine. However,
+ * if they don't have the file offset, they're not likely to be doing
+ * anything with the contents.
*/
- unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset);
- if (localHdrOffset + kLFHLen >= zipLength) {
- LOGE("ERROR: bad local hdr offset in zip\n");
- return false;
- }
- const unsigned char* localHdr = basePtr + localHdrOffset;
- off_t dataOffset = localHdrOffset + kLFHLen
- + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen);
- if ((unsigned long) dataOffset >= zipLength) {
- LOGE("ERROR: bad data offset in zip\n");
- return false;
- }
+ if (pOffset != NULL) {
+ long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
+ if (localHdrOffset + kLFHLen >= cdOffset) {
+ LOGW("Zip: bad local hdr offset in zip\n");
+ return -1;
+ }
- if (pCompLen != NULL) {
- *pCompLen = get4LE(ptr + kCDECompLen);
- if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) {
- LOGE("ERROR: bad compressed length in zip\n");
- return false;
+ u1 lfhBuf[kLFHLen];
+ if (lseek(pArchive->mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
+ LOGW("Zip: failed seeking to lfh at offset %ld\n", localHdrOffset);
+ return -1;
}
- }
- if (pUncompLen != NULL) {
- *pUncompLen = get4LE(ptr + kCDEUncompLen);
- if (*pUncompLen < 0) {
- LOGE("ERROR: negative uncompressed length in zip\n");
- return false;
+ ssize_t actual =
+ TEMP_FAILURE_RETRY(read(pArchive->mFd, lfhBuf, sizeof(lfhBuf)));
+ if (actual != sizeof(lfhBuf)) {
+ LOGW("Zip: failed reading lfh from offset %ld\n", localHdrOffset);
+ return -1;
+ }
+
+ if (get4LE(lfhBuf) != kLFHSignature) {
+ LOGW("Zip: didn't find signature at start of lfh, offset=%ld\n",
+ localHdrOffset);
+ return -1;
+ }
+
+ off_t dataOffset = localHdrOffset + kLFHLen
+ + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
+ if (dataOffset >= cdOffset) {
+ LOGW("Zip: bad data offset %ld in zip\n", (long) dataOffset);
+ return -1;
+ }
+
+ /* check lengths */
+ if ((off_t)(dataOffset + compLen) > cdOffset) {
+ LOGW("Zip: bad compressed length in zip (%ld + %zd > %ld)\n",
+ (long) dataOffset, compLen, (long) cdOffset);
+ return -1;
}
+
if (method == kCompressStored &&
- (size_t)(dataOffset + *pUncompLen) >= zipLength)
+ (off_t)(dataOffset + uncompLen) > cdOffset)
{
- LOGE("ERROR: bad uncompressed length in zip\n");
- return false;
+ LOGW("Zip: bad uncompressed length in zip (%ld + %zd > %ld)\n",
+ (long) dataOffset, uncompLen, (long) cdOffset);
+ return -1;
}
- }
- if (pOffset != NULL) {
*pOffset = dataOffset;
}
- return true;
+ return 0;
}
/*
- * Uncompress "deflate" data from one buffer to an open file descriptor.
+ * Uncompress "deflate" data from the archive's file to an open file
+ * descriptor.
*/
-static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
- long compLen)
+static int inflateToFile(int inFd, int outFd, size_t uncompLen, size_t compLen)
{
- bool result = false;
- const int kWriteBufSize = 32768;
- unsigned char writeBuf[kWriteBufSize];
+ int result = -1;
+ const size_t kBufSize = 32768;
+ unsigned char* readBuf = (unsigned char*) malloc(kBufSize);
+ unsigned char* writeBuf = (unsigned char*) malloc(kBufSize);
z_stream zstream;
int zerr;
+ if (readBuf == NULL || writeBuf == NULL)
+ goto bail;
+
/*
* Initialize the zlib stream struct.
*/
- memset(&zstream, 0, sizeof(zstream));
+ memset(&zstream, 0, sizeof(zstream));
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
- zstream.next_in = (Bytef*)inBuf;
- zstream.avail_in = compLen;
+ zstream.next_in = NULL;
+ zstream.avail_in = 0;
zstream.next_out = (Bytef*) writeBuf;
- zstream.avail_out = sizeof(writeBuf);
+ zstream.avail_out = kBufSize;
zstream.data_type = Z_UNKNOWN;
- /*
- * Use the undocumented "negative window bits" feature to tell zlib
- * that there's no zlib header waiting for it.
- */
+ /*
+ * Use the undocumented "negative window bits" feature to tell zlib
+ * that there's no zlib header waiting for it.
+ */
zerr = inflateInit2(&zstream, -MAX_WBITS);
if (zerr != Z_OK) {
if (zerr == Z_VERSION_ERROR) {
LOGE("Installed zlib is not compatible with linked version (%s)\n",
ZLIB_VERSION);
} else {
- LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
+ LOGW("Call to inflateInit2 failed (zerr=%d)\n", zerr);
}
goto bail;
}
@@ -543,12 +617,27 @@ static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
* Loop while we have more to do.
*/
do {
- /*
- * Expand data.
- */
+ /* read as much as we can */
+ if (zstream.avail_in == 0) {
+ size_t getSize = (compLen > kBufSize) ? kBufSize : compLen;
+
+ ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, readBuf, getSize));
+ if (actual != (ssize_t) getSize) {
+ LOGW("Zip: inflate read failed (%d vs %zd)\n",
+ (int)actual, getSize);
+ goto z_bail;
+ }
+
+ compLen -= getSize;
+
+ zstream.next_in = readBuf;
+ zstream.avail_in = getSize;
+ }
+
+ /* uncompress the data */
zerr = inflate(&zstream, Z_NO_FLUSH);
if (zerr != Z_OK && zerr != Z_STREAM_END) {
- LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
+ LOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
zerr, zstream.next_in, zstream.avail_in,
zstream.next_out, zstream.avail_out);
goto z_bail;
@@ -556,88 +645,116 @@ static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
/* write when we're full or when we're done */
if (zstream.avail_out == 0 ||
- (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
+ (zerr == Z_STREAM_END && zstream.avail_out != kBufSize))
{
- long writeSize = zstream.next_out - writeBuf;
- int cc = write(fd, writeBuf, writeSize);
- if (cc != (int) writeSize) {
- if (cc < 0) {
- LOGW("write failed in inflate: %s\n", strerror(errno));
+ size_t writeSize = zstream.next_out - writeBuf;
+ ssize_t actual =
+ TEMP_FAILURE_RETRY(write(outFd, writeBuf, writeSize));
+ if (actual != (ssize_t) writeSize) {
+ if (actual < 0) {
+ LOGW("Zip: write failed in inflate: %s\n", strerror(errno));
} else {
- LOGW("partial write in inflate (%d vs %ld)\n",
- cc, writeSize);
+ LOGW("Zip: partial write in inflate (%d vs %zd)\n",
+ (int) actual, writeSize);
}
goto z_bail;
}
zstream.next_out = writeBuf;
- zstream.avail_out = sizeof(writeBuf);
+ zstream.avail_out = kBufSize;
}
} while (zerr == Z_OK);
assert(zerr == Z_STREAM_END); /* other errors should've been caught */
/* paranoia */
- if ((long) zstream.total_out != uncompLen) {
- LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
+ if (zstream.total_out != uncompLen) {
+ LOGW("Zip: size mismatch on inflated file (%ld vs %zd)\n",
zstream.total_out, uncompLen);
goto z_bail;
}
- result = true;
+ result = 0;
z_bail:
inflateEnd(&zstream); /* free up any allocated structures */
bail:
+ free(readBuf);
+ free(writeBuf);
return result;
}
/*
+ * Copy bytes from input to output.
+ */
+static int copyFileToFile(int inFd, int outFd, size_t uncompLen)
+{
+ const size_t kBufSize = 32768;
+ unsigned char buf[kBufSize];
+
+ while (uncompLen != 0) {
+ size_t getSize = (uncompLen > kBufSize) ? kBufSize : uncompLen;
+
+ ssize_t actual = TEMP_FAILURE_RETRY(read(inFd, buf, getSize));
+ if (actual != (ssize_t) getSize) {
+ LOGW("Zip: copy read failed (%d vs %zd)\n", (int)actual, getSize);
+ return -1;
+ }
+
+ actual = TEMP_FAILURE_RETRY(write(outFd, buf, getSize));
+ if (actual != (ssize_t) getSize) {
+ /* could be disk out of space, so show errno in message */
+ LOGW("Zip: copy write failed (%d vs %zd): %s\n",
+ (int) actual, getSize, strerror(errno));
+ return -1;
+ }
+
+ uncompLen -= getSize;
+ }
+
+ return 0;
+}
+
+/*
* Uncompress an entry, in its entirety, to an open file descriptor.
*
* TODO: this doesn't verify the data's CRC, but probably should (especially
* for uncompressed data).
*/
-bool dexZipExtractEntryToFile(const ZipArchive* pArchive,
+int dexZipExtractEntryToFile(const ZipArchive* pArchive,
const ZipEntry entry, int fd)
{
- bool result = false;
+ int result = -1;
int ent = entryToIndex(pArchive, entry);
- if (ent < 0)
- return -1;
+ if (ent < 0) {
+ LOGW("Zip: extract can't find entry %p\n", entry);
+ goto bail;
+ }
- const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr;
int method;
- long uncompLen, compLen;
- off_t offset;
+ size_t uncompLen, compLen;
+ off_t dataOffset;
- if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
- &offset, NULL, NULL))
+ if (dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
+ &dataOffset, NULL, NULL) != 0)
{
goto bail;
}
+ if (lseek(pArchive->mFd, dataOffset, SEEK_SET) != dataOffset) {
+ LOGW("Zip: lseek to data at %ld failed\n", (long) dataOffset);
+ goto bail;
+ }
if (method == kCompressStored) {
- ssize_t actual;
-
- actual = write(fd, basePtr + offset, uncompLen);
- if (actual < 0) {
- LOGE("Write failed: %s\n", strerror(errno));
+ if (copyFileToFile(pArchive->mFd, fd, uncompLen) != 0)
goto bail;
- } else if (actual != uncompLen) {
- LOGE("Partial write during uncompress (%d of %ld)\n",
- (int) actual, uncompLen);
- goto bail;
- } else {
- LOGI("+++ successful write\n");
- }
} else {
- if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen))
+ if (inflateToFile(pArchive->mFd, fd, uncompLen, compLen) != 0)
goto bail;
}
- result = true;
+ result = 0;
bail:
return result;
diff --git a/libdex/ZipArchive.h b/libdex/ZipArchive.h
index 0cd98b221..bf4edf972 100644
--- a/libdex/ZipArchive.h
+++ b/libdex/ZipArchive.h
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/*
* Read-only access to Zip archives, with minimal heap allocation.
*/
@@ -41,12 +42,15 @@ typedef struct ZipHashEntry {
/*
* Read-only Zip archive.
*
- * We want "open" and "find entry by name" to be fast operations, and we
- * want to use as little memory as possible. We memory-map the file,
- * and load a hash table with pointers to the filenames (which aren't
- * null-terminated). The other fields are at a fixed offset from the
- * filename, so we don't need to extract those (but we do need to byte-read
- * and endian-swap them every time we want them).
+ * We want "open" and "find entry by name" to be fast operations, and
+ * we want to use as little memory as possible. We memory-map the zip
+ * central directory, and load a hash table with pointers to the filenames
+ * (which aren't null-terminated). The other fields are at a fixed offset
+ * from the filename, so we don't need to extract those (but we do need
+ * to byte-read and endian-swap them every time we want them).
+ *
+ * It's possible that somebody has handed us a massive (~1GB) zip archive,
+ * so we can't expect to mmap the entire file.
*
* To speed comparisons when doing a lookup by name, we could make the mapping
* "private" (copy-on-write) and null-terminate the filenames after verifying
@@ -58,8 +62,9 @@ typedef struct ZipArchive {
/* open Zip archive */
int mFd;
- /* mapped file */
- MemMapping mMap;
+ /* mapped central directory area */
+ off_t mDirectoryOffset;
+ MemMapping mDirectoryMap;
/* number of entries in the Zip archive */
int mNumEntries;
@@ -121,9 +126,11 @@ ZipEntry dexZipFindEntry(const ZipArchive* pArchive,
/*
* Retrieve one or more of the "interesting" fields. Non-NULL pointers
* are filled in.
+ *
+ * Returns 0 on success.
*/
-bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
- int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset,
+int dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
+ int* pMethod, size_t* pUncompLen, size_t* pCompLen, off_t* pOffset,
long* pModWhen, long* pCrc32);
/*
@@ -136,10 +143,10 @@ DEX_INLINE long dexGetZipEntryOffset(const ZipArchive* pArchive,
dexZipGetEntryInfo(pArchive, entry, NULL, NULL, NULL, &val, NULL, NULL);
return (long) val;
}
-DEX_INLINE long dexGetZipEntryUncompLen(const ZipArchive* pArchive,
+DEX_INLINE size_t dexGetZipEntryUncompLen(const ZipArchive* pArchive,
const ZipEntry entry)
{
- long val = 0;
+ size_t val = 0;
dexZipGetEntryInfo(pArchive, entry, NULL, &val, NULL, NULL, NULL, NULL);
return val;
}
@@ -160,8 +167,10 @@ DEX_INLINE long dexGetZipEntryCrc32(const ZipArchive* pArchive,
/*
* Uncompress and write an entry to a file descriptor.
+ *
+ * Returns 0 on success.
*/
-bool dexZipExtractEntryToFile(const ZipArchive* pArchive,
+int dexZipExtractEntryToFile(const ZipArchive* pArchive,
const ZipEntry entry, int fd);
/*
diff --git a/vm/JarFile.c b/vm/JarFile.c
index 0a58390a5..fbadf9c3d 100644
--- a/vm/JarFile.c
+++ b/vm/JarFile.c
@@ -283,7 +283,7 @@ tryArchive:
if (result) {
startWhen = dvmGetRelativeTimeUsec();
- result = dexZipExtractEntryToFile(&archive, entry, fd);
+ result = dexZipExtractEntryToFile(&archive, entry, fd) == 0;
extractWhen = dvmGetRelativeTimeUsec();
}
if (result) {