diff options
| author | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:28:47 -0800 |
|---|---|---|
| committer | The Android Open Source Project <initial-contribution@android.com> | 2009-03-03 19:28:47 -0800 |
| commit | f6c387128427e121477c1b32ad35cdcaa5101ba3 (patch) | |
| tree | 2aa25fa8c8c3a9caeecf98fd8ac4cd9b12717997 /libdex | |
| parent | f72d5de56a522ac3be03873bdde26f23a5eeeb3c (diff) | |
| download | android_dalvik-f6c387128427e121477c1b32ad35cdcaa5101ba3.tar.gz android_dalvik-f6c387128427e121477c1b32ad35cdcaa5101ba3.tar.bz2 android_dalvik-f6c387128427e121477c1b32ad35cdcaa5101ba3.zip | |
auto import from //depot/cupcake/@135843
Diffstat (limited to 'libdex')
| -rw-r--r-- | libdex/Android.mk | 64 | ||||
| -rw-r--r-- | libdex/CmdUtils.c | 184 | ||||
| -rw-r--r-- | libdex/CmdUtils.h | 66 | ||||
| -rw-r--r-- | libdex/DexCatch.c | 90 | ||||
| -rw-r--r-- | libdex/DexCatch.h | 162 | ||||
| -rw-r--r-- | libdex/DexClass.c | 192 | ||||
| -rw-r--r-- | libdex/DexClass.h | 162 | ||||
| -rw-r--r-- | libdex/DexDataMap.c | 141 | ||||
| -rw-r--r-- | libdex/DexDataMap.h | 73 | ||||
| -rw-r--r-- | libdex/DexFile.c | 1183 | ||||
| -rw-r--r-- | libdex/DexFile.h | 1054 | ||||
| -rw-r--r-- | libdex/DexInlines.c | 31 | ||||
| -rw-r--r-- | libdex/DexProto.c | 534 | ||||
| -rw-r--r-- | libdex/DexProto.h | 216 | ||||
| -rw-r--r-- | libdex/DexSwapVerify.c | 2851 | ||||
| -rw-r--r-- | libdex/InstrUtils.c | 1234 | ||||
| -rw-r--r-- | libdex/InstrUtils.h | 177 | ||||
| -rw-r--r-- | libdex/Leb128.c | 65 | ||||
| -rw-r--r-- | libdex/Leb128.h | 127 | ||||
| -rw-r--r-- | libdex/OpCode.h | 653 | ||||
| -rw-r--r-- | libdex/OptInvocation.c | 143 | ||||
| -rw-r--r-- | libdex/OptInvocation.h | 50 | ||||
| -rw-r--r-- | libdex/SysUtil.c | 286 | ||||
| -rw-r--r-- | libdex/SysUtil.h | 72 | ||||
| -rw-r--r-- | libdex/ZipArchive.c | 643 | ||||
| -rw-r--r-- | libdex/ZipArchive.h | 173 | ||||
| -rw-r--r-- | libdex/sha1.c | 514 | ||||
| -rw-r--r-- | libdex/sha1.h | 20 |
28 files changed, 11160 insertions, 0 deletions
diff --git a/libdex/Android.mk b/libdex/Android.mk new file mode 100644 index 000000000..df45f045a --- /dev/null +++ b/libdex/Android.mk @@ -0,0 +1,64 @@ +# Copyright (C) 2008 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LOCAL_PATH:= $(call my-dir) + +dex_src_files := \ + CmdUtils.c \ + DexCatch.c \ + DexClass.c \ + DexDataMap.c \ + DexFile.c \ + DexInlines.c \ + DexProto.c \ + DexSwapVerify.c \ + InstrUtils.c \ + Leb128.c \ + OptInvocation.c \ + sha1.c \ + SysUtil.c \ + ZipArchive.c + +dex_include_files := \ + dalvik \ + $(JNI_H_INCLUDE) \ + external/zlib \ + external/safe-iop/include + +## +## +## Build the device version of libdex +## +## +ifneq ($(SDK_ONLY),true) # SDK_only doesn't need device version + +include $(CLEAR_VARS) +LOCAL_SRC_FILES := $(dex_src_files) +LOCAL_C_INCLUDES += $(dex_include_files) +LOCAL_MODULE := libdex +include $(BUILD_STATIC_LIBRARY) + +endif # !SDK_ONLY + + +## +## +## Build the host version of libdex +## +## +include $(CLEAR_VARS) +LOCAL_SRC_FILES := $(dex_src_files) +LOCAL_C_INCLUDES += $(dex_include_files) +LOCAL_MODULE := libdex +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/libdex/CmdUtils.c b/libdex/CmdUtils.c new file mode 100644 index 000000000..ca1054c7f --- /dev/null +++ b/libdex/CmdUtils.c @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Some utility functions for use with command-line utilities. + */ +#include "DexFile.h" +#include "ZipArchive.h" +#include "CmdUtils.h" + +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> + + +/* + * Extract "classes.dex" from archive file. + * + * If "quiet" is set, don't report common errors. + */ +UnzipToFileResult dexUnzipToFile(const char* zipFileName, + const char* outFileName, bool quiet) +{ + UnzipToFileResult result = kUTFRSuccess; + static const char* kFileToExtract = "classes.dex"; + ZipArchive archive; + ZipEntry entry; + bool unlinkOnFailure = false; + int fd = -1; + + if (dexZipOpenArchive(zipFileName, &archive) != 0) { + if (!quiet) { + fprintf(stderr, "Unable to open '%s' as zip archive\n", + zipFileName); + } + result = kUTFRNotZip; + goto bail; + } + + fd = open(outFileName, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + fprintf(stderr, "Unable to create output file '%s': %s\n", + outFileName, strerror(errno)); + result = kUTFROutputFileProblem; + goto bail; + } + + unlinkOnFailure = true; + + entry = dexZipFindEntry(&archive, kFileToExtract); + if (entry == NULL) { + if (!quiet) { + fprintf(stderr, "Unable to find '%s' in '%s'\n", + kFileToExtract, zipFileName); + } + result = kUTFRNoClassesDex; + goto bail; + } + + if (!dexZipExtractEntryToFile(&archive, entry, fd)) { + fprintf(stderr, "Extract of '%s' from '%s' failed\n", + kFileToExtract, zipFileName); + result = kUTFRBadZip; + goto bail; + } + +bail: + if (fd >= 0) + close(fd); + if (unlinkOnFailure && result != kUTFRSuccess) + unlink(outFileName); + dexZipCloseArchive(&archive); + return result; +} + +/* + * Map the specified DEX file read-only (possibly after expanding it into a + * temp file from a Jar). Pass in a MemMapping struct to hold the info. + * + * The temp file is deleted after the map succeeds. + * + * This is intended for use by tools (e.g. dexdump) that need to get a + * read-only copy of a DEX file that could be in a number of different states. + * + * If "quiet" is set, don't report common errors. + * + * Returns 0 on success. + */ +UnzipToFileResult dexOpenAndMap(const char* fileName, const char* tempFileName, + MemMapping* pMap, bool quiet) +{ + UnzipToFileResult result = kUTFRSuccess; + int len = strlen(fileName); + char tempName[32]; + bool removeTemp = false; + int fd = -1; + + if (len < 5) { + if (!quiet) { + fprintf(stderr, + "ERROR: filename must end in .dex, .zip, .jar, or .apk\n"); + } + result = kUTFRBadArgs; + goto bail; + } + + if (strcasecmp(fileName + len -3, "dex") != 0) { + if (tempFileName == NULL) { + /* + * Try .zip/.jar/.apk, all of which are Zip archives with + * "classes.dex" inside. We need to extract the compressed + * data to a temp file, the location of which varies. + */ + if (access("/tmp", W_OK) == 0) + sprintf(tempName, "/tmp/dex-temp-%d", getpid()); + else + sprintf(tempName, "/sdcard/dex-temp-%d", getpid()); + + tempFileName = tempName; + } + + result = dexUnzipToFile(fileName, tempName, quiet); + + if (result == kUTFRSuccess) { + //printf("+++ Good unzip to '%s'\n", tempName); + fileName = tempName; + removeTemp = true; + } else if (result == kUTFRNotZip) { + if (!quiet) { + fprintf(stderr, "Not Zip, retrying as DEX\n"); + } + } else { + if (!quiet && result == kUTFRNoClassesDex) { + fprintf(stderr, "Zip has no classes.dex\n"); + } + goto bail; + } + } + + /* + * Pop open the (presumed) DEX file. + */ + fd = open(fileName, O_RDONLY); + if (fd < 0) { + if (!quiet) { + fprintf(stderr, "ERROR: unable to open '%s': %s\n", + fileName, strerror(errno)); + } + goto bail; + } + + if (sysMapFileInShmem(fd, pMap) != 0) { + fprintf(stderr, "ERROR: Unable to map %s\n", fileName); + close(fd); + goto bail; + } + + /* + * Success! Close the file and return with the start/length in pMap. + */ + result = 0; + +bail: + if (fd >= 0) + close(fd); + if (removeTemp) { + if (unlink(tempName) != 0) + fprintf(stderr, "Warning: unable to remove temp '%s'\n", tempName); + } + return result; +} diff --git a/libdex/CmdUtils.h b/libdex/CmdUtils.h new file mode 100644 index 000000000..fa354a9d9 --- /dev/null +++ b/libdex/CmdUtils.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Access .dex (Dalvik Executable Format) files. The code here assumes that + * the DEX file has been rewritten (byte-swapped, word-aligned) and that + * the contents can be directly accessed as a collection of C arrays. Please + * see docs/dalvik/dex-format.html for a detailed description. + * + * The structure and field names were chosen to match those in the DEX spec. + * + * It's generally assumed that the DEX file will be stored in shared memory, + * obviating the need to copy code and constant pool entries into newly + * allocated storage. Maintaining local pointers to items in the shared area + * is valid and encouraged. + * + * All memory-mapped structures are 32-bit aligned unless otherwise noted. + */ +#ifndef _LIBDEX_CMDUTILS +#define _LIBDEX_CMDUTILS + +/* encode the result of unzipping to a file */ +typedef enum UnzipToFileResult { + kUTFRSuccess = 0, + kUTFRBadArgs, + kUTFRNotZip, + kUTFRNoClassesDex, + kUTFROutputFileProblem, + kUTFRBadZip, +} UnzipToFileResult; + +/* + * Map the specified DEX file, possibly after expanding it into a temp file + * from a Jar. Pass in a MemMapping struct to hold the info. + * + * This is intended for use by tools (e.g. dexdump) that need to get a + * read-only copy of a DEX file that could be in a number of different states. + * + * If "tempFileName" is NULL, a default value is used. The temp file is + * deleted after the map succeeds. + * + * Returns 0 on success. + */ +UnzipToFileResult dexOpenAndMap(const char* fileName, const char* tempFileName, + MemMapping* pMap, bool quiet); + +/* + * Utility function to open a Zip archive, find "classes.dex", and extract + * it to a file. + */ +UnzipToFileResult dexUnzipToFile(const char* zipFileName, + const char* outFileName, bool quiet); + +#endif /*_LIBDEX_CMDUTILS*/ diff --git a/libdex/DexCatch.c b/libdex/DexCatch.c new file mode 100644 index 000000000..5eae17a66 --- /dev/null +++ b/libdex/DexCatch.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for dealing with try-catch info. + */ + +#include "DexCatch.h" + +/* Get the first handler offset for the given DexCode. + * It's not 0 because the handlers list is prefixed with its size + * (in entries) as a uleb128. */ +u4 dexGetFirstHandlerOffset(const DexCode* pCode) { + if (pCode->triesSize == 0) { + return 0; + } + + const u1* baseData = dexGetCatchHandlerData(pCode); + const u1* data = baseData; + + readUnsignedLeb128(&data); + + return data - baseData; +} + +/* Get count of handler lists for the given DexCode. */ +u4 dexGetHandlersSize(const DexCode* pCode) { + if (pCode->triesSize == 0) { + return 0; + } + + const u1* data = dexGetCatchHandlerData(pCode); + + return readUnsignedLeb128(&data); +} + +/* Helper for dexFindCatchHandlerOffset(), which does an actual search + * in the tries table. Returns -1 if there is no applicable handler. */ +int dexFindCatchHandlerOffset0(u2 triesSize, const DexTry* pTries, + u4 address) { + // Note: Signed type is important for max and min. + int min = 0; + int max = triesSize - 1; + + while (max >= min) { + int guess = (min + max) >> 1; + const DexTry* pTry = &pTries[guess]; + u4 start = pTry->startAddr; + + if (address < start) { + max = guess - 1; + continue; + } + + u4 end = start + pTry->insnCount; + + if (address >= end) { + min = guess + 1; + continue; + } + + // We have a winner! + return (int) pTry->handlerOff; + } + + // No match. + return -1; +} + +/* Get the handler offset just past the end of the one just iterated over. + * This ends the iteration if it wasn't already. */ +u4 dexCatchIteratorGetEndOffset(DexCatchIterator* pIterator, + const DexCode* pCode) { + while (dexCatchIteratorNext(pIterator) != NULL) /* empty */ ; + + return (u4) (pIterator->pEncodedData - dexGetCatchHandlerData(pCode)); +} diff --git a/libdex/DexCatch.h b/libdex/DexCatch.h new file mode 100644 index 000000000..f928144aa --- /dev/null +++ b/libdex/DexCatch.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for dealing with try-catch info. + */ + +#ifndef _LIBDEX_DEXCATCH +#define _LIBDEX_DEXCATCH + +#include "DexFile.h" +#include "Leb128.h" + +/* + * Catch handler entry, used while iterating over catch_handler_items. + */ +typedef struct DexCatchHandler { + u4 typeIdx; /* type index of the caught exception type */ + u4 address; /* handler address */ +} DexCatchHandler; + +/* Get the first handler offset for the given DexCode. + * It's not 0 because the handlers list is prefixed with its size + * (in entries) as a uleb128. */ +u4 dexGetFirstHandlerOffset(const DexCode* pCode); + +/* Get count of handler lists for the given DexCode. */ +u4 dexGetHandlersSize(const DexCode* pCode); + +/* + * Iterator over catch handler data. This structure should be treated as + * opaque. + */ +typedef struct DexCatchIterator { + const u1* pEncodedData; + bool catchesAll; + u4 countRemaining; + DexCatchHandler handler; +} DexCatchIterator; + +/* Initialize a DexCatchIterator to emptiness. This mostly exists to + * squelch innocuous warnings. */ +DEX_INLINE void dexCatchIteratorClear(DexCatchIterator* pIterator) { + pIterator->pEncodedData = NULL; + pIterator->catchesAll = false; + pIterator->countRemaining = 0; + pIterator->handler.typeIdx = 0; + pIterator->handler.address = 0; +} + +/* Initialize a DexCatchIterator with a direct pointer to encoded handlers. */ +DEX_INLINE void dexCatchIteratorInitToPointer(DexCatchIterator* pIterator, + const u1* pEncodedData) +{ + s4 count = readSignedLeb128(&pEncodedData); + + if (count <= 0) { + pIterator->catchesAll = true; + count = -count; + } else { + pIterator->catchesAll = false; + } + + pIterator->pEncodedData = pEncodedData; + pIterator->countRemaining = count; +} + +/* Initialize a DexCatchIterator to a particular handler offset. */ +DEX_INLINE void dexCatchIteratorInit(DexCatchIterator* pIterator, + const DexCode* pCode, u4 offset) +{ + dexCatchIteratorInitToPointer(pIterator, + dexGetCatchHandlerData(pCode) + offset); +} + +/* Get the next item from a DexCatchIterator. Returns NULL if at end. */ +DEX_INLINE DexCatchHandler* dexCatchIteratorNext(DexCatchIterator* pIterator) { + if (pIterator->countRemaining == 0) { + if (! pIterator->catchesAll) { + return NULL; + } + + pIterator->catchesAll = false; + pIterator->handler.typeIdx = kDexNoIndex; + } else { + u4 typeIdx = readUnsignedLeb128(&pIterator->pEncodedData); + pIterator->handler.typeIdx = typeIdx; + pIterator->countRemaining--; + } + + pIterator->handler.address = readUnsignedLeb128(&pIterator->pEncodedData); + return &pIterator->handler; +} + +/* Get the handler offset just past the end of the one just iterated over. + * This ends the iteration if it wasn't already. */ +u4 dexCatchIteratorGetEndOffset(DexCatchIterator* pIterator, + const DexCode* pCode); + +/* Helper for dexFindCatchHandler(). Do not call directly. */ +int dexFindCatchHandlerOffset0(u2 triesSize, const DexTry* pTries, + u4 address); + +/* Find the handler associated with a given address, if any. + * Initializes the given iterator and returns true if a match is + * found. Returns false if there is no applicable handler. */ +DEX_INLINE bool dexFindCatchHandler(DexCatchIterator *pIterator, + const DexCode* pCode, u4 address) { + u2 triesSize = pCode->triesSize; + int offset = -1; + + // Short-circuit the overwhelmingly common cases. + switch (triesSize) { + case 0: { + break; + } + case 1: { + const DexTry* tries = dexGetTries(pCode); + u4 start = tries[0].startAddr; + + if (address < start) { + break; + } + + u4 end = start + tries[0].insnCount; + + if (address >= end) { + break; + } + + offset = tries[0].handlerOff; + break; + } + default: { + offset = dexFindCatchHandlerOffset0(triesSize, dexGetTries(pCode), + address); + } + } + + if (offset < 0) { + dexCatchIteratorClear(pIterator); // This squelches warnings. + return false; + } else { + dexCatchIteratorInit(pIterator, pCode, offset); + return true; + } +} + +#endif diff --git a/libdex/DexClass.c b/libdex/DexClass.c new file mode 100644 index 000000000..126813016 --- /dev/null +++ b/libdex/DexClass.c @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions to deal with class definition structures in DEX files + */ + +#include <stdlib.h> +#include <string.h> +#include "DexClass.h" +#include "Leb128.h" + +/* Helper for verification which reads and verifies a given number + * of uleb128 values. */ +static bool verifyUlebs(const u1* pData, const u1* pLimit, u4 count) { + bool okay = true; + u4 i; + + while (okay && (count-- != 0)) { + readAndVerifyUnsignedLeb128(&pData, pLimit, &okay); + } + + return okay; +} + +/* Read and verify the header of a class_data_item. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). */ +bool dexReadAndVerifyClassDataHeader(const u1** pData, const u1* pLimit, + DexClassDataHeader *pHeader) { + if (! verifyUlebs(*pData, pLimit, 4)) { + return false; + } + + dexReadClassDataHeader(pData, pHeader); + return true; +} + +/* Read and verify an encoded_field. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). + * + * The lastIndex value should be set to 0 before the first field in + * a list is read. It is updated as fields are read and used in the + * decode process. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags or indices + * are valid. */ +bool dexReadAndVerifyClassDataField(const u1** pData, const u1* pLimit, + DexField* pField, u4* lastIndex) { + if (! verifyUlebs(*pData, pLimit, 2)) { + return false; + } + + dexReadClassDataField(pData, pField, lastIndex); + return true; +} + +/* Read and verify an encoded_method. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). + * + * The lastIndex value should be set to 0 before the first method in + * a list is read. It is updated as fields are read and used in the + * decode process. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags, indices, or offsets + * are valid. */ +bool dexReadAndVerifyClassDataMethod(const u1** pData, const u1* pLimit, + DexMethod* pMethod, u4* lastIndex) { + if (! verifyUlebs(*pData, pLimit, 3)) { + return false; + } + + dexReadClassDataMethod(pData, pMethod, lastIndex); + return true; +} + +/* Read, verify, and return an entire class_data_item. This updates + * the given data pointer to point past the end of the read data. This + * function allocates a single chunk of memory for the result, which + * must subsequently be free()d. This function returns NULL if there + * was trouble parsing the data. If this function is passed NULL, it + * returns an initialized empty DexClassData structure. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags, indices, or offsets + * are valid. */ +DexClassData* dexReadAndVerifyClassData(const u1** pData, const u1* pLimit) { + DexClassDataHeader header; + u4 lastIndex; + + if (*pData == NULL) { + DexClassData* result = malloc(sizeof(DexClassData)); + memset(result, 0, sizeof(*result)); + return result; + } + + if (! dexReadAndVerifyClassDataHeader(pData, pLimit, &header)) { + return NULL; + } + + size_t resultSize = sizeof(DexClassData) + + (header.staticFieldsSize * sizeof(DexField)) + + (header.instanceFieldsSize * sizeof(DexField)) + + (header.directMethodsSize * sizeof(DexMethod)) + + (header.virtualMethodsSize * sizeof(DexMethod)); + + DexClassData* result = malloc(resultSize); + u1* ptr = ((u1*) result) + sizeof(DexClassData); + bool okay = true; + u4 i; + + if (result == NULL) { + return NULL; + } + + result->header = header; + + if (header.staticFieldsSize != 0) { + result->staticFields = (DexField*) ptr; + ptr += header.staticFieldsSize * sizeof(DexField); + } else { + result->staticFields = NULL; + } + + if (header.instanceFieldsSize != 0) { + result->instanceFields = (DexField*) ptr; + ptr += header.instanceFieldsSize * sizeof(DexField); + } else { + result->instanceFields = NULL; + } + + if (header.directMethodsSize != 0) { + result->directMethods = (DexMethod*) ptr; + ptr += header.directMethodsSize * sizeof(DexMethod); + } else { + result->directMethods = NULL; + } + + if (header.virtualMethodsSize != 0) { + result->virtualMethods = (DexMethod*) ptr; + } else { + result->virtualMethods = NULL; + } + + lastIndex = 0; + for (i = 0; okay && (i < header.staticFieldsSize); i++) { + okay = dexReadAndVerifyClassDataField(pData, pLimit, + &result->staticFields[i], &lastIndex); + } + + lastIndex = 0; + for (i = 0; okay && (i < header.instanceFieldsSize); i++) { + okay = dexReadAndVerifyClassDataField(pData, pLimit, + &result->instanceFields[i], &lastIndex); + } + + lastIndex = 0; + for (i = 0; okay && (i < header.directMethodsSize); i++) { + okay = dexReadAndVerifyClassDataMethod(pData, pLimit, + &result->directMethods[i], &lastIndex); + } + + lastIndex = 0; + for (i = 0; okay && (i < header.virtualMethodsSize); i++) { + okay = dexReadAndVerifyClassDataMethod(pData, pLimit, + &result->virtualMethods[i], &lastIndex); + } + + if (! okay) { + free(result); + return NULL; + } + + return result; +} diff --git a/libdex/DexClass.h b/libdex/DexClass.h new file mode 100644 index 000000000..ce41e8bb3 --- /dev/null +++ b/libdex/DexClass.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions to deal with class definition structures in DEX files + */ + +#ifndef _LIBDEX_DEXCLASS +#define _LIBDEX_DEXCLASS + +#include "DexFile.h" +#include "Leb128.h" + +/* expanded form of a class_data_item header */ +typedef struct DexClassDataHeader { + u4 staticFieldsSize; + u4 instanceFieldsSize; + u4 directMethodsSize; + u4 virtualMethodsSize; +} DexClassDataHeader; + +/* expanded form of encoded_field */ +typedef struct DexField { + u4 fieldIdx; /* index to a field_id_item */ + u4 accessFlags; +} DexField; + +/* expanded form of encoded_method */ +typedef struct DexMethod { + u4 methodIdx; /* index to a method_id_item */ + u4 accessFlags; + u4 codeOff; /* file offset to a code_item */ +} DexMethod; + +/* expanded form of class_data_item. Note: If a particular item is + * absent (e.g., no static fields), then the corresponding pointer + * is set to NULL. */ +typedef struct DexClassData { + DexClassDataHeader header; + DexField* staticFields; + DexField* instanceFields; + DexMethod* directMethods; + DexMethod* virtualMethods; +} DexClassData; + +/* Read and verify the header of a class_data_item. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). */ +bool dexReadAndVerifyClassDataHeader(const u1** pData, const u1* pLimit, + DexClassDataHeader *pHeader); + +/* Read and verify an encoded_field. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). + * + * The lastIndex value should be set to 0 before the first field in + * a list is read. It is updated as fields are read and used in the + * decode process. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags or indices + * are valid. */ +bool dexReadAndVerifyClassDataField(const u1** pData, const u1* pLimit, + DexField* pField, u4* lastIndex); + +/* Read and verify an encoded_method. This updates the + * given data pointer to point past the end of the read data and + * returns an "okay" flag (that is, false == failure). + * + * The lastIndex value should be set to 0 before the first method in + * a list is read. It is updated as fields are read and used in the + * decode process. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags, indices, or offsets + * are valid. */ +bool dexReadAndVerifyClassDataMethod(const u1** pData, const u1* pLimit, + DexMethod* pMethod, u4* lastIndex); + +/* Read, verify, and return an entire class_data_item. This updates + * the given data pointer to point past the end of the read data. This + * function allocates a single chunk of memory for the result, which + * must subsequently be free()d. This function returns NULL if there + * was trouble parsing the data. If this function is passed NULL, it + * returns an initialized empty DexClassData structure. + * + * The verification done by this function is of the raw data format + * only; it does not verify that access flags, indices, or offsets + * are valid. */ +DexClassData* dexReadAndVerifyClassData(const u1** pData, const u1* pLimit); + +/* + * Get the DexCode for a DexMethod. Returns NULL if the class is native + * or abstract. + */ +DEX_INLINE const DexCode* dexGetCode(const DexFile* pDexFile, + const DexMethod* pDexMethod) +{ + if (pDexMethod->codeOff == 0) + return NULL; + return (const DexCode*) (pDexFile->baseAddr + pDexMethod->codeOff); +} + + +/* Read the header of a class_data_item without verification. This + * updates the given data pointer to point past the end of the read + * data. */ +DEX_INLINE void dexReadClassDataHeader(const u1** pData, + DexClassDataHeader *pHeader) { + pHeader->staticFieldsSize = readUnsignedLeb128(pData); + pHeader->instanceFieldsSize = readUnsignedLeb128(pData); + pHeader->directMethodsSize = readUnsignedLeb128(pData); + pHeader->virtualMethodsSize = readUnsignedLeb128(pData); +} + +/* Read an encoded_field without verification. This updates the + * given data pointer to point past the end of the read data. + * + * The lastIndex value should be set to 0 before the first field in + * a list is read. It is updated as fields are read and used in the + * decode process. + */ +DEX_INLINE void dexReadClassDataField(const u1** pData, DexField* pField, + u4* lastIndex) { + u4 index = *lastIndex + readUnsignedLeb128(pData); + + pField->accessFlags = readUnsignedLeb128(pData); + pField->fieldIdx = index; + *lastIndex = index; +} + +/* Read an encoded_method without verification. This updates the + * given data pointer to point past the end of the read data. + * + * The lastIndex value should be set to 0 before the first method in + * a list is read. It is updated as fields are read and used in the + * decode process. + */ +DEX_INLINE void dexReadClassDataMethod(const u1** pData, DexMethod* pMethod, + u4* lastIndex) { + u4 index = *lastIndex + readUnsignedLeb128(pData); + + pMethod->accessFlags = readUnsignedLeb128(pData); + pMethod->codeOff = readUnsignedLeb128(pData); + pMethod->methodIdx = index; + *lastIndex = index; +} + +#endif diff --git a/libdex/DexDataMap.c b/libdex/DexDataMap.c new file mode 100644 index 000000000..a9d429e18 --- /dev/null +++ b/libdex/DexDataMap.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Verification-time map of data section items + */ + +#include "DexDataMap.h" +#include <safe_iop.h> +#include <stdlib.h> + +/* + * Allocate and initialize a DexDataMap. Returns NULL on failure. + */ +DexDataMap* dexDataMapAlloc(u4 maxCount) { + /* + * Allocate a single chunk for the DexDataMap per se as well as the + * two arrays. + */ + size_t size = 0; + DexDataMap* map = NULL; + + /* + * Avoiding pulling in safe_iop for safe_iopf. + */ + if (!safe_mul(&size, maxCount, sizeof(u4) + sizeof(u2)) || + !safe_add(&size, size, sizeof(DexDataMap))) { + return NULL; + } + + map = malloc(size); + + if (map == NULL) { + return NULL; + } + + map->count = 0; + map->max = maxCount; + map->offsets = (u4*) (map + 1); + map->types = (u2*) (map->offsets + maxCount); + + return map; +} + +/* + * Free a DexDataMap. + */ +void dexDataMapFree(DexDataMap* map) { + /* + * Since everything got allocated together, everything can be freed + * in one fell swoop. Also, free(NULL) is a nop (per spec), so we + * don't have to worry about an explicit test for that. + */ + free(map); +} + +/* + * Add a new element to the map. The offset must be greater than the + * all previously added offsets. + */ +void dexDataMapAdd(DexDataMap* map, u4 offset, u2 type) { + assert(map != NULL); + assert(map->count < map->max); + + if ((map->count != 0) && + (map->offsets[map->count - 1] >= offset)) { + LOGE("Out-of-order data map offset: 0x%x then 0x%x\n", + map->offsets[map->count - 1], offset); + return; + } + + map->offsets[map->count] = offset; + map->types[map->count] = type; + map->count++; +} + +/* + * Get the type associated with the given offset. This returns -1 if + * there is no entry for the given offset. + */ +int dexDataMapGet(DexDataMap* map, u4 offset) { + assert(map != NULL); + + // Note: Signed type is important for max and min. + int min = 0; + int max = map->count - 1; + u4* offsets = map->offsets; + + while (max >= min) { + int guessIdx = (min + max) >> 1; + u4 guess = offsets[guessIdx]; + + if (offset < guess) { + max = guessIdx - 1; + } else if (offset > guess) { + min = guessIdx + 1; + } else { + // We have a winner! + return map->types[guessIdx]; + } + } + + // No match. + return -1; +} + +/* + * Verify that there is an entry in the map, mapping the given offset to + * the given type. This will return true if such an entry exists and + * return false as well as log an error if not. + */ +bool dexDataMapVerify(DexDataMap* map, u4 offset, u2 type) { + int found = dexDataMapGet(map, offset); + + if (found == type) { + return true; + } + + if (found < 0) { + LOGE("No data map entry found @ 0x%x; expected %x\n", + offset, type); + } else { + LOGE("Unexpected data map entry @ 0x%x: expected %x, found %x\n", + offset, type, found); + } + + return false; +} diff --git a/libdex/DexDataMap.h b/libdex/DexDataMap.h new file mode 100644 index 000000000..fa556d5aa --- /dev/null +++ b/libdex/DexDataMap.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Verification-time map of data section items + */ + +#ifndef _LIBDEX_DEXDATAMAP +#define _LIBDEX_DEXDATAMAP + +#include "DexFile.h" + +typedef struct DexDataMap { + u4 count; /* number of items currently in the map */ + u4 max; /* maximum number of items that may be held */ + u4* offsets; /* array of item offsets */ + u2* types; /* corresponding array of item types */ +} DexDataMap; + +/* + * Allocate and initialize a DexDataMap. Returns NULL on failure. + */ +DexDataMap* dexDataMapAlloc(u4 maxCount); + +/* + * Free a DexDataMap. + */ +void dexDataMapFree(DexDataMap* map); + +/* + * Add a new element to the map. The offset must be greater than the + * all previously added offsets. + */ +void dexDataMapAdd(DexDataMap* map, u4 offset, u2 type); + +/* + * Get the type associated with the given offset. This returns -1 if + * there is no entry for the given offset. + */ +int dexDataMapGet(DexDataMap* map, u4 offset); + +/* + * Verify that there is an entry in the map, mapping the given offset to + * the given type. This will return true if such an entry exists and + * return false as well as log an error if not. + */ +bool dexDataMapVerify(DexDataMap* map, u4 offset, u2 type); + +/* + * Like dexDataMapVerify(), but also accept a 0 offset as valid. + */ +DEX_INLINE bool dexDataMapVerify0Ok(DexDataMap* map, u4 offset, u2 type) { + if (offset == 0) { + return true; + } + + return dexDataMapVerify(map, offset, type); +} + +#endif /*_LIBDEX_DEXDATAMAP*/ diff --git a/libdex/DexFile.c b/libdex/DexFile.c new file mode 100644 index 000000000..2639d7bdb --- /dev/null +++ b/libdex/DexFile.c @@ -0,0 +1,1183 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Access the contents of a .dex file. + */ + +#include "DexFile.h" +#include "DexProto.h" +#include "Leb128.h" +#include "sha1.h" +#include "ZipArchive.h" + +#include <zlib.h> + +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> + +/* + * Verifying checksums is good, but it slows things down and causes us to + * touch every page. In the "optimized" world, it doesn't work at all, + * because we rewrite the contents. + */ +static const bool kVerifyChecksum = false; +static const bool kVerifySignature = false; + + +/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode + * code point values for comparison. This treats different encodings + * for the same code point as equivalent, except that only a real '\0' + * byte is considered the string terminator. The return value is as + * for strcmp(). */ +int dexUtf8Cmp(const char* s1, const char* s2) { + for (;;) { + if (*s1 == '\0') { + if (*s2 == '\0') { + return 0; + } + return -1; + } else if (*s2 == '\0') { + return 1; + } + + int utf1 = dexGetUtf16FromUtf8(&s1); + int utf2 = dexGetUtf16FromUtf8(&s2); + int diff = utf1 - utf2; + + if (diff != 0) { + return diff; + } + } +} + +/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ +u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { + 0x00000000, // 00..1f low control characters; nothing valid + 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' + 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' + 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' +}; + +/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ +bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { + /* + * It's a multibyte encoded character. Decode it and analyze. We + * accept anything that isn't (a) an improperly encoded low value, + * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high + * control character, or (e) a high space, layout, or special + * character (U+00a0, U+2000..U+200f, U+2028..U+202f, + * U+fff0..U+ffff). + */ + + u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); + + // Perform follow-up tests based on the high 8 bits. + switch (utf16 >> 8) { + case 0x00: { + // It's only valid if it's above the ISO-8859-1 high space (0xa0). + return (utf16 > 0x00a0); + } + case 0xd8: + case 0xd9: + case 0xda: + case 0xdb: { + /* + * It's a leading surrogate. Check to see that a trailing + * surrogate follows. + */ + utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); + return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); + } + case 0xdc: + case 0xdd: + case 0xde: + case 0xdf: { + // It's a trailing surrogate, which is not valid at this point. + return false; + } + case 0x20: + case 0xff: { + // It's in the range that has spaces, controls, and specials. + switch (utf16 & 0xfff8) { + case 0x2000: + case 0x2008: + case 0x2028: + case 0xfff0: + case 0xfff8: { + return false; + } + } + break; + } + } + + return true; +} + +/* Return whether the given string is a valid field or method name. */ +bool dexIsValidMemberName(const char* s) { + bool angleName = false; + + switch (*s) { + case '\0': { + // The empty string is not a valid name. + return false; + } + case '<': { + /* + * '<' is allowed only at the start of a name, and if present, + * means that the name must end with '>'. + */ + angleName = true; + s++; + break; + } + } + + for (;;) { + switch (*s) { + case '\0': { + return !angleName; + } + case '>': { + return angleName && s[1] == '\0'; + } + } + if (!dexIsValidMemberNameUtf8(&s)) { + return false; + } + } +} + +/* Return whether the given string is a valid type descriptor. */ +bool dexIsValidTypeDescriptor(const char* s) { + int arrayCount = 0; + + while (*s == '[') { + arrayCount++; + s++; + } + + if (arrayCount > 255) { + // Arrays may have no more than 255 dimensions. + return false; + } + + switch (*(s++)) { + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': { + // These are all single-character descriptors for primitive types. + return (*s == '\0'); + } + case 'V': { + // You can't have an array of void. + return (arrayCount == 0) && (*s == '\0'); + } + case 'L': { + // Break out and continue below. + break; + } + default: { + // Oddball descriptor character. + return false; + } + } + + // We just consumed the 'L' that introduces a class name. + + bool slashOrFirst = true; // first character or just encountered a slash + for (;;) { + u1 c = (u1) *s; + switch (c) { + case '\0': { + // Premature end. + return false; + } + case ';': { + /* + * Make sure that this is the end of the string and that + * it doesn't end with an empty component (including the + * degenerate case of "L;"). + */ + return (s[1] == '\0') && !slashOrFirst; + } + case '/': { + if (slashOrFirst) { + // Slash at start or two slashes in a row. + return false; + } + slashOrFirst = true; + s++; + break; + } + default: { + if (!dexIsValidMemberNameUtf8(&s)) { + return false; + } + slashOrFirst = false; + break; + } + } + } +} + +/* Return whether the given string is a valid reference descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for a class or array and not a primitive type. */ +bool dexIsReferenceDescriptor(const char* s) { + if (!dexIsValidTypeDescriptor(s)) { + return false; + } + + return (s[0] == 'L') || (s[0] == '['); +} + +/* Return whether the given string is a valid class descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for a class and not an array or primitive type. */ +bool dexIsClassDescriptor(const char* s) { + if (!dexIsValidTypeDescriptor(s)) { + return false; + } + + return s[0] == 'L'; +} + +/* Return whether the given string is a valid field type descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for anything but "void". */ +bool dexIsFieldDescriptor(const char* s) { + if (!dexIsValidTypeDescriptor(s)) { + return false; + } + + return s[0] != 'V'; +} + +/* Return the UTF-8 encoded string with the specified string_id index, + * also filling in the UTF-16 size (number of 16-bit code points).*/ +const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, + u4* utf16Size) { + const DexStringId* pStringId = dexGetStringId(pDexFile, idx); + const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; + + *utf16Size = readUnsignedLeb128(&ptr); + return (const char*) ptr; +} + +/* + * Format an SHA-1 digest for printing. tmpBuf must be able to hold at + * least kSHA1DigestOutputLen bytes. + */ +const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); + +/* + * Compute a SHA-1 digest on a range of bytes. + */ +static void dexComputeSHA1Digest(const unsigned char* data, size_t length, + unsigned char digest[]) +{ + SHA1_CTX context; + SHA1Init(&context); + SHA1Update(&context, data, length); + SHA1Final(digest, &context); +} + +/* + * Format the SHA-1 digest into the buffer, which must be able to hold at + * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, + */ +static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) +{ + static const char hexDigit[] = "0123456789abcdef"; + char* cp; + int i; + + cp = tmpBuf; + for (i = 0; i < kSHA1DigestLen; i++) { + *cp++ = hexDigit[digest[i] >> 4]; + *cp++ = hexDigit[digest[i] & 0x0f]; + } + *cp++ = '\0'; + + assert(cp == tmpBuf + kSHA1DigestOutputLen); + + return tmpBuf; +} + +/* + * Compute a hash code on a UTF-8 string, for use with internal hash tables. + * + * This may or may not be compatible with UTF-8 hash functions used inside + * the Dalvik VM. + * + * The basic "multiply by 31 and add" approach does better on class names + * than most other things tried (e.g. adler32). + */ +static u4 classDescriptorHash(const char* str) +{ + u4 hash = 1; + + while (*str != '\0') + hash = hash * 31 + *str++; + + return hash; +} + +/* + * Add an entry to the class lookup table. We hash the string and probe + * until we find an open slot. + */ +static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, + int stringOff, int classDefOff, int* pNumProbes) +{ + const char* classDescriptor = + (const char*) (pDexFile->baseAddr + stringOff); + const DexClassDef* pClassDef = + (const DexClassDef*) (pDexFile->baseAddr + classDefOff); + u4 hash = classDescriptorHash(classDescriptor); + int mask = pLookup->numEntries-1; + int idx = hash & mask; + + /* + * Find the first empty slot. We oversized the table, so this is + * guaranteed to finish. + */ + int probes = 0; + while (pLookup->table[idx].classDescriptorOffset != 0) { + idx = (idx + 1) & mask; + probes++; + } + //if (probes > 1) + // LOGW("classLookupAdd: probes=%d\n", probes); + + pLookup->table[idx].classDescriptorHash = hash; + pLookup->table[idx].classDescriptorOffset = stringOff; + pLookup->table[idx].classDefOffset = classDefOff; + *pNumProbes = probes; +} + +/* + * Round up to the next highest power of 2. + * + * Found on http://graphics.stanford.edu/~seander/bithacks.html. + */ +u4 dexRoundUpPower2(u4 val) +{ + val--; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + val++; + + return val; +} + +/* + * Create the class lookup hash table. + * + * Returns newly-allocated storage. + */ +DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) +{ + DexClassLookup* pLookup; + int allocSize; + int i, numEntries; + int numProbes, totalProbes, maxProbes; + + numProbes = totalProbes = maxProbes = 0; + + assert(pDexFile != NULL); + + /* + * Using a factor of 3 results in far less probing than a factor of 2, + * but almost doubles the flash storage requirements for the bootstrap + * DEX files. The overall impact on class loading performance seems + * to be minor. We could probably get some performance improvement by + * using a secondary hash. + */ + numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); + allocSize = offsetof(DexClassLookup, table) + + numEntries * sizeof(pLookup->table[0]); + + pLookup = (DexClassLookup*) calloc(1, allocSize); + if (pLookup == NULL) + return NULL; + pLookup->size = allocSize; + pLookup->numEntries = numEntries; + + for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { + const DexClassDef* pClassDef; + const char* pString; + + pClassDef = dexGetClassDef(pDexFile, i); + pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); + + classLookupAdd(pDexFile, pLookup, + (u1*)pString - pDexFile->baseAddr, + (u1*)pClassDef - pDexFile->baseAddr, &numProbes); + + if (numProbes > maxProbes) + maxProbes = numProbes; + totalProbes += numProbes; + } + + LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" + " total=%d max=%d\n", + pDexFile->pHeader->classDefsSize, numEntries, + (100 * pDexFile->pHeader->classDefsSize) / numEntries, + allocSize, totalProbes, maxProbes); + + return pLookup; +} + + +/* + * Set up the basic raw data pointers of a DexFile. This function isn't + * meant for general use. + */ +void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { + DexHeader *pHeader = (DexHeader*) data; + + pDexFile->baseAddr = data; + pDexFile->pHeader = pHeader; + pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); + pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); + pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); + pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); + pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); + pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); + pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); +} + + +/* + * Parse out an index map entry, advancing "*pData" and reducing "*pSize". + */ +static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding, + u4* pFullCount, u4* pReducedCount, const u2** pMap) +{ + const u4* wordPtr = (const u4*) *pData; + u4 size = *pSize; + u4 mapCount; + + if (expanding) { + if (size < 4) + return false; + mapCount = *pReducedCount = *wordPtr++; + *pFullCount = (u4) -1; + size -= sizeof(u4); + } else { + if (size < 8) + return false; + mapCount = *pFullCount = *wordPtr++; + *pReducedCount = *wordPtr++; + size -= sizeof(u4) * 2; + } + + u4 mapSize = mapCount * sizeof(u2); + + if (size < mapSize) + return false; + *pMap = (const u2*) wordPtr; + size -= mapSize; + + /* advance the pointer */ + const u1* ptr = (const u1*) wordPtr; + ptr += (mapSize + 3) & ~0x3; + + /* update pass-by-reference values */ + *pData = (const u1*) ptr; + *pSize = size; + + return true; +} + +/* + * Set up some pointers into the mapped data. + * + * See analysis/ReduceConstants.c for the data layout description. + */ +static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size, + bool expanding) +{ + if (!parseIndexMapEntry(&data, &size, expanding, + &pDexFile->indexMap.classFullCount, + &pDexFile->indexMap.classReducedCount, + &pDexFile->indexMap.classMap)) + { + return false; + } + + if (!parseIndexMapEntry(&data, &size, expanding, + &pDexFile->indexMap.methodFullCount, + &pDexFile->indexMap.methodReducedCount, + &pDexFile->indexMap.methodMap)) + { + return false; + } + + if (!parseIndexMapEntry(&data, &size, expanding, + &pDexFile->indexMap.fieldFullCount, + &pDexFile->indexMap.fieldReducedCount, + &pDexFile->indexMap.fieldMap)) + { + return false; + } + + if (!parseIndexMapEntry(&data, &size, expanding, + &pDexFile->indexMap.stringFullCount, + &pDexFile->indexMap.stringReducedCount, + &pDexFile->indexMap.stringMap)) + { + return false; + } + + if (expanding) { + /* + * The map includes the "reduced" counts; pull the original counts + * out of the DexFile so that code has a consistent source. + */ + assert(pDexFile->indexMap.classFullCount == (u4) -1); + assert(pDexFile->indexMap.methodFullCount == (u4) -1); + assert(pDexFile->indexMap.fieldFullCount == (u4) -1); + assert(pDexFile->indexMap.stringFullCount == (u4) -1); + +#if 0 // TODO: not available yet -- do later or just skip this + pDexFile->indexMap.classFullCount = + pDexFile->pHeader->typeIdsSize; + pDexFile->indexMap.methodFullCount = + pDexFile->pHeader->methodIdsSize; + pDexFile->indexMap.fieldFullCount = + pDexFile->pHeader->fieldIdsSize; + pDexFile->indexMap.stringFullCount = + pDexFile->pHeader->stringIdsSize; +#endif + } + + LOGI("Class : %u %u %u\n", + pDexFile->indexMap.classFullCount, + pDexFile->indexMap.classReducedCount, + pDexFile->indexMap.classMap[0]); + LOGI("Method: %u %u %u\n", + pDexFile->indexMap.methodFullCount, + pDexFile->indexMap.methodReducedCount, + pDexFile->indexMap.methodMap[0]); + LOGI("Field : %u %u %u\n", + pDexFile->indexMap.fieldFullCount, + pDexFile->indexMap.fieldReducedCount, + pDexFile->indexMap.fieldMap[0]); + LOGI("String: %u %u %u\n", + pDexFile->indexMap.stringFullCount, + pDexFile->indexMap.stringReducedCount, + pDexFile->indexMap.stringMap[0]); + + return true; +} + +/* + * Parse some auxillary data tables. + * + * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup + * table. Subsequent versions switched to the "chunk" format. + */ +static bool parseAuxData(const u1* data, DexFile* pDexFile) +{ + const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset); + u4 indexMapType = 0; + + /* v1.0 format? */ + if (*pAux == 0) { + LOGV("+++ found OLD dex format\n"); + pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1); + return true; + } + LOGV("+++ found NEW dex format\n"); + + /* process chunks until we see the end marker */ + while (*pAux != kDexChunkEnd) { + u4 size = *(pAux+1); + u1* data = (u1*) (pAux + 2); + + switch (*pAux) { + case kDexChunkClassLookup: + pDexFile->pClassLookup = (const DexClassLookup*) data; + break; + case kDexChunkReducingIndexMap: + LOGI("+++ found reducing index map, size=%u\n", size); + if (!parseIndexMap(pDexFile, data, size, false)) { + LOGE("Failed parsing reducing index map\n"); + return false; + } + indexMapType = *pAux; + break; + case kDexChunkExpandingIndexMap: + LOGI("+++ found expanding index map, size=%u\n", size); + if (!parseIndexMap(pDexFile, data, size, true)) { + LOGE("Failed parsing expanding index map\n"); + return false; + } + indexMapType = *pAux; + break; + default: + LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n", + *pAux, + (char) ((*pAux) >> 24), (char) ((*pAux) >> 16), + (char) ((*pAux) >> 8), (char) (*pAux), + size); + break; + } + + /* + * Advance pointer, padding to 64-bit boundary. The extra "+8" is + * for the type/size header. + */ + size = (size + 8 + 7) & ~7; + pAux += size / sizeof(u4); + } + +#if 0 // TODO: propagate expected map type from the VM through the API + /* + * If we're configured to expect an index map, and we don't find one, + * reject this DEX so we'll regenerate it. Also, if we found an + * "expanding" map but we're not configured to use it, we have to fail + * because the constants aren't usable without translation. + */ + if (indexMapType != expectedIndexMapType) { + LOGW("Incompatible index map configuration: found 0x%04x, need %d\n", + indexMapType, DVM_REDUCE_CONSTANTS); + return false; + } +#endif + + return true; +} + +/* + * Parse an optimized or unoptimized .dex file sitting in memory. This is + * called after the byte-ordering and structure alignment has been fixed up. + * + * On success, return a newly-allocated DexFile. + */ +DexFile* dexFileParse(const u1* data, size_t length, int flags) +{ + DexFile* pDexFile = NULL; + const DexHeader* pHeader; + const u1* magic; + int result = -1; + + if (length < sizeof(DexHeader)) { + LOGE("too short to be a valid .dex\n"); + goto bail; /* bad file format */ + } + + pDexFile = (DexFile*) malloc(sizeof(DexFile)); + if (pDexFile == NULL) + goto bail; /* alloc failure */ + memset(pDexFile, 0, sizeof(DexFile)); + + /* + * Peel off the optimized header. + */ + if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { + magic = data; + if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { + LOGE("bad opt version (0x%02x %02x %02x %02x)\n", + magic[4], magic[5], magic[6], magic[7]); + goto bail; + } + + pDexFile->pOptHeader = (const DexOptHeader*) data; + LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", + pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); + + /* locate some auxillary data tables */ + if (!parseAuxData(data, pDexFile)) + goto bail; + + /* ignore the opt header and appended data from here on out */ + data += pDexFile->pOptHeader->dexOffset; + length -= pDexFile->pOptHeader->dexOffset; + if (pDexFile->pOptHeader->dexLength > length) { + LOGE("File truncated? stored len=%d, rem len=%d\n", + pDexFile->pOptHeader->dexLength, (int) length); + goto bail; + } + length = pDexFile->pOptHeader->dexLength; + } + + dexFileSetupBasicPointers(pDexFile, data); + pHeader = pDexFile->pHeader; + + magic = pHeader->magic; + if (memcmp(magic, DEX_MAGIC, 4) != 0) { + /* not expected */ + LOGE("bad magic number (0x%02x %02x %02x %02x)\n", + magic[0], magic[1], magic[2], magic[3]); + goto bail; + } + if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { + LOGE("bad dex version (0x%02x %02x %02x %02x)\n", + magic[4], magic[5], magic[6], magic[7]); + goto bail; + } + + /* + * Verify the checksum. This is reasonably quick, but does require + * touching every byte in the DEX file. The checksum changes after + * byte-swapping and DEX optimization. + */ + if (flags & kDexParseVerifyChecksum) { + u4 adler = dexComputeChecksum(pHeader); + if (adler != pHeader->checksum) { + LOGE("ERROR: bad checksum (%08x vs %08x)\n", + adler, pHeader->checksum); + if (!(flags & kDexParseContinueOnError)) + goto bail; + } else { + LOGV("+++ adler32 checksum (%08x) verified\n", adler); + } + } + + /* + * Verify the SHA-1 digest. (Normally we don't want to do this -- + * the digest is used to uniquely identify a DEX file, and can't be + * computed post-optimization.) + * + * The digest will be invalid after byte swapping and DEX optimization. + */ + if (kVerifySignature) { + unsigned char sha1Digest[kSHA1DigestLen]; + const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + + kSHA1DigestLen; + + dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); + if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { + char tmpBuf1[kSHA1DigestOutputLen]; + char tmpBuf2[kSHA1DigestOutputLen]; + LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", + dexSHA1DigestToStr(sha1Digest, tmpBuf1), + dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); + if (!(flags & kDexParseContinueOnError)) + goto bail; + } else { + LOGV("+++ sha1 digest verified\n"); + } + } + + if (pHeader->fileSize != length) { + LOGE("ERROR: stored file size (%d) != expected (%d)\n", + (int) pHeader->fileSize, (int) length); + if (!(flags & kDexParseContinueOnError)) + goto bail; + } + + if (pHeader->classDefsSize == 0) { + LOGE("ERROR: DEX file has no classes in it, failing\n"); + goto bail; + } + + /* + * Success! + */ + result = 0; + +bail: + if (result != 0 && pDexFile != NULL) { + dexFileFree(pDexFile); + pDexFile = NULL; + } + return pDexFile; +} + +/* + * Free up the DexFile and any associated data structures. + * + * Note we may be called with a partially-initialized DexFile. + */ +void dexFileFree(DexFile* pDexFile) +{ + if (pDexFile == NULL) + return; + + free(pDexFile); +} + +/* + * Look up a class definition entry by descriptor. + * + * "descriptor" should look like "Landroid/debug/Stuff;". + */ +const DexClassDef* dexFindClass(const DexFile* pDexFile, + const char* descriptor) +{ + const DexClassLookup* pLookup = pDexFile->pClassLookup; + u4 hash; + int idx, mask; + + hash = classDescriptorHash(descriptor); + mask = pLookup->numEntries - 1; + idx = hash & mask; + + /* + * Search until we find a matching entry or an empty slot. + */ + while (true) { + int offset; + + offset = pLookup->table[idx].classDescriptorOffset; + if (offset == 0) + return NULL; + + if (pLookup->table[idx].classDescriptorHash == hash) { + const char* str; + + str = (const char*) (pDexFile->baseAddr + offset); + if (strcmp(str, descriptor) == 0) { + return (const DexClassDef*) + (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); + } + } + + idx = (idx + 1) & mask; + } +} + + +/* + * Compute the DEX file checksum for a memory-mapped DEX file. + */ +u4 dexComputeChecksum(const DexHeader* pHeader) +{ + const u1* start = (const u1*) pHeader; + + uLong adler = adler32(0L, Z_NULL, 0); + const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); + + return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); +} + + +/* + * =========================================================================== + * Debug info + * =========================================================================== + */ + +/* + * Decode the arguments in a method signature, which looks something + * like "(ID[Ljava/lang/String;)V". + * + * Returns the type signature letter for the next argument, or ')' if + * there are no more args. Advances "pSig" to point to the character + * after the one returned. + */ +static char decodeSignature(const char** pSig) +{ + const char* sig = *pSig; + + if (*sig == '(') + sig++; + + if (*sig == 'L') { + /* object ref */ + while (*++sig != ';') + ; + *pSig = sig+1; + return 'L'; + } + if (*sig == '[') { + /* array; advance past array type */ + while (*++sig == '[') + ; + if (*sig == 'L') { + while (*++sig != ';') + ; + } + *pSig = sig+1; + return '['; + } + if (*sig == '\0') + return *sig; /* don't advance further */ + + *pSig = sig+1; + return *sig; +} + +/* + * returns the length of a type string, given the start of the + * type string. Used for the case where the debug info format + * references types that are inside a method type signature. + */ +static int typeLength (const char *type) { + // Assumes any leading '(' has already been gobbled + const char *end = type; + decodeSignature(&end); + return end - type; +} + +/* + * Reads a string index as encoded for the debug info format, + * returning a string pointer or NULL as appropriate. + */ +static const char* readStringIdx(const DexFile* pDexFile, + const u1** pStream) { + u4 stringIdx = readUnsignedLeb128(pStream); + + // Remember, encoded string indicies have 1 added to them. + if (stringIdx == 0) { + return NULL; + } else { + return dexStringById(pDexFile, stringIdx - 1); + } +} + +/* + * Reads a type index as encoded for the debug info format, returning + * a string pointer for its descriptor or NULL as appropriate. + */ +static const char* readTypeIdx(const DexFile* pDexFile, + const u1** pStream) { + u4 typeIdx = readUnsignedLeb128(pStream); + + // Remember, encoded type indicies have 1 added to them. + if (typeIdx == 0) { + return NULL; + } else { + return dexStringByTypeIdx(pDexFile, typeIdx - 1); + } +} + +/* access_flag value indicating that a method is static */ +#define ACC_STATIC 0x0008 + +typedef struct LocalInfo { + const char *name; + const char *descriptor; + const char *signature; + u2 startAddress; + bool live; +} LocalInfo; + +static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, + LocalInfo *localInReg, DexDebugNewLocalCb localCb) +{ + if (localCb != NULL && localInReg[reg].live) { + localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, + localInReg[reg].name, + localInReg[reg].descriptor, + localInReg[reg].signature == NULL + ? "" : localInReg[reg].signature ); + } +} + +// TODO optimize localCb == NULL case +void dexDecodeDebugInfo( + const DexFile* pDexFile, + const DexCode* pCode, + const char* classDescriptor, + u4 protoIdx, + u4 accessFlags, + DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, + void* cnxt) +{ + const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); + u4 line; + u4 parametersSize; + u4 address = 0; + LocalInfo localInReg[pCode->registersSize]; + u4 insnsSize = pCode->insnsSize; + DexProto proto = { pDexFile, protoIdx }; + + memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); + + if (stream == NULL) { + goto end; + } + + line = readUnsignedLeb128(&stream); + parametersSize = readUnsignedLeb128(&stream); + + u2 argReg = pCode->registersSize - pCode->insSize; + + if ((accessFlags & ACC_STATIC) == 0) { + /* + * The code is an instance method, which means that there is + * an initial this parameter. Also, the proto list should + * contain exactly one fewer argument word than the insSize + * indicates. + */ + assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); + localInReg[argReg].name = "this"; + localInReg[argReg].descriptor = classDescriptor; + localInReg[argReg].startAddress = 0; + localInReg[argReg].live = true; + argReg++; + } else { + assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); + } + + DexParameterIterator iterator; + dexParameterIteratorInit(&iterator, &proto); + + while (parametersSize-- != 0) { + const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); + const char *name; + int reg; + + if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { + goto invalid_stream; + } + + name = readStringIdx(pDexFile, &stream); + reg = argReg; + + switch (descriptor[0]) { + case 'D': + case 'J': + argReg += 2; + break; + default: + argReg += 1; + break; + } + + if (name != NULL) { + localInReg[reg].name = name; + localInReg[reg].descriptor = descriptor; + localInReg[reg].signature = NULL; + localInReg[reg].startAddress = address; + localInReg[reg].live = true; + } + } + + for (;;) { + u1 opcode = *stream++; + u2 reg; + + switch (opcode) { + case DBG_END_SEQUENCE: + goto end; + + case DBG_ADVANCE_PC: + address += readUnsignedLeb128(&stream); + break; + + case DBG_ADVANCE_LINE: + line += readSignedLeb128(&stream); + break; + + case DBG_START_LOCAL: + case DBG_START_LOCAL_EXTENDED: + reg = readUnsignedLeb128(&stream); + if (reg > pCode->registersSize) goto invalid_stream; + + // Emit what was previously there, if anything + emitLocalCbIfLive (cnxt, reg, address, + localInReg, localCb); + + localInReg[reg].name = readStringIdx(pDexFile, &stream); + localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); + if (opcode == DBG_START_LOCAL_EXTENDED) { + localInReg[reg].signature + = readStringIdx(pDexFile, &stream); + } else { + localInReg[reg].signature = NULL; + } + localInReg[reg].startAddress = address; + localInReg[reg].live = true; + break; + + case DBG_END_LOCAL: + reg = readUnsignedLeb128(&stream); + if (reg > pCode->registersSize) goto invalid_stream; + + emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); + localInReg[reg].live = false; + break; + + case DBG_RESTART_LOCAL: + reg = readUnsignedLeb128(&stream); + if (reg > pCode->registersSize) goto invalid_stream; + + if (localInReg[reg].name == NULL + || localInReg[reg].descriptor == NULL) { + goto invalid_stream; + } + + /* + * If the register is live, the "restart" is superfluous, + * and we don't want to mess with the existing start address. + */ + if (!localInReg[reg].live) { + localInReg[reg].startAddress = address; + localInReg[reg].live = true; + } + break; + + case DBG_SET_PROLOGUE_END: + case DBG_SET_EPILOGUE_BEGIN: + case DBG_SET_FILE: + break; + + default: { + int adjopcode = opcode - DBG_FIRST_SPECIAL; + + address += adjopcode / DBG_LINE_RANGE; + line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); + + if (posCb != NULL) { + int done; + done = posCb(cnxt, address, line); + + if (done) { + // early exit + goto end; + } + } + break; + } + } + } + +end: + { + int reg; + for (reg = 0; reg < pCode->registersSize; reg++) { + emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); + } + } + return; + +invalid_stream: + IF_LOGE() { + char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); + LOGE("Invalid debug info stream. class %s; proto %s", + classDescriptor, methodDescriptor); + free(methodDescriptor); + } +} diff --git a/libdex/DexFile.h b/libdex/DexFile.h new file mode 100644 index 000000000..d1ea5ebb2 --- /dev/null +++ b/libdex/DexFile.h @@ -0,0 +1,1054 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Access .dex (Dalvik Executable Format) files. The code here assumes that + * the DEX file has been rewritten (byte-swapped, word-aligned) and that + * the contents can be directly accessed as a collection of C arrays. Please + * see docs/dalvik/dex-format.html for a detailed description. + * + * The structure and field names were chosen to match those in the DEX spec. + * + * It's generally assumed that the DEX file will be stored in shared memory, + * obviating the need to copy code and constant pool entries into newly + * allocated storage. Maintaining local pointers to items in the shared area + * is valid and encouraged. + * + * All memory-mapped structures are 32-bit aligned unless otherwise noted. + */ +#ifndef _LIBDEX_DEXFILE +#define _LIBDEX_DEXFILE + +#include "vm/Common.h" // basic type defs, e.g. u1/u2/u4/u8, and LOG +#include "libdex/SysUtil.h" + +/* + * gcc-style inline management -- ensures we have a copy of all functions + * in the library, so code that links against us will work whether or not + * it was built with optimizations enabled. + */ +#ifndef _DEX_GEN_INLINES /* only defined by DexInlines.c */ +# define DEX_INLINE extern __inline__ +#else +# define DEX_INLINE +#endif + +/* DEX file magic number */ +#define DEX_MAGIC "dex\n" +/* version, encoded in 4 bytes of ASCII */ +#define DEX_MAGIC_VERS "035\0" + +/* same, but for optimized DEX header */ +#define DEX_OPT_MAGIC "dey\n" +#define DEX_OPT_MAGIC_VERS "035\0" + +#define DEX_DEP_MAGIC "deps" + +/* + * 160-bit SHA-1 digest. + */ +enum { kSHA1DigestLen = 20, + kSHA1DigestOutputLen = kSHA1DigestLen*2 +1 }; + +/* general constants */ +enum { + kDexEndianConstant = 0x12345678, /* the endianness indicator */ + kDexNoIndex = 0xffffffff, /* not a valid index value */ +}; + +/* + * access flags and masks; the "standard" ones are all <= 0x4000 + * + * Note: There are related declarations in vm/oo/Object.h in the ClassFlags + * enum. + */ +enum { + ACC_PUBLIC = 0x00000001, // class, field, method, ic + ACC_PRIVATE = 0x00000002, // field, method, ic + ACC_PROTECTED = 0x00000004, // field, method, ic + ACC_STATIC = 0x00000008, // field, method, ic + ACC_FINAL = 0x00000010, // class, field, method, ic + ACC_SYNCHRONIZED = 0x00000020, // method (only allowed on natives) + ACC_SUPER = 0x00000020, // class (not used in Dalvik) + ACC_VOLATILE = 0x00000040, // field + ACC_BRIDGE = 0x00000040, // method (1.5) + ACC_TRANSIENT = 0x00000080, // field + ACC_VARARGS = 0x00000080, // method (1.5) + ACC_NATIVE = 0x00000100, // method + ACC_INTERFACE = 0x00000200, // class, ic + ACC_ABSTRACT = 0x00000400, // class, method, ic + ACC_STRICT = 0x00000800, // method + ACC_SYNTHETIC = 0x00001000, // field, method, ic + ACC_ANNOTATION = 0x00002000, // class, ic (1.5) + ACC_ENUM = 0x00004000, // class, field, ic (1.5) + ACC_CONSTRUCTOR = 0x00010000, // method (Dalvik only) + ACC_DECLARED_SYNCHRONIZED = + 0x00020000, // method (Dalvik only) + ACC_CLASS_MASK = + (ACC_PUBLIC | ACC_FINAL | ACC_INTERFACE | ACC_ABSTRACT + | ACC_SYNTHETIC | ACC_ANNOTATION | ACC_ENUM), + ACC_INNER_CLASS_MASK = + (ACC_CLASS_MASK | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC), + ACC_FIELD_MASK = + (ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL + | ACC_VOLATILE | ACC_TRANSIENT | ACC_SYNTHETIC | ACC_ENUM), + ACC_METHOD_MASK = + (ACC_PUBLIC | ACC_PRIVATE | ACC_PROTECTED | ACC_STATIC | ACC_FINAL + | ACC_SYNCHRONIZED | ACC_BRIDGE | ACC_VARARGS | ACC_NATIVE + | ACC_ABSTRACT | ACC_STRICT | ACC_SYNTHETIC | ACC_CONSTRUCTOR + | ACC_DECLARED_SYNCHRONIZED), +}; + +/* annotation constants */ +enum { + kDexVisibilityBuild = 0x00, /* annotation visibility */ + kDexVisibilityRuntime = 0x01, + kDexVisibilitySystem = 0x02, + + kDexAnnotationByte = 0x00, + kDexAnnotationShort = 0x02, + kDexAnnotationChar = 0x03, + kDexAnnotationInt = 0x04, + kDexAnnotationLong = 0x06, + kDexAnnotationFloat = 0x10, + kDexAnnotationDouble = 0x11, + kDexAnnotationString = 0x17, + kDexAnnotationType = 0x18, + kDexAnnotationField = 0x19, + kDexAnnotationMethod = 0x1a, + kDexAnnotationEnum = 0x1b, + kDexAnnotationArray = 0x1c, + kDexAnnotationAnnotation = 0x1d, + kDexAnnotationNull = 0x1e, + kDexAnnotationBoolean = 0x1f, + + kDexAnnotationValueTypeMask = 0x1f, /* low 5 bits */ + kDexAnnotationValueArgShift = 5, +}; + +/* map item type codes */ +enum { + kDexTypeHeaderItem = 0x0000, + kDexTypeStringIdItem = 0x0001, + kDexTypeTypeIdItem = 0x0002, + kDexTypeProtoIdItem = 0x0003, + kDexTypeFieldIdItem = 0x0004, + kDexTypeMethodIdItem = 0x0005, + kDexTypeClassDefItem = 0x0006, + kDexTypeMapList = 0x1000, + kDexTypeTypeList = 0x1001, + kDexTypeAnnotationSetRefList = 0x1002, + kDexTypeAnnotationSetItem = 0x1003, + kDexTypeClassDataItem = 0x2000, + kDexTypeCodeItem = 0x2001, + kDexTypeStringDataItem = 0x2002, + kDexTypeDebugInfoItem = 0x2003, + kDexTypeAnnotationItem = 0x2004, + kDexTypeEncodedArrayItem = 0x2005, + kDexTypeAnnotationsDirectoryItem = 0x2006, +}; + +/* auxillary data section chunk codes */ +enum { + kDexChunkClassLookup = 0x434c4b50, /* CLKP */ + + kDexChunkReducingIndexMap = 0x5249584d, /* RIXM */ + kDexChunkExpandingIndexMap = 0x4549584d, /* EIXM */ + + kDexChunkEnd = 0x41454e44, /* AEND */ +}; + +/* debug info opcodes and constants */ +enum { + DBG_END_SEQUENCE = 0x00, + DBG_ADVANCE_PC = 0x01, + DBG_ADVANCE_LINE = 0x02, + DBG_START_LOCAL = 0x03, + DBG_START_LOCAL_EXTENDED = 0x04, + DBG_END_LOCAL = 0x05, + DBG_RESTART_LOCAL = 0x06, + DBG_SET_PROLOGUE_END = 0x07, + DBG_SET_EPILOGUE_BEGIN = 0x08, + DBG_SET_FILE = 0x09, + DBG_FIRST_SPECIAL = 0x0a, + DBG_LINE_BASE = -4, + DBG_LINE_RANGE = 15, +}; + +/* + * Direct-mapped "header_item" struct. + */ +typedef struct DexHeader { + u1 magic[8]; /* includes version number */ + u4 checksum; /* adler32 checksum */ + u1 signature[kSHA1DigestLen]; /* SHA-1 hash */ + u4 fileSize; /* length of entire file */ + u4 headerSize; /* offset to start of next section */ + u4 endianTag; + u4 linkSize; + u4 linkOff; + u4 mapOff; + u4 stringIdsSize; + u4 stringIdsOff; + u4 typeIdsSize; + u4 typeIdsOff; + u4 protoIdsSize; + u4 protoIdsOff; + u4 fieldIdsSize; + u4 fieldIdsOff; + u4 methodIdsSize; + u4 methodIdsOff; + u4 classDefsSize; + u4 classDefsOff; + u4 dataSize; + u4 dataOff; +} DexHeader; + +/* + * Direct-mapped "map_item". + */ +typedef struct DexMapItem { + u2 type; /* type code (see kDexType* above) */ + u2 unused; + u4 size; /* count of items of the indicated type */ + u4 offset; /* file offset to the start of data */ +} DexMapItem; + +/* + * Direct-mapped "map_list". + */ +typedef struct DexMapList { + u4 size; /* #of entries in list */ + DexMapItem list[1]; /* entries */ +} DexMapList; + +/* + * Direct-mapped "string_id_item". + */ +typedef struct DexStringId { + u4 stringDataOff; /* file offset to string_data_item */ +} DexStringId; + +/* + * Direct-mapped "type_id_item". + */ +typedef struct DexTypeId { + u4 descriptorIdx; /* index into stringIds list for type descriptor */ +} DexTypeId; + +/* + * Direct-mapped "field_id_item". + */ +typedef struct DexFieldId { + u2 classIdx; /* index into typeIds list for defining class */ + u2 typeIdx; /* index into typeIds for field type */ + u4 nameIdx; /* index into stringIds for field name */ +} DexFieldId; + +/* + * Direct-mapped "method_id_item". + */ +typedef struct DexMethodId { + u2 classIdx; /* index into typeIds list for defining class */ + u2 protoIdx; /* index into protoIds for method prototype */ + u4 nameIdx; /* index into stringIds for method name */ +} DexMethodId; + +/* + * Direct-mapped "proto_id_item". + */ +typedef struct DexProtoId { + u4 shortyIdx; /* index into stringIds for shorty descriptor */ + u4 returnTypeIdx; /* index into typeIds list for return type */ + u4 parametersOff; /* file offset to type_list for parameter types */ +} DexProtoId; + +/* + * Direct-mapped "class_def_item". + */ +typedef struct DexClassDef { + u4 classIdx; /* index into typeIds for this class */ + u4 accessFlags; + u4 superclassIdx; /* index into typeIds for superclass */ + u4 interfacesOff; /* file offset to DexTypeList */ + u4 sourceFileIdx; /* index into stringIds for source file name */ + u4 annotationsOff; /* file offset to annotations_directory_item */ + u4 classDataOff; /* file offset to class_data_item */ + u4 staticValuesOff; /* file offset to DexEncodedArray */ +} DexClassDef; + +/* + * Direct-mapped "type_item". + */ +typedef struct DexTypeItem { + u2 typeIdx; /* index into typeIds */ +} DexTypeItem; + +/* + * Direct-mapped "type_list". + */ +typedef struct DexTypeList { + u4 size; /* #of entries in list */ + DexTypeItem list[1]; /* entries */ +} DexTypeList; + +/* + * Direct-mapped "code_item". + * + * The "catches" table is used when throwing an exception, + * "debugInfo" is used when displaying an exception stack trace or + * debugging. An offset of zero indicates that there are no entries. + */ +typedef struct DexCode { + u2 registersSize; + u2 insSize; + u2 outsSize; + u2 triesSize; + u4 debugInfoOff; /* file offset to debug info stream */ + u4 insnsSize; /* size of the insns array, in u2 units */ + u2 insns[1]; + /* followed by optional u2 padding */ + /* followed by try_item[triesSize] */ + /* followed by uleb128 handlersSize */ + /* followed by catch_handler_item[handlersSize] */ +} DexCode; + +/* + * Direct-mapped "try_item". + */ +typedef struct DexTry { + u4 startAddr; /* start address, in 16-bit code units */ + u2 insnCount; /* instruction count, in 16-bit code units */ + u2 handlerOff; /* offset in encoded handler data to handlers */ +} DexTry; + +/* + * Link table. Currently undefined. + */ +typedef struct DexLink { + u1 bleargh; +} DexLink; + + +/* + * Direct-mapped "annotations_directory_item". + */ +typedef struct DexAnnotationsDirectoryItem { + u4 classAnnotationsOff; /* offset to DexAnnotationSetItem */ + u4 fieldsSize; /* count of DexFieldAnnotationsItem */ + u4 methodsSize; /* count of DexMethodAnnotationsItem */ + u4 parametersSize; /* count of DexParameterAnnotationsItem */ + /* followed by DexFieldAnnotationsItem[fieldsSize] */ + /* followed by DexMethodAnnotationsItem[methodsSize] */ + /* followed by DexParameterAnnotationsItem[parametersSize] */ +} DexAnnotationsDirectoryItem; + +/* + * Direct-mapped "field_annotations_item". + */ +typedef struct DexFieldAnnotationsItem { + u4 fieldIdx; + u4 annotationsOff; /* offset to DexAnnotationSetItem */ +} DexFieldAnnotationsItem; + +/* + * Direct-mapped "method_annotations_item". + */ +typedef struct DexMethodAnnotationsItem { + u4 methodIdx; + u4 annotationsOff; /* offset to DexAnnotationSetItem */ +} DexMethodAnnotationsItem; + +/* + * Direct-mapped "parameter_annotations_item". + */ +typedef struct DexParameterAnnotationsItem { + u4 methodIdx; + u4 annotationsOff; /* offset to DexAnotationSetRefList */ +} DexParameterAnnotationsItem; + +/* + * Direct-mapped "annotation_set_ref_item". + */ +typedef struct DexAnnotationSetRefItem { + u4 annotationsOff; /* offset to DexAnnotationSetItem */ +} DexAnnotationSetRefItem; + +/* + * Direct-mapped "annotation_set_ref_list". + */ +typedef struct DexAnnotationSetRefList { + u4 size; + DexAnnotationSetRefItem list[1]; +} DexAnnotationSetRefList; + +/* + * Direct-mapped "anotation_set_item". + */ +typedef struct DexAnnotationSetItem { + u4 size; + u4 entries[1]; /* offset to DexAnnotationItem */ +} DexAnnotationSetItem; + +/* + * Direct-mapped "annotation_item". + * + * NOTE: this structure is byte-aligned. + */ +typedef struct DexAnnotationItem { + u1 visibility; + u1 annotation[1]; /* data in encoded_annotation format */ +} DexAnnotationItem; + +/* + * Direct-mapped "encoded_array". + * + * NOTE: this structure is byte-aligned. + */ +typedef struct DexEncodedArray { + u1 array[1]; /* data in encoded_array format */ +} DexEncodedArray; + +/* + * Lookup table for classes. It provides a mapping from class name to + * class definition. Used by dexFindClass(). + * + * We calculate this at DEX optimization time and embed it in the file so we + * don't need the same hash table in every VM. This is slightly slower than + * a hash table with direct pointers to the items, but because it's shared + * there's less of a penalty for using a fairly sparse table. + */ +typedef struct DexClassLookup { + int size; // total size, including "size" + int numEntries; // size of table[]; always power of 2 + struct { + u4 classDescriptorHash; // class descriptor hash code + int classDescriptorOffset; // in bytes, from start of DEX + int classDefOffset; // in bytes, from start of DEX + } table[1]; +} DexClassLookup; + +/* + * Map constant pool indices from one form to another. Some or all of these + * may be NULL. + * + * The map values are 16-bit unsigned values. If the values we map to + * require a larger range, we omit the mapping for that category (which + * requires that the lookup code recognize that the data will not be + * there for all DEX files in all categories.) + */ +typedef struct DexIndexMap { + const u2* classMap; /* map, either expanding or reducing */ + u4 classFullCount; /* same as typeIdsSize */ + u4 classReducedCount; /* post-reduction count */ + const u2* methodMap; + u4 methodFullCount; + u4 methodReducedCount; + const u2* fieldMap; + u4 fieldFullCount; + u4 fieldReducedCount; + const u2* stringMap; + u4 stringFullCount; + u4 stringReducedCount; +} DexIndexMap; + +/* + * Header added by DEX optimization pass. Values are always written in + * local byte and structure padding. The first field (magic + version) + * is guaranteed to be present and directly readable for all expected + * compiler configurations; the rest is version-dependent. + * + * Try to keep this simple and fixed-size. + */ +typedef struct DexOptHeader { + u1 magic[8]; /* includes version number */ + + u4 dexOffset; /* file offset of DEX header */ + u4 dexLength; + u4 depsOffset; /* offset of optimized DEX dependency table */ + u4 depsLength; + u4 auxOffset; /* file offset of pre-calc auxillary data */ + u4 auxLength; + + u4 flags; /* some info flags */ + + u4 padding; /* induce 64-bit alignment */ +} DexOptHeader; + +#define DEX_FLAG_VERIFIED (1) /* tried to verify all classes */ +#define DEX_OPT_FLAG_BIG (1<<1) /* swapped to big-endian */ +#define DEX_OPT_FLAG_FIELDS (1<<2) /* field access optimized */ +#define DEX_OPT_FLAG_INVOCATIONS (1<<3) /* method calls optimized */ + +#define DEX_INTERFACE_CACHE_SIZE 128 /* must be power of 2 */ + +/* + * Structure representing a DEX file. + * + * Code should regard DexFile as opaque, using the API calls provided here + * to access specific structures. + */ +typedef struct DexFile { + /* directly-mapped "opt" header */ + const DexOptHeader* pOptHeader; + + /* pointers to directly-mapped structs and arrays in base DEX */ + const DexHeader* pHeader; + const DexStringId* pStringIds; + const DexTypeId* pTypeIds; + const DexFieldId* pFieldIds; + const DexMethodId* pMethodIds; + const DexProtoId* pProtoIds; + const DexClassDef* pClassDefs; + const DexLink* pLinkData; + + /* mapped in "auxillary" section */ + const DexClassLookup* pClassLookup; + + /* mapped in "auxillary" section */ + DexIndexMap indexMap; + + /* points to start of DEX file data */ + const u1* baseAddr; + + /* track memory overhead for auxillary structures */ + int overhead; + + /* additional app-specific data structures associated with the DEX */ + //void* auxData; +} DexFile; + +/* + * Utility function -- rounds up to the nearest power of 2. + */ +u4 dexRoundUpPower2(u4 val); + +/* + * Parse an optimized or unoptimized .dex file sitting in memory. + * + * On success, return a newly-allocated DexFile. + */ +DexFile* dexFileParse(const u1* data, size_t length, int flags); + +/* bit values for "flags" argument to dexFileParse */ +enum { + kDexParseDefault = 0, + kDexParseVerifyChecksum = 1, + kDexParseContinueOnError = (1 << 1), +}; + +/* + * Correct the byte ordering in a memory-mapped DEX file. This is only + * required for code that opens "raw" DEX files, such as the DEX optimizer. + * + * Return 0 on success. + */ +int dexFixByteOrdering(u1* addr, int len); + +/* + * Compute DEX checksum. + */ +u4 dexComputeChecksum(const DexHeader* pHeader); + +/* + * Free a DexFile structure, along with any associated structures. + */ +void dexFileFree(DexFile* pDexFile); + +/* + * Create class lookup table. + */ +DexClassLookup* dexCreateClassLookup(DexFile* pDexFile); + +/* + * Find a class definition by descriptor. + */ +const DexClassDef* dexFindClass(const DexFile* pFile, const char* descriptor); + +/* + * Set up the basic raw data pointers of a DexFile. This function isn't + * meant for general use. + */ +void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data); + +/* return the DexMapList of the file, if any */ +DEX_INLINE const DexMapList* dexGetMap(const DexFile* pDexFile) { + u4 mapOff = pDexFile->pHeader->mapOff; + + if (mapOff == 0) { + return NULL; + } else { + return (const DexMapList*) (pDexFile->baseAddr + mapOff); + } +} + +/* return the const char* string data referred to by the given string_id */ +DEX_INLINE const char* dexGetStringData(const DexFile* pDexFile, + const DexStringId* pStringId) { + const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; + + // Skip the uleb128 length. + while (*(ptr++) > 0x7f) /* empty */ ; + + return (const char*) ptr; +} +/* return the StringId with the specified index */ +DEX_INLINE const DexStringId* dexGetStringId(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->stringIdsSize); + return &pDexFile->pStringIds[idx]; +} +/* return the UTF-8 encoded string with the specified string_id index */ +DEX_INLINE const char* dexStringById(const DexFile* pDexFile, u4 idx) { + const DexStringId* pStringId = dexGetStringId(pDexFile, idx); + return dexGetStringData(pDexFile, pStringId); +} + +/* Return the UTF-8 encoded string with the specified string_id index, + * also filling in the UTF-16 size (number of 16-bit code points).*/ +const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, + u4* utf16Size); + +/* return the TypeId with the specified index */ +DEX_INLINE const DexTypeId* dexGetTypeId(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->typeIdsSize); + return &pDexFile->pTypeIds[idx]; +} + +/* + * Get the descriptor string associated with a given type index. + * The caller should not free() the returned string. + */ +DEX_INLINE const char* dexStringByTypeIdx(const DexFile* pDexFile, u4 idx) { + const DexTypeId* typeId = dexGetTypeId(pDexFile, idx); + return dexStringById(pDexFile, typeId->descriptorIdx); +} + +/* return the MethodId with the specified index */ +DEX_INLINE const DexMethodId* dexGetMethodId(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->methodIdsSize); + return &pDexFile->pMethodIds[idx]; +} + +/* return the FieldId with the specified index */ +DEX_INLINE const DexFieldId* dexGetFieldId(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->fieldIdsSize); + return &pDexFile->pFieldIds[idx]; +} + +/* return the ProtoId with the specified index */ +DEX_INLINE const DexProtoId* dexGetProtoId(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->protoIdsSize); + return &pDexFile->pProtoIds[idx]; +} + +/* + * Get the parameter list from a ProtoId. The returns NULL if the ProtoId + * does not have a parameter list. + */ +DEX_INLINE const DexTypeList* dexGetProtoParameters( + const DexFile *pDexFile, const DexProtoId* pProtoId) { + if (pProtoId->parametersOff == 0) { + return NULL; + } + return (const DexTypeList*) + (pDexFile->baseAddr + pProtoId->parametersOff); +} + +/* return the ClassDef with the specified index */ +DEX_INLINE const DexClassDef* dexGetClassDef(const DexFile* pDexFile, u4 idx) { + assert(idx < pDexFile->pHeader->classDefsSize); + return &pDexFile->pClassDefs[idx]; +} + +/* get the interface list for a DexClass */ +DEX_INLINE const DexTypeList* dexGetInterfacesList(const DexFile* pDexFile, + const DexClassDef* pClassDef) +{ + if (pClassDef->interfacesOff == 0) + return NULL; + return (const DexTypeList*) + (pDexFile->baseAddr + pClassDef->interfacesOff); +} +/* return the Nth entry in a DexTypeList. */ +DEX_INLINE const DexTypeItem* dexGetTypeItem(const DexTypeList* pList, + u4 idx) +{ + assert(idx < pList->size); + return &pList->list[idx]; +} +/* return the type_idx for the Nth entry in a TypeList */ +DEX_INLINE u4 dexTypeListGetIdx(const DexTypeList* pList, u4 idx) { + const DexTypeItem* pItem = dexGetTypeItem(pList, idx); + return pItem->typeIdx; +} + +/* get the static values list for a DexClass */ +DEX_INLINE const DexEncodedArray* dexGetStaticValuesList( + const DexFile* pDexFile, const DexClassDef* pClassDef) +{ + if (pClassDef->staticValuesOff == 0) + return NULL; + return (const DexEncodedArray*) + (pDexFile->baseAddr + pClassDef->staticValuesOff); +} + +/* get the annotations directory item for a DexClass */ +DEX_INLINE const DexAnnotationsDirectoryItem* dexGetAnnotationsDirectoryItem( + const DexFile* pDexFile, const DexClassDef* pClassDef) +{ + if (pClassDef->annotationsOff == 0) + return NULL; + return (const DexAnnotationsDirectoryItem*) + (pDexFile->baseAddr + pClassDef->annotationsOff); +} + +/* get the source file string */ +DEX_INLINE const char* dexGetSourceFile( + const DexFile* pDexFile, const DexClassDef* pClassDef) +{ + if (pClassDef->sourceFileIdx == 0xffffffff) + return NULL; + return dexStringById(pDexFile, pClassDef->sourceFileIdx); +} + +/* Get the list of "tries" for the given DexCode. */ +DEX_INLINE const DexTry* dexGetTries(const DexCode* pCode) { + const u2* insnsEnd = &pCode->insns[pCode->insnsSize]; + + // Round to four bytes. + if ((((u4) insnsEnd) & 3) != 0) { + insnsEnd++; + } + + return (const DexTry*) insnsEnd; +} + +/* Get the base of the encoded data for the given DexCode. */ +DEX_INLINE const u1* dexGetCatchHandlerData(const DexCode* pCode) { + const DexTry* pTries = dexGetTries(pCode); + return (const u1*) &pTries[pCode->triesSize]; +} + +DEX_INLINE const u1* dexGetDebugInfoStream(const DexFile* pDexFile, + const DexCode* pCode) +{ + if (pCode->debugInfoOff == 0) { + return NULL; + } else { + return pDexFile->baseAddr + pCode->debugInfoOff; + } +} + +/* + * Callback for "new position table entry". + * Returning non-0 causes the decoder to stop early. + */ +typedef int (*DexDebugNewPositionCb)(void *cnxt, u4 address, u4 lineNum); + +/* + * Callback for "new locals table entry". "signature" is an empty string + * if no signature is available for an entry. + */ +typedef void (*DexDebugNewLocalCb)(void *cnxt, u2 reg, u4 startAddress, + u4 endAddress, const char *name, const char *descriptor, + const char *signature); + +/* + * Decode debug info for method. + * + * posCb is called in ascending address order. + * localCb is called in order of ascending end address. + */ +void dexDecodeDebugInfo( + const DexFile* pDexFile, + const DexCode* pDexCode, + const char* classDescriptor, + u4 protoIdx, + u4 accessFlags, + DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, + void* cnxt); + +/* DexClassDef convenience - get class descriptor */ +DEX_INLINE const char* dexGetClassDescriptor(const DexFile* pDexFile, + const DexClassDef* pClassDef) +{ + return dexStringByTypeIdx(pDexFile, pClassDef->classIdx); +} + +/* DexClassDef convenience - get superclass descriptor */ +DEX_INLINE const char* dexGetSuperClassDescriptor(const DexFile* pDexFile, + const DexClassDef* pClassDef) +{ + if (pClassDef->superclassIdx == 0) + return NULL; + return dexStringByTypeIdx(pDexFile, pClassDef->superclassIdx); +} + +/* DexClassDef convenience - get class_data_item pointer */ +DEX_INLINE const u1* dexGetClassData(const DexFile* pDexFile, + const DexClassDef* pClassDef) +{ + if (pClassDef->classDataOff == 0) + return NULL; + return (const u1*) (pDexFile->baseAddr + pClassDef->classDataOff); +} + +/* Get an annotation set at a particular offset. */ +DEX_INLINE const DexAnnotationSetItem* dexGetAnnotationSetItem( + const DexFile* pDexFile, u4 offset) +{ + return (const DexAnnotationSetItem*) (pDexFile->baseAddr + offset); +} +/* get the class' annotation set */ +DEX_INLINE const DexAnnotationSetItem* dexGetClassAnnotationSet( + const DexFile* pDexFile, const DexAnnotationsDirectoryItem* pAnnoDir) +{ + if (pAnnoDir->classAnnotationsOff == 0) + return NULL; + return dexGetAnnotationSetItem(pDexFile, pAnnoDir->classAnnotationsOff); +} + +/* get the class' field annotation list */ +DEX_INLINE const DexFieldAnnotationsItem* dexGetFieldAnnotations( + const DexFile* pDexFile, const DexAnnotationsDirectoryItem* pAnnoDir) +{ + if (pAnnoDir->fieldsSize == 0) + return NULL; + + // Skip past the header to the start of the field annotations. + return (const DexFieldAnnotationsItem*) &pAnnoDir[1]; +} + +/* get field annotation list size */ +DEX_INLINE int dexGetFieldAnnotationsSize(const DexFile* pDexFile, + const DexAnnotationsDirectoryItem* pAnnoDir) +{ + return pAnnoDir->fieldsSize; +} + +/* return a pointer to the field's annotation set */ +DEX_INLINE const DexAnnotationSetItem* dexGetFieldAnnotationSetItem( + const DexFile* pDexFile, const DexFieldAnnotationsItem* pItem) +{ + return dexGetAnnotationSetItem(pDexFile, pItem->annotationsOff); +} + +/* get the class' method annotation list */ +DEX_INLINE const DexMethodAnnotationsItem* dexGetMethodAnnotations( + const DexFile* pDexFile, const DexAnnotationsDirectoryItem* pAnnoDir) +{ + if (pAnnoDir->methodsSize == 0) + return NULL; + + /* + * Skip past the header and field annotations to the start of the + * method annotations. + */ + const u1* addr = (const u1*) &pAnnoDir[1]; + addr += pAnnoDir->fieldsSize * sizeof (DexFieldAnnotationsItem); + return (const DexMethodAnnotationsItem*) addr; +} + +/* get method annotation list size */ +DEX_INLINE int dexGetMethodAnnotationsSize(const DexFile* pDexFile, + const DexAnnotationsDirectoryItem* pAnnoDir) +{ + return pAnnoDir->methodsSize; +} + +/* return a pointer to the method's annotation set */ +DEX_INLINE const DexAnnotationSetItem* dexGetMethodAnnotationSetItem( + const DexFile* pDexFile, const DexMethodAnnotationsItem* pItem) +{ + return dexGetAnnotationSetItem(pDexFile, pItem->annotationsOff); +} + +/* get the class' parameter annotation list */ +DEX_INLINE const DexParameterAnnotationsItem* dexGetParameterAnnotations( + const DexFile* pDexFile, const DexAnnotationsDirectoryItem* pAnnoDir) +{ + if (pAnnoDir->parametersSize == 0) + return NULL; + + /* + * Skip past the header, field annotations, and method annotations + * to the start of the parameter annotations. + */ + const u1* addr = (const u1*) &pAnnoDir[1]; + addr += pAnnoDir->fieldsSize * sizeof (DexFieldAnnotationsItem); + addr += pAnnoDir->methodsSize * sizeof (DexMethodAnnotationsItem); + return (const DexParameterAnnotationsItem*) addr; +} + +/* get method annotation list size */ +DEX_INLINE int dexGetParameterAnnotationsSize(const DexFile* pDexFile, + const DexAnnotationsDirectoryItem* pAnnoDir) +{ + return pAnnoDir->parametersSize; +} + +/* return the parameter annotation ref list */ +DEX_INLINE const DexAnnotationSetRefList* dexGetParameterAnnotationSetRefList( + const DexFile* pDexFile, const DexParameterAnnotationsItem* pItem) +{ + return (const DexAnnotationSetRefList*) + (pDexFile->baseAddr + pItem->annotationsOff); +} + +/* get method annotation list size */ +DEX_INLINE int dexGetParameterAnnotationSetRefSize(const DexFile* pDexFile, + const DexParameterAnnotationsItem* pItem) +{ + if (pItem->annotationsOff == 0) + return 0; + return dexGetParameterAnnotationSetRefList(pDexFile, pItem)->size; +} + +/* return the Nth entry from an annotation set ref list */ +DEX_INLINE const DexAnnotationSetRefItem* dexGetParameterAnnotationSetRef( + const DexAnnotationSetRefList* pList, u4 idx) +{ + assert(idx < pList->size); + return &pList->list[idx]; +} + +/* given a DexAnnotationSetRefItem, return the DexAnnotationSetItem */ +DEX_INLINE const DexAnnotationSetItem* dexGetSetRefItemItem( + const DexFile* pDexFile, const DexAnnotationSetRefItem* pItem) +{ + return dexGetAnnotationSetItem(pDexFile, pItem->annotationsOff); +} + +/* return the Nth annotation offset from a DexAnnotationSetItem */ +DEX_INLINE u4 dexGetAnnotationOff( + const DexAnnotationSetItem* pAnnoSet, u4 idx) +{ + assert(idx < pAnnoSet->size); + return pAnnoSet->entries[idx]; +} + +/* return the Nth annotation item from a DexAnnotationSetItem */ +DEX_INLINE const DexAnnotationItem* dexGetAnnotationItem( + const DexFile* pDexFile, const DexAnnotationSetItem* pAnnoSet, u4 idx) +{ + return (const DexAnnotationItem*) + (pDexFile->baseAddr + dexGetAnnotationOff(pAnnoSet, idx)); +} + + +/* + * =========================================================================== + * Utility Functions + * =========================================================================== + */ + +/* + * Retrieve the next UTF-16 character from a UTF-8 string. + * + * Advances "*pUtf8Ptr" to the start of the next character. + * + * WARNING: If a string is corrupted by dropping a '\0' in the middle + * of a 3-byte sequence, you can end up overrunning the buffer with + * reads (and possibly with the writes if the length was computed and + * cached before the damage). For performance reasons, this function + * assumes that the string being parsed is known to be valid (e.g., by + * already being verified). Most strings we process here are coming + * out of dex files or other internal translations, so the only real + * risk comes from the JNI NewStringUTF call. + */ +DEX_INLINE u2 dexGetUtf16FromUtf8(const char** pUtf8Ptr) +{ + unsigned int one, two, three; + + one = *(*pUtf8Ptr)++; + if ((one & 0x80) != 0) { + /* two- or three-byte encoding */ + two = *(*pUtf8Ptr)++; + if ((one & 0x20) != 0) { + /* three-byte encoding */ + three = *(*pUtf8Ptr)++; + return ((one & 0x0f) << 12) | + ((two & 0x3f) << 6) | + (three & 0x3f); + } else { + /* two-byte encoding */ + return ((one & 0x1f) << 6) | + (two & 0x3f); + } + } else { + /* one-byte encoding */ + return one; + } +} + +/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode + * code point values for comparison. This treats different encodings + * for the same code point as equivalent, except that only a real '\0' + * byte is considered the string terminator. The return value is as + * for strcmp(). */ +int dexUtf8Cmp(const char* s1, const char* s2); + + +/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ +extern u4 DEX_MEMBER_VALID_LOW_ASCII[4]; + +/* Helper for dexIsValidMemberUtf8(); do not call directly. */ +bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr); + +/* Return whether the pointed-at modified-UTF-8 encoded character is + * valid as part of a member name, updating the pointer to point past + * the consumed character. This will consume two encoded UTF-16 code + * points if the character is encoded as a surrogate pair. Also, if + * this function returns false, then the given pointer may only have + * been partially advanced. */ +DEX_INLINE bool dexIsValidMemberNameUtf8(const char** pUtf8Ptr) { + u1 c = (u1) **pUtf8Ptr; + if (c <= 0x7f) { + // It's low-ascii, so check the table. + u4 wordIdx = c >> 5; + u4 bitIdx = c & 0x1f; + (*pUtf8Ptr)++; + return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0; + } + + /* + * It's a multibyte encoded character. Call a non-inline function + * for the heavy lifting. + */ + return dexIsValidMemberNameUtf8_0(pUtf8Ptr); +} + +/* Return whether the given string is a valid field or method name. */ +bool dexIsValidMemberName(const char* s); + +/* Return whether the given string is a valid type descriptor. */ +bool dexIsValidTypeDescriptor(const char* s); + +/* Return whether the given string is a valid reference descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for a class or array and not a primitive type. */ +bool dexIsReferenceDescriptor(const char* s); + +/* Return whether the given string is a valid class descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for a class and not an array or primitive type. */ +bool dexIsClassDescriptor(const char* s); + +/* Return whether the given string is a valid field type descriptor. This + * is true if dexIsValidTypeDescriptor() returns true and the descriptor + * is for anything but "void". */ +bool dexIsFieldDescriptor(const char* s); + +#endif /*_LIBDEX_DEXFILE*/ diff --git a/libdex/DexInlines.c b/libdex/DexInlines.c new file mode 100644 index 000000000..ac0262f4e --- /dev/null +++ b/libdex/DexInlines.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Generate non-inline copies of inline functions in header files. + */ + +#define _DEX_GEN_INLINES + +#include "DexFile.h" + +#include "DexCatch.h" +#include "DexClass.h" +#include "DexDataMap.h" +#include "DexProto.h" +#include "InstrUtils.h" +#include "Leb128.h" +#include "ZipArchive.h" + diff --git a/libdex/DexProto.c b/libdex/DexProto.c new file mode 100644 index 000000000..c8f1b3edb --- /dev/null +++ b/libdex/DexProto.c @@ -0,0 +1,534 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for dealing with method prototypes + */ + +#include "DexProto.h" + +#include <stdlib.h> +#include <string.h> + +/* + * =========================================================================== + * String Cache + * =========================================================================== + */ + +/* + * Make sure that the given cache can hold a string of the given length, + * including the final '\0' byte. + */ +static void dexStringCacheAlloc(DexStringCache* pCache, size_t length) { + if (pCache->allocatedSize != 0) { + if (pCache->allocatedSize >= length) { + return; + } + free((void*) pCache->value); + } + + if (length <= sizeof(pCache->buffer)) { + pCache->value = pCache->buffer; + pCache->allocatedSize = 0; + } else { + pCache->value = malloc(length); + pCache->allocatedSize = length; + } +} + +/* + * Initialize the given DexStringCache. Use this function before passing + * one into any other function. + */ +void dexStringCacheInit(DexStringCache* pCache) { + pCache->value = pCache->buffer; + pCache->allocatedSize = 0; + pCache->buffer[0] = '\0'; +} + +/* + * Release the allocated contents of the given DexStringCache, if any. + * Use this function after your last use of a DexStringCache. + */ +void dexStringCacheRelease(DexStringCache* pCache) { + if (pCache->allocatedSize != 0) { + free((void*) pCache->value); + pCache->value = pCache->buffer; + pCache->allocatedSize = 0; + } +} + +/* + * If the given DexStringCache doesn't already point at the given value, + * make a copy of it into the cache. This always returns a writable + * pointer to the contents (whether or not a copy had to be made). This + * function is intended to be used after making a call that at least + * sometimes doesn't populate a DexStringCache. + */ +char* dexStringCacheEnsureCopy(DexStringCache* pCache, const char* value) { + if (value != pCache->value) { + size_t length = strlen(value) + 1; + dexStringCacheAlloc(pCache, length); + memcpy(pCache->value, value, length); + } + + return pCache->value; +} + +/* + * Abandon the given DexStringCache, and return a writable copy of the + * given value (reusing the string cache's allocation if possible). + * The return value must be free()d by the caller. Use this instead of + * dexStringCacheRelease() if you want the buffer to survive past the + * scope of the DexStringCache. + */ +char* dexStringCacheAbandon(DexStringCache* pCache, const char* value) { + if ((value == pCache->value) && (pCache->allocatedSize != 0)) { + char* result = pCache->value; + pCache->allocatedSize = 0; + pCache->value = pCache->buffer; + return result; + } else { + return strdup(value); + } +} + + +/* + * =========================================================================== + * Method Prototypes + * =========================================================================== + */ + +/* + * Return the DexProtoId from the given DexProto. The DexProto must + * actually refer to a DexProtoId. + */ +static inline const DexProtoId* getProtoId(const DexProto* pProto) { + return dexGetProtoId(pProto->dexFile, pProto->protoIdx); +} + +/* + * Get the short-form method descriptor for the given prototype. The + * prototype must be protoIdx-based. + */ +const char* dexProtoGetShorty(const DexProto* pProto) { + const DexProtoId* protoId = getProtoId(pProto); + + return dexStringById(pProto->dexFile, protoId->shortyIdx); +} + +/* + * Get the full method descriptor for the given prototype. + */ +const char* dexProtoGetMethodDescriptor(const DexProto* pProto, + DexStringCache* pCache) { + const DexFile* dexFile = pProto->dexFile; + const DexProtoId* protoId = getProtoId(pProto); + const DexTypeList* typeList = dexGetProtoParameters(dexFile, protoId); + size_t length = 3; // parens and terminating '\0' + u4 paramCount = (typeList == NULL) ? 0 : typeList->size; + u4 i; + + for (i = 0; i < paramCount; i++) { + u4 idx = dexTypeListGetIdx(typeList, i); + length += strlen(dexStringByTypeIdx(dexFile, idx)); + } + + length += strlen(dexStringByTypeIdx(dexFile, protoId->returnTypeIdx)); + + dexStringCacheAlloc(pCache, length); + + char *at = (char*) pCache->value; + *(at++) = '('; + + for (i = 0; i < paramCount; i++) { + u4 idx = dexTypeListGetIdx(typeList, i); + const char* desc = dexStringByTypeIdx(dexFile, idx); + strcpy(at, desc); + at += strlen(desc); + } + + *(at++) = ')'; + + strcpy(at, dexStringByTypeIdx(dexFile, protoId->returnTypeIdx)); + return pCache->value; +} + +/* + * Get a copy of the descriptor string associated with the given prototype. + * The returned pointer must be free()ed by the caller. + */ +char* dexProtoCopyMethodDescriptor(const DexProto* pProto) { + DexStringCache cache; + + dexStringCacheInit(&cache); + return dexStringCacheAbandon(&cache, + dexProtoGetMethodDescriptor(pProto, &cache)); +} + +/* + * Get the parameter descriptors for the given prototype. This is the + * concatenation of all the descriptors for all the parameters, in + * order, with no other adornment. + */ +const char* dexProtoGetParameterDescriptors(const DexProto* pProto, + DexStringCache* pCache) { + DexParameterIterator iterator; + size_t length = 1; /* +1 for the terminating '\0' */ + + dexParameterIteratorInit(&iterator, pProto); + + for (;;) { + const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); + if (descriptor == NULL) { + break; + } + + length += strlen(descriptor); + } + + dexParameterIteratorInit(&iterator, pProto); + + dexStringCacheAlloc(pCache, length); + char *at = (char*) pCache->value; + + for (;;) { + const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); + if (descriptor == NULL) { + break; + } + + strcpy(at, descriptor); + at += strlen(descriptor); + } + + return pCache->value; +} + +/* + * Get the type descriptor for the return type of the given prototype. + */ +const char* dexProtoGetReturnType(const DexProto* pProto) { + const DexProtoId* protoId = getProtoId(pProto); + return dexStringByTypeIdx(pProto->dexFile, protoId->returnTypeIdx); +} + +/* + * Get the parameter count of the given prototype. + */ +size_t dexProtoGetParameterCount(const DexProto* pProto) { + const DexProtoId* protoId = getProtoId(pProto); + const DexTypeList* typeList = + dexGetProtoParameters(pProto->dexFile, protoId); + return (typeList == NULL) ? 0 : typeList->size; +} + +/* + * Compute the number of parameter words (u4 units) required by the + * given prototype. For example, if the method takes (int, long) and + * returns double, this would return 3 (one for the int, two for the + * long, and the return type isn't relevant). + */ +int dexProtoComputeArgsSize(const DexProto* pProto) { + const char* shorty = dexProtoGetShorty(pProto); + int count = 0; + + /* Skip the return type. */ + shorty++; + + for (;;) { + switch (*(shorty++)) { + case '\0': { + return count; + } + case 'D': + case 'J': { + count += 2; + break; + } + default: { + count++; + break; + } + } + } +} + +/* + * Common implementation for dexProtoCompare() and dexProtoCompareParameters(). + */ +static int protoCompare(const DexProto* pProto1, const DexProto* pProto2, + bool compareReturnType) { + + if (pProto1 == pProto2) { + // Easy out. + return 0; + } else { + const DexFile* dexFile1 = pProto1->dexFile; + const DexProtoId* protoId1 = getProtoId(pProto1); + const DexTypeList* typeList1 = + dexGetProtoParameters(dexFile1, protoId1); + int paramCount1 = (typeList1 == NULL) ? 0 : typeList1->size; + + const DexFile* dexFile2 = pProto2->dexFile; + const DexProtoId* protoId2 = getProtoId(pProto2); + const DexTypeList* typeList2 = + dexGetProtoParameters(dexFile2, protoId2); + int paramCount2 = (typeList2 == NULL) ? 0 : typeList2->size; + + if (protoId1 == protoId2) { + // Another easy out. + return 0; + } + + // Compare return types. + + if (compareReturnType) { + int result = + strcmp(dexStringByTypeIdx(dexFile1, protoId1->returnTypeIdx), + dexStringByTypeIdx(dexFile2, protoId2->returnTypeIdx)); + + if (result != 0) { + return result; + } + } + + // Compare parameters. + + int minParam = (paramCount1 > paramCount2) ? paramCount2 : paramCount1; + int i; + + for (i = 0; i < minParam; i++) { + u4 idx1 = dexTypeListGetIdx(typeList1, i); + u4 idx2 = dexTypeListGetIdx(typeList2, i); + int result = + strcmp(dexStringByTypeIdx(dexFile1, idx1), + dexStringByTypeIdx(dexFile2, idx2)); + + if (result != 0) { + return result; + } + } + + if (paramCount1 < paramCount2) { + return -1; + } else if (paramCount1 > paramCount2) { + return 1; + } else { + return 0; + } + } +} + +/* + * Compare the two prototypes. The two prototypes are compared + * with the return type as the major order, then the first arguments, + * then second, etc. If two prototypes are identical except that one + * has extra arguments, then the shorter argument is considered the + * earlier one in sort order (similar to strcmp()). + */ +int dexProtoCompare(const DexProto* pProto1, const DexProto* pProto2) { + return protoCompare(pProto1, pProto2, true); +} + +/* + * Compare the two prototypes. The two prototypes are compared + * with the first argument as the major order, then second, etc. If two + * prototypes are identical except that one has extra arguments, then the + * shorter argument is considered the earlier one in sort order (similar + * to strcmp()). + */ +int dexProtoCompareParameters(const DexProto* pProto1, const DexProto* pProto2){ + return protoCompare(pProto1, pProto2, false); +} + + +/* + * Helper for dexProtoCompareToDescriptor(), which gets the return type + * descriptor from a method descriptor string. + */ +static const char* methodDescriptorReturnType(const char* descriptor) { + const char* result = strchr(descriptor, ')'); + + if (result == NULL) { + return NULL; + } + + // The return type is the character just past the ')'. + return result + 1; +} + +/* + * Helper for dexProtoCompareToDescriptor(), which indicates the end + * of an embedded argument type descriptor, which is also the + * beginning of the next argument type descriptor. Since this is for + * argument types, it doesn't accept 'V' as a valid type descriptor. + */ +static const char* methodDescriptorNextType(const char* descriptor) { + // Skip any array references. + + while (*descriptor == '[') { + descriptor++; + } + + switch (*descriptor) { + case 'B': case 'C': case 'D': case 'F': + case 'I': case 'J': case 'S': case 'Z': { + return descriptor + 1; + } + case 'L': { + const char* result = strchr(descriptor + 1, ';'); + if (result != NULL) { + // The type ends just past the ';'. + return result + 1; + } + } + } + + return NULL; +} + +/* + * Compare a prototype and a string method descriptor. The comparison + * is done as if the descriptor were converted to a prototype and compared + * with dexProtoCompare(). + */ +int dexProtoCompareToDescriptor(const DexProto* proto, + const char* descriptor) { + // First compare the return types. + + int result = strcmp(dexProtoGetReturnType(proto), + methodDescriptorReturnType(descriptor)); + + if (result != 0) { + return result; + } + + // The return types match, so we have to check arguments. + + DexParameterIterator iterator; + dexParameterIteratorInit(&iterator, proto); + + // Skip the '('. + assert (*descriptor == '('); + descriptor++; + + for (;;) { + const char* protoDesc = dexParameterIteratorNextDescriptor(&iterator); + + if (*descriptor == ')') { + // It's the end of the descriptor string. + if (protoDesc == NULL) { + // It's also the end of the prototype's arguments. + return 0; + } else { + // The prototype still has more arguments. + return 1; + } + } + + if (protoDesc == NULL) { + /* + * The prototype doesn't have arguments left, but the + * descriptor string does. + */ + return -1; + } + + // Both prototype and descriptor have arguments. Compare them. + + const char* nextDesc = methodDescriptorNextType(descriptor); + + for (;;) { + char c1 = *(protoDesc++); + char c2 = (descriptor < nextDesc) ? *(descriptor++) : '\0'; + + if (c1 < c2) { + // This includes the case where the proto is shorter. + return -1; + } else if (c1 > c2) { + // This includes the case where the desc is shorter. + return 1; + } else if (c1 == '\0') { + // The two types are equal in length. (c2 necessarily == '\0'.) + break; + } + } + + /* + * If we made it here, the two arguments matched, and + * descriptor == nextDesc. + */ + } +} + + +/* + * =========================================================================== + * Parameter Iterators + * =========================================================================== + */ + +/* + * Initialize the given DexParameterIterator to be at the start of the + * parameters of the given prototype. + */ +void dexParameterIteratorInit(DexParameterIterator* pIterator, + const DexProto* pProto) { + pIterator->proto = pProto; + pIterator->cursor = 0; + + pIterator->parameters = + dexGetProtoParameters(pProto->dexFile, getProtoId(pProto)); + pIterator->parameterCount = (pIterator->parameters == NULL) ? 0 + : pIterator->parameters->size; +} + +/* + * Get the type_id index for the next parameter, if any. This returns + * kDexNoIndex if the last parameter has already been consumed. + */ +u4 dexParameterIteratorNextIndex(DexParameterIterator* pIterator) { + int cursor = pIterator->cursor; + int parameterCount = pIterator->parameterCount; + + if (cursor >= parameterCount) { + // The iteration is complete. + return kDexNoIndex; + } else { + u4 idx = dexTypeListGetIdx(pIterator->parameters, cursor); + pIterator->cursor++; + return idx; + } +} + +/* + * Get the type descriptor for the next parameter, if any. This returns + * NULL if the last parameter has already been consumed. + */ +const char* dexParameterIteratorNextDescriptor( + DexParameterIterator* pIterator) { + u4 idx = dexParameterIteratorNextIndex(pIterator); + + if (idx == kDexNoIndex) { + return NULL; + } + + return dexStringByTypeIdx(pIterator->proto->dexFile, idx); +} + diff --git a/libdex/DexProto.h b/libdex/DexProto.h new file mode 100644 index 000000000..1ef577b5b --- /dev/null +++ b/libdex/DexProto.h @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for dealing with method prototypes + */ + +#ifndef _LIBDEX_DEXPROTO +#define _LIBDEX_DEXPROTO + +#include "DexFile.h" + +/* + * Single-thread single-string cache. This structure holds a pointer to + * a string which is semi-automatically manipulated by some of the + * method prototype functions. Functions which use in this struct + * generally return a string that is valid until the next + * time the same DexStringCache is used. + */ +typedef struct DexStringCache { + char* value; /* the latest value */ + size_t allocatedSize; /* size of the allocated buffer, if allocated */ + char buffer[120]; /* buffer used to hold small-enough results */ +} DexStringCache; + +/* + * Initialize the given DexStringCache. Use this function before passing + * one into any other function. + */ +void dexStringCacheInit(DexStringCache* pCache); + +/* + * Release the allocated contents of the given DexStringCache, if any. + * Use this function after your last use of a DexStringCache. + */ +void dexStringCacheRelease(DexStringCache* pCache); + +/* + * If the given DexStringCache doesn't already point at the given value, + * make a copy of it into the cache. This always returns a writable + * pointer to the contents (whether or not a copy had to be made). This + * function is intended to be used after making a call that at least + * sometimes doesn't populate a DexStringCache. + */ +char* dexStringCacheEnsureCopy(DexStringCache* pCache, const char* value); + +/* + * Abandon the given DexStringCache, and return a writable copy of the + * given value (reusing the string cache's allocation if possible). + * The return value must be free()d by the caller. Use this instead of + * dexStringCacheRelease() if you want the buffer to survive past the + * scope of the DexStringCache. + */ +char* dexStringCacheAbandon(DexStringCache* pCache, const char* value); + +/* + * Method prototype structure, which refers to a protoIdx in a + * particular DexFile. + */ +typedef struct DexProto { + const DexFile* dexFile; /* file the idx refers to */ + u4 protoIdx; /* index into proto_ids table of dexFile */ +} DexProto; + +/* + * Set the given DexProto to refer to the prototype of the given MethodId. + */ +DEX_INLINE void dexProtoSetFromMethodId(DexProto* pProto, + const DexFile* pDexFile, const DexMethodId* pMethodId) +{ + pProto->dexFile = pDexFile; + pProto->protoIdx = pMethodId->protoIdx; +} + +/* + * Get the short-form method descriptor for the given prototype. The + * prototype must be protoIdx-based. + */ +const char* dexProtoGetShorty(const DexProto* pProto); + +/* + * Get the full method descriptor for the given prototype. + */ +const char* dexProtoGetMethodDescriptor(const DexProto* pProto, + DexStringCache* pCache); + +/* + * Get a copy of the descriptor string associated with the given prototype. + * The returned pointer must be free()ed by the caller. + */ +char* dexProtoCopyMethodDescriptor(const DexProto* pProto); + +/* + * Get the parameter descriptors for the given prototype. This is the + * concatenation of all the descriptors for all the parameters, in + * order, with no other adornment. + */ +const char* dexProtoGetParameterDescriptors(const DexProto* pProto, + DexStringCache* pCache); + +/* + * Return the utf-8 encoded descriptor string from the proto of a MethodId. + */ +DEX_INLINE const char* dexGetDescriptorFromMethodId(const DexFile* pDexFile, + const DexMethodId* pMethodId, DexStringCache* pCache) +{ + DexProto proto; + + dexProtoSetFromMethodId(&proto, pDexFile, pMethodId); + return dexProtoGetMethodDescriptor(&proto, pCache); +} + +/* + * Get a copy of the utf-8 encoded method descriptor string from the + * proto of a MethodId. The returned pointer must be free()ed by the + * caller. + */ +DEX_INLINE char* dexCopyDescriptorFromMethodId(const DexFile* pDexFile, + const DexMethodId* pMethodId) +{ + DexProto proto; + + dexProtoSetFromMethodId(&proto, pDexFile, pMethodId); + return dexProtoCopyMethodDescriptor(&proto); +} + +/* + * Get the type descriptor for the return type of the given prototype. + */ +const char* dexProtoGetReturnType(const DexProto* pProto); + +/* + * Get the parameter count of the given prototype. + */ +size_t dexProtoGetParameterCount(const DexProto* pProto); + +/* + * Compute the number of parameter words (u4 units) required by the + * given prototype. For example, if the method takes (int, long) and + * returns double, this would return 3 (one for the int, two for the + * long, and the return type isn't relevant). + */ +int dexProtoComputeArgsSize(const DexProto* pProto); + +/* + * Compare the two prototypes. The two prototypes are compared + * with the return type as the major order, then the first arguments, + * then second, etc. If two prototypes are identical except that one + * has extra arguments, then the shorter argument is considered the + * earlier one in sort order (similar to strcmp()). + */ +int dexProtoCompare(const DexProto* pProto1, const DexProto* pProto2); + +/* + * Compare the two prototypes. The two prototypes are compared + * with the first argument as the major order, then second, etc. If two + * prototypes are identical except that one has extra arguments, then the + * shorter argument is considered the earlier one in sort order (similar + * to strcmp()). + */ +int dexProtoCompareParameters(const DexProto* pProto1, const DexProto* pProto2); + +/* + * Compare a prototype and a string method descriptor. The comparison + * is done as if the descriptor were converted to a prototype and compared + * with dexProtoCompare(). + */ +int dexProtoCompareToDescriptor(const DexProto* proto, const char* descriptor); + +/* + * Single-thread prototype parameter iterator. This structure holds a + * pointer to a prototype and its parts, along with a cursor. + */ +typedef struct DexParameterIterator { + const DexProto* proto; + const DexTypeList* parameters; + int parameterCount; + int cursor; +} DexParameterIterator; + +/* + * Initialize the given DexParameterIterator to be at the start of the + * parameters of the given prototype. + */ +void dexParameterIteratorInit(DexParameterIterator* pIterator, + const DexProto* pProto); + +/* + * Get the type_id index for the next parameter, if any. This returns + * kDexNoIndex if the last parameter has already been consumed. + */ +u4 dexParameterIteratorNextIndex(DexParameterIterator* pIterator); + +/* + * Get the type descriptor for the next parameter, if any. This returns + * NULL if the last parameter has already been consumed. + */ +const char* dexParameterIteratorNextDescriptor( + DexParameterIterator* pIterator); + + + +#endif /*_LIBDEX_DEXPROTO*/ diff --git a/libdex/DexSwapVerify.c b/libdex/DexSwapVerify.c new file mode 100644 index 000000000..5ecda9ff0 --- /dev/null +++ b/libdex/DexSwapVerify.c @@ -0,0 +1,2851 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Byte-swapping and verification of dex files. + */ + +#include "DexFile.h" +#include "DexClass.h" +#include "DexDataMap.h" +#include "DexProto.h" +#include "Leb128.h" + +#include <safe_iop.h> +#include <zlib.h> + +#include <stdlib.h> +#include <string.h> + +#ifndef __BYTE_ORDER +# error "byte ordering not defined" +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define SWAP2(_value) (_value) +# define SWAP4(_value) (_value) +# define SWAP8(_value) (_value) +#else +# define SWAP2(_value) endianSwapU2((_value)) +# define SWAP4(_value) endianSwapU4((_value)) +# define SWAP8(_value) endianSwapU8((_value)) +static u2 endianSwapU2(u2 value) { + return (value >> 8) | (value << 8); +} +static u4 endianSwapU4(u4 value) { + /* ABCD --> CDAB --> DCBA */ + value = (value >> 16) | (value << 16); + return ((value & 0xff00ff00) >> 8) | ((value << 8) & 0xff00ff00); +} +static u8 endianSwapU8(u8 value) { + /* ABCDEFGH --> EFGHABCD --> GHEFCDAB --> HGFEDCBA */ + value = (value >> 32) | (value << 32); + value = ((value & 0xffff0000ffff0000ULL) >> 16) | + ((value << 16) & 0xffff0000ffff0000ULL); + return ((value & 0xff00ff00ff00ff00ULL) >> 8) | + ((value << 8) & 0xff00ff00ff00ff00ULL); +} +#endif + +#define SWAP_FIELD2(_field) (_field) = SWAP2(_field) +#define SWAP_FIELD4(_field) (_field) = SWAP4(_field) +#define SWAP_FIELD8(_field) (_field) = SWAP8(_field) + +/* + * Some information we pass around to help verify values. + */ +typedef struct CheckState { + const DexHeader* pHeader; + const u1* fileStart; + const u1* fileEnd; // points to fileStart + fileLen + u4 fileLen; + DexDataMap* pDataMap; // set after map verification + const DexFile* pDexFile; // set after intraitem verification + const void* previousItem; // set during section iteration +} CheckState; + +/* + * Return the file offset of the given pointer. + */ +static inline u4 fileOffset(const CheckState* state, const void* ptr) { + return ((const u1*) ptr) - state->fileStart; +} + +/* + * Return a pointer for the given file offset. + */ +static inline void* filePointer(const CheckState* state, u4 offset) { + return (void*) (state->fileStart + offset); +} + +/* + * Verify that a pointer range, start inclusive to end exclusive, only + * covers bytes in the file and doesn't point beyond the end of the + * file. That is, the start must indicate a valid byte or may point at + * the byte just past the end of the file (but no further), and the + * end must be no less than the start and must also not point beyond + * the byte just past the end of the file. + */ +static inline bool checkPtrRange(const CheckState* state, + const void* start, const void* end, const char* label) { + const void* fileStart = state->fileStart; + const void* fileEnd = state->fileEnd; + if ((start < fileStart) || (start > fileEnd) + || (end < start) || (end > fileEnd)) { + LOGW("Bad offset range for %s: 0x%x..0x%x\n", label, + fileOffset(state, start), fileOffset(state, end)); + return false; + } + return true; +} + +/* + * Verify that a range of offsets, start inclusive to end exclusive, + * are all valid. That is, the start must indicate a valid byte or may + * point at the byte just past the end of the file (but no further), + * and the end must be no less than the start and must also not point + * beyond the byte just past the end of the file. + * + * Assumes "const CheckState* state". + */ +#define CHECK_OFFSET_RANGE(_start, _end) { \ + const u1* _startPtr = filePointer(state, (_start)); \ + const u1* _endPtr = filePointer(state, (_end)); \ + if (!checkPtrRange(state, _startPtr, _endPtr, \ + #_start ".." #_end)) { \ + return 0; \ + } \ + } + +/* + * Verify that a pointer range, start inclusive to end exclusive, only + * covers bytes in the file and doesn't point beyond the end of the + * file. That is, the start must indicate a valid byte or may point at + * the byte just past the end of the file (but no further), and the + * end must be no less than the start and must also not point beyond + * the byte just past the end of the file. + * + * Assumes "const CheckState* state". + */ +#define CHECK_PTR_RANGE(_start, _end) { \ + if (!checkPtrRange(state, (_start), (_end), #_start ".." #_end)) { \ + return 0; \ + } \ + } + +/* + * Make sure a list of items fits entirely within the file. + * + * Assumes "const CheckState* state" and "typeof(_count) == typeof(_elemSize)" + * If the type sizes or signs are mismatched, this will return 0. + */ +#define CHECK_LIST_SIZE(_ptr, _count, _elemSize) { \ + const u1* _start = (const u1*) (_ptr); \ + const u1* _end = _start + ((_count) * (_elemSize)); \ + if (!safe_mul(NULL, (_count), (_elemSize)) || \ + !checkPtrRange(state, _start, _end, #_ptr)) { \ + return 0; \ + } \ + } + +/* + * Swap a field that is known to hold an absolute DEX file offset. Note: + * This does not check to see that the swapped offset points within the + * mapped file, since that should be handled (with even more rigor) by + * the cross-verification phase. + * + * Assumes "const CheckState* state". + */ +#define SWAP_OFFSET4(_field) { \ + SWAP_FIELD4((_field)); \ + } + +/* + * Verify that an index falls in a valid range. + */ +#define CHECK_INDEX(_field, _limit) { \ + if ((_field) >= (_limit)) { \ + LOGW("Bad index: %s(%u) > %s(%u)\n", \ + #_field, (u4)(_field), #_limit, (u4)(_limit)); \ + return 0; \ + } \ + } + +/* + * Swap an index, and verify that it falls in a valid range. + */ +#define SWAP_INDEX2(_field, _limit) { \ + SWAP_FIELD2((_field)); \ + CHECK_INDEX((_field), (_limit)); \ + } + +/* + * Verify that an index falls in a valid range or is kDexNoIndex. + */ +#define CHECK_INDEX_OR_NOINDEX(_field, _limit) { \ + if ((_field) != kDexNoIndex && (_field) >= (_limit)) { \ + LOGW("Bad index: %s(%u) > %s(%u)\n", \ + #_field, (u4)(_field), #_limit, (u4)(_limit)); \ + return 0; \ + } \ + } + +/* + * Swap an index, and verify that it falls in a valid range. + */ +#define SWAP_INDEX4(_field, _limit) { \ + SWAP_FIELD4((_field)); \ + CHECK_INDEX((_field), (_limit)); \ + } + +/* + * Swap an index, and verify that it falls in a valid range or is + * kDexNoIndex. + */ +#define SWAP_INDEX4_OR_NOINDEX(_field, _limit) { \ + SWAP_FIELD4((_field)); \ + CHECK_INDEX_OR_NOINDEX((_field), (_limit)); \ + } + +/* Verify the definer of a given field_idx. */ +static bool verifyFieldDefiner(const CheckState* state, u4 definingClass, + u4 fieldIdx) { + const DexFieldId* field = dexGetFieldId(state->pDexFile, fieldIdx); + return field->classIdx == definingClass; +} + +/* Verify the definer of a given method_idx. */ +static bool verifyMethodDefiner(const CheckState* state, u4 definingClass, + u4 methodIdx) { + const DexMethodId* meth = dexGetMethodId(state->pDexFile, methodIdx); + return meth->classIdx == definingClass; +} + +/* + * Swap the header_item. + */ +static bool swapDexHeader(const CheckState* state, DexHeader* pHeader) +{ + CHECK_PTR_RANGE(pHeader, pHeader + 1); + + // magic is ok + SWAP_FIELD4(pHeader->checksum); + // signature is ok + SWAP_FIELD4(pHeader->fileSize); + SWAP_FIELD4(pHeader->headerSize); + SWAP_FIELD4(pHeader->endianTag); + SWAP_FIELD4(pHeader->linkSize); + SWAP_OFFSET4(pHeader->linkOff); + SWAP_OFFSET4(pHeader->mapOff); + SWAP_FIELD4(pHeader->stringIdsSize); + SWAP_OFFSET4(pHeader->stringIdsOff); + SWAP_FIELD4(pHeader->typeIdsSize); + SWAP_OFFSET4(pHeader->typeIdsOff); + SWAP_FIELD4(pHeader->fieldIdsSize); + SWAP_OFFSET4(pHeader->fieldIdsOff); + SWAP_FIELD4(pHeader->methodIdsSize); + SWAP_OFFSET4(pHeader->methodIdsOff); + SWAP_FIELD4(pHeader->protoIdsSize); + SWAP_OFFSET4(pHeader->protoIdsOff); + SWAP_FIELD4(pHeader->classDefsSize); + SWAP_OFFSET4(pHeader->classDefsOff); + SWAP_FIELD4(pHeader->dataSize); + SWAP_OFFSET4(pHeader->dataOff); + + if (pHeader->endianTag != kDexEndianConstant) { + LOGE("Unexpected endian_tag: 0x%x\n", pHeader->endianTag); + return false; + } + + // Assign variables so the diagnostic is prettier. (Hooray for macros.) + u4 linkOff = pHeader->linkOff; + u4 linkEnd = linkOff + pHeader->linkSize; + u4 dataOff = pHeader->dataOff; + u4 dataEnd = dataOff + pHeader->dataSize; + CHECK_OFFSET_RANGE(linkOff, linkEnd); + CHECK_OFFSET_RANGE(dataOff, dataEnd); + + /* + * Note: The offsets and ranges of the other header items end up getting + * checked during the first iteration over the map. + */ + + return true; +} + +/* Check the header section for sanity. */ +static bool checkHeaderSection(const CheckState* state, u4 sectionOffset, + u4 sectionCount, u4* endOffset) { + if (sectionCount != 1) { + LOGE("Multiple header items\n"); + return false; + } + + if (sectionOffset != 0) { + LOGE("Header at 0x%x; not at start of file\n", sectionOffset); + return false; + } + + const DexHeader* pHeader = filePointer(state, 0); + *endOffset = pHeader->headerSize; + return true; +} + +/* + * Helper for swapMap(), which turns a map type constant into a small + * one-bit-on integer, suitable for use in an int-sized bit set. + */ +static u4 mapTypeToBitMask(int mapType) { + switch (mapType) { + case kDexTypeHeaderItem: return 1 << 0; + case kDexTypeStringIdItem: return 1 << 1; + case kDexTypeTypeIdItem: return 1 << 2; + case kDexTypeProtoIdItem: return 1 << 3; + case kDexTypeFieldIdItem: return 1 << 4; + case kDexTypeMethodIdItem: return 1 << 5; + case kDexTypeClassDefItem: return 1 << 6; + case kDexTypeMapList: return 1 << 7; + case kDexTypeTypeList: return 1 << 8; + case kDexTypeAnnotationSetRefList: return 1 << 9; + case kDexTypeAnnotationSetItem: return 1 << 10; + case kDexTypeClassDataItem: return 1 << 11; + case kDexTypeCodeItem: return 1 << 12; + case kDexTypeStringDataItem: return 1 << 13; + case kDexTypeDebugInfoItem: return 1 << 14; + case kDexTypeAnnotationItem: return 1 << 15; + case kDexTypeEncodedArrayItem: return 1 << 16; + case kDexTypeAnnotationsDirectoryItem: return 1 << 17; + default: { + LOGE("Unknown map item type %04x\n", mapType); + return 0; + } + } +} + +/* + * Helper for swapMap(), which indicates if an item type should appear + * in the data section. + */ +static bool isDataSectionType(int mapType) { + switch (mapType) { + case kDexTypeHeaderItem: + case kDexTypeStringIdItem: + case kDexTypeTypeIdItem: + case kDexTypeProtoIdItem: + case kDexTypeFieldIdItem: + case kDexTypeMethodIdItem: + case kDexTypeClassDefItem: { + return false; + } + } + + return true; +} + +/* + * Swap the map_list and verify what we can about it. Also, if verification + * passes, allocate the state's DexDataMap. + */ +static bool swapMap(CheckState* state, DexMapList* pMap) +{ + DexMapItem* item = pMap->list; + u4 count = pMap->size; + u4 dataItemCount = 0; // Total count of items in the data section. + u4 dataItemsLeft = state->pHeader->dataSize; // See use below. + u4 usedBits = 0; // Bit set: one bit per section + bool first = true; + u4 lastOffset = 0; + + CHECK_LIST_SIZE(item, count, sizeof(DexMapItem)); + + while (count--) { + SWAP_FIELD2(item->type); + SWAP_FIELD2(item->unused); + SWAP_FIELD4(item->size); + SWAP_OFFSET4(item->offset); + + if (first) { + first = false; + } else if (lastOffset >= item->offset) { + LOGE("Out-of-order map item: 0x%x then 0x%x\n", + lastOffset, item->offset); + return false; + } + + if (item->offset >= state->pHeader->fileSize) { + LOGE("Map item after end of file: %x, size 0x%x\n", + item->offset, state->pHeader->fileSize); + return false; + } + + if (isDataSectionType(item->type)) { + u4 count = item->size; + + /* + * This sanity check on the data section items ensures that + * there are no more items than the number of bytes in + * the data section. + */ + if (count > dataItemsLeft) { + LOGE("Unrealistically many items in the data section: " + "at least %d\n", dataItemCount + count); + return false; + } + + dataItemsLeft -= count; + dataItemCount += count; + } + + u4 bit = mapTypeToBitMask(item->type); + + if (bit == 0) { + return false; + } + + if ((usedBits & bit) != 0) { + LOGE("Duplicate map section of type 0x%x\n", item->type); + return false; + } + + usedBits |= bit; + lastOffset = item->offset; + item++; + } + + if ((usedBits & mapTypeToBitMask(kDexTypeHeaderItem)) == 0) { + LOGE("Map is missing header entry\n"); + return false; + } + + if ((usedBits & mapTypeToBitMask(kDexTypeMapList)) == 0) { + LOGE("Map is missing map_list entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeStringIdItem)) == 0) + && ((state->pHeader->stringIdsOff != 0) + || (state->pHeader->stringIdsSize != 0))) { + LOGE("Map is missing string_ids entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeTypeIdItem)) == 0) + && ((state->pHeader->typeIdsOff != 0) + || (state->pHeader->typeIdsSize != 0))) { + LOGE("Map is missing type_ids entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeProtoIdItem)) == 0) + && ((state->pHeader->protoIdsOff != 0) + || (state->pHeader->protoIdsSize != 0))) { + LOGE("Map is missing proto_ids entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeFieldIdItem)) == 0) + && ((state->pHeader->fieldIdsOff != 0) + || (state->pHeader->fieldIdsSize != 0))) { + LOGE("Map is missing field_ids entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeMethodIdItem)) == 0) + && ((state->pHeader->methodIdsOff != 0) + || (state->pHeader->methodIdsSize != 0))) { + LOGE("Map is missing method_ids entry\n"); + return false; + } + + if (((usedBits & mapTypeToBitMask(kDexTypeClassDefItem)) == 0) + && ((state->pHeader->classDefsOff != 0) + || (state->pHeader->classDefsSize != 0))) { + LOGE("Map is missing class_defs entry\n"); + return false; + } + + state->pDataMap = dexDataMapAlloc(dataItemCount); + if (state->pDataMap == NULL) { + LOGE("Unable to allocate data map (size 0x%x)\n", dataItemCount); + return false; + } + + return true; +} + +/* Check the map section for sanity. */ +static bool checkMapSection(const CheckState* state, u4 sectionOffset, + u4 sectionCount, u4* endOffset) { + if (sectionCount != 1) { + LOGE("Multiple map list items"); + return false; + } + + if (sectionOffset != state->pHeader->mapOff) { + LOGE("Map not at header-defined offset: 0x%x, expected 0x%x\n", + sectionOffset, state->pHeader->mapOff); + return false; + } + + const DexMapList* pMap = filePointer(state, sectionOffset); + + *endOffset = + sectionOffset + sizeof(u4) + (pMap->size * sizeof(DexMapItem)); + return true; +} + +/* Perform byte-swapping and intra-item verification on string_id_item. */ +static void* swapStringIdItem(const CheckState* state, void* ptr) { + DexStringId* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_OFFSET4(item->stringDataOff); + + return item + 1; +} + +/* Perform cross-item verification of string_id_item. */ +static void* crossVerifyStringIdItem(const CheckState* state, void* ptr) { + const DexStringId* item = ptr; + + if (!dexDataMapVerify(state->pDataMap, + item->stringDataOff, kDexTypeStringDataItem)) { + return NULL; + } + + const DexStringId* item0 = state->previousItem; + if (item0 != NULL) { + // Check ordering. + const char* s0 = dexGetStringData(state->pDexFile, item0); + const char* s1 = dexGetStringData(state->pDexFile, item); + if (dexUtf8Cmp(s0, s1) >= 0) { + LOGE("Out-of-order string_ids: '%s' then '%s'\n", s0, s1); + return NULL; + } + } + + return (void*) (item + 1); +} + +/* Perform byte-swapping and intra-item verification on type_id_item. */ +static void* swapTypeIdItem(const CheckState* state, void* ptr) { + DexTypeId* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_INDEX4(item->descriptorIdx, state->pHeader->stringIdsSize); + + return item + 1; +} + +/* Perform cross-item verification of type_id_item. */ +static void* crossVerifyTypeIdItem(const CheckState* state, void* ptr) { + const DexTypeId* item = ptr; + const char* descriptor = + dexStringById(state->pDexFile, item->descriptorIdx); + + if (!dexIsValidTypeDescriptor(descriptor)) { + LOGE("Invalid type descriptor: '%s'\n", descriptor); + return NULL; + } + + const DexTypeId* item0 = state->previousItem; + if (item0 != NULL) { + // Check ordering. This relies on string_ids being in order. + if (item0->descriptorIdx >= item->descriptorIdx) { + LOGE("Out-of-order type_ids: 0x%x then 0x%x\n", + item0->descriptorIdx, item->descriptorIdx); + return NULL; + } + } + + return (void*) (item + 1); +} + +/* Perform byte-swapping and intra-item verification on proto_id_item. */ +static void* swapProtoIdItem(const CheckState* state, void* ptr) { + DexProtoId* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_INDEX4(item->shortyIdx, state->pHeader->stringIdsSize); + SWAP_INDEX4(item->returnTypeIdx, state->pHeader->typeIdsSize); + SWAP_OFFSET4(item->parametersOff); + + return item + 1; +} + +/* Helper for crossVerifyProtoIdItem(), which checks a shorty character + * to see if it is compatible with a type descriptor. Returns true if + * so, false if not. */ +static bool shortyDescMatch(char shorty, const char* descriptor, bool + isReturnType) { + switch (shorty) { + case 'V': { + if (!isReturnType) { + LOGE("Invalid use of void\n"); + return false; + } + // Fall through. + } + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': { + if ((descriptor[0] != shorty) || (descriptor[1] != '\0')) { + LOGE("Shorty vs. primitive type mismatch: '%c', '%s'\n", + shorty, descriptor); + return false; + } + break; + } + case 'L': { + if ((descriptor[0] != 'L') && (descriptor[0] != '[')) { + LOGE("Shorty vs. type mismatch: '%c', '%s'\n", + shorty, descriptor); + return false; + } + break; + } + default: { + LOGE("Bogus shorty: '%c'\n", shorty); + return false; + } + } + + return true; +} + +/* Perform cross-item verification of proto_id_item. */ +static void* crossVerifyProtoIdItem(const CheckState* state, void* ptr) { + const DexProtoId* item = ptr; + const char* shorty = + dexStringById(state->pDexFile, item->shortyIdx); + + if (!dexDataMapVerify0Ok(state->pDataMap, + item->parametersOff, kDexTypeTypeList)) { + return NULL; + } + + if (!shortyDescMatch(*shorty, + dexStringByTypeIdx(state->pDexFile, item->returnTypeIdx), + true)) { + return NULL; + } + + u4 protoIdx = item - state->pDexFile->pProtoIds; + DexProto proto = { state->pDexFile, protoIdx }; + DexParameterIterator iterator; + + dexParameterIteratorInit(&iterator, &proto); + shorty++; // Skip the return type. + + for (;;) { + const char *desc = dexParameterIteratorNextDescriptor(&iterator); + + if (desc == NULL) { + break; + } + + if (*shorty == '\0') { + LOGE("Shorty is too short\n"); + return NULL; + } + + if (!shortyDescMatch(*shorty, desc, false)) { + return NULL; + } + + shorty++; + } + + if (*shorty != '\0') { + LOGE("Shorty is too long\n"); + return NULL; + } + + const DexProtoId* item0 = state->previousItem; + if (item0 != NULL) { + // Check ordering. This relies on type_ids being in order. + if (item0->returnTypeIdx > item->returnTypeIdx) { + LOGE("Out-of-order proto_id return types\n"); + return NULL; + } else if (item0->returnTypeIdx == item->returnTypeIdx) { + bool badOrder = false; + DexProto proto0 = { state->pDexFile, protoIdx - 1 }; + DexParameterIterator iterator0; + + dexParameterIteratorInit(&iterator, &proto); + dexParameterIteratorInit(&iterator0, &proto0); + + for (;;) { + u4 idx0 = dexParameterIteratorNextIndex(&iterator0); + u4 idx1 = dexParameterIteratorNextIndex(&iterator); + + if (idx1 == kDexNoIndex) { + badOrder = true; + break; + } + + if (idx0 == kDexNoIndex) { + break; + } + + if (idx0 < idx1) { + break; + } else if (idx0 > idx1) { + badOrder = true; + break; + } + } + + if (badOrder) { + LOGE("Out-of-order proto_id arguments\n"); + return NULL; + } + } + } + + return (void*) (item + 1); +} + +/* Perform byte-swapping and intra-item verification on field_id_item. */ +static void* swapFieldIdItem(const CheckState* state, void* ptr) { + DexFieldId* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_INDEX2(item->classIdx, state->pHeader->typeIdsSize); + SWAP_INDEX2(item->typeIdx, state->pHeader->typeIdsSize); + SWAP_INDEX4(item->nameIdx, state->pHeader->stringIdsSize); + + return item + 1; +} + +/* Perform cross-item verification of field_id_item. */ +static void* crossVerifyFieldIdItem(const CheckState* state, void* ptr) { + const DexFieldId* item = ptr; + const char* s; + + s = dexStringByTypeIdx(state->pDexFile, item->classIdx); + if (!dexIsClassDescriptor(s)) { + LOGE("Invalid descriptor for class_idx: '%s'\n", s); + return NULL; + } + + s = dexStringByTypeIdx(state->pDexFile, item->typeIdx); + if (!dexIsFieldDescriptor(s)) { + LOGE("Invalid descriptor for type_idx: '%s'\n", s); + return NULL; + } + + s = dexStringById(state->pDexFile, item->nameIdx); + if (!dexIsValidMemberName(s)) { + LOGE("Invalid name: '%s'\n", s); + return NULL; + } + + const DexFieldId* item0 = state->previousItem; + if (item0 != NULL) { + // Check ordering. This relies on the other sections being in order. + bool done = false; + bool bogus = false; + + if (item0->classIdx > item->classIdx) { + bogus = true; + done = true; + } else if (item0->classIdx < item->classIdx) { + done = true; + } + + if (!done) { + if (item0->nameIdx > item->nameIdx) { + bogus = true; + done = true; + } else if (item0->nameIdx < item->nameIdx) { + done = true; + } + } + + if (!done) { + if (item0->typeIdx >= item->typeIdx) { + bogus = true; + } + } + + if (bogus) { + LOGE("Out-of-order field_ids\n"); + return NULL; + } + } + + return (void*) (item + 1); +} + +/* Perform byte-swapping and intra-item verification on method_id_item. */ +static void* swapMethodIdItem(const CheckState* state, void* ptr) { + DexMethodId* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_INDEX2(item->classIdx, state->pHeader->typeIdsSize); + SWAP_INDEX2(item->protoIdx, state->pHeader->protoIdsSize); + SWAP_INDEX4(item->nameIdx, state->pHeader->stringIdsSize); + + return item + 1; +} + +/* Perform cross-item verification of method_id_item. */ +static void* crossVerifyMethodIdItem(const CheckState* state, void* ptr) { + const DexMethodId* item = ptr; + const char* s; + + s = dexStringByTypeIdx(state->pDexFile, item->classIdx); + if (!dexIsReferenceDescriptor(s)) { + LOGE("Invalid descriptor for class_idx: '%s'\n", s); + return NULL; + } + + s = dexStringById(state->pDexFile, item->nameIdx); + if (!dexIsValidMemberName(s)) { + LOGE("Invalid name: '%s'\n", s); + return NULL; + } + + const DexMethodId* item0 = state->previousItem; + if (item0 != NULL) { + // Check ordering. This relies on the other sections being in order. + bool done = false; + bool bogus = false; + + if (item0->classIdx > item->classIdx) { + bogus = true; + done = true; + } else if (item0->classIdx < item->classIdx) { + done = true; + } + + if (!done) { + if (item0->nameIdx > item->nameIdx) { + bogus = true; + done = true; + } else if (item0->nameIdx < item->nameIdx) { + done = true; + } + } + + if (!done) { + if (item0->protoIdx >= item->protoIdx) { + bogus = true; + } + } + + if (bogus) { + LOGE("Out-of-order method_ids\n"); + return NULL; + } + } + + return (void*) (item + 1); +} + +/* Perform byte-swapping and intra-item verification on class_def_item. */ +static void* swapClassDefItem(const CheckState* state, void* ptr) { + DexClassDef* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_INDEX4(item->classIdx, state->pHeader->typeIdsSize); + SWAP_FIELD4(item->accessFlags); + SWAP_INDEX4_OR_NOINDEX(item->superclassIdx, state->pHeader->typeIdsSize); + SWAP_OFFSET4(item->interfacesOff); + SWAP_INDEX4_OR_NOINDEX(item->sourceFileIdx, state->pHeader->stringIdsSize); + SWAP_OFFSET4(item->annotationsOff); + SWAP_OFFSET4(item->classDataOff); + + return item + 1; +} + +/* defined below */ +static u4 findFirstClassDataDefiner(const CheckState* state, + DexClassData* classData); +static u4 findFirstAnnotationsDirectoryDefiner(const CheckState* state, + const DexAnnotationsDirectoryItem* dir); + +/* Helper for crossVerifyClassDefItem(), which checks a class_data_item to + * make sure all its references are to a given class. */ +static bool verifyClassDataIsForDef(const CheckState* state, u4 offset, + u4 definerIdx) { + if (offset == 0) { + return true; + } + + const u1* data = filePointer(state, offset); + DexClassData* classData = dexReadAndVerifyClassData(&data, NULL); + + if (classData == NULL) { + // Shouldn't happen, but bail here just in case. + return false; + } + + /* + * The class_data_item verification ensures that + * it consistently refers to the same definer, so all we need to + * do is check the first one. + */ + u4 dataDefiner = findFirstClassDataDefiner(state, classData); + bool result = (dataDefiner == definerIdx) || (dataDefiner == kDexNoIndex); + + free(classData); + return result; +} + +/* Helper for crossVerifyClassDefItem(), which checks an + * annotations_directory_item to make sure all its references are to a + * given class. */ +static bool verifyAnnotationsDirectoryIsForDef(const CheckState* state, + u4 offset, u4 definerIdx) { + if (offset == 0) { + return true; + } + + const DexAnnotationsDirectoryItem* dir = filePointer(state, offset); + u4 annoDefiner = findFirstAnnotationsDirectoryDefiner(state, dir); + + return (annoDefiner == definerIdx) || (annoDefiner == kDexNoIndex); +} + +/* Perform cross-item verification of class_def_item. */ +static void* crossVerifyClassDefItem(const CheckState* state, void* ptr) { + const DexClassDef* item = ptr; + const char* descriptor = + dexStringByTypeIdx(state->pDexFile, item->classIdx); + + if (!dexIsClassDescriptor(descriptor)) { + LOGE("Invalid class: '%s'\n", descriptor); + return NULL; + } + + bool okay = + dexDataMapVerify0Ok(state->pDataMap, + item->interfacesOff, kDexTypeTypeList) + && dexDataMapVerify0Ok(state->pDataMap, + item->annotationsOff, kDexTypeAnnotationsDirectoryItem) + && dexDataMapVerify0Ok(state->pDataMap, + item->classDataOff, kDexTypeClassDataItem) + && dexDataMapVerify0Ok(state->pDataMap, + item->staticValuesOff, kDexTypeEncodedArrayItem); + + if (!okay) { + return NULL; + } + + if (item->superclassIdx != kDexNoIndex) { + descriptor = dexStringByTypeIdx(state->pDexFile, item->superclassIdx); + if (!dexIsClassDescriptor(descriptor)) { + LOGE("Invalid superclass: '%s'\n", descriptor); + return NULL; + } + } + + const DexTypeList* interfaces = + dexGetInterfacesList(state->pDexFile, item); + if (interfaces != NULL) { + u4 size = interfaces->size; + u4 i; + + /* + * Ensure that all interfaces refer to classes (not arrays or + * primitives). + */ + for (i = 0; i < size; i++) { + descriptor = dexStringByTypeIdx(state->pDexFile, + dexTypeListGetIdx(interfaces, i)); + if (!dexIsClassDescriptor(descriptor)) { + LOGE("Invalid interface: '%s'\n", descriptor); + return NULL; + } + } + + /* + * Ensure that there are no duplicates. This is an O(N^2) test, + * but in practice the number of interfaces implemented by any + * given class is low. I will buy a milkshake for the + * first person to show me a realistic case for which this test + * would be unacceptably slow. + */ + for (i = 1; i < size; i++) { + u4 idx1 = dexTypeListGetIdx(interfaces, i); + u4 j; + for (j = 0; j < i; j++) { + u4 idx2 = dexTypeListGetIdx(interfaces, j); + if (idx1 == idx2) { + LOGE("Duplicate interface: '%s'\n", + dexStringByTypeIdx(state->pDexFile, idx1)); + return NULL; + } + } + } + } + + if (!verifyClassDataIsForDef(state, item->classDataOff, item->classIdx)) { + LOGE("Invalid class_data_item\n"); + return NULL; + } + + if (!verifyAnnotationsDirectoryIsForDef(state, item->annotationsOff, + item->classIdx)) { + LOGE("Invalid annotations_directory_item\n"); + return NULL; + } + + return (void*) (item + 1); +} + +/* Helper for swapAnnotationsDirectoryItem(), which performs + * byte-swapping and intra-item verification on an + * annotation_directory_item's field elements. */ +static u1* swapFieldAnnotations(const CheckState* state, u4 count, u1* addr) { + DexFieldAnnotationsItem* item = (DexFieldAnnotationsItem*) addr; + bool first = true; + u4 lastIdx = 0; + + CHECK_LIST_SIZE(item, count, sizeof(DexFieldAnnotationsItem)); + + while (count--) { + SWAP_INDEX4(item->fieldIdx, state->pHeader->fieldIdsSize); + SWAP_OFFSET4(item->annotationsOff); + + if (first) { + first = false; + } else if (lastIdx >= item->fieldIdx) { + LOGE("Out-of-order field_idx: 0x%x then 0x%x\n", lastIdx, + item->fieldIdx); + return NULL; + } + + lastIdx = item->fieldIdx; + item++; + } + + return (u1*) item; +} + +/* Helper for swapAnnotationsDirectoryItem(), which performs + * byte-swapping and intra-item verification on an + * annotation_directory_item's method elements. */ +static u1* swapMethodAnnotations(const CheckState* state, u4 count, u1* addr) { + DexMethodAnnotationsItem* item = (DexMethodAnnotationsItem*) addr; + bool first = true; + u4 lastIdx = 0; + + CHECK_LIST_SIZE(item, count, sizeof(DexMethodAnnotationsItem)); + + while (count--) { + SWAP_INDEX4(item->methodIdx, state->pHeader->methodIdsSize); + SWAP_OFFSET4(item->annotationsOff); + + if (first) { + first = false; + } else if (lastIdx >= item->methodIdx) { + LOGE("Out-of-order method_idx: 0x%x then 0x%x\n", lastIdx, + item->methodIdx); + return NULL; + } + + lastIdx = item->methodIdx; + item++; + } + + return (u1*) item; +} + +/* Helper for swapAnnotationsDirectoryItem(), which performs + * byte-swapping and intra-item verification on an + * annotation_directory_item's parameter elements. */ +static u1* swapParameterAnnotations(const CheckState* state, u4 count, + u1* addr) { + DexParameterAnnotationsItem* item = (DexParameterAnnotationsItem*) addr; + bool first = true; + u4 lastIdx = 0; + + CHECK_LIST_SIZE(item, count, sizeof(DexParameterAnnotationsItem)); + + while (count--) { + SWAP_INDEX4(item->methodIdx, state->pHeader->methodIdsSize); + SWAP_OFFSET4(item->annotationsOff); + + if (first) { + first = false; + } else if (lastIdx >= item->methodIdx) { + LOGE("Out-of-order method_idx: 0x%x then 0x%x\n", lastIdx, + item->methodIdx); + return NULL; + } + + lastIdx = item->methodIdx; + item++; + } + + return (u1*) item; +} + +/* Perform byte-swapping and intra-item verification on + * annotations_directory_item. */ +static void* swapAnnotationsDirectoryItem(const CheckState* state, void* ptr) { + DexAnnotationsDirectoryItem* item = ptr; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_OFFSET4(item->classAnnotationsOff); + SWAP_FIELD4(item->fieldsSize); + SWAP_FIELD4(item->methodsSize); + SWAP_FIELD4(item->parametersSize); + + u1* addr = (u1*) (item + 1); + + if (item->fieldsSize != 0) { + addr = swapFieldAnnotations(state, item->fieldsSize, addr); + if (addr == NULL) { + return NULL; + } + } + + if (item->methodsSize != 0) { + addr = swapMethodAnnotations(state, item->methodsSize, addr); + if (addr == NULL) { + return NULL; + } + } + + if (item->parametersSize != 0) { + addr = swapParameterAnnotations(state, item->parametersSize, addr); + if (addr == NULL) { + return NULL; + } + } + + return addr; +} + +/* Helper for crossVerifyAnnotationsDirectoryItem(), which checks the + * field elements. */ +static const u1* crossVerifyFieldAnnotations(const CheckState* state, u4 count, + const u1* addr, u4 definingClass) { + const DexFieldAnnotationsItem* item = (DexFieldAnnotationsItem*) addr; + + while (count--) { + if (!verifyFieldDefiner(state, definingClass, item->fieldIdx)) { + return NULL; + } + if (!dexDataMapVerify(state->pDataMap, item->annotationsOff, + kDexTypeAnnotationSetItem)) { + return NULL; + } + item++; + } + + return (const u1*) item; +} + +/* Helper for crossVerifyAnnotationsDirectoryItem(), which checks the + * method elements. */ +static const u1* crossVerifyMethodAnnotations(const CheckState* state, + u4 count, const u1* addr, u4 definingClass) { + const DexMethodAnnotationsItem* item = (DexMethodAnnotationsItem*) addr; + + while (count--) { + if (!verifyMethodDefiner(state, definingClass, item->methodIdx)) { + return NULL; + } + if (!dexDataMapVerify(state->pDataMap, item->annotationsOff, + kDexTypeAnnotationSetItem)) { + return NULL; + } + item++; + } + + return (const u1*) item; +} + +/* Helper for crossVerifyAnnotationsDirectoryItem(), which checks the + * parameter elements. */ +static const u1* crossVerifyParameterAnnotations(const CheckState* state, + u4 count, const u1* addr, u4 definingClass) { + const DexParameterAnnotationsItem* item = + (DexParameterAnnotationsItem*) addr; + + while (count--) { + if (!verifyMethodDefiner(state, definingClass, item->methodIdx)) { + return NULL; + } + if (!dexDataMapVerify(state->pDataMap, item->annotationsOff, + kDexTypeAnnotationSetRefList)) { + return NULL; + } + item++; + } + + return (const u1*) item; +} + +/* Helper for crossVerifyClassDefItem() and + * crossVerifyAnnotationsDirectoryItem(), which finds the type_idx of + * the definer of the first item in the data. */ +static u4 findFirstAnnotationsDirectoryDefiner(const CheckState* state, + const DexAnnotationsDirectoryItem* dir) { + if (dir->fieldsSize != 0) { + const DexFieldAnnotationsItem* fields = + dexGetFieldAnnotations(state->pDexFile, dir); + const DexFieldId* field = + dexGetFieldId(state->pDexFile, fields[0].fieldIdx); + return field->classIdx; + } + + if (dir->methodsSize != 0) { + const DexMethodAnnotationsItem* methods = + dexGetMethodAnnotations(state->pDexFile, dir); + const DexMethodId* method = + dexGetMethodId(state->pDexFile, methods[0].methodIdx); + return method->classIdx; + } + + if (dir->parametersSize != 0) { + const DexParameterAnnotationsItem* parameters = + dexGetParameterAnnotations(state->pDexFile, dir); + const DexMethodId* method = + dexGetMethodId(state->pDexFile, parameters[0].methodIdx); + return method->classIdx; + } + + return kDexNoIndex; +} + +/* Perform cross-item verification of annotations_directory_item. */ +static void* crossVerifyAnnotationsDirectoryItem(const CheckState* state, + void* ptr) { + const DexAnnotationsDirectoryItem* item = ptr; + u4 definingClass = findFirstAnnotationsDirectoryDefiner(state, item); + + if (!dexDataMapVerify0Ok(state->pDataMap, + item->classAnnotationsOff, kDexTypeAnnotationSetItem)) { + return NULL; + } + + const u1* addr = (const u1*) (item + 1); + + if (item->fieldsSize != 0) { + addr = crossVerifyFieldAnnotations(state, item->fieldsSize, addr, + definingClass); + if (addr == NULL) { + return NULL; + } + } + + if (item->methodsSize != 0) { + addr = crossVerifyMethodAnnotations(state, item->methodsSize, addr, + definingClass); + if (addr == NULL) { + return NULL; + } + } + + if (item->parametersSize != 0) { + addr = crossVerifyParameterAnnotations(state, item->parametersSize, + addr, definingClass); + if (addr == NULL) { + return NULL; + } + } + + return (void*) addr; +} + +/* Perform byte-swapping and intra-item verification on type_list. */ +static void* swapTypeList(const CheckState* state, void* ptr) +{ + DexTypeList* pTypeList = ptr; + DexTypeItem* pType; + u4 count; + + CHECK_PTR_RANGE(pTypeList, pTypeList + 1); + SWAP_FIELD4(pTypeList->size); + count = pTypeList->size; + pType = pTypeList->list; + CHECK_LIST_SIZE(pType, count, sizeof(DexTypeItem)); + + while (count--) { + SWAP_INDEX2(pType->typeIdx, state->pHeader->typeIdsSize); + pType++; + } + + return pType; +} + +/* Perform byte-swapping and intra-item verification on + * annotation_set_ref_list. */ +static void* swapAnnotationSetRefList(const CheckState* state, void* ptr) { + DexAnnotationSetRefList* list = ptr; + DexAnnotationSetRefItem* item; + u4 count; + + CHECK_PTR_RANGE(list, list + 1); + SWAP_FIELD4(list->size); + count = list->size; + item = list->list; + CHECK_LIST_SIZE(item, count, sizeof(DexAnnotationSetRefItem)); + + while (count--) { + SWAP_OFFSET4(item->annotationsOff); + item++; + } + + return item; +} + +/* Perform cross-item verification of annotation_set_ref_list. */ +static void* crossVerifyAnnotationSetRefList(const CheckState* state, + void* ptr) { + const DexAnnotationSetRefList* list = ptr; + const DexAnnotationSetRefItem* item = list->list; + int count = list->size; + + while (count--) { + if (!dexDataMapVerify0Ok(state->pDataMap, + item->annotationsOff, kDexTypeAnnotationSetItem)) { + return NULL; + } + item++; + } + + return (void*) item; +} + +/* Perform byte-swapping and intra-item verification on + * annotation_set_item. */ +static void* swapAnnotationSetItem(const CheckState* state, void* ptr) { + DexAnnotationSetItem* set = ptr; + u4* item; + u4 count; + + CHECK_PTR_RANGE(set, set + 1); + SWAP_FIELD4(set->size); + count = set->size; + item = set->entries; + CHECK_LIST_SIZE(item, count, sizeof(u4)); + + while (count--) { + SWAP_OFFSET4(*item); + item++; + } + + return item; +} + +/* Helper for crossVerifyAnnotationSetItem(), which extracts the type_idx + * out of an annotation_item. */ +static u4 annotationItemTypeIdx(const DexAnnotationItem* item) { + const u1* data = item->annotation; + return readUnsignedLeb128(&data); +} + +/* Perform cross-item verification of annotation_set_item. */ +static void* crossVerifyAnnotationSetItem(const CheckState* state, void* ptr) { + const DexAnnotationSetItem* set = ptr; + int count = set->size; + u4 lastIdx = 0; + bool first = true; + int i; + + for (i = 0; i < count; i++) { + if (!dexDataMapVerify0Ok(state->pDataMap, + dexGetAnnotationOff(set, i), kDexTypeAnnotationItem)) { + return NULL; + } + + const DexAnnotationItem* annotation = + dexGetAnnotationItem(state->pDexFile, set, i); + u4 idx = annotationItemTypeIdx(annotation); + + if (first) { + first = false; + } else if (lastIdx >= idx) { + LOGE("Out-of-order entry types: 0x%x then 0x%x\n", + lastIdx, idx); + return NULL; + } + + lastIdx = idx; + } + + return (void*) (set->entries + count); +} + +/* Helper for verifyClassDataItem(), which checks a list of fields. */ +static bool verifyFields(const CheckState* state, u4 size, + DexField* fields, bool expectStatic) { + u4 i; + + for (i = 0; i < size; i++) { + DexField* field = &fields[i]; + u4 accessFlags = field->accessFlags; + bool isStatic = (accessFlags & ACC_STATIC) != 0; + + CHECK_INDEX(field->fieldIdx, state->pHeader->fieldIdsSize); + + if (isStatic != expectStatic) { + LOGE("Field in wrong list @ %d\n", i); + return false; + } + + if ((accessFlags & ~ACC_FIELD_MASK) != 0) { + LOGE("Bogus field access flags %x @ %d\n", accessFlags, i); + return false; + } + } + + return true; +} + +/* Helper for verifyClassDataItem(), which checks a list of methods. */ +static bool verifyMethods(const CheckState* state, u4 size, + DexMethod* methods, bool expectDirect) { + u4 i; + + for (i = 0; i < size; i++) { + DexMethod* method = &methods[i]; + + CHECK_INDEX(method->methodIdx, state->pHeader->methodIdsSize); + + u4 accessFlags = method->accessFlags; + bool isDirect = + (accessFlags & (ACC_STATIC | ACC_PRIVATE | ACC_CONSTRUCTOR)) != 0; + bool expectCode = (accessFlags & (ACC_NATIVE | ACC_ABSTRACT)) == 0; + bool isSynchronized = (accessFlags & ACC_SYNCHRONIZED) != 0; + bool allowSynchronized = (accessFlags & ACC_NATIVE) != 0; + + if (isDirect != expectDirect) { + LOGE("Method in wrong list @ %d\n", i); + return false; + } + + if (((accessFlags & ~ACC_METHOD_MASK) != 0) + || (isSynchronized && !allowSynchronized)) { + LOGE("Bogus method access flags %x @ %d\n", accessFlags, i); + return false; + } + + if (expectCode) { + if (method->codeOff == 0) { + LOGE("Unexpected zero code_off for access_flags %x\n", + accessFlags); + return false; + } + } else if (method->codeOff != 0) { + LOGE("Unexpected non-zero code_off 0x%x for access_flags %x\n", + method->codeOff, accessFlags); + return false; + } + } + + return true; +} + +/* Helper for verifyClassDataItem(), which does most of the work. */ +static bool verifyClassDataItem0(const CheckState* state, + DexClassData* classData) { + bool okay; + + okay = verifyFields(state, classData->header.staticFieldsSize, + classData->staticFields, true); + + if (!okay) { + LOGE("Trouble with static fields\n"); + return false; + } + + verifyFields(state, classData->header.instanceFieldsSize, + classData->instanceFields, false); + + if (!okay) { + LOGE("Trouble with instance fields\n"); + return false; + } + + okay = verifyMethods(state, classData->header.directMethodsSize, + classData->directMethods, true); + + if (!okay) { + LOGE("Trouble with direct methods\n"); + return false; + } + + okay = verifyMethods(state, classData->header.virtualMethodsSize, + classData->virtualMethods, false); + + if (!okay) { + LOGE("Trouble with virtual methods\n"); + return false; + } + + return true; +} + +/* Perform intra-item verification on class_data_item. */ +static void* intraVerifyClassDataItem(const CheckState* state, void* ptr) { + const u1* data = ptr; + DexClassData* classData = dexReadAndVerifyClassData(&data, state->fileEnd); + + if (classData == NULL) { + LOGE("Unable to parse class_data_item\n"); + return NULL; + } + + bool okay = verifyClassDataItem0(state, classData); + + free(classData); + + if (!okay) { + return NULL; + } + + return (void*) data; +} + +/* Helper for crossVerifyClassDefItem() and + * crossVerifyClassDataItem(), which finds the type_idx of the definer + * of the first item in the data. */ +static u4 findFirstClassDataDefiner(const CheckState* state, + DexClassData* classData) { + if (classData->header.staticFieldsSize != 0) { + u4 fieldIdx = classData->staticFields[0].fieldIdx; + const DexFieldId* field = dexGetFieldId(state->pDexFile, fieldIdx); + return field->classIdx; + } + + if (classData->header.instanceFieldsSize != 0) { + u4 fieldIdx = classData->instanceFields[0].fieldIdx; + const DexFieldId* field = dexGetFieldId(state->pDexFile, fieldIdx); + return field->classIdx; + } + + if (classData->header.directMethodsSize != 0) { + u4 methodIdx = classData->directMethods[0].methodIdx; + const DexMethodId* meth = dexGetMethodId(state->pDexFile, methodIdx); + return meth->classIdx; + } + + if (classData->header.virtualMethodsSize != 0) { + u4 methodIdx = classData->virtualMethods[0].methodIdx; + const DexMethodId* meth = dexGetMethodId(state->pDexFile, methodIdx); + return meth->classIdx; + } + + return kDexNoIndex; +} + +/* Perform cross-item verification of class_data_item. */ +static void* crossVerifyClassDataItem(const CheckState* state, void* ptr) { + const u1* data = ptr; + DexClassData* classData = dexReadAndVerifyClassData(&data, state->fileEnd); + u4 definingClass = findFirstClassDataDefiner(state, classData); + bool okay = true; + u4 i; + + for (i = classData->header.staticFieldsSize; okay && (i > 0); /*i*/) { + i--; + const DexField* field = &classData->staticFields[i]; + okay = verifyFieldDefiner(state, definingClass, field->fieldIdx); + } + + for (i = classData->header.instanceFieldsSize; okay && (i > 0); /*i*/) { + i--; + const DexField* field = &classData->instanceFields[i]; + okay = verifyFieldDefiner(state, definingClass, field->fieldIdx); + } + + for (i = classData->header.directMethodsSize; okay && (i > 0); /*i*/) { + i--; + const DexMethod* meth = &classData->directMethods[i]; + okay = dexDataMapVerify0Ok(state->pDataMap, meth->codeOff, + kDexTypeCodeItem) + && verifyMethodDefiner(state, definingClass, meth->methodIdx); + } + + for (i = classData->header.virtualMethodsSize; okay && (i > 0); /*i*/) { + i--; + const DexMethod* meth = &classData->virtualMethods[i]; + okay = dexDataMapVerify0Ok(state->pDataMap, meth->codeOff, + kDexTypeCodeItem) + && verifyMethodDefiner(state, definingClass, meth->methodIdx); + } + + free(classData); + + if (!okay) { + return NULL; + } + + return (void*) data; +} + +/* Helper for swapCodeItem(), which fills an array with all the valid + * handlerOff values for catch handlers and also verifies the handler + * contents. */ +static u4 setHandlerOffsAndVerify(const CheckState* state, + DexCode* code, u4 firstOffset, u4 handlersSize, u4* handlerOffs) { + const u1* fileEnd = state->fileEnd; + const u1* handlersBase = dexGetCatchHandlerData(code); + u4 offset = firstOffset; + bool okay = true; + u4 i; + + for (i = 0; i < handlersSize; i++) { + const u1* ptr = handlersBase + offset; + int size = readAndVerifySignedLeb128(&ptr, fileEnd, &okay); + bool catchAll; + + if (!okay) { + LOGE("Bogus size\n"); + return 0; + } + + if ((size < -65536) || (size > 65536)) { + LOGE("Invalid size: %d\n", size); + return 0; + } + + if (size <= 0) { + catchAll = true; + size = -size; + } else { + catchAll = false; + } + + handlerOffs[i] = offset; + + while (size-- > 0) { + u4 typeIdx = + readAndVerifyUnsignedLeb128(&ptr, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus type_idx"); + return 0; + } + + CHECK_INDEX(typeIdx, state->pHeader->typeIdsSize); + + u4 addr = readAndVerifyUnsignedLeb128(&ptr, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus addr"); + return 0; + } + + if (addr >= code->insnsSize) { + LOGE("Invalid addr: 0x%x", addr); + return 0; + } + } + + if (catchAll) { + u4 addr = readAndVerifyUnsignedLeb128(&ptr, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus catch_all_addr"); + return 0; + } + + if (addr >= code->insnsSize) { + LOGE("Invalid catch_all_addr: 0x%x", addr); + return 0; + } + } + + offset = ptr - handlersBase; + } + + return offset; +} + +/* Helper for swapCodeItem(), which does all the try-catch related + * swapping and verification. */ +static void* swapTriesAndCatches(const CheckState* state, DexCode* code) { + const u1* encodedHandlers = dexGetCatchHandlerData(code); + const u1* encodedPtr = encodedHandlers; + bool okay = true; + u4 handlersSize = + readAndVerifyUnsignedLeb128(&encodedPtr, state->fileEnd, &okay); + + if (!okay) { + LOGE("Bogus handlers_size\n"); + return NULL; + } + + if ((handlersSize == 0) || (handlersSize >= 65536)) { + LOGE("Invalid handlers_size: %d\n", handlersSize); + return NULL; + } + + u4 handlerOffs[handlersSize]; // list of valid handlerOff values + u4 endOffset = setHandlerOffsAndVerify(state, code, + encodedPtr - encodedHandlers, + handlersSize, handlerOffs); + + if (endOffset == 0) { + return NULL; + } + + DexTry* tries = (DexTry*) dexGetTries(code); + u4 count = code->triesSize; + u4 lastEnd = 0; + + CHECK_LIST_SIZE(tries, count, sizeof(DexTry)); + + while (count--) { + u4 i; + + SWAP_FIELD4(tries->startAddr); + SWAP_FIELD2(tries->insnCount); + SWAP_FIELD2(tries->handlerOff); + + if (tries->startAddr < lastEnd) { + LOGE("Out-of-order try\n"); + return NULL; + } + + if (tries->startAddr >= code->insnsSize) { + LOGE("Invalid start_addr: 0x%x\n", tries->startAddr); + return NULL; + } + + for (i = 0; i < handlersSize; i++) { + if (tries->handlerOff == handlerOffs[i]) { + break; + } + } + + if (i == handlersSize) { + LOGE("Bogus handler offset: 0x%x\n", tries->handlerOff); + return NULL; + } + + lastEnd = tries->startAddr + tries->insnCount; + + if (lastEnd > code->insnsSize) { + LOGE("Invalid insn_count: 0x%x (end addr 0x%x)\n", + tries->insnCount, lastEnd); + return NULL; + } + + tries++; + } + + return (u1*) encodedHandlers + endOffset; +} + +/* Perform byte-swapping and intra-item verification on code_item. */ +static void* swapCodeItem(const CheckState* state, void* ptr) { + DexCode* item = ptr; + u2* insns; + u4 count; + + CHECK_PTR_RANGE(item, item + 1); + SWAP_FIELD2(item->registersSize); + SWAP_FIELD2(item->insSize); + SWAP_FIELD2(item->outsSize); + SWAP_FIELD2(item->triesSize); + SWAP_OFFSET4(item->debugInfoOff); + SWAP_FIELD4(item->insnsSize); + + count = item->insnsSize; + insns = item->insns; + CHECK_LIST_SIZE(insns, count, sizeof(u2)); + + while (count--) { + *insns = SWAP2(*insns); + insns++; + } + + if (item->triesSize == 0) { + ptr = insns; + } else { + if ((((u4) insns) & 3) != 0) { + // Four-byte alignment for the tries. Verify the spacer is a 0. + if (*insns != 0) { + LOGE("Non-zero padding: 0x%x\n", (u4) *insns); + return NULL; + } + } + + ptr = swapTriesAndCatches(state, item); + } + + return ptr; +} + +/* Perform intra-item verification on string_data_item. */ +static void* intraVerifyStringDataItem(const CheckState* state, void* ptr) { + const u1* fileEnd = state->fileEnd; + const u1* data = ptr; + bool okay = true; + u4 utf16Size = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + u4 i; + + if (!okay) { + LOGE("Bogus utf16_size\n"); + return NULL; + } + + for (i = 0; i < utf16Size; i++) { + if (data >= fileEnd) { + LOGE("String data would go beyond end-of-file\n"); + return NULL; + } + + u1 byte1 = *(data++); + + // Switch on the high four bits. + switch (byte1 >> 4) { + case 0x00: { + // Special case of bit pattern 0xxx. + if (byte1 == 0) { + LOGE("String shorter than indicated utf16_size 0x%x\n", + utf16Size); + return NULL; + } + break; + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: { + // Bit pattern 0xxx. No need for any extra bytes or checks. + break; + } + case 0x08: + case 0x09: + case 0x0a: + case 0x0b: + case 0x0f: { + /* + * Bit pattern 10xx or 1111, which are illegal start bytes. + * Note: 1111 is valid for normal UTF-8, but not the + * modified UTF-8 used here. + */ + LOGE("Illegal start byte 0x%x\n", byte1); + return NULL; + } + case 0x0e: { + // Bit pattern 1110, so there are two additional bytes. + u1 byte2 = *(data++); + if ((byte2 & 0xc0) != 0x80) { + LOGE("Illegal continuation byte 0x%x\n", byte2); + return NULL; + } + u1 byte3 = *(data++); + if ((byte3 & 0xc0) != 0x80) { + LOGE("Illegal continuation byte 0x%x\n", byte3); + return NULL; + } + u2 value = ((byte1 & 0x0f) << 12) | ((byte2 & 0x3f) << 6) + | (byte3 & 0x3f); + if (value < 0x800) { + LOGE("Illegal representation for value %x\n", value); + return NULL; + } + break; + } + case 0x0c: + case 0x0d: { + // Bit pattern 110x, so there is one additional byte. + u1 byte2 = *(data++); + if ((byte2 & 0xc0) != 0x80) { + LOGE("Illegal continuation byte 0x%x\n", byte2); + return NULL; + } + u2 value = ((byte1 & 0x1f) << 6) | (byte2 & 0x3f); + if ((value != 0) && (value < 0x80)) { + LOGE("Illegal representation for value %x\n", value); + return NULL; + } + break; + } + } + } + + if (*(data++) != '\0') { + LOGE("String longer than indicated utf16_size 0x%x\n", utf16Size); + return NULL; + } + + return (void*) data; +} + +/* Perform intra-item verification on debug_info_item. */ +static void* intraVerifyDebugInfoItem(const CheckState* state, void* ptr) { + const u1* fileEnd = state->fileEnd; + const u1* data = ptr; + bool okay = true; + u4 i; + + readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus line_start\n"); + return NULL; + } + + u4 parametersSize = + readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus parameters_size\n"); + return NULL; + } + + if (parametersSize > 65536) { + LOGE("Invalid parameters_size: 0x%x\n", parametersSize); + return NULL; + } + + for (i = 0; i < parametersSize; i++) { + u4 parameterName = + readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus parameter_name\n"); + return NULL; + } + + if (parameterName != 0) { + parameterName--; + CHECK_INDEX(parameterName, state->pHeader->stringIdsSize); + } + } + + bool done = false; + while (!done) { + u1 opcode = *(data++); + + switch (opcode) { + case DBG_END_SEQUENCE: { + done = true; + break; + } + case DBG_ADVANCE_PC: { + readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + break; + } + case DBG_ADVANCE_LINE: { + readAndVerifySignedLeb128(&data, fileEnd, &okay); + break; + } + case DBG_START_LOCAL: { + u4 idx; + u4 regNum = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (regNum >= 65536) { + okay = false; + break; + } + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + break; + } + case DBG_END_LOCAL: + case DBG_RESTART_LOCAL: { + u4 regNum = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (regNum >= 65536) { + okay = false; + break; + } + break; + } + case DBG_START_LOCAL_EXTENDED: { + u4 idx; + u4 regNum = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (regNum >= 65536) { + okay = false; + break; + } + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + break; + } + case DBG_SET_FILE: { + u4 idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + if (!okay) break; + if (idx != 0) { + idx--; + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + } + break; + } + default: { + // No arguments to parse for anything else. + } + } + + if (!okay) { + LOGE("Bogus syntax for opcode %02x\n", opcode); + return NULL; + } + } + + return (void*) data; +} + +/* defined below */ +static const u1* verifyEncodedValue(const CheckState* state, const u1* data, + bool crossVerify); +static const u1* verifyEncodedAnnotation(const CheckState* state, + const u1* data, bool crossVerify); + +/* Helper for verifyEncodedValue(), which reads a 1- to 4- byte unsigned + * little endian value. */ +static u4 readUnsignedLittleEndian(const CheckState* state, const u1** pData, + u4 size) { + const u1* data = *pData; + u4 result = 0; + u4 i; + + CHECK_PTR_RANGE(data, data + size); + + for (i = 0; i < size; i++) { + result |= ((u4) *(data++)) << (i * 8); + } + + *pData = data; + return result; +} + +/* Helper for *VerifyAnnotationItem() and *VerifyEncodedArrayItem(), which + * verifies an encoded_array. */ +static const u1* verifyEncodedArray(const CheckState* state, + const u1* data, bool crossVerify) { + bool okay = true; + u4 size = readAndVerifyUnsignedLeb128(&data, state->fileEnd, &okay); + + if (!okay) { + LOGE("Bogus encoded_array size\n"); + return NULL; + } + + while (size--) { + data = verifyEncodedValue(state, data, crossVerify); + if (data == NULL) { + return NULL; + } + } + + return data; +} + +/* Helper for *VerifyAnnotationItem() and *VerifyEncodedArrayItem(), which + * verifies an encoded_value. */ +static const u1* verifyEncodedValue(const CheckState* state, + const u1* data, bool crossVerify) { + CHECK_PTR_RANGE(data, data + 1); + + u1 headerByte = *(data++); + u4 valueType = headerByte & kDexAnnotationValueTypeMask; + u4 valueArg = headerByte >> kDexAnnotationValueArgShift; + + switch (valueType) { + case kDexAnnotationByte: { + if (valueArg != 0) { + LOGE("Bogus byte size 0x%x\n", valueArg); + return NULL; + } + data++; + break; + } + case kDexAnnotationShort: + case kDexAnnotationChar: { + if (valueArg > 1) { + LOGE("Bogus char/short size 0x%x\n", valueArg); + return NULL; + } + data += valueArg + 1; + break; + } + case kDexAnnotationInt: + case kDexAnnotationFloat: { + if (valueArg > 3) { + LOGE("Bogus int/float size 0x%x\n", valueArg); + return NULL; + } + data += valueArg + 1; + break; + } + case kDexAnnotationLong: + case kDexAnnotationDouble: { + data += valueArg + 1; + break; + } + case kDexAnnotationString: { + if (valueArg > 3) { + LOGE("Bogus string size 0x%x\n", valueArg); + return NULL; + } + u4 idx = readUnsignedLittleEndian(state, &data, valueArg + 1); + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + break; + } + case kDexAnnotationType: { + if (valueArg > 3) { + LOGE("Bogus type size 0x%x\n", valueArg); + return NULL; + } + u4 idx = readUnsignedLittleEndian(state, &data, valueArg + 1); + CHECK_INDEX(idx, state->pHeader->typeIdsSize); + break; + } + case kDexAnnotationField: + case kDexAnnotationEnum: { + if (valueArg > 3) { + LOGE("Bogus field/enum size 0x%x\n", valueArg); + return NULL; + } + u4 idx = readUnsignedLittleEndian(state, &data, valueArg + 1); + CHECK_INDEX(idx, state->pHeader->fieldIdsSize); + break; + } + case kDexAnnotationMethod: { + if (valueArg > 3) { + LOGE("Bogus method size 0x%x\n", valueArg); + return NULL; + } + u4 idx = readUnsignedLittleEndian(state, &data, valueArg + 1); + CHECK_INDEX(idx, state->pHeader->methodIdsSize); + break; + } + case kDexAnnotationArray: { + if (valueArg != 0) { + LOGE("Bogus array value_arg 0x%x\n", valueArg); + return NULL; + } + data = verifyEncodedArray(state, data, crossVerify); + break; + } + case kDexAnnotationAnnotation: { + if (valueArg != 0) { + LOGE("Bogus annotation value_arg 0x%x\n", valueArg); + return NULL; + } + data = verifyEncodedAnnotation(state, data, crossVerify); + break; + } + case kDexAnnotationNull: { + if (valueArg != 0) { + LOGE("Bogus null value_arg 0x%x\n", valueArg); + return NULL; + } + // Nothing else to do for this type. + break; + } + case kDexAnnotationBoolean: { + if (valueArg > 1) { + LOGE("Bogus boolean value_arg 0x%x\n", valueArg); + return NULL; + } + // Nothing else to do for this type. + break; + } + default: { + LOGE("Bogus value_type 0x%x\n", valueType); + return NULL; + } + } + + return data; +} + +/* Helper for *VerifyAnnotationItem() and *VerifyEncodedArrayItem(), which + * verifies an encoded_annotation. */ +static const u1* verifyEncodedAnnotation(const CheckState* state, + const u1* data, bool crossVerify) { + const u1* fileEnd = state->fileEnd; + bool okay = true; + u4 idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus encoded_annotation type_idx\n"); + return NULL; + } + + CHECK_INDEX(idx, state->pHeader->typeIdsSize); + + if (crossVerify) { + const char* descriptor = dexStringByTypeIdx(state->pDexFile, idx); + if (!dexIsClassDescriptor(descriptor)) { + LOGE("Bogus annotation type: '%s'\n", descriptor); + return NULL; + } + } + + u4 size = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + u4 lastIdx = 0; + bool first = true; + + if (!okay) { + LOGE("Bogus encoded_annotation size\n"); + return NULL; + } + + while (size--) { + idx = readAndVerifyUnsignedLeb128(&data, fileEnd, &okay); + + if (!okay) { + LOGE("Bogus encoded_annotation name_idx\n"); + return NULL; + } + + CHECK_INDEX(idx, state->pHeader->stringIdsSize); + + if (crossVerify) { + const char* name = dexStringById(state->pDexFile, idx); + if (!dexIsValidMemberName(name)) { + LOGE("Bogus annotation member name: '%s'\n", name); + return NULL; + } + } + + if (first) { + first = false; + } else if (lastIdx >= idx) { + LOGE("Out-of-order encoded_annotation name_idx: 0x%x then 0x%x\n", + lastIdx, idx); + return NULL; + } + + data = verifyEncodedValue(state, data, crossVerify); + lastIdx = idx; + + if (data == NULL) { + return NULL; + } + } + + return data; +} + +/* Perform intra-item verification on encoded_array_item. */ +static void* intraVerifyEncodedArrayItem(const CheckState* state, void* ptr) { + return (void*) verifyEncodedArray(state, (const u1*) ptr, false); +} + +/* Perform intra-item verification on annotation_item. */ +static void* intraVerifyAnnotationItem(const CheckState* state, void* ptr) { + const u1* data = ptr; + + CHECK_PTR_RANGE(data, data + 1); + + switch (*(data++)) { + case kDexVisibilityBuild: + case kDexVisibilityRuntime: + case kDexVisibilitySystem: { + break; + } + default: { + LOGE("Bogus annotation visibility: 0x%x\n", *data); + return NULL; + } + } + + return (void*) verifyEncodedAnnotation(state, data, false); +} + +/* Perform cross-item verification on annotation_item. */ +static void* crossVerifyAnnotationItem(const CheckState* state, void* ptr) { + const u1* data = ptr; + + // Skip the visibility byte. + data++; + + return (void*) verifyEncodedAnnotation(state, data, true); +} + + + + +/* + * Function to visit an individual top-level item type. + */ +typedef void* ItemVisitorFunction(const CheckState* state, void* ptr); + +/* + * Iterate over all the items in a section, optionally updating the + * data map (done if mapType is passed as non-negative). The section + * must consist of concatenated items of the same type. + */ +static bool iterateSectionWithOptionalUpdate(CheckState* state, + u4 offset, u4 count, ItemVisitorFunction* func, u4 alignment, + u4* nextOffset, int mapType) { + u4 alignmentMask = alignment - 1; + u4 i; + + state->previousItem = NULL; + + for (i = 0; i < count; i++) { + u4 newOffset = (offset + alignmentMask) & ~alignmentMask; + u1* ptr = filePointer(state, newOffset); + + if (offset < newOffset) { + ptr = filePointer(state, offset); + if (offset < newOffset) { + CHECK_OFFSET_RANGE(offset, newOffset); + while (offset < newOffset) { + if (*ptr != '\0') { + LOGE("Non-zero padding 0x%02x @ %x\n", *ptr, offset); + return false; + } + ptr++; + offset++; + } + } + } + + u1* newPtr = (u1*) func(state, ptr); + newOffset = fileOffset(state, newPtr); + + if (newPtr == NULL) { + LOGE("Trouble with item %d @ offset 0x%x\n", i, offset); + return false; + } + + if (newOffset > state->fileLen) { + LOGE("Item %d @ offset 0x%x ends out of bounds\n", i, offset); + return false; + } + + if (mapType >= 0) { + dexDataMapAdd(state->pDataMap, offset, mapType); + } + + state->previousItem = ptr; + offset = newOffset; + } + + if (nextOffset != NULL) { + *nextOffset = offset; + } + + return true; +} + +/* + * Iterate over all the items in a section. The section must consist of + * concatenated items of the same type. This variant will not update the data + * map. + */ +static bool iterateSection(CheckState* state, u4 offset, u4 count, + ItemVisitorFunction* func, u4 alignment, u4* nextOffset) { + return iterateSectionWithOptionalUpdate(state, offset, count, func, + alignment, nextOffset, -1); +} + +/* + * Like iterateSection(), but also check that the offset and count match + * a given pair of expected values. + */ +static bool checkBoundsAndIterateSection(CheckState* state, + u4 offset, u4 count, u4 expectedOffset, u4 expectedCount, + ItemVisitorFunction* func, u4 alignment, u4* nextOffset) { + if (offset != expectedOffset) { + LOGE("Bogus offset for section: got 0x%x; expected 0x%x\n", + offset, expectedOffset); + return false; + } + + if (count != expectedCount) { + LOGE("Bogus size for section: got 0x%x; expected 0x%x\n", + count, expectedCount); + return false; + } + + return iterateSection(state, offset, count, func, alignment, nextOffset); +} + +/* + * Like iterateSection(), but also update the data section map and + * check that all the items fall within the data section. + */ +static bool iterateDataSection(CheckState* state, u4 offset, u4 count, + ItemVisitorFunction* func, u4 alignment, u4* nextOffset, int mapType) { + u4 dataStart = state->pHeader->dataOff; + u4 dataEnd = dataStart + state->pHeader->dataSize; + + assert(nextOffset != NULL); + + if ((offset < dataStart) || (offset >= dataEnd)) { + LOGE("Bogus offset for data subsection: 0x%x\n", offset); + return false; + } + + if (!iterateSectionWithOptionalUpdate(state, offset, count, func, + alignment, nextOffset, mapType)) { + return false; + } + + if (*nextOffset > dataEnd) { + LOGE("Out-of-bounds end of data subsection: 0x%x\n", *nextOffset); + return false; + } + + return true; +} + +/* + * Byte-swap all items in the given map except the header and the map + * itself, both of which should have already gotten swapped. This also + * does all possible intra-item verification, that is, verification + * that doesn't need to assume the sanctity of the contents of *other* + * items. The intra-item limitation is because at the time an item is + * asked to verify itself, it can't assume that the items it refers to + * have been byte-swapped and verified. + */ +static bool swapEverythingButHeaderAndMap(CheckState* state, + DexMapList* pMap) { + const DexMapItem* item = pMap->list; + u4 lastOffset = 0; + u4 count = pMap->size; + bool okay = true; + + while (okay && count--) { + u4 sectionOffset = item->offset; + u4 sectionCount = item->size; + u2 type = item->type; + + if (lastOffset < sectionOffset) { + CHECK_OFFSET_RANGE(lastOffset, sectionOffset); + const u1* ptr = filePointer(state, lastOffset); + while (lastOffset < sectionOffset) { + if (*ptr != '\0') { + LOGE("Non-zero padding 0x%02x before section start @ %x\n", + *ptr, lastOffset); + okay = false; + break; + } + ptr++; + lastOffset++; + } + } else if (lastOffset > sectionOffset) { + LOGE("Section overlap or out-of-order map: %x, %x\n", + lastOffset, sectionOffset); + okay = false; + } + + if (!okay) { + break; + } + + switch (type) { + case kDexTypeHeaderItem: { + /* + * The header got swapped very early on, but do some + * additional sanity checking here. + */ + okay = checkHeaderSection(state, sectionOffset, sectionCount, + &lastOffset); + break; + } + case kDexTypeStringIdItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->stringIdsOff, + state->pHeader->stringIdsSize, swapStringIdItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeTypeIdItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->typeIdsOff, + state->pHeader->typeIdsSize, swapTypeIdItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeProtoIdItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->protoIdsOff, + state->pHeader->protoIdsSize, swapProtoIdItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeFieldIdItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->fieldIdsOff, + state->pHeader->fieldIdsSize, swapFieldIdItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeMethodIdItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->methodIdsOff, + state->pHeader->methodIdsSize, swapMethodIdItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeClassDefItem: { + okay = checkBoundsAndIterateSection(state, sectionOffset, + sectionCount, state->pHeader->classDefsOff, + state->pHeader->classDefsSize, swapClassDefItem, + sizeof(u4), &lastOffset); + break; + } + case kDexTypeMapList: { + /* + * The map section was swapped early on, but do some + * additional sanity checking here. + */ + okay = checkMapSection(state, sectionOffset, sectionCount, + &lastOffset); + break; + } + case kDexTypeTypeList: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + swapTypeList, sizeof(u4), &lastOffset, type); + break; + } + case kDexTypeAnnotationSetRefList: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + swapAnnotationSetRefList, sizeof(u4), &lastOffset, + type); + break; + } + case kDexTypeAnnotationSetItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + swapAnnotationSetItem, sizeof(u4), &lastOffset, type); + break; + } + case kDexTypeClassDataItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + intraVerifyClassDataItem, sizeof(u1), &lastOffset, + type); + break; + } + case kDexTypeCodeItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + swapCodeItem, sizeof(u4), &lastOffset, type); + break; + } + case kDexTypeStringDataItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + intraVerifyStringDataItem, sizeof(u1), &lastOffset, + type); + break; + } + case kDexTypeDebugInfoItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + intraVerifyDebugInfoItem, sizeof(u1), &lastOffset, + type); + break; + } + case kDexTypeAnnotationItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + intraVerifyAnnotationItem, sizeof(u1), &lastOffset, + type); + break; + } + case kDexTypeEncodedArrayItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + intraVerifyEncodedArrayItem, sizeof(u1), &lastOffset, + type); + break; + } + case kDexTypeAnnotationsDirectoryItem: { + okay = iterateDataSection(state, sectionOffset, sectionCount, + swapAnnotationsDirectoryItem, sizeof(u4), &lastOffset, + type); + break; + } + default: { + LOGE("Unknown map item type %04x\n", type); + return false; + } + } + + if (!okay) { + LOGE("Swap of section type %04x failed\n", type); + } + + item++; + } + + return okay; +} + +/* + * Perform cross-item verification on everything that needs it. This + * pass is only called after all items are byte-swapped and + * intra-verified (checked for internal consistency). + */ +static bool crossVerifyEverything(CheckState* state, DexMapList* pMap) +{ + const DexMapItem* item = pMap->list; + u4 count = pMap->size; + bool okay = true; + + while (okay && count--) { + u4 sectionOffset = item->offset; + u4 sectionCount = item->size; + + switch (item->type) { + case kDexTypeHeaderItem: + case kDexTypeMapList: + case kDexTypeTypeList: + case kDexTypeCodeItem: + case kDexTypeStringDataItem: + case kDexTypeDebugInfoItem: + case kDexTypeAnnotationItem: + case kDexTypeEncodedArrayItem: { + // There is no need for cross-item verification for these. + break; + } + case kDexTypeStringIdItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyStringIdItem, sizeof(u4), NULL); + break; + } + case kDexTypeTypeIdItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyTypeIdItem, sizeof(u4), NULL); + break; + } + case kDexTypeProtoIdItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyProtoIdItem, sizeof(u4), NULL); + break; + } + case kDexTypeFieldIdItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyFieldIdItem, sizeof(u4), NULL); + break; + } + case kDexTypeMethodIdItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyMethodIdItem, sizeof(u4), NULL); + break; + } + case kDexTypeClassDefItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyClassDefItem, sizeof(u4), NULL); + break; + } + case kDexTypeAnnotationSetRefList: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyAnnotationSetRefList, sizeof(u4), NULL); + break; + } + case kDexTypeAnnotationSetItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyAnnotationSetItem, sizeof(u4), NULL); + break; + } + case kDexTypeClassDataItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyClassDataItem, sizeof(u1), NULL); + break; + } + case kDexTypeAnnotationsDirectoryItem: { + okay = iterateSection(state, sectionOffset, sectionCount, + crossVerifyAnnotationsDirectoryItem, sizeof(u4), NULL); + break; + } + default: { + LOGE("Unknown map item type %04x\n", item->type); + return false; + } + } + + if (!okay) { + LOGE("Cross-item verify of section type %04x failed\n", + item->type); + } + + item++; + } + + return okay; +} + +/* + * Fix the byte ordering of all fields in the DEX file, and do structural + * verification. + * + * While we're at it, make sure that the file offsets all refer to locations + * within the file. + * + * Returns 0 on success, nonzero on failure. + */ +int dexFixByteOrdering(u1* addr, int len) +{ + DexHeader* pHeader; + CheckState state; + bool okay = true; + + memset(&state, 0, sizeof(state)); + LOGV("+++ swapping and verifying\n"); + + /* + * Start by verifying the magic number. The caller verified that "len" + * says we have at least a header's worth of data. + */ + pHeader = (DexHeader*) addr; + if (memcmp(pHeader->magic, DEX_MAGIC, 4) != 0) { + /* really shouldn't be here -- this is weird */ + LOGE("ERROR: Can't byte swap: bad magic number " + "(0x%02x %02x %02x %02x)\n", + pHeader->magic[0], pHeader->magic[1], + pHeader->magic[2], pHeader->magic[3]); + okay = false; + } + + if (okay && memcmp(pHeader->magic+4, DEX_MAGIC_VERS, 4) != 0) { + /* older or newer version we don't know how to read */ + LOGE("ERROR: Can't byte swap: bad dex version " + "(0x%02x %02x %02x %02x)\n", + pHeader->magic[4], pHeader->magic[5], + pHeader->magic[6], pHeader->magic[7]); + okay = false; + } + + if (okay) { + int expectedLen = (int) SWAP4(pHeader->fileSize); + if (len < expectedLen) { + LOGE("ERROR: Bad length: expected %d, got %d\n", expectedLen, len); + okay = false; + } else if (len != expectedLen) { + LOGW("WARNING: Odd length: expected %d, got %d\n", expectedLen, + len); + // keep going + } + } + + if (okay) { + /* + * Compute the adler32 checksum and compare it to what's stored in + * the file. This isn't free, but chances are good that we just + * unpacked this from a jar file and have all of the pages sitting + * in memory, so it's pretty quick. + * + * This might be a big-endian system, so we need to do this before + * we byte-swap the header. + */ + uLong adler = adler32(0L, Z_NULL, 0); + const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); + u4 storedFileSize = SWAP4(pHeader->fileSize); + u4 expectedChecksum = SWAP4(pHeader->checksum); + + adler = adler32(adler, ((const u1*) pHeader) + nonSum, + storedFileSize - nonSum); + + if (adler != expectedChecksum) { + LOGE("ERROR: bad checksum (%08lx, expected %08x)\n", + adler, expectedChecksum); + okay = false; + } + } + + if (okay) { + state.fileStart = addr; + state.fileEnd = addr + len; + state.fileLen = len; + state.pDexFile = NULL; + state.pDataMap = NULL; + state.previousItem = NULL; + + /* + * Swap the header and check the contents. + */ + okay = swapDexHeader(&state, pHeader); + } + + if (okay) { + state.pHeader = pHeader; + + if (pHeader->headerSize < sizeof(DexHeader)) { + LOGE("ERROR: Small header size %d, struct %d\n", + pHeader->headerSize, (int) sizeof(DexHeader)); + okay = false; + } else if (pHeader->headerSize > sizeof(DexHeader)) { + LOGW("WARNING: Large header size %d, struct %d\n", + pHeader->headerSize, (int) sizeof(DexHeader)); + // keep going? + } + } + + if (okay) { + /* + * Look for the map. Swap it and then use it to find and swap + * everything else. + */ + if (pHeader->mapOff != 0) { + DexFile dexFile; + DexMapList* pDexMap = (DexMapList*) (addr + pHeader->mapOff); + + okay = okay && swapMap(&state, pDexMap); + okay = okay && swapEverythingButHeaderAndMap(&state, pDexMap); + + dexFileSetupBasicPointers(&dexFile, addr); + state.pDexFile = &dexFile; + + okay = okay && crossVerifyEverything(&state, pDexMap); + } else { + LOGE("ERROR: No map found; impossible to byte-swap and verify"); + okay = false; + } + } + + if (!okay) { + LOGE("ERROR: Byte swap + verify failed\n"); + } + + if (state.pDataMap != NULL) { + dexDataMapFree(state.pDataMap); + } + + return !okay; // 0 == success +} diff --git a/libdex/InstrUtils.c b/libdex/InstrUtils.c new file mode 100644 index 000000000..b0718f386 --- /dev/null +++ b/libdex/InstrUtils.c @@ -0,0 +1,1234 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Dalvik instruction utility functions. + */ +#include "InstrUtils.h" + +#include <stdlib.h> + + +/* + * Generate a table that holds the width of all instructions. + * + * Standard instructions have positive values, optimizer instructions + * have negative values, unimplemented instructions have a width of zero. + * + * I'm doing it with a giant switch statement because it's easier to + * maintain and update than a static table with 256 unadorned integers, + * and if we're missing a case gcc emits a "warning: enumeration value not + * handled" message. + * + * (To save space in the binary we could generate a static table with a + * command-line utility.) + */ +InstructionWidth* dexCreateInstrWidthTable(void) +{ + InstructionWidth* instrWidth; + int i; + + instrWidth = malloc(sizeof(InstructionWidth) * kNumDalvikInstructions); + if (instrWidth == NULL) + return NULL; + + for (i = 0; i < kNumDalvikInstructions; i++) { + OpCode opc = (OpCode) i; + int width = 0; + + switch (opc) { + case OP_NOP: /* switch-statement data is a special case of NOP */ + case OP_MOVE: + case OP_MOVE_WIDE: + case OP_MOVE_OBJECT: + case OP_MOVE_RESULT: + case OP_MOVE_RESULT_WIDE: + case OP_MOVE_RESULT_OBJECT: + case OP_MOVE_EXCEPTION: + case OP_RETURN_VOID: + case OP_RETURN: + case OP_RETURN_WIDE: + case OP_RETURN_OBJECT: + case OP_CONST_4: + case OP_MONITOR_ENTER: + case OP_MONITOR_EXIT: + case OP_ARRAY_LENGTH: + case OP_THROW: + case OP_GOTO: + case OP_NEG_INT: + case OP_NOT_INT: + case OP_NEG_LONG: + case OP_NOT_LONG: + case OP_NEG_FLOAT: + case OP_NEG_DOUBLE: + case OP_INT_TO_LONG: + case OP_INT_TO_FLOAT: + case OP_INT_TO_DOUBLE: + case OP_LONG_TO_INT: + case OP_LONG_TO_FLOAT: + case OP_LONG_TO_DOUBLE: + case OP_FLOAT_TO_INT: + case OP_FLOAT_TO_LONG: + case OP_FLOAT_TO_DOUBLE: + case OP_DOUBLE_TO_INT: + case OP_DOUBLE_TO_LONG: + case OP_DOUBLE_TO_FLOAT: + case OP_INT_TO_BYTE: + case OP_INT_TO_CHAR: + case OP_INT_TO_SHORT: + case OP_ADD_INT_2ADDR: + case OP_SUB_INT_2ADDR: + case OP_MUL_INT_2ADDR: + case OP_DIV_INT_2ADDR: + case OP_REM_INT_2ADDR: + case OP_AND_INT_2ADDR: + case OP_OR_INT_2ADDR: + case OP_XOR_INT_2ADDR: + case OP_SHL_INT_2ADDR: + case OP_SHR_INT_2ADDR: + case OP_USHR_INT_2ADDR: + case OP_ADD_LONG_2ADDR: + case OP_SUB_LONG_2ADDR: + case OP_MUL_LONG_2ADDR: + case OP_DIV_LONG_2ADDR: + case OP_REM_LONG_2ADDR: + case OP_AND_LONG_2ADDR: + case OP_OR_LONG_2ADDR: + case OP_XOR_LONG_2ADDR: + case OP_SHL_LONG_2ADDR: + case OP_SHR_LONG_2ADDR: + case OP_USHR_LONG_2ADDR: + case OP_ADD_FLOAT_2ADDR: + case OP_SUB_FLOAT_2ADDR: + case OP_MUL_FLOAT_2ADDR: + case OP_DIV_FLOAT_2ADDR: + case OP_REM_FLOAT_2ADDR: + case OP_ADD_DOUBLE_2ADDR: + case OP_SUB_DOUBLE_2ADDR: + case OP_MUL_DOUBLE_2ADDR: + case OP_DIV_DOUBLE_2ADDR: + case OP_REM_DOUBLE_2ADDR: + width = 1; + break; + + case OP_MOVE_FROM16: + case OP_MOVE_WIDE_FROM16: + case OP_MOVE_OBJECT_FROM16: + case OP_CONST_16: + case OP_CONST_HIGH16: + case OP_CONST_WIDE_16: + case OP_CONST_WIDE_HIGH16: + case OP_CONST_STRING: + case OP_CONST_CLASS: + case OP_CHECK_CAST: + case OP_INSTANCE_OF: + case OP_NEW_INSTANCE: + case OP_NEW_ARRAY: + case OP_CMPL_FLOAT: + case OP_CMPG_FLOAT: + case OP_CMPL_DOUBLE: + case OP_CMPG_DOUBLE: + case OP_CMP_LONG: + case OP_GOTO_16: + case OP_IF_EQ: + case OP_IF_NE: + case OP_IF_LT: + case OP_IF_GE: + case OP_IF_GT: + case OP_IF_LE: + case OP_IF_EQZ: + case OP_IF_NEZ: + case OP_IF_LTZ: + case OP_IF_GEZ: + case OP_IF_GTZ: + case OP_IF_LEZ: + case OP_AGET: + case OP_AGET_WIDE: + case OP_AGET_OBJECT: + case OP_AGET_BOOLEAN: + case OP_AGET_BYTE: + case OP_AGET_CHAR: + case OP_AGET_SHORT: + case OP_APUT: + case OP_APUT_WIDE: + case OP_APUT_OBJECT: + case OP_APUT_BOOLEAN: + case OP_APUT_BYTE: + case OP_APUT_CHAR: + case OP_APUT_SHORT: + case OP_IGET: + case OP_IGET_WIDE: + case OP_IGET_OBJECT: + case OP_IGET_BOOLEAN: + case OP_IGET_BYTE: + case OP_IGET_CHAR: + case OP_IGET_SHORT: + case OP_IPUT: + case OP_IPUT_WIDE: + case OP_IPUT_OBJECT: + case OP_IPUT_BOOLEAN: + case OP_IPUT_BYTE: + case OP_IPUT_CHAR: + case OP_IPUT_SHORT: + case OP_SGET: + case OP_SGET_WIDE: + case OP_SGET_OBJECT: + case OP_SGET_BOOLEAN: + case OP_SGET_BYTE: + case OP_SGET_CHAR: + case OP_SGET_SHORT: + case OP_SPUT: + case OP_SPUT_WIDE: + case OP_SPUT_OBJECT: + case OP_SPUT_BOOLEAN: + case OP_SPUT_BYTE: + case OP_SPUT_CHAR: + case OP_SPUT_SHORT: + case OP_ADD_INT: + case OP_SUB_INT: + case OP_MUL_INT: + case OP_DIV_INT: + case OP_REM_INT: + case OP_AND_INT: + case OP_OR_INT: + case OP_XOR_INT: + case OP_SHL_INT: + case OP_SHR_INT: + case OP_USHR_INT: + case OP_ADD_LONG: + case OP_SUB_LONG: + case OP_MUL_LONG: + case OP_DIV_LONG: + case OP_REM_LONG: + case OP_AND_LONG: + case OP_OR_LONG: + case OP_XOR_LONG: + case OP_SHL_LONG: + case OP_SHR_LONG: + case OP_USHR_LONG: + case OP_ADD_FLOAT: + case OP_SUB_FLOAT: + case OP_MUL_FLOAT: + case OP_DIV_FLOAT: + case OP_REM_FLOAT: + case OP_ADD_DOUBLE: + case OP_SUB_DOUBLE: + case OP_MUL_DOUBLE: + case OP_DIV_DOUBLE: + case OP_REM_DOUBLE: + case OP_ADD_INT_LIT16: + case OP_RSUB_INT: + case OP_MUL_INT_LIT16: + case OP_DIV_INT_LIT16: + case OP_REM_INT_LIT16: + case OP_AND_INT_LIT16: + case OP_OR_INT_LIT16: + case OP_XOR_INT_LIT16: + case OP_ADD_INT_LIT8: + case OP_RSUB_INT_LIT8: + case OP_MUL_INT_LIT8: + case OP_DIV_INT_LIT8: + case OP_REM_INT_LIT8: + case OP_AND_INT_LIT8: + case OP_OR_INT_LIT8: + case OP_XOR_INT_LIT8: + case OP_SHL_INT_LIT8: + case OP_SHR_INT_LIT8: + case OP_USHR_INT_LIT8: + width = 2; + break; + + case OP_MOVE_16: + case OP_MOVE_WIDE_16: + case OP_MOVE_OBJECT_16: + case OP_CONST: + case OP_CONST_WIDE_32: + case OP_CONST_STRING_JUMBO: + case OP_GOTO_32: + case OP_FILLED_NEW_ARRAY: + case OP_FILLED_NEW_ARRAY_RANGE: + case OP_FILL_ARRAY_DATA: + case OP_PACKED_SWITCH: + case OP_SPARSE_SWITCH: + case OP_INVOKE_VIRTUAL: + case OP_INVOKE_SUPER: + case OP_INVOKE_DIRECT: + case OP_INVOKE_STATIC: + case OP_INVOKE_INTERFACE: + case OP_INVOKE_VIRTUAL_RANGE: + case OP_INVOKE_SUPER_RANGE: + case OP_INVOKE_DIRECT_RANGE: + case OP_INVOKE_STATIC_RANGE: + case OP_INVOKE_INTERFACE_RANGE: + width = 3; + break; + + case OP_CONST_WIDE: + width = 5; + break; + + /* + * Optimized instructions. We return negative size values for these + * to distinguish them. + */ + case OP_IGET_QUICK: + case OP_IGET_WIDE_QUICK: + case OP_IGET_OBJECT_QUICK: + case OP_IPUT_QUICK: + case OP_IPUT_WIDE_QUICK: + case OP_IPUT_OBJECT_QUICK: + width = -2; + break; + case OP_INVOKE_VIRTUAL_QUICK: + case OP_INVOKE_VIRTUAL_QUICK_RANGE: + case OP_INVOKE_SUPER_QUICK: + case OP_INVOKE_SUPER_QUICK_RANGE: + case OP_EXECUTE_INLINE: + case OP_INVOKE_DIRECT_EMPTY: + width = -3; + break; + + /* these should never appear */ + case OP_UNUSED_3E: + case OP_UNUSED_3F: + case OP_UNUSED_40: + case OP_UNUSED_41: + case OP_UNUSED_42: + case OP_UNUSED_43: + case OP_UNUSED_73: + case OP_UNUSED_79: + case OP_UNUSED_7A: + case OP_UNUSED_E3: + case OP_UNUSED_E4: + case OP_UNUSED_E5: + case OP_UNUSED_E6: + case OP_UNUSED_E7: + case OP_UNUSED_E8: + case OP_UNUSED_E9: + case OP_UNUSED_EA: + case OP_UNUSED_EB: + case OP_UNUSED_EC: + case OP_UNUSED_ED: + case OP_UNUSED_EF: + case OP_UNUSED_F1: + case OP_UNUSED_FC: + case OP_UNUSED_FD: + case OP_UNUSED_FE: + case OP_UNUSED_FF: + assert(width == 0); + break; + + /* + * DO NOT add a "default" clause here. Without it the compiler will + * complain if an instruction is missing (which is desirable). + */ + } + + instrWidth[opc] = width; + } + + return instrWidth; +} + +/* + * Generate a table that holds instruction flags. + */ +InstructionFlags* dexCreateInstrFlagsTable(void) +{ + InstructionFlags* instrFlags; + int i; + + instrFlags = malloc(sizeof(InstructionFlags) * kNumDalvikInstructions); + if (instrFlags == NULL) + return NULL; + + for (i = 0; i < kNumDalvikInstructions; i++) { + OpCode opc = (OpCode) i; + InstructionFlags flags = 0; + + switch (opc) { + /* these don't affect the PC and can't cause an exception */ + case OP_NOP: + case OP_MOVE: + case OP_MOVE_FROM16: + case OP_MOVE_16: + case OP_MOVE_WIDE: + case OP_MOVE_WIDE_FROM16: + case OP_MOVE_WIDE_16: + case OP_MOVE_OBJECT: + case OP_MOVE_OBJECT_FROM16: + case OP_MOVE_OBJECT_16: + case OP_MOVE_RESULT: + case OP_MOVE_RESULT_WIDE: + case OP_MOVE_RESULT_OBJECT: + case OP_MOVE_EXCEPTION: + case OP_CONST_4: + case OP_CONST_16: + case OP_CONST: + case OP_CONST_HIGH16: + case OP_CONST_WIDE_16: + case OP_CONST_WIDE_32: + case OP_CONST_WIDE: + case OP_CONST_WIDE_HIGH16: + case OP_FILL_ARRAY_DATA: + case OP_CMPL_FLOAT: + case OP_CMPG_FLOAT: + case OP_CMPL_DOUBLE: + case OP_CMPG_DOUBLE: + case OP_CMP_LONG: + case OP_NEG_INT: + case OP_NOT_INT: + case OP_NEG_LONG: + case OP_NOT_LONG: + case OP_NEG_FLOAT: + case OP_NEG_DOUBLE: + case OP_INT_TO_LONG: + case OP_INT_TO_FLOAT: + case OP_INT_TO_DOUBLE: + case OP_LONG_TO_INT: + case OP_LONG_TO_FLOAT: + case OP_LONG_TO_DOUBLE: + case OP_FLOAT_TO_INT: + case OP_FLOAT_TO_LONG: + case OP_FLOAT_TO_DOUBLE: + case OP_DOUBLE_TO_INT: + case OP_DOUBLE_TO_LONG: + case OP_DOUBLE_TO_FLOAT: + case OP_INT_TO_BYTE: + case OP_INT_TO_CHAR: + case OP_INT_TO_SHORT: + case OP_ADD_INT: + case OP_SUB_INT: + case OP_MUL_INT: + case OP_AND_INT: + case OP_OR_INT: + case OP_XOR_INT: + case OP_SHL_INT: + case OP_SHR_INT: + case OP_USHR_INT: + case OP_ADD_LONG: + case OP_SUB_LONG: + case OP_MUL_LONG: + case OP_AND_LONG: + case OP_OR_LONG: + case OP_XOR_LONG: + case OP_SHL_LONG: + case OP_SHR_LONG: + case OP_USHR_LONG: + case OP_ADD_FLOAT: + case OP_SUB_FLOAT: + case OP_MUL_FLOAT: + case OP_DIV_FLOAT: + case OP_REM_FLOAT: + case OP_ADD_DOUBLE: + case OP_SUB_DOUBLE: + case OP_MUL_DOUBLE: + case OP_DIV_DOUBLE: // div by zero just returns NaN + case OP_REM_DOUBLE: + case OP_ADD_INT_2ADDR: + case OP_SUB_INT_2ADDR: + case OP_MUL_INT_2ADDR: + case OP_AND_INT_2ADDR: + case OP_OR_INT_2ADDR: + case OP_XOR_INT_2ADDR: + case OP_SHL_INT_2ADDR: + case OP_SHR_INT_2ADDR: + case OP_USHR_INT_2ADDR: + case OP_ADD_LONG_2ADDR: + case OP_SUB_LONG_2ADDR: + case OP_MUL_LONG_2ADDR: + case OP_AND_LONG_2ADDR: + case OP_OR_LONG_2ADDR: + case OP_XOR_LONG_2ADDR: + case OP_SHL_LONG_2ADDR: + case OP_SHR_LONG_2ADDR: + case OP_USHR_LONG_2ADDR: + case OP_ADD_FLOAT_2ADDR: + case OP_SUB_FLOAT_2ADDR: + case OP_MUL_FLOAT_2ADDR: + case OP_DIV_FLOAT_2ADDR: + case OP_REM_FLOAT_2ADDR: + case OP_ADD_DOUBLE_2ADDR: + case OP_SUB_DOUBLE_2ADDR: + case OP_MUL_DOUBLE_2ADDR: + case OP_DIV_DOUBLE_2ADDR: + case OP_REM_DOUBLE_2ADDR: + case OP_ADD_INT_LIT16: + case OP_RSUB_INT: + case OP_MUL_INT_LIT16: + case OP_AND_INT_LIT16: + case OP_OR_INT_LIT16: + case OP_XOR_INT_LIT16: + case OP_ADD_INT_LIT8: + case OP_RSUB_INT_LIT8: + case OP_MUL_INT_LIT8: + case OP_AND_INT_LIT8: + case OP_OR_INT_LIT8: + case OP_XOR_INT_LIT8: + case OP_SHL_INT_LIT8: + case OP_SHR_INT_LIT8: + case OP_USHR_INT_LIT8: + flags = kInstrCanContinue; + break; + + /* these don't affect the PC, but can cause exceptions */ + case OP_CONST_STRING: + case OP_CONST_STRING_JUMBO: + case OP_CONST_CLASS: + case OP_MONITOR_ENTER: + case OP_MONITOR_EXIT: + case OP_CHECK_CAST: + case OP_INSTANCE_OF: + case OP_ARRAY_LENGTH: + case OP_NEW_INSTANCE: + case OP_NEW_ARRAY: + case OP_FILLED_NEW_ARRAY: + case OP_FILLED_NEW_ARRAY_RANGE: + case OP_AGET: + case OP_AGET_BOOLEAN: + case OP_AGET_BYTE: + case OP_AGET_CHAR: + case OP_AGET_SHORT: + case OP_AGET_WIDE: + case OP_AGET_OBJECT: + case OP_APUT: + case OP_APUT_BOOLEAN: + case OP_APUT_BYTE: + case OP_APUT_CHAR: + case OP_APUT_SHORT: + case OP_APUT_WIDE: + case OP_APUT_OBJECT: + case OP_IGET: + case OP_IGET_BOOLEAN: + case OP_IGET_BYTE: + case OP_IGET_CHAR: + case OP_IGET_SHORT: + case OP_IGET_WIDE: + case OP_IGET_OBJECT: + case OP_IPUT: + case OP_IPUT_BOOLEAN: + case OP_IPUT_BYTE: + case OP_IPUT_CHAR: + case OP_IPUT_SHORT: + case OP_IPUT_WIDE: + case OP_IPUT_OBJECT: + case OP_SGET: + case OP_SGET_BOOLEAN: + case OP_SGET_BYTE: + case OP_SGET_CHAR: + case OP_SGET_SHORT: + case OP_SGET_WIDE: + case OP_SGET_OBJECT: + case OP_SPUT: + case OP_SPUT_BOOLEAN: + case OP_SPUT_BYTE: + case OP_SPUT_CHAR: + case OP_SPUT_SHORT: + case OP_SPUT_WIDE: + case OP_SPUT_OBJECT: + case OP_INVOKE_VIRTUAL: + case OP_INVOKE_VIRTUAL_RANGE: + case OP_INVOKE_SUPER: + case OP_INVOKE_SUPER_RANGE: + case OP_INVOKE_DIRECT: + case OP_INVOKE_DIRECT_RANGE: + case OP_INVOKE_STATIC: + case OP_INVOKE_STATIC_RANGE: + case OP_INVOKE_INTERFACE: + case OP_INVOKE_INTERFACE_RANGE: + case OP_DIV_INT: + case OP_REM_INT: + case OP_DIV_LONG: + case OP_REM_LONG: + case OP_DIV_INT_2ADDR: + case OP_REM_INT_2ADDR: + case OP_DIV_LONG_2ADDR: + case OP_REM_LONG_2ADDR: + case OP_DIV_INT_LIT16: + case OP_REM_INT_LIT16: + case OP_DIV_INT_LIT8: + case OP_REM_INT_LIT8: + flags = kInstrCanContinue | kInstrCanThrow; + break; + + case OP_RETURN_VOID: + case OP_RETURN: + case OP_RETURN_WIDE: + case OP_RETURN_OBJECT: + flags = kInstrCanReturn; + break; + + case OP_THROW: + flags = kInstrCanThrow; + break; + + /* unconditional branches */ + case OP_GOTO: + case OP_GOTO_16: + case OP_GOTO_32: + flags = kInstrCanBranch; + break; + + /* conditional branches */ + case OP_IF_EQ: + case OP_IF_NE: + case OP_IF_LT: + case OP_IF_GE: + case OP_IF_GT: + case OP_IF_LE: + case OP_IF_EQZ: + case OP_IF_NEZ: + case OP_IF_LTZ: + case OP_IF_GEZ: + case OP_IF_GTZ: + case OP_IF_LEZ: + flags = kInstrCanBranch | kInstrCanContinue; + break; + + /* switch statements; if value not in switch, it continues */ + case OP_PACKED_SWITCH: + case OP_SPARSE_SWITCH: + flags = kInstrCanSwitch | kInstrCanContinue; + break; + + /* optimizer-generated instructions */ + case OP_EXECUTE_INLINE: + flags = kInstrCanContinue; + break; + case OP_IGET_QUICK: + case OP_IGET_WIDE_QUICK: + case OP_IGET_OBJECT_QUICK: + case OP_IPUT_QUICK: + case OP_IPUT_WIDE_QUICK: + case OP_IPUT_OBJECT_QUICK: + case OP_INVOKE_VIRTUAL_QUICK: + case OP_INVOKE_VIRTUAL_QUICK_RANGE: + case OP_INVOKE_SUPER_QUICK: + case OP_INVOKE_SUPER_QUICK_RANGE: + case OP_INVOKE_DIRECT_EMPTY: + flags = kInstrCanContinue | kInstrCanThrow; + break; + + /* these should never appear */ + case OP_UNUSED_3E: + case OP_UNUSED_3F: + case OP_UNUSED_40: + case OP_UNUSED_41: + case OP_UNUSED_42: + case OP_UNUSED_43: + case OP_UNUSED_73: + case OP_UNUSED_79: + case OP_UNUSED_7A: + case OP_UNUSED_E3: + case OP_UNUSED_E4: + case OP_UNUSED_E5: + case OP_UNUSED_E6: + case OP_UNUSED_E7: + case OP_UNUSED_E8: + case OP_UNUSED_E9: + case OP_UNUSED_EA: + case OP_UNUSED_EB: + case OP_UNUSED_EC: + case OP_UNUSED_ED: + case OP_UNUSED_EF: + case OP_UNUSED_F1: + case OP_UNUSED_FC: + case OP_UNUSED_FD: + case OP_UNUSED_FE: + case OP_UNUSED_FF: + break; + + /* + * DO NOT add a "default" clause here. Without it the compiler will + * complain if an instruction is missing (which is desirable). + */ + } + + instrFlags[opc] = flags; + } + + return instrFlags; +} + +/* + * Allocate and populate a 256-element array with instruction formats. + * Used in conjunction with dexDecodeInstruction. + */ +InstructionFormat* dexCreateInstrFormatTable(void) +{ + InstructionFormat* instFmt; + int i; + + instFmt = malloc(sizeof(InstructionFormat) * kNumDalvikInstructions); + if (instFmt == NULL) + return NULL; + + for (i = 0; i < kNumDalvikInstructions; i++) { + OpCode opc = (OpCode) i; + InstructionFormat fmt = kFmtUnknown; + + switch (opc) { + case OP_GOTO: + fmt = kFmt10t; + break; + case OP_NOP: + case OP_RETURN_VOID: + fmt = kFmt10x; + break; + case OP_CONST_4: + fmt = kFmt11n; + break; + case OP_CONST_HIGH16: + case OP_CONST_WIDE_HIGH16: + fmt = kFmt21h; + break; + case OP_MOVE_RESULT: + case OP_MOVE_RESULT_WIDE: + case OP_MOVE_RESULT_OBJECT: + case OP_MOVE_EXCEPTION: + case OP_RETURN: + case OP_RETURN_WIDE: + case OP_RETURN_OBJECT: + case OP_MONITOR_ENTER: + case OP_MONITOR_EXIT: + case OP_THROW: + fmt = kFmt11x; + break; + case OP_MOVE: + case OP_MOVE_WIDE: + case OP_MOVE_OBJECT: + case OP_ARRAY_LENGTH: + case OP_NEG_INT: + case OP_NOT_INT: + case OP_NEG_LONG: + case OP_NOT_LONG: + case OP_NEG_FLOAT: + case OP_NEG_DOUBLE: + case OP_INT_TO_LONG: + case OP_INT_TO_FLOAT: + case OP_INT_TO_DOUBLE: + case OP_LONG_TO_INT: + case OP_LONG_TO_FLOAT: + case OP_LONG_TO_DOUBLE: + case OP_FLOAT_TO_INT: + case OP_FLOAT_TO_LONG: + case OP_FLOAT_TO_DOUBLE: + case OP_DOUBLE_TO_INT: + case OP_DOUBLE_TO_LONG: + case OP_DOUBLE_TO_FLOAT: + case OP_INT_TO_BYTE: + case OP_INT_TO_CHAR: + case OP_INT_TO_SHORT: + case OP_ADD_INT_2ADDR: + case OP_SUB_INT_2ADDR: + case OP_MUL_INT_2ADDR: + case OP_DIV_INT_2ADDR: + case OP_REM_INT_2ADDR: + case OP_AND_INT_2ADDR: + case OP_OR_INT_2ADDR: + case OP_XOR_INT_2ADDR: + case OP_SHL_INT_2ADDR: + case OP_SHR_INT_2ADDR: + case OP_USHR_INT_2ADDR: + case OP_ADD_LONG_2ADDR: + case OP_SUB_LONG_2ADDR: + case OP_MUL_LONG_2ADDR: + case OP_DIV_LONG_2ADDR: + case OP_REM_LONG_2ADDR: + case OP_AND_LONG_2ADDR: + case OP_OR_LONG_2ADDR: + case OP_XOR_LONG_2ADDR: + case OP_SHL_LONG_2ADDR: + case OP_SHR_LONG_2ADDR: + case OP_USHR_LONG_2ADDR: + case OP_ADD_FLOAT_2ADDR: + case OP_SUB_FLOAT_2ADDR: + case OP_MUL_FLOAT_2ADDR: + case OP_DIV_FLOAT_2ADDR: + case OP_REM_FLOAT_2ADDR: + case OP_ADD_DOUBLE_2ADDR: + case OP_SUB_DOUBLE_2ADDR: + case OP_MUL_DOUBLE_2ADDR: + case OP_DIV_DOUBLE_2ADDR: + case OP_REM_DOUBLE_2ADDR: + fmt = kFmt12x; + break; + case OP_GOTO_16: + fmt = kFmt20t; + break; + case OP_GOTO_32: + fmt = kFmt30t; + break; + case OP_CONST_STRING: + case OP_CONST_CLASS: + case OP_CHECK_CAST: + case OP_NEW_INSTANCE: + case OP_SGET: + case OP_SGET_WIDE: + case OP_SGET_OBJECT: + case OP_SGET_BOOLEAN: + case OP_SGET_BYTE: + case OP_SGET_CHAR: + case OP_SGET_SHORT: + case OP_SPUT: + case OP_SPUT_WIDE: + case OP_SPUT_OBJECT: + case OP_SPUT_BOOLEAN: + case OP_SPUT_BYTE: + case OP_SPUT_CHAR: + case OP_SPUT_SHORT: + fmt = kFmt21c; + break; + case OP_CONST_16: + case OP_CONST_WIDE_16: + fmt = kFmt21s; + break; + case OP_IF_EQZ: + case OP_IF_NEZ: + case OP_IF_LTZ: + case OP_IF_GEZ: + case OP_IF_GTZ: + case OP_IF_LEZ: + fmt = kFmt21t; + break; + case OP_FILL_ARRAY_DATA: + case OP_PACKED_SWITCH: + case OP_SPARSE_SWITCH: + fmt = kFmt31t; + break; + case OP_ADD_INT_LIT8: + case OP_RSUB_INT_LIT8: + case OP_MUL_INT_LIT8: + case OP_DIV_INT_LIT8: + case OP_REM_INT_LIT8: + case OP_AND_INT_LIT8: + case OP_OR_INT_LIT8: + case OP_XOR_INT_LIT8: + case OP_SHL_INT_LIT8: + case OP_SHR_INT_LIT8: + case OP_USHR_INT_LIT8: + fmt = kFmt22b; + break; + case OP_INSTANCE_OF: + case OP_NEW_ARRAY: + case OP_IGET: + case OP_IGET_WIDE: + case OP_IGET_OBJECT: + case OP_IGET_BOOLEAN: + case OP_IGET_BYTE: + case OP_IGET_CHAR: + case OP_IGET_SHORT: + case OP_IPUT: + case OP_IPUT_WIDE: + case OP_IPUT_OBJECT: + case OP_IPUT_BOOLEAN: + case OP_IPUT_BYTE: + case OP_IPUT_CHAR: + case OP_IPUT_SHORT: + fmt = kFmt22c; + break; + case OP_ADD_INT_LIT16: + case OP_RSUB_INT: + case OP_MUL_INT_LIT16: + case OP_DIV_INT_LIT16: + case OP_REM_INT_LIT16: + case OP_AND_INT_LIT16: + case OP_OR_INT_LIT16: + case OP_XOR_INT_LIT16: + fmt = kFmt22s; + break; + case OP_IF_EQ: + case OP_IF_NE: + case OP_IF_LT: + case OP_IF_GE: + case OP_IF_GT: + case OP_IF_LE: + fmt = kFmt22t; + break; + case OP_MOVE_FROM16: + case OP_MOVE_WIDE_FROM16: + case OP_MOVE_OBJECT_FROM16: + fmt = kFmt22x; + break; + case OP_CMPL_FLOAT: + case OP_CMPG_FLOAT: + case OP_CMPL_DOUBLE: + case OP_CMPG_DOUBLE: + case OP_CMP_LONG: + case OP_AGET: + case OP_AGET_WIDE: + case OP_AGET_OBJECT: + case OP_AGET_BOOLEAN: + case OP_AGET_BYTE: + case OP_AGET_CHAR: + case OP_AGET_SHORT: + case OP_APUT: + case OP_APUT_WIDE: + case OP_APUT_OBJECT: + case OP_APUT_BOOLEAN: + case OP_APUT_BYTE: + case OP_APUT_CHAR: + case OP_APUT_SHORT: + case OP_ADD_INT: + case OP_SUB_INT: + case OP_MUL_INT: + case OP_DIV_INT: + case OP_REM_INT: + case OP_AND_INT: + case OP_OR_INT: + case OP_XOR_INT: + case OP_SHL_INT: + case OP_SHR_INT: + case OP_USHR_INT: + case OP_ADD_LONG: + case OP_SUB_LONG: + case OP_MUL_LONG: + case OP_DIV_LONG: + case OP_REM_LONG: + case OP_AND_LONG: + case OP_OR_LONG: + case OP_XOR_LONG: + case OP_SHL_LONG: + case OP_SHR_LONG: + case OP_USHR_LONG: + case OP_ADD_FLOAT: + case OP_SUB_FLOAT: + case OP_MUL_FLOAT: + case OP_DIV_FLOAT: + case OP_REM_FLOAT: + case OP_ADD_DOUBLE: + case OP_SUB_DOUBLE: + case OP_MUL_DOUBLE: + case OP_DIV_DOUBLE: + case OP_REM_DOUBLE: + fmt = kFmt23x; + break; + case OP_CONST: + case OP_CONST_WIDE_32: + fmt = kFmt31i; + break; + case OP_CONST_STRING_JUMBO: + fmt = kFmt31c; + break; + case OP_MOVE_16: + case OP_MOVE_WIDE_16: + case OP_MOVE_OBJECT_16: + fmt = kFmt32x; + break; + case OP_FILLED_NEW_ARRAY: + case OP_INVOKE_VIRTUAL: + case OP_INVOKE_SUPER: + case OP_INVOKE_DIRECT: + case OP_INVOKE_STATIC: + case OP_INVOKE_INTERFACE: + fmt = kFmt35c; + break; + case OP_FILLED_NEW_ARRAY_RANGE: + case OP_INVOKE_VIRTUAL_RANGE: + case OP_INVOKE_SUPER_RANGE: + case OP_INVOKE_DIRECT_RANGE: + case OP_INVOKE_STATIC_RANGE: + case OP_INVOKE_INTERFACE_RANGE: + fmt = kFmt3rc; + break; + case OP_CONST_WIDE: + fmt = kFmt51l; + break; + + /* + * Optimized instructions. + */ + case OP_IGET_QUICK: + case OP_IGET_WIDE_QUICK: + case OP_IGET_OBJECT_QUICK: + case OP_IPUT_QUICK: + case OP_IPUT_WIDE_QUICK: + case OP_IPUT_OBJECT_QUICK: + fmt = kFmt22cs; + break; + case OP_INVOKE_VIRTUAL_QUICK: + case OP_INVOKE_SUPER_QUICK: + fmt = kFmt35ms; + break; + case OP_INVOKE_VIRTUAL_QUICK_RANGE: + case OP_INVOKE_SUPER_QUICK_RANGE: + fmt = kFmt3rms; + break; + case OP_EXECUTE_INLINE: + fmt = kFmt3inline; + break; + case OP_INVOKE_DIRECT_EMPTY: + fmt = kFmt35c; + break; + + /* these should never appear */ + case OP_UNUSED_3E: + case OP_UNUSED_3F: + case OP_UNUSED_40: + case OP_UNUSED_41: + case OP_UNUSED_42: + case OP_UNUSED_43: + case OP_UNUSED_73: + case OP_UNUSED_79: + case OP_UNUSED_7A: + case OP_UNUSED_E3: + case OP_UNUSED_E4: + case OP_UNUSED_E5: + case OP_UNUSED_E6: + case OP_UNUSED_E7: + case OP_UNUSED_E8: + case OP_UNUSED_E9: + case OP_UNUSED_EA: + case OP_UNUSED_EB: + case OP_UNUSED_EC: + case OP_UNUSED_ED: + case OP_UNUSED_EF: + case OP_UNUSED_F1: + case OP_UNUSED_FC: + case OP_UNUSED_FD: + case OP_UNUSED_FE: + case OP_UNUSED_FF: + fmt = kFmtUnknown; + break; + + /* + * DO NOT add a "default" clause here. Without it the compiler will + * complain if an instruction is missing (which is desirable). + */ + } + + instFmt[opc] = fmt; + } + + return instFmt; +} + +/* + * Copied from InterpCore.h. Used for instruction decoding. + */ +#define FETCH(_offset) (insns[(_offset)]) +#define INST_INST(_inst) ((_inst) & 0xff) +#define INST_A(_inst) (((u2)(_inst) >> 8) & 0x0f) +#define INST_B(_inst) ((u2)(_inst) >> 12) +#define INST_AA(_inst) ((_inst) >> 8) + +/* + * Decode the instruction pointed to by "insns". + * + * Fills out the pieces of "pDec" that are affected by the current + * instruction. Does not touch anything else. + */ +void dexDecodeInstruction(const InstructionFormat* fmts, const u2* insns, + DecodedInstruction* pDec) +{ + u2 inst = *insns; + + pDec->opCode = (OpCode) INST_INST(inst); + + switch (dexGetInstrFormat(fmts, pDec->opCode)) { + case kFmt10x: // op + /* nothing to do; copy the AA bits out for the verifier */ + pDec->vA = INST_AA(inst); + break; + case kFmt12x: // op vA, vB + pDec->vA = INST_A(inst); + pDec->vB = INST_B(inst); + break; + case kFmt11n: // op vA, #+B + pDec->vA = INST_A(inst); + pDec->vB = (s4) (INST_B(inst) << 28) >> 28; // sign extend 4-bit value + break; + case kFmt11x: // op vAA + pDec->vA = INST_AA(inst); + break; + case kFmt10t: // op +AA + pDec->vA = (s1) INST_AA(inst); // sign-extend 8-bit value + break; + case kFmt20t: // op +AAAA + pDec->vA = (s2) FETCH(1); // sign-extend 16-bit value + break; + case kFmt21c: // op vAA, thing@BBBB + case kFmt22x: // op vAA, vBBBB + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1); + break; + case kFmt21s: // op vAA, #+BBBB + case kFmt21t: // op vAA, +BBBB + pDec->vA = INST_AA(inst); + pDec->vB = (s2) FETCH(1); // sign-extend 16-bit value + break; + case kFmt21h: // op vAA, #+BBBB0000[00000000] + pDec->vA = INST_AA(inst); + /* + * The value should be treated as right-zero-extended, but we don't + * actually do that here. Among other things, we don't know if it's + * the top bits of a 32- or 64-bit value. + */ + pDec->vB = FETCH(1); + break; + case kFmt23x: // op vAA, vBB, vCC + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1) & 0xff; + pDec->vC = FETCH(1) >> 8; + break; + case kFmt22b: // op vAA, vBB, #+CC + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1) & 0xff; + pDec->vC = (s1) (FETCH(1) >> 8); // sign-extend 8-bit value + break; + case kFmt22s: // op vA, vB, #+CCCC + case kFmt22t: // op vA, vB, +CCCC + pDec->vA = INST_A(inst); + pDec->vB = INST_B(inst); + pDec->vC = (s2) FETCH(1); // sign-extend 16-bit value + break; + case kFmt22c: // op vA, vB, thing@CCCC + case kFmt22cs: // [opt] op vA, vB, field offset CCCC + pDec->vA = INST_A(inst); + pDec->vB = INST_B(inst); + pDec->vC = FETCH(1); + break; + case kFmt30t: // op +AAAAAAAA + pDec->vA = FETCH(1) | ((u4) FETCH(2) << 16); // signed 32-bit value + break; + case kFmt31t: // op vAA, +BBBBBBBB + case kFmt31c: // op vAA, thing@BBBBBBBB + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1) | ((u4) FETCH(2) << 16); // 32-bit value + break; + case kFmt32x: // op vAAAA, vBBBB + pDec->vA = FETCH(1); + pDec->vB = FETCH(2); + break; + case kFmt31i: // op vAA, #+BBBBBBBB + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1) | ((u4) FETCH(2) << 16); + break; + case kFmt35c: // op vB, {vD..vG,vA}, thing@CCCC + case kFmt35ms: // [opt] invoke-virtual+super + { + /* + * The lettering changes that came about when we went from 4 args + * to 5 made the "range" versions of the calls different from + * the non-range versions. We have the choice between decoding + * them the way the spec shows and having lots of conditionals + * in the verifier, or mapping the values onto their original + * registers and leaving the verifier intact. + * + * Current plan is to leave the verifier alone. We can fix it + * later if it's architecturally unbearable. + * + * Bottom line: method constant is always in vB. + */ + u2 regList; + int i, count; + + pDec->vA = INST_B(inst); + pDec->vB = FETCH(1); + regList = FETCH(2); + + if (pDec->vA > 5) { + LOGW("Invalid arg count in 35c/35ms (%d)\n", pDec->vA); + goto bail; + } + count = pDec->vA; + if (count == 5) { + /* 5th arg comes from A field in instruction */ + pDec->arg[4] = INST_A(inst); + count--; + } + for (i = 0; i < count; i++) { + pDec->arg[i] = regList & 0x0f; + regList >>= 4; + } + /* copy arg[0] to vC; we don't have vD/vE/vF, so ignore those */ + if (pDec->vA > 0) + pDec->vC = pDec->arg[0]; + } + break; + case kFmt3inline: // [opt] inline invoke + { + u2 regList; + int i; + + pDec->vA = INST_B(inst); + pDec->vB = FETCH(1); + regList = FETCH(2); + + if (pDec->vA > 4) { + LOGW("Invalid arg count in 3inline (%d)\n", pDec->vA); + goto bail; + } + for (i = 0; i < (int) pDec->vA; i++) { + pDec->arg[i] = regList & 0x0f; + regList >>= 4; + } + /* copy arg[0] to vC; we don't have vD/vE/vF, so ignore those */ + if (pDec->vA > 0) + pDec->vC = pDec->arg[0]; + } + break; + case kFmt35fs: // [opt] invoke-interface + assert(false); // TODO + break; + case kFmt3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB + case kFmt3rms: // [opt] invoke-virtual+super/range + pDec->vA = INST_AA(inst); + pDec->vB = FETCH(1); + pDec->vC = FETCH(2); + break; + case kFmt3rfs: // [opt] invoke-interface/range + assert(false); // TODO + break; + case kFmt51l: // op vAA, #+BBBBBBBBBBBBBBBB + pDec->vA = INST_AA(inst); + pDec->vB_wide = FETCH(1); + pDec->vB_wide |= (u8)FETCH(2) << 16; + pDec->vB_wide |= (u8)FETCH(3) << 32; + pDec->vB_wide |= (u8)FETCH(4) << 48; + break; + default: + LOGW("Can't decode unexpected format %d (op=%d)\n", + dexGetInstrFormat(fmts, pDec->opCode), pDec->opCode); + assert(false); + break; + } + +bail: + ; +} + +/* + * Return the width of the specified instruction, or 0 if not defined. Also + * works for special OP_NOP entries, including switch statement data tables + * and array data. + */ +int dexGetInstrOrTableWidthAbs(const InstructionWidth* widths, const u2* insns) +{ + int width; + + if (*insns == kPackedSwitchSignature) { + width = 4 + insns[1] * 2; + } else if (*insns == kSparseSwitchSignature) { + width = 2 + insns[1] * 4; + } else if (*insns == kArrayDataSignature) { + u2 elemWidth = insns[1]; + u4 len = insns[2] | (((u4)insns[3]) << 16); + width = 4 + (elemWidth * len + 1) / 2; + } else { + width = dexGetInstrWidthAbs(widths, INST_INST(insns[0])); + } + return width; +} diff --git a/libdex/InstrUtils.h b/libdex/InstrUtils.h new file mode 100644 index 000000000..9d8e5c37a --- /dev/null +++ b/libdex/InstrUtils.h @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Dalvik instruction utility functions. + */ +#ifndef _LIBDEX_INSTRUTILS +#define _LIBDEX_INSTRUTILS + +#include "DexFile.h" +#include "OpCode.h" + +/* + * Dalvik-defined instruction formats. + * + * (This defines InstructionFormat as an unsigned char to reduce the size + * of the table. This isn't necessary with some compilers, which use an + * integer width appropriate for the number of enum values.) + * + * If you add or delete a format, you have to change some or all of: + * - this enum + * - the switch inside dexDecodeInstruction() in InstrUtils.c + * - the switch inside dumpInstruction() in DexDump.c + */ +typedef unsigned char InstructionFormat; +enum InstructionFormat { + kFmtUnknown = 0, + kFmt10x, // op + kFmt12x, // op vA, vB + kFmt11n, // op vA, #+B + kFmt11x, // op vAA + kFmt10t, // op +AA + kFmt20t, // op +AAAA + kFmt22x, // op vAA, vBBBB + kFmt21t, // op vAA, +BBBB + kFmt21s, // op vAA, #+BBBB + kFmt21h, // op vAA, #+BBBB00000[00000000] + kFmt21c, // op vAA, thing@BBBB + kFmt23x, // op vAA, vBB, vCC + kFmt22b, // op vAA, vBB, #+CC + kFmt22t, // op vA, vB, +CCCC + kFmt22s, // op vA, vB, #+CCCC + kFmt22c, // op vA, vB, thing@CCCC + kFmt22cs, // [opt] op vA, vB, field offset CCCC + kFmt32x, // op vAAAA, vBBBB + kFmt30t, // op +AAAAAAAA + kFmt31t, // op vAA, +BBBBBBBB + kFmt31i, // op vAA, #+BBBBBBBB + kFmt31c, // op vAA, thing@BBBBBBBB + kFmt35c, // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG) + kFmt35ms, // [opt] invoke-virtual+super + kFmt35fs, // [opt] invoke-interface + kFmt3rc, // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB + kFmt3rms, // [opt] invoke-virtual+super/range + kFmt3rfs, // [opt] invoke-interface/range + kFmt3inline, // [opt] inline invoke + kFmt51l, // op vAA, #+BBBBBBBBBBBBBBBB +}; + +/* + * Holds the contents of a decoded instruction. + */ +typedef struct DecodedInstruction { + u4 vA; + u4 vB; + u8 vB_wide; /* for kFmt51l */ + u4 vC; + u4 arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */ + OpCode opCode; +} DecodedInstruction; + +/* + * Instruction width, a value in the range -3 to 5. + */ +typedef signed char InstructionWidth; + +/* + * Instruction flags, used by the verifier to determine where control + * can flow to next. + */ +typedef unsigned char InstructionFlags; +enum InstructionFlags { + kInstrCanBranch = 1, // conditional or unconditional branch + kInstrCanContinue = 1 << 1, // flow can continue to next statement + kInstrCanSwitch = 1 << 2, // switch statement + kInstrCanThrow = 1 << 3, // could cause an exception to be thrown + kInstrCanReturn = 1 << 4, // returns, no additional statements +}; + + +/* + * Allocate and populate a 256-element array with instruction widths. A + * width of zero means the entry does not exist. + */ +InstructionWidth* dexCreateInstrWidthTable(void); + +/* + * Returns the width of the specified instruction, or 0 if not defined. + * Optimized instructions use negative values. + */ +DEX_INLINE int dexGetInstrWidth(const InstructionWidth* widths, OpCode opCode) +{ + // assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions); + return widths[opCode]; +} + +/* + * Return the width of the specified instruction, or 0 if not defined. + */ +DEX_INLINE int dexGetInstrWidthAbs(const InstructionWidth* widths,OpCode opCode) +{ + //assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions); + + int val = dexGetInstrWidth(widths, opCode); + if (val < 0) + val = -val; + /* XXX - the no-compare trick may be a cycle slower on ARM */ + return val; +} + +/* + * Return the width of the specified instruction, or 0 if not defined. Also + * works for special OP_NOP entries, including switch statement data tables + * and array data. + */ +int dexGetInstrOrTableWidthAbs(const InstructionWidth* widths, const u2* insns); + + +/* + * Allocate and populate a 256-element array with instruction flags. + */ +InstructionFlags* dexCreateInstrFlagsTable(void); + +/* + * Returns the flags for the specified opcode. + */ +DEX_INLINE int dexGetInstrFlags(const InstructionFlags* flags, OpCode opCode) +{ + //assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions); + return flags[opCode]; +} + + +/* + * Allocate and populate a 256-element array with instruction formats. + */ +InstructionFormat* dexCreateInstrFormatTable(void); + +/* + * Return the instruction format for the specified opcode. + */ +DEX_INLINE InstructionFormat dexGetInstrFormat(const InstructionFormat* fmts, + OpCode opCode) +{ + //assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions); + return fmts[opCode]; +} + +/* + * Decode the instruction pointed to by "insns". + */ +void dexDecodeInstruction(const InstructionFormat* fmts, const u2* insns, + DecodedInstruction* pDec); + +#endif /*_LIBDEX_INSTRUTILS*/ diff --git a/libdex/Leb128.c b/libdex/Leb128.c new file mode 100644 index 000000000..ed09e19aa --- /dev/null +++ b/libdex/Leb128.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for interpreting LEB128 (little endian base 128) values + */ + +#include "Leb128.h" + +/* + * Reads an unsigned LEB128 value, updating the given pointer to point + * just past the end of the read value and also indicating whether the + * value was syntactically valid. The only syntactically *invalid* + * values are ones that are five bytes long where the final byte has + * any but the low-order four bits set. Additionally, if the limit is + * passed as non-NULL and bytes would need to be read past the limit, + * then the read is considered invalid. + */ +int readAndVerifyUnsignedLeb128(const u1** pStream, const u1* limit, + bool* okay) { + const u1* ptr = *pStream; + int result = readUnsignedLeb128(pStream); + + if (((limit != NULL) && (*pStream > limit)) + || (((*pStream - ptr) == 5) && (ptr[4] > 0x0f))) { + *okay = false; + } + + return result; +} + +/* + * Reads a signed LEB128 value, updating the given pointer to point + * just past the end of the read value and also indicating whether the + * value was syntactically valid. The only syntactically *invalid* + * values are ones that are five bytes long where the final byte has + * any but the low-order four bits set. Additionally, if the limit is + * passed as non-NULL and bytes would need to be read past the limit, + * then the read is considered invalid. + */ +int readAndVerifySignedLeb128(const u1** pStream, const u1* limit, + bool* okay) { + const u1* ptr = *pStream; + int result = readSignedLeb128(pStream); + + if (((limit != NULL) && (*pStream > limit)) + || (((*pStream - ptr) == 5) && (ptr[4] > 0x0f))) { + *okay = false; + } + + return result; +} diff --git a/libdex/Leb128.h b/libdex/Leb128.h new file mode 100644 index 000000000..215ae3025 --- /dev/null +++ b/libdex/Leb128.h @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for interpreting LEB128 (little endian base 128) values + */ + +#ifndef _LIBDEX_LEB128 +#define _LIBDEX_LEB128 + +#include "DexFile.h" + +/* + * Reads an unsigned LEB128 value, updating the given pointer to point + * just past the end of the read value. This function tolerates + * non-zero high-order bits in the fifth encoded byte. + */ +DEX_INLINE int readUnsignedLeb128(const u1** pStream) { + const u1* ptr = *pStream; + int result = *(ptr++); + + if (result > 0x7f) { + int cur = *(ptr++); + result = (result & 0x7f) | ((cur & 0x7f) << 7); + if (cur > 0x7f) { + cur = *(ptr++); + result |= (cur & 0x7f) << 14; + if (cur > 0x7f) { + cur = *(ptr++); + result |= (cur & 0x7f) << 21; + if (cur > 0x7f) { + /* + * Note: We don't check to see if cur is out of + * range here, meaning we tolerate garbage in the + * high four-order bits. + */ + cur = *(ptr++); + result |= cur << 28; + } + } + } + } + + *pStream = ptr; + return result; +} + +/* + * Reads a signed LEB128 value, updating the given pointer to point + * just past the end of the read value. This function tolerates + * non-zero high-order bits in the fifth encoded byte. + */ +DEX_INLINE int readSignedLeb128(const u1** pStream) { + const u1* ptr = *pStream; + int result = *(ptr++); + + if (result <= 0x7f) { + result = (result << 25) >> 25; + } else { + int cur = *(ptr++); + result = (result & 0x7f) | ((cur & 0x7f) << 7); + if (cur <= 0x7f) { + result = (result << 18) >> 18; + } else { + cur = *(ptr++); + result |= (cur & 0x7f) << 14; + if (cur <= 0x7f) { + result = (result << 11) >> 11; + } else { + cur = *(ptr++); + result |= (cur & 0x7f) << 21; + if (cur <= 0x7f) { + result = (result << 4) >> 4; + } else { + /* + * Note: We don't check to see if cur is out of + * range here, meaning we tolerate garbage in the + * high four-order bits. + */ + cur = *(ptr++); + result |= cur << 28; + } + } + } + } + + *pStream = ptr; + return result; +} + +/* + * Reads an unsigned LEB128 value, updating the given pointer to point + * just past the end of the read value and also indicating whether the + * value was syntactically valid. The only syntactically *invalid* + * values are ones that are five bytes long where the final byte has + * any but the low-order four bits set. Additionally, if the limit is + * passed as non-NULL and bytes would need to be read past the limit, + * then the read is considered invalid. + */ +int readAndVerifyUnsignedLeb128(const u1** pStream, const u1* limit, + bool* okay); + +/* + * Reads a signed LEB128 value, updating the given pointer to point + * just past the end of the read value and also indicating whether the + * value was syntactically valid. The only syntactically *invalid* + * values are ones that are five bytes long where the final byte has + * any but the low-order four bits set. Additionally, if the limit is + * passed as non-NULL and bytes would need to be read past the limit, + * then the read is considered invalid. + */ +int readAndVerifySignedLeb128(const u1** pStream, const u1* limit, bool* okay); + +#endif diff --git a/libdex/OpCode.h b/libdex/OpCode.h new file mode 100644 index 000000000..d38947290 --- /dev/null +++ b/libdex/OpCode.h @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Dalvik opcode enumeration. + */ +#ifndef _LIBDEX_OPCODE +#define _LIBDEX_OPCODE + +/* + * If you add, delete, or renumber instructions, you need to change things + * in various places. Renumbering really only affects the "unused" opcodes, + * which are given explicit enumeration values to make it easier to find + * the places in the code that need to be updated when making changes -- + * if you replace "OP_UNUSED_2D" and neglect to update a switch statement, + * the compiler will complain about an unknown value. + * + * Opcode definitions and attributes: + * - update the OpCode enum below + * - update the "goto table" definition macro, DEFINE_GOTO_TABLE(), below + * - update the instruction info table generators and (if you changed an + * instruction format) instruction decoder in InstrUtils.c + * - update the instruction format list in InstrUtils.h, if necessary + * - update the parallel definitions in the class dalvik.bytecode.Opcodes + * + * Interpreter: + * - implement/update the instruction in C in mterp/c/... + * - verify new code by running with "dalvik.vm.execution-mode = + * int:portable" or "-Xint:portable" + * - implement/update the instruction in ARM in mterp/armv5/... + * - verify by enabling ARM handler for that instruction in mterp config + * and running int:fast as above + * - repeat for other platforms (x86, ...) + * (see notes in mterp/ReadMe.txt for rebuilding instructions) + * + * Verifier / optimizer: + * - update some stuff in analysis/DexOptimize.c, analysis/DexVerify.c, + * and/or analysis/CodeVerify.c as needed + * - verify by running with verifier enabled (it's on by default) + * + * Tools: + * - update the OpCodeNames table in dexdump/OpCodeNames.c + * - update dexdump/DexDump.c if an instruction format has changed + * + * Note: The Dalvik VM tests (in the tests subdirectory) provide a convenient + * way to test most of the above without doing any rebuilds. In particular, + * test 003-omnibus-opcodes will exercise most of the opcodes. + */ + +/* + * Dalvik opcode list. + */ +typedef enum OpCode { + OP_NOP = 0x00, + + OP_MOVE = 0x01, + OP_MOVE_FROM16 = 0x02, + OP_MOVE_16 = 0x03, + OP_MOVE_WIDE = 0x04, + OP_MOVE_WIDE_FROM16 = 0x05, + OP_MOVE_WIDE_16 = 0x06, + OP_MOVE_OBJECT = 0x07, + OP_MOVE_OBJECT_FROM16 = 0x08, + OP_MOVE_OBJECT_16 = 0x09, + + OP_MOVE_RESULT = 0x0a, + OP_MOVE_RESULT_WIDE = 0x0b, + OP_MOVE_RESULT_OBJECT = 0x0c, + OP_MOVE_EXCEPTION = 0x0d, + + OP_RETURN_VOID = 0x0e, + OP_RETURN = 0x0f, + OP_RETURN_WIDE = 0x10, + OP_RETURN_OBJECT = 0x11, + + OP_CONST_4 = 0x12, + OP_CONST_16 = 0x13, + OP_CONST = 0x14, + OP_CONST_HIGH16 = 0x15, + OP_CONST_WIDE_16 = 0x16, + OP_CONST_WIDE_32 = 0x17, + OP_CONST_WIDE = 0x18, + OP_CONST_WIDE_HIGH16 = 0x19, + OP_CONST_STRING = 0x1a, + OP_CONST_STRING_JUMBO = 0x1b, + OP_CONST_CLASS = 0x1c, + + OP_MONITOR_ENTER = 0x1d, + OP_MONITOR_EXIT = 0x1e, + + OP_CHECK_CAST = 0x1f, + OP_INSTANCE_OF = 0x20, + + OP_ARRAY_LENGTH = 0x21, + + OP_NEW_INSTANCE = 0x22, + OP_NEW_ARRAY = 0x23, + + OP_FILLED_NEW_ARRAY = 0x24, + OP_FILLED_NEW_ARRAY_RANGE = 0x25, + OP_FILL_ARRAY_DATA = 0x26, + + OP_THROW = 0x27, + OP_GOTO = 0x28, + OP_GOTO_16 = 0x29, + OP_GOTO_32 = 0x2a, + OP_PACKED_SWITCH = 0x2b, + OP_SPARSE_SWITCH = 0x2c, + + OP_CMPL_FLOAT = 0x2d, + OP_CMPG_FLOAT = 0x2e, + OP_CMPL_DOUBLE = 0x2f, + OP_CMPG_DOUBLE = 0x30, + OP_CMP_LONG = 0x31, + + OP_IF_EQ = 0x32, + OP_IF_NE = 0x33, + OP_IF_LT = 0x34, + OP_IF_GE = 0x35, + OP_IF_GT = 0x36, + OP_IF_LE = 0x37, + OP_IF_EQZ = 0x38, + OP_IF_NEZ = 0x39, + OP_IF_LTZ = 0x3a, + OP_IF_GEZ = 0x3b, + OP_IF_GTZ = 0x3c, + OP_IF_LEZ = 0x3d, + + OP_UNUSED_3E = 0x3e, + OP_UNUSED_3F = 0x3f, + OP_UNUSED_40 = 0x40, + OP_UNUSED_41 = 0x41, + OP_UNUSED_42 = 0x42, + OP_UNUSED_43 = 0x43, + + OP_AGET = 0x44, + OP_AGET_WIDE = 0x45, + OP_AGET_OBJECT = 0x46, + OP_AGET_BOOLEAN = 0x47, + OP_AGET_BYTE = 0x48, + OP_AGET_CHAR = 0x49, + OP_AGET_SHORT = 0x4a, + OP_APUT = 0x4b, + OP_APUT_WIDE = 0x4c, + OP_APUT_OBJECT = 0x4d, + OP_APUT_BOOLEAN = 0x4e, + OP_APUT_BYTE = 0x4f, + OP_APUT_CHAR = 0x50, + OP_APUT_SHORT = 0x51, + + OP_IGET = 0x52, + OP_IGET_WIDE = 0x53, + OP_IGET_OBJECT = 0x54, + OP_IGET_BOOLEAN = 0x55, + OP_IGET_BYTE = 0x56, + OP_IGET_CHAR = 0x57, + OP_IGET_SHORT = 0x58, + OP_IPUT = 0x59, + OP_IPUT_WIDE = 0x5a, + OP_IPUT_OBJECT = 0x5b, + OP_IPUT_BOOLEAN = 0x5c, + OP_IPUT_BYTE = 0x5d, + OP_IPUT_CHAR = 0x5e, + OP_IPUT_SHORT = 0x5f, + + OP_SGET = 0x60, + OP_SGET_WIDE = 0x61, + OP_SGET_OBJECT = 0x62, + OP_SGET_BOOLEAN = 0x63, + OP_SGET_BYTE = 0x64, + OP_SGET_CHAR = 0x65, + OP_SGET_SHORT = 0x66, + OP_SPUT = 0x67, + OP_SPUT_WIDE = 0x68, + OP_SPUT_OBJECT = 0x69, + OP_SPUT_BOOLEAN = 0x6a, + OP_SPUT_BYTE = 0x6b, + OP_SPUT_CHAR = 0x6c, + OP_SPUT_SHORT = 0x6d, + + OP_INVOKE_VIRTUAL = 0x6e, + OP_INVOKE_SUPER = 0x6f, + OP_INVOKE_DIRECT = 0x70, + OP_INVOKE_STATIC = 0x71, + OP_INVOKE_INTERFACE = 0x72, + + OP_UNUSED_73 = 0x73, + + OP_INVOKE_VIRTUAL_RANGE = 0x74, + OP_INVOKE_SUPER_RANGE = 0x75, + OP_INVOKE_DIRECT_RANGE = 0x76, + OP_INVOKE_STATIC_RANGE = 0x77, + OP_INVOKE_INTERFACE_RANGE = 0x78, + + OP_UNUSED_79 = 0x79, + OP_UNUSED_7A = 0x7a, + + OP_NEG_INT = 0x7b, + OP_NOT_INT = 0x7c, + OP_NEG_LONG = 0x7d, + OP_NOT_LONG = 0x7e, + OP_NEG_FLOAT = 0x7f, + OP_NEG_DOUBLE = 0x80, + OP_INT_TO_LONG = 0x81, + OP_INT_TO_FLOAT = 0x82, + OP_INT_TO_DOUBLE = 0x83, + OP_LONG_TO_INT = 0x84, + OP_LONG_TO_FLOAT = 0x85, + OP_LONG_TO_DOUBLE = 0x86, + OP_FLOAT_TO_INT = 0x87, + OP_FLOAT_TO_LONG = 0x88, + OP_FLOAT_TO_DOUBLE = 0x89, + OP_DOUBLE_TO_INT = 0x8a, + OP_DOUBLE_TO_LONG = 0x8b, + OP_DOUBLE_TO_FLOAT = 0x8c, + OP_INT_TO_BYTE = 0x8d, + OP_INT_TO_CHAR = 0x8e, + OP_INT_TO_SHORT = 0x8f, + + OP_ADD_INT = 0x90, + OP_SUB_INT = 0x91, + OP_MUL_INT = 0x92, + OP_DIV_INT = 0x93, + OP_REM_INT = 0x94, + OP_AND_INT = 0x95, + OP_OR_INT = 0x96, + OP_XOR_INT = 0x97, + OP_SHL_INT = 0x98, + OP_SHR_INT = 0x99, + OP_USHR_INT = 0x9a, + + OP_ADD_LONG = 0x9b, + OP_SUB_LONG = 0x9c, + OP_MUL_LONG = 0x9d, + OP_DIV_LONG = 0x9e, + OP_REM_LONG = 0x9f, + OP_AND_LONG = 0xa0, + OP_OR_LONG = 0xa1, + OP_XOR_LONG = 0xa2, + OP_SHL_LONG = 0xa3, + OP_SHR_LONG = 0xa4, + OP_USHR_LONG = 0xa5, + + OP_ADD_FLOAT = 0xa6, + OP_SUB_FLOAT = 0xa7, + OP_MUL_FLOAT = 0xa8, + OP_DIV_FLOAT = 0xa9, + OP_REM_FLOAT = 0xaa, + OP_ADD_DOUBLE = 0xab, + OP_SUB_DOUBLE = 0xac, + OP_MUL_DOUBLE = 0xad, + OP_DIV_DOUBLE = 0xae, + OP_REM_DOUBLE = 0xaf, + + OP_ADD_INT_2ADDR = 0xb0, + OP_SUB_INT_2ADDR = 0xb1, + OP_MUL_INT_2ADDR = 0xb2, + OP_DIV_INT_2ADDR = 0xb3, + OP_REM_INT_2ADDR = 0xb4, + OP_AND_INT_2ADDR = 0xb5, + OP_OR_INT_2ADDR = 0xb6, + OP_XOR_INT_2ADDR = 0xb7, + OP_SHL_INT_2ADDR = 0xb8, + OP_SHR_INT_2ADDR = 0xb9, + OP_USHR_INT_2ADDR = 0xba, + + OP_ADD_LONG_2ADDR = 0xbb, + OP_SUB_LONG_2ADDR = 0xbc, + OP_MUL_LONG_2ADDR = 0xbd, + OP_DIV_LONG_2ADDR = 0xbe, + OP_REM_LONG_2ADDR = 0xbf, + OP_AND_LONG_2ADDR = 0xc0, + OP_OR_LONG_2ADDR = 0xc1, + OP_XOR_LONG_2ADDR = 0xc2, + OP_SHL_LONG_2ADDR = 0xc3, + OP_SHR_LONG_2ADDR = 0xc4, + OP_USHR_LONG_2ADDR = 0xc5, + + OP_ADD_FLOAT_2ADDR = 0xc6, + OP_SUB_FLOAT_2ADDR = 0xc7, + OP_MUL_FLOAT_2ADDR = 0xc8, + OP_DIV_FLOAT_2ADDR = 0xc9, + OP_REM_FLOAT_2ADDR = 0xca, + OP_ADD_DOUBLE_2ADDR = 0xcb, + OP_SUB_DOUBLE_2ADDR = 0xcc, + OP_MUL_DOUBLE_2ADDR = 0xcd, + OP_DIV_DOUBLE_2ADDR = 0xce, + OP_REM_DOUBLE_2ADDR = 0xcf, + + OP_ADD_INT_LIT16 = 0xd0, + OP_RSUB_INT = 0xd1, /* no _LIT16 suffix for this */ + OP_MUL_INT_LIT16 = 0xd2, + OP_DIV_INT_LIT16 = 0xd3, + OP_REM_INT_LIT16 = 0xd4, + OP_AND_INT_LIT16 = 0xd5, + OP_OR_INT_LIT16 = 0xd6, + OP_XOR_INT_LIT16 = 0xd7, + + OP_ADD_INT_LIT8 = 0xd8, + OP_RSUB_INT_LIT8 = 0xd9, + OP_MUL_INT_LIT8 = 0xda, + OP_DIV_INT_LIT8 = 0xdb, + OP_REM_INT_LIT8 = 0xdc, + OP_AND_INT_LIT8 = 0xdd, + OP_OR_INT_LIT8 = 0xde, + OP_XOR_INT_LIT8 = 0xdf, + OP_SHL_INT_LIT8 = 0xe0, + OP_SHR_INT_LIT8 = 0xe1, + OP_USHR_INT_LIT8 = 0xe2, + + OP_UNUSED_E3 = 0xe3, + OP_UNUSED_E4 = 0xe4, + OP_UNUSED_E5 = 0xe5, + OP_UNUSED_E6 = 0xe6, + OP_UNUSED_E7 = 0xe7, + OP_UNUSED_E8 = 0xe8, + OP_UNUSED_E9 = 0xe9, + OP_UNUSED_EA = 0xea, + OP_UNUSED_EB = 0xeb, + OP_UNUSED_EC = 0xec, + OP_UNUSED_ED = 0xed, + + /* optimizer output -- these are never generated by "dx" */ + OP_EXECUTE_INLINE = 0xee, + OP_UNUSED_EF = 0xef, /* OP_EXECUTE_INLINE_RANGE? */ + + OP_INVOKE_DIRECT_EMPTY = 0xf0, + OP_UNUSED_F1 = 0xf1, /* OP_INVOKE_DIRECT_EMPTY_RANGE? */ + OP_IGET_QUICK = 0xf2, + OP_IGET_WIDE_QUICK = 0xf3, + OP_IGET_OBJECT_QUICK = 0xf4, + OP_IPUT_QUICK = 0xf5, + OP_IPUT_WIDE_QUICK = 0xf6, + OP_IPUT_OBJECT_QUICK = 0xf7, + + OP_INVOKE_VIRTUAL_QUICK = 0xf8, + OP_INVOKE_VIRTUAL_QUICK_RANGE = 0xf9, + OP_INVOKE_SUPER_QUICK = 0xfa, + OP_INVOKE_SUPER_QUICK_RANGE = 0xfb, + OP_UNUSED_FC = 0xfc, /* OP_INVOKE_DIRECT_QUICK? */ + OP_UNUSED_FD = 0xfd, /* OP_INVOKE_DIRECT_QUICK_RANGE? */ + OP_UNUSED_FE = 0xfe, /* OP_INVOKE_INTERFACE_QUICK? */ + OP_UNUSED_FF = 0xff, /* OP_INVOKE_INTERFACE_QUICK_RANGE*/ +} OpCode; + +#define kNumDalvikInstructions 256 + +/* + * Switch-statement signatures are a "NOP" followed by a code. (A true NOP + * is 0x0000.) + */ +#define kPackedSwitchSignature 0x0100 +#define kSparseSwitchSignature 0x0200 +#define kArrayDataSignature 0x0300 + +/* + * Macro used to generate computed goto tables for the C interpreter. + * + * The labels here must match up with the labels in the interpreter + * implementation. There is no direct connection between these and the + * numeric definitions above, but if the two get out of sync strange things + * will happen. + */ +#define DEFINE_GOTO_TABLE(_name) \ + static const void* _name[kNumDalvikInstructions] = { \ + /* 00..0f */ \ + H(OP_NOP), \ + H(OP_MOVE), \ + H(OP_MOVE_FROM16), \ + H(OP_MOVE_16), \ + H(OP_MOVE_WIDE), \ + H(OP_MOVE_WIDE_FROM16), \ + H(OP_MOVE_WIDE_16), \ + H(OP_MOVE_OBJECT), \ + H(OP_MOVE_OBJECT_FROM16), \ + H(OP_MOVE_OBJECT_16), \ + H(OP_MOVE_RESULT), \ + H(OP_MOVE_RESULT_WIDE), \ + H(OP_MOVE_RESULT_OBJECT), \ + H(OP_MOVE_EXCEPTION), \ + H(OP_RETURN_VOID), \ + H(OP_RETURN), \ + /* 10..1f */ \ + H(OP_RETURN_WIDE), \ + H(OP_RETURN_OBJECT), \ + H(OP_CONST_4), \ + H(OP_CONST_16), \ + H(OP_CONST), \ + H(OP_CONST_HIGH16), \ + H(OP_CONST_WIDE_16), \ + H(OP_CONST_WIDE_32), \ + H(OP_CONST_WIDE), \ + H(OP_CONST_WIDE_HIGH16), \ + H(OP_CONST_STRING), \ + H(OP_CONST_STRING_JUMBO), \ + H(OP_CONST_CLASS), \ + H(OP_MONITOR_ENTER), \ + H(OP_MONITOR_EXIT), \ + H(OP_CHECK_CAST), \ + /* 20..2f */ \ + H(OP_INSTANCE_OF), \ + H(OP_ARRAY_LENGTH), \ + H(OP_NEW_INSTANCE), \ + H(OP_NEW_ARRAY), \ + H(OP_FILLED_NEW_ARRAY), \ + H(OP_FILLED_NEW_ARRAY_RANGE), \ + H(OP_FILL_ARRAY_DATA), \ + H(OP_THROW), \ + H(OP_GOTO), \ + H(OP_GOTO_16), \ + H(OP_GOTO_32), \ + H(OP_PACKED_SWITCH), \ + H(OP_SPARSE_SWITCH), \ + H(OP_CMPL_FLOAT), \ + H(OP_CMPG_FLOAT), \ + H(OP_CMPL_DOUBLE), \ + /* 30..3f */ \ + H(OP_CMPG_DOUBLE), \ + H(OP_CMP_LONG), \ + H(OP_IF_EQ), \ + H(OP_IF_NE), \ + H(OP_IF_LT), \ + H(OP_IF_GE), \ + H(OP_IF_GT), \ + H(OP_IF_LE), \ + H(OP_IF_EQZ), \ + H(OP_IF_NEZ), \ + H(OP_IF_LTZ), \ + H(OP_IF_GEZ), \ + H(OP_IF_GTZ), \ + H(OP_IF_LEZ), \ + H(OP_UNUSED_3E), \ + H(OP_UNUSED_3F), \ + /* 40..4f */ \ + H(OP_UNUSED_40), \ + H(OP_UNUSED_41), \ + H(OP_UNUSED_42), \ + H(OP_UNUSED_43), \ + H(OP_AGET), \ + H(OP_AGET_WIDE), \ + H(OP_AGET_OBJECT), \ + H(OP_AGET_BOOLEAN), \ + H(OP_AGET_BYTE), \ + H(OP_AGET_CHAR), \ + H(OP_AGET_SHORT), \ + H(OP_APUT), \ + H(OP_APUT_WIDE), \ + H(OP_APUT_OBJECT), \ + H(OP_APUT_BOOLEAN), \ + H(OP_APUT_BYTE), \ + /* 50..5f */ \ + H(OP_APUT_CHAR), \ + H(OP_APUT_SHORT), \ + H(OP_IGET), \ + H(OP_IGET_WIDE), \ + H(OP_IGET_OBJECT), \ + H(OP_IGET_BOOLEAN), \ + H(OP_IGET_BYTE), \ + H(OP_IGET_CHAR), \ + H(OP_IGET_SHORT), \ + H(OP_IPUT), \ + H(OP_IPUT_WIDE), \ + H(OP_IPUT_OBJECT), \ + H(OP_IPUT_BOOLEAN), \ + H(OP_IPUT_BYTE), \ + H(OP_IPUT_CHAR), \ + H(OP_IPUT_SHORT), \ + /* 60..6f */ \ + H(OP_SGET), \ + H(OP_SGET_WIDE), \ + H(OP_SGET_OBJECT), \ + H(OP_SGET_BOOLEAN), \ + H(OP_SGET_BYTE), \ + H(OP_SGET_CHAR), \ + H(OP_SGET_SHORT), \ + H(OP_SPUT), \ + H(OP_SPUT_WIDE), \ + H(OP_SPUT_OBJECT), \ + H(OP_SPUT_BOOLEAN), \ + H(OP_SPUT_BYTE), \ + H(OP_SPUT_CHAR), \ + H(OP_SPUT_SHORT), \ + H(OP_INVOKE_VIRTUAL), \ + H(OP_INVOKE_SUPER), \ + /* 70..7f */ \ + H(OP_INVOKE_DIRECT), \ + H(OP_INVOKE_STATIC), \ + H(OP_INVOKE_INTERFACE), \ + H(OP_UNUSED_73), \ + H(OP_INVOKE_VIRTUAL_RANGE), \ + H(OP_INVOKE_SUPER_RANGE), \ + H(OP_INVOKE_DIRECT_RANGE), \ + H(OP_INVOKE_STATIC_RANGE), \ + H(OP_INVOKE_INTERFACE_RANGE), \ + H(OP_UNUSED_79), \ + H(OP_UNUSED_7A), \ + H(OP_NEG_INT), \ + H(OP_NOT_INT), \ + H(OP_NEG_LONG), \ + H(OP_NOT_LONG), \ + H(OP_NEG_FLOAT), \ + /* 80..8f */ \ + H(OP_NEG_DOUBLE), \ + H(OP_INT_TO_LONG), \ + H(OP_INT_TO_FLOAT), \ + H(OP_INT_TO_DOUBLE), \ + H(OP_LONG_TO_INT), \ + H(OP_LONG_TO_FLOAT), \ + H(OP_LONG_TO_DOUBLE), \ + H(OP_FLOAT_TO_INT), \ + H(OP_FLOAT_TO_LONG), \ + H(OP_FLOAT_TO_DOUBLE), \ + H(OP_DOUBLE_TO_INT), \ + H(OP_DOUBLE_TO_LONG), \ + H(OP_DOUBLE_TO_FLOAT), \ + H(OP_INT_TO_BYTE), \ + H(OP_INT_TO_CHAR), \ + H(OP_INT_TO_SHORT), \ + /* 90..9f */ \ + H(OP_ADD_INT), \ + H(OP_SUB_INT), \ + H(OP_MUL_INT), \ + H(OP_DIV_INT), \ + H(OP_REM_INT), \ + H(OP_AND_INT), \ + H(OP_OR_INT), \ + H(OP_XOR_INT), \ + H(OP_SHL_INT), \ + H(OP_SHR_INT), \ + H(OP_USHR_INT), \ + H(OP_ADD_LONG), \ + H(OP_SUB_LONG), \ + H(OP_MUL_LONG), \ + H(OP_DIV_LONG), \ + H(OP_REM_LONG), \ + /* a0..af */ \ + H(OP_AND_LONG), \ + H(OP_OR_LONG), \ + H(OP_XOR_LONG), \ + H(OP_SHL_LONG), \ + H(OP_SHR_LONG), \ + H(OP_USHR_LONG), \ + H(OP_ADD_FLOAT), \ + H(OP_SUB_FLOAT), \ + H(OP_MUL_FLOAT), \ + H(OP_DIV_FLOAT), \ + H(OP_REM_FLOAT), \ + H(OP_ADD_DOUBLE), \ + H(OP_SUB_DOUBLE), \ + H(OP_MUL_DOUBLE), \ + H(OP_DIV_DOUBLE), \ + H(OP_REM_DOUBLE), \ + /* b0..bf */ \ + H(OP_ADD_INT_2ADDR), \ + H(OP_SUB_INT_2ADDR), \ + H(OP_MUL_INT_2ADDR), \ + H(OP_DIV_INT_2ADDR), \ + H(OP_REM_INT_2ADDR), \ + H(OP_AND_INT_2ADDR), \ + H(OP_OR_INT_2ADDR), \ + H(OP_XOR_INT_2ADDR), \ + H(OP_SHL_INT_2ADDR), \ + H(OP_SHR_INT_2ADDR), \ + H(OP_USHR_INT_2ADDR), \ + H(OP_ADD_LONG_2ADDR), \ + H(OP_SUB_LONG_2ADDR), \ + H(OP_MUL_LONG_2ADDR), \ + H(OP_DIV_LONG_2ADDR), \ + H(OP_REM_LONG_2ADDR), \ + /* c0..cf */ \ + H(OP_AND_LONG_2ADDR), \ + H(OP_OR_LONG_2ADDR), \ + H(OP_XOR_LONG_2ADDR), \ + H(OP_SHL_LONG_2ADDR), \ + H(OP_SHR_LONG_2ADDR), \ + H(OP_USHR_LONG_2ADDR), \ + H(OP_ADD_FLOAT_2ADDR), \ + H(OP_SUB_FLOAT_2ADDR), \ + H(OP_MUL_FLOAT_2ADDR), \ + H(OP_DIV_FLOAT_2ADDR), \ + H(OP_REM_FLOAT_2ADDR), \ + H(OP_ADD_DOUBLE_2ADDR), \ + H(OP_SUB_DOUBLE_2ADDR), \ + H(OP_MUL_DOUBLE_2ADDR), \ + H(OP_DIV_DOUBLE_2ADDR), \ + H(OP_REM_DOUBLE_2ADDR), \ + /* d0..df */ \ + H(OP_ADD_INT_LIT16), \ + H(OP_RSUB_INT), \ + H(OP_MUL_INT_LIT16), \ + H(OP_DIV_INT_LIT16), \ + H(OP_REM_INT_LIT16), \ + H(OP_AND_INT_LIT16), \ + H(OP_OR_INT_LIT16), \ + H(OP_XOR_INT_LIT16), \ + H(OP_ADD_INT_LIT8), \ + H(OP_RSUB_INT_LIT8), \ + H(OP_MUL_INT_LIT8), \ + H(OP_DIV_INT_LIT8), \ + H(OP_REM_INT_LIT8), \ + H(OP_AND_INT_LIT8), \ + H(OP_OR_INT_LIT8), \ + H(OP_XOR_INT_LIT8), \ + /* e0..ef */ \ + H(OP_SHL_INT_LIT8), \ + H(OP_SHR_INT_LIT8), \ + H(OP_USHR_INT_LIT8), \ + H(OP_UNUSED_E3), \ + H(OP_UNUSED_E4), \ + H(OP_UNUSED_E5), \ + H(OP_UNUSED_E6), \ + H(OP_UNUSED_E7), \ + H(OP_UNUSED_E8), \ + H(OP_UNUSED_E9), \ + H(OP_UNUSED_EA), \ + H(OP_UNUSED_EB), \ + H(OP_UNUSED_EC), \ + H(OP_UNUSED_ED), \ + H(OP_EXECUTE_INLINE), \ + H(OP_UNUSED_EF), \ + /* f0..ff */ \ + H(OP_INVOKE_DIRECT_EMPTY), \ + H(OP_UNUSED_F1), \ + H(OP_IGET_QUICK), \ + H(OP_IGET_WIDE_QUICK), \ + H(OP_IGET_OBJECT_QUICK), \ + H(OP_IPUT_QUICK), \ + H(OP_IPUT_WIDE_QUICK), \ + H(OP_IPUT_OBJECT_QUICK), \ + H(OP_INVOKE_VIRTUAL_QUICK), \ + H(OP_INVOKE_VIRTUAL_QUICK_RANGE), \ + H(OP_INVOKE_SUPER_QUICK), \ + H(OP_INVOKE_SUPER_QUICK_RANGE), \ + H(OP_UNUSED_FC), \ + H(OP_UNUSED_FD), \ + H(OP_UNUSED_FE), \ + H(OP_UNUSED_FF), \ + }; + +#endif /*_LIBDEX_OPCODE*/ diff --git a/libdex/OptInvocation.c b/libdex/OptInvocation.c new file mode 100644 index 000000000..8ce918b97 --- /dev/null +++ b/libdex/OptInvocation.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Utility functions for managing an invocation of "dexopt". + */ +#include "vm/DalvikVersion.h" + +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/file.h> +#include <errno.h> + +#include "OptInvocation.h" +#include "DexFile.h" + +static const char* kClassesDex = "classes.dex"; + + +/* + * Given the filename of a .jar or .dex file, construct the DEX file cache + * name. + * + * For a Jar, "subFileName" is the name of the entry (usually "classes.dex"). + * For a DEX, it may be NULL. + * + * Returns a newly-allocated string, or NULL on failure. + */ +char* dexOptGenerateCacheFileName(const char* fileName, const char* subFileName) +{ + char nameBuf[512]; + static const char kDexCachePath[] = "dalvik-cache"; + char absoluteFile[sizeof(nameBuf)]; + const size_t kBufLen = sizeof(nameBuf) - 1; + const char* dataRoot; + char* cp; + + /* + * Get the absolute path of the Jar or DEX file. + */ + absoluteFile[0] = '\0'; + if (fileName[0] != '/') { + /* + * Generate the absolute path. This doesn't do everything it + * should, e.g. if filename is "./out/whatever" it doesn't crunch + * the leading "./" out, but it'll do. + */ + if (getcwd(absoluteFile, kBufLen) == NULL) { + LOGE("Can't get CWD while opening jar file\n"); + return NULL; + } + strncat(absoluteFile, "/", kBufLen); + } + strncat(absoluteFile, fileName, kBufLen); + + /* + * Append the name of the Jar file entry, if any. This is not currently + * required, but will be if we start putting more than one DEX file + * in a Jar. + */ + if (subFileName != NULL) { + strncat(absoluteFile, "/", kBufLen); + strncat(absoluteFile, subFileName, kBufLen); + } + + /* Turn the path into a flat filename by replacing + * any slashes after the first one with '@' characters. + */ + cp = absoluteFile + 1; + while (*cp != '\0') { + if (*cp == '/') { + *cp = '@'; + } + cp++; + } + + /* Build the name of the cache directory. + */ + dataRoot = getenv("ANDROID_DATA"); + if (dataRoot == NULL) + dataRoot = "/data"; + snprintf(nameBuf, kBufLen, "%s/%s", dataRoot, kDexCachePath); + + /* Tack on the file name for the actual cache file path. + */ + strncat(nameBuf, absoluteFile, kBufLen); + + LOGV("Cache file for '%s' '%s' is '%s'\n", fileName, subFileName, nameBuf); + return strdup(nameBuf); +} + +/* + * Create a skeletal "opt" header in a new file. Most of the fields are + * initialized to garbage, but we fill in "dexOffset" so others can + * see how large the header is. + * + * "fd" must be positioned at the start of the file. On return, it will + * be positioned just past the header, and the place where the DEX data + * should go. + * + * Returns 0 on success, errno on failure. + */ +int dexOptCreateEmptyHeader(int fd) +{ + DexOptHeader optHdr; + ssize_t actual; + + assert(lseek(fd, 0, SEEK_CUR) == 0); + + /* + * The data is only expected to be readable on the current system, so + * we just write the structure. We do need the file offset to be 64-bit + * aligned to fulfill a DEX requirement. + */ + assert((sizeof(optHdr) & 0x07) == 0); + memset(&optHdr, 0xff, sizeof(optHdr)); + optHdr.dexOffset = sizeof(optHdr); + actual = write(fd, &optHdr, sizeof(optHdr)); + if (actual != sizeof(optHdr)) { + int err = errno ? errno : -1; + LOGE("opt header write failed: %s", strerror(errno)); + return errno; + } + + return 0; +} + diff --git a/libdex/OptInvocation.h b/libdex/OptInvocation.h new file mode 100644 index 000000000..d9708caed --- /dev/null +++ b/libdex/OptInvocation.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Utility functions related to "dexopt". + */ +#ifndef _LIBDEX_OPTINVOCATION +#define _LIBDEX_OPTINVOCATION + +#include <stdint.h> +#include <unistd.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Utility routines, used by the VM. + */ +char* dexOptGenerateCacheFileName(const char* fileName, + const char* subFileName); +int dexOptCreateEmptyHeader(int fd); + +/* some flags that get passed through to "dexopt" command */ +#define DEXOPT_OPT_ENABLED (1) +#define DEXOPT_OPT_ALL (1 << 1) +#define DEXOPT_VERIFY_ENABLED (1 << 2) +#define DEXOPT_VERIFY_ALL (1 << 3) +#define DEXOPT_IS_BOOTSTRAP (1 << 4) +#define DEXOPT_GEN_REGISTER_MAP (1 << 5) + + +#ifdef __cplusplus +}; +#endif + +#endif /*_LIBDEX_OPTINVOCATION*/ diff --git a/libdex/SysUtil.c b/libdex/SysUtil.c new file mode 100644 index 000000000..530ac2eeb --- /dev/null +++ b/libdex/SysUtil.c @@ -0,0 +1,286 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * System utilities. + */ +#include "DexFile.h" +#include "SysUtil.h" + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> + +#ifdef HAVE_POSIX_FILEMAP +#include <sys/mman.h> +#endif + +#include <limits.h> +#include <errno.h> + +/* + * Having trouble finding a portable way to get this. sysconf(_SC_PAGE_SIZE) + * seems appropriate, but we don't have that on the device. Some systems + * have getpagesize(2), though the linux man page has some odd cautions. + */ +#define DEFAULT_PAGE_SIZE 4096 + + +/* + * Create an anonymous shared memory segment large enough to hold "length" + * bytes. The actual segment may be larger because mmap() operates on + * page boundaries (usually 4K). + */ +static void* sysCreateAnonShmem(size_t length) +{ +#ifdef HAVE_POSIX_FILEMAP + void* ptr; + + ptr = mmap(NULL, length, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + LOGW("mmap(%d, RW, SHARED|ANON) failed: %s\n", (int) length, + strerror(errno)); + return NULL; + } + + return ptr; +#else + LOGE("sysCreateAnonShmem not implemented.\n"); + return NULL; +#endif +} + +static int getFileStartAndLength(int fd, off_t *start_, size_t *length_) +{ + off_t start, end; + size_t length; + + assert(start_ != NULL); + assert(length_ != NULL); + + start = lseek(fd, 0L, SEEK_CUR); + end = lseek(fd, 0L, SEEK_END); + (void) lseek(fd, start, SEEK_SET); + + if (start == (off_t) -1 || end == (off_t) -1) { + LOGE("could not determine length of file\n"); + return -1; + } + + length = end - start; + if (length == 0) { + LOGE("file is empty\n"); + return -1; + } + + *start_ = start; + *length_ = length; + + return 0; +} + +/* + * Pull the contents of a file into an new shared memory segment. We grab + * everything from fd's current offset on. + * + * We need to know the length ahead of time so we can allocate a segment + * of sufficient size. + */ +int sysLoadFileInShmem(int fd, MemMapping* pMap) +{ +#ifdef HAVE_POSIX_FILEMAP + off_t start; + size_t length, actual; + void* memPtr; + + assert(pMap != NULL); + + if (getFileStartAndLength(fd, &start, &length) < 0) + return -1; + + memPtr = sysCreateAnonShmem(length); + if (memPtr == NULL) + return -1; + + actual = read(fd, memPtr, length); + if (actual != length) { + LOGE("only read %d of %d bytes\n", (int) actual, (int) length); + sysReleaseShmem(pMap); + return -1; + } + + pMap->baseAddr = pMap->addr = memPtr; + pMap->baseLength = pMap->length = length; + + return 0; +#else + LOGE("sysLoadFileInShmem not implemented.\n"); + return -1; +#endif +} + +/* + * Map a file (from fd's current offset) into a shared, read-only memory + * segment. The file offset must be a multiple of the page size. + * + * On success, returns 0 and fills out "pMap". On failure, returns a nonzero + * value and does not disturb "pMap". + */ +int sysMapFileInShmem(int fd, MemMapping* pMap) +{ +#ifdef HAVE_POSIX_FILEMAP + off_t start; + size_t length; + void* memPtr; + + assert(pMap != NULL); + + if (getFileStartAndLength(fd, &start, &length) < 0) + return -1; + + memPtr = mmap(NULL, length, PROT_READ, MAP_FILE | MAP_SHARED, fd, start); + if (memPtr == MAP_FAILED) { + LOGW("mmap(%d, R, FILE|SHARED, %d, %d) failed: %s\n", (int) length, + fd, (int) start, strerror(errno)); + return -1; + } + + pMap->baseAddr = pMap->addr = memPtr; + pMap->baseLength = pMap->length = length; + + return 0; +#else + /* No MMAP, just fake it by copying the bits. + For Win32 we could use MapViewOfFile if really necessary + (see libs/utils/FileMap.cpp). + */ + off_t start; + size_t length; + void* memPtr; + + assert(pMap != NULL); + + if (getFileStartAndLength(fd, &start, &length) < 0) + return -1; + + memPtr = malloc(length); + if (read(fd, memPtr, length) < 0) { + LOGW("read(fd=%d, start=%d, length=%d) failed: %s\n", (int) length, + fd, (int) start, strerror(errno)); + return -1; + } + + pMap->baseAddr = pMap->addr = memPtr; + pMap->baseLength = pMap->length = length; + + return 0; +#endif +} + +/* + * Map part of a file (from fd's current offset) into a shared, read-only + * memory segment. + * + * On success, returns 0 and fills out "pMap". On failure, returns a nonzero + * value and does not disturb "pMap". + */ +int sysMapFileSegmentInShmem(int fd, off_t start, long length, + MemMapping* pMap) +{ +#ifdef HAVE_POSIX_FILEMAP + off_t dummy; + size_t fileLength, actualLength; + off_t actualStart; + int adjust; + void* memPtr; + + assert(pMap != NULL); + + if (getFileStartAndLength(fd, &dummy, &fileLength) < 0) + return -1; + + if (start + length > (long)fileLength) { + LOGW("bad segment: st=%d len=%ld flen=%d\n", + (int) start, length, (int) fileLength); + return -1; + } + + /* adjust to be page-aligned */ + adjust = start % DEFAULT_PAGE_SIZE; + actualStart = start - adjust; + actualLength = length + adjust; + + memPtr = mmap(NULL, actualLength, PROT_READ, MAP_FILE | MAP_SHARED, + fd, actualStart); + if (memPtr == MAP_FAILED) { + LOGW("mmap(%d, R, FILE|SHARED, %d, %d) failed: %s\n", + (int) actualLength, fd, (int) actualStart, strerror(errno)); + return -1; + } + + pMap->baseAddr = memPtr; + pMap->baseLength = actualLength; + pMap->addr = (char*)memPtr + adjust; + pMap->length = length; + + LOGVV("mmap seg (st=%d ln=%d): bp=%p bl=%d ad=%p ln=%d\n", + (int) start, (int) length, + pMap->baseAddr, (int) pMap->baseLength, + pMap->addr, (int) pMap->length); + + return 0; +#else + LOGE("sysMapFileSegmentInShmem not implemented.\n"); + return -1; +#endif +} + +/* + * Release a memory mapping. + */ +void sysReleaseShmem(MemMapping* pMap) +{ +#ifdef HAVE_POSIX_FILEMAP + if (pMap->baseAddr == NULL && pMap->baseLength == 0) + return; + + if (munmap(pMap->baseAddr, pMap->baseLength) < 0) { + LOGW("munmap(%p, %d) failed: %s\n", + pMap->baseAddr, (int)pMap->baseLength, strerror(errno)); + } else { + LOGV("munmap(%p, %d) succeeded\n", pMap->baseAddr, pMap->baseLength); + pMap->baseAddr = NULL; + pMap->baseLength = 0; + } +#else + /* Free the bits allocated by sysMapFileInShmem. */ + if (pMap->baseAddr != NULL) { + free(pMap->baseAddr); + pMap->baseAddr = NULL; + } + pMap->baseLength = 0; +#endif +} + +/* + * Make a copy of a MemMapping. + */ +void sysCopyMap(MemMapping* dst, const MemMapping* src) +{ + memcpy(dst, src, sizeof(MemMapping)); +} + diff --git a/libdex/SysUtil.h b/libdex/SysUtil.h new file mode 100644 index 000000000..8d85efa8a --- /dev/null +++ b/libdex/SysUtil.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * System utilities. + */ +#ifndef _LIBDEX_SYSUTIL +#define _LIBDEX_SYSUTIL + +#include <sys/types.h> + +/* + * Use this to keep track of mapped segments. + */ +typedef struct MemMapping { + void* addr; /* start of data */ + size_t length; /* length of data */ + + void* baseAddr; /* page-aligned base address */ + size_t baseLength; /* length of mapping */ +} MemMapping; + +/* + * Copy a map. + */ +void sysCopyMap(MemMapping* dst, const MemMapping* src); + +/* + * Load a file into a new shared memory segment. All data from the current + * offset to the end of the file is pulled in. + * + * The segment is read-write, allowing VM fixups. (It should be modified + * to support .gz/.zip compressed data.) + * + * On success, "pMap" is filled in, and zero is returned. + */ +int sysLoadFileInShmem(int fd, MemMapping* pMap); + +/* + * Map a file (from fd's current offset) into a shared, + * read-only memory segment. + * + * On success, "pMap" is filled in, and zero is returned. + */ +int sysMapFileInShmem(int fd, MemMapping* pMap); + +/* + * Like sysMapFileInShmem, but on only part of a file. + */ +int sysMapFileSegmentInShmem(int fd, off_t start, long length, + MemMapping* pMap); + +/* + * Release the pages associated with a shared memory segment. + * + * This does not free "pMap"; it just releases the memory. + */ +void sysReleaseShmem(MemMapping* pMap); + +#endif /*_DALVIK_SYSUTIL*/ diff --git a/libdex/ZipArchive.c b/libdex/ZipArchive.c new file mode 100644 index 000000000..a75a85b84 --- /dev/null +++ b/libdex/ZipArchive.c @@ -0,0 +1,643 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Read-only access to Zip archives, with minimal heap allocation. + */ +#include "ZipArchive.h" + +#include <zlib.h> + +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> + + +/* + * Zip file constants. + */ +#define kEOCDSignature 0x06054b50 +#define kEOCDLen 22 +#define kEOCDNumEntries 8 // offset to #of entries in file +#define kEOCDFileOffset 16 // offset to central directory + +#define kMaxCommentLen 65535 // longest possible in ushort +#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) + +#define kLFHSignature 0x04034b50 +#define kLFHLen 30 // excluding variable-len fields +#define kLFHNameLen 26 // offset to filename length +#define kLFHExtraLen 28 // offset to extra length + +#define kCDESignature 0x02014b50 +#define kCDELen 46 // excluding variable-len fields +#define kCDEMethod 10 // offset to compression method +#define kCDEModWhen 12 // offset to modification timestamp +#define kCDECRC 16 // offset to entry CRC +#define kCDECompLen 20 // offset to compressed length +#define kCDEUncompLen 24 // offset to uncompressed length +#define kCDENameLen 28 // offset to filename length +#define kCDEExtraLen 30 // offset to extra length +#define kCDECommentLen 32 // offset to comment length +#define kCDELocalOffset 42 // offset to local hdr + +/* + * The values we return for ZipEntry use 0 as an invalid value, so we + * want to adjust the hash table index by a fixed amount. Using a large + * value helps insure that people don't mix & match arguments, e.g. with + * entry indices. + */ +#define kZipEntryAdj 10000 + +/* + * Convert a ZipEntry to a hash table index, verifying that it's in a + * valid range. + */ +static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry) +{ + long ent = ((long) entry) - kZipEntryAdj; + if (ent < 0 || ent >= pArchive->mHashTableSize || + pArchive->mHashTable[ent].name == NULL) + { + LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent); + return -1; + } + return ent; +} + +/* + * Simple string hash function for non-null-terminated strings. + */ +static unsigned int computeHash(const char* str, int len) +{ + unsigned int hash = 0; + + while (len--) + hash = hash * 31 + *str++; + + return hash; +} + +/* + * Add a new entry to the hash table. + */ +static void addToHash(ZipArchive* pArchive, const char* str, int strLen, + unsigned int hash) +{ + const int hashTableSize = pArchive->mHashTableSize; + int ent = hash & (hashTableSize - 1); + + /* + * We over-allocated the table, so we're guaranteed to find an empty slot. + */ + while (pArchive->mHashTable[ent].name != NULL) + ent = (ent + 1) & (hashTableSize-1); + + pArchive->mHashTable[ent].name = str; + pArchive->mHashTable[ent].nameLen = strLen; +} + +/* + * Get 2 little-endian bytes. + */ +static u2 get2LE(unsigned char const* pSrc) +{ + return pSrc[0] | (pSrc[1] << 8); +} + +/* + * Get 4 little-endian bytes. + */ +static u4 get4LE(unsigned char const* pSrc) +{ + u4 result; + + result = pSrc[0]; + result |= pSrc[1] << 8; + result |= pSrc[2] << 16; + result |= pSrc[3] << 24; + + return result; +} + +/* + * Parse the Zip archive, verifying its contents and initializing internal + * data structures. + */ +static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) +{ +#define CHECK_OFFSET(_off) { \ + if ((unsigned int) (_off) >= maxOffset) { \ + LOGE("ERROR: bad offset %u (max %d): %s\n", \ + (unsigned int) (_off), maxOffset, #_off); \ + goto bail; \ + } \ + } + bool result = false; + const unsigned char* basePtr = (const unsigned char*)pMap->addr; + const unsigned char* ptr; + size_t length = pMap->length; + unsigned int i, numEntries, cdOffset; + unsigned int val; + + /* + * The first 4 bytes of the file will either be the local header + * signature for the first file (kLFHSignature) or, if the archive doesn't + * have any files in it, the end-of-central-directory signature + * (kEOCDSignature). + */ + val = get4LE(basePtr); + if (val == kEOCDSignature) { + LOGI("Found Zip archive, but it looks empty\n"); + goto bail; + } else if (val != kLFHSignature) { + LOGV("Not a Zip archive (found 0x%08x)\n", val); + goto bail; + } + + /* + * Find the EOCD. We'll find it immediately unless they have a file + * comment. + */ + ptr = basePtr + length - kEOCDLen; + + while (ptr >= basePtr) { + if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) + break; + ptr--; + } + if (ptr < basePtr) { + LOGI("Could not find end-of-central-directory in Zip\n"); + goto bail; + } + + /* + * There are two interesting items in the EOCD block: the number of + * entries in the file, and the file offset of the start of the + * central directory. + * + * (There's actually a count of the #of entries in this file, and for + * all files which comprise a spanned archive, but for our purposes + * we're only interested in the current file. Besides, we expect the + * two to be equivalent for our stuff.) + */ + numEntries = get2LE(ptr + kEOCDNumEntries); + cdOffset = get4LE(ptr + kEOCDFileOffset); + + /* valid offsets are [0,EOCD] */ + unsigned int maxOffset; + maxOffset = (ptr - basePtr) +1; + + LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); + if (numEntries == 0 || cdOffset >= length) { + LOGW("Invalid entries=%d offset=%d (len=%zd)\n", + numEntries, cdOffset, length); + goto bail; + } + + /* + * Create hash table. We have a minimum 75% load factor, possibly as + * low as 50% after we round off to a power of 2. There must be at + * least one unused entry to avoid an infinite loop during creation. + */ + pArchive->mNumEntries = numEntries; + pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3); + pArchive->mHashTable = (ZipHashEntry*) + calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry)); + + /* + * Walk through the central directory, adding entries to the hash + * table. + */ + ptr = basePtr + cdOffset; + for (i = 0; i < numEntries; i++) { + unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; + const unsigned char* localHdr; + unsigned int hash; + + if (get4LE(ptr) != kCDESignature) { + LOGW("Missed a central dir sig (at %d)\n", i); + goto bail; + } + if (ptr + kCDELen > basePtr + length) { + LOGW("Ran off the end (at %d)\n", i); + goto bail; + } + + localHdrOffset = get4LE(ptr + kCDELocalOffset); + CHECK_OFFSET(localHdrOffset); + fileNameLen = get2LE(ptr + kCDENameLen); + extraLen = get2LE(ptr + kCDEExtraLen); + commentLen = get2LE(ptr + kCDECommentLen); + + //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", + // i, localHdrOffset, fileNameLen, extraLen, commentLen); + //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); + + /* add the CDE filename to the hash table */ + hash = computeHash((const char*)ptr + kCDELen, fileNameLen); + addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash); + + localHdr = basePtr + localHdrOffset; + if (get4LE(localHdr) != kLFHSignature) { + LOGW("Bad offset to local header: %d (at %d)\n", + localHdrOffset, i); + goto bail; + } + + ptr += kCDELen + fileNameLen + extraLen + commentLen; + CHECK_OFFSET(ptr - basePtr); + } + + result = true; + +bail: + return result; +#undef CHECK_OFFSET +} + +/* + * Open the specified file read-only. We memory-map the entire thing and + * parse the contents. + * + * This will be called on non-Zip files, especially during VM startup, so + * we don't want to be too noisy about certain types of failure. (Do + * we want a "quiet" flag?) + * + * On success, we fill out the contents of "pArchive" and return 0. + */ +int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive) +{ + int fd, err; + + LOGV("Opening archive '%s' %p\n", fileName, pArchive); + + fd = open(fileName, O_RDONLY, 0); + if (fd < 0) { + err = errno ? errno : -1; + LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); + return err; + } + + return dexZipPrepArchive(fd, fileName, pArchive); +} + +/* + * Prepare to access a ZipArchive in an open file descriptor. + */ +int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive) +{ + MemMapping map; + int err; + + map.addr = NULL; + memset(pArchive, 0, sizeof(*pArchive)); + + pArchive->mFd = fd; + + if (sysMapFileInShmem(pArchive->mFd, &map) != 0) { + err = -1; + LOGW("Map of '%s' failed\n", debugFileName); + goto bail; + } + + if (map.length < kEOCDLen) { + err = -1; + LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length); + goto bail; + } + + if (!parseZipArchive(pArchive, &map)) { + err = -1; + LOGV("Parsing '%s' failed\n", debugFileName); + goto bail; + } + + /* success */ + err = 0; + sysCopyMap(&pArchive->mMap, &map); + map.addr = NULL; + +bail: + if (err != 0) + dexZipCloseArchive(pArchive); + if (map.addr != NULL) + sysReleaseShmem(&map); + return err; +} + + +/* + * Close a ZipArchive, closing the file and freeing the contents. + * + * NOTE: the ZipArchive may not have been fully created. + */ +void dexZipCloseArchive(ZipArchive* pArchive) +{ + LOGV("Closing archive %p\n", pArchive); + + if (pArchive->mFd >= 0) + close(pArchive->mFd); + + sysReleaseShmem(&pArchive->mMap); + + free(pArchive->mHashTable); + + pArchive->mFd = -1; + pArchive->mNumEntries = -1; + pArchive->mHashTableSize = -1; + pArchive->mHashTable = NULL; +} + + +/* + * Find a matching entry. + * + * Returns 0 if not found. + */ +ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName) +{ + int nameLen = strlen(entryName); + unsigned int hash = computeHash(entryName, nameLen); + const int hashTableSize = pArchive->mHashTableSize; + int ent = hash & (hashTableSize-1); + + while (pArchive->mHashTable[ent].name != NULL) { + if (pArchive->mHashTable[ent].nameLen == nameLen && + memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0) + { + /* match */ + return (ZipEntry) (ent + kZipEntryAdj); + } + + ent = (ent + 1) & (hashTableSize-1); + } + + return NULL; +} + +#if 0 +/* + * Find the Nth entry. + * + * This currently involves walking through the sparse hash table, counting + * non-empty entries. If we need to speed this up we can either allocate + * a parallel lookup table or (perhaps better) provide an iterator interface. + */ +ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx) +{ + if (idx < 0 || idx >= pArchive->mNumEntries) { + LOGW("Invalid index %d\n", idx); + return NULL; + } + + int ent; + for (ent = 0; ent < pArchive->mHashTableSize; ent++) { + if (pArchive->mHashTable[ent].name != NULL) { + if (idx-- == 0) + return (ZipEntry) (ent + kZipEntryAdj); + } + } + + return NULL; +} +#endif + +/* + * Get the useful fields from the zip entry. + * + * Returns "false" if the offsets to the fields or the contents of the fields + * appear to be bogus. + */ +bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, + int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset, + long* pModWhen, long* pCrc32) +{ + int ent = entryToIndex(pArchive, entry); + if (ent < 0) + return false; + + /* + * Recover the start of the central directory entry from the filename + * pointer. + */ + const unsigned char* basePtr = (const unsigned char*) + pArchive->mMap.addr; + const unsigned char* ptr = (const unsigned char*) + pArchive->mHashTable[ent].name; + size_t zipLength = + pArchive->mMap.length; + + ptr -= kCDELen; + + int method = get2LE(ptr + kCDEMethod); + if (pMethod != NULL) + *pMethod = method; + + if (pModWhen != NULL) + *pModWhen = get4LE(ptr + kCDEModWhen); + if (pCrc32 != NULL) + *pCrc32 = get4LE(ptr + kCDECRC); + + /* + * We need to make sure that the lengths are not so large that somebody + * trying to map the compressed or uncompressed data runs off the end + * of the mapped region. + */ + unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); + if (localHdrOffset + kLFHLen >= zipLength) { + LOGE("ERROR: bad local hdr offset in zip\n"); + return false; + } + const unsigned char* localHdr = basePtr + localHdrOffset; + off_t dataOffset = localHdrOffset + kLFHLen + + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); + if ((unsigned long) dataOffset >= zipLength) { + LOGE("ERROR: bad data offset in zip\n"); + return false; + } + + if (pCompLen != NULL) { + *pCompLen = get4LE(ptr + kCDECompLen); + if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { + LOGE("ERROR: bad compressed length in zip\n"); + return false; + } + } + if (pUncompLen != NULL) { + *pUncompLen = get4LE(ptr + kCDEUncompLen); + if (*pUncompLen < 0) { + LOGE("ERROR: negative uncompressed length in zip\n"); + return false; + } + if (method == kCompressStored && + (size_t)(dataOffset + *pUncompLen) >= zipLength) + { + LOGE("ERROR: bad uncompressed length in zip\n"); + return false; + } + } + + if (pOffset != NULL) { + *pOffset = dataOffset; + } + return true; +} + +/* + * Uncompress "deflate" data from one buffer to an open file descriptor. + */ +static bool inflateToFile(int fd, const void* inBuf, long uncompLen, + long compLen) +{ + bool result = false; + const int kWriteBufSize = 32768; + unsigned char writeBuf[kWriteBufSize]; + z_stream zstream; + int zerr; + + /* + * Initialize the zlib stream struct. + */ + memset(&zstream, 0, sizeof(zstream)); + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.next_in = (Bytef*)inBuf; + zstream.avail_in = compLen; + zstream.next_out = (Bytef*) writeBuf; + zstream.avail_out = sizeof(writeBuf); + zstream.data_type = Z_UNKNOWN; + + /* + * Use the undocumented "negative window bits" feature to tell zlib + * that there's no zlib header waiting for it. + */ + zerr = inflateInit2(&zstream, -MAX_WBITS); + if (zerr != Z_OK) { + if (zerr == Z_VERSION_ERROR) { + LOGE("Installed zlib is not compatible with linked version (%s)\n", + ZLIB_VERSION); + } else { + LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); + } + goto bail; + } + + /* + * Loop while we have more to do. + */ + do { + /* + * Expand data. + */ + zerr = inflate(&zstream, Z_NO_FLUSH); + if (zerr != Z_OK && zerr != Z_STREAM_END) { + LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", + zerr, zstream.next_in, zstream.avail_in, + zstream.next_out, zstream.avail_out); + goto z_bail; + } + + /* write when we're full or when we're done */ + if (zstream.avail_out == 0 || + (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) + { + long writeSize = zstream.next_out - writeBuf; + int cc = write(fd, writeBuf, writeSize); + if (cc != (int) writeSize) { + if (cc < 0) { + LOGW("write failed in inflate: %s\n", strerror(errno)); + } else { + LOGW("partial write in inflate (%d vs %ld)\n", + cc, writeSize); + } + goto z_bail; + } + + zstream.next_out = writeBuf; + zstream.avail_out = sizeof(writeBuf); + } + } while (zerr == Z_OK); + + assert(zerr == Z_STREAM_END); /* other errors should've been caught */ + + /* paranoia */ + if ((long) zstream.total_out != uncompLen) { + LOGW("Size mismatch on inflated file (%ld vs %ld)\n", + zstream.total_out, uncompLen); + goto z_bail; + } + + result = true; + +z_bail: + inflateEnd(&zstream); /* free up any allocated structures */ + +bail: + return result; +} + +/* + * Uncompress an entry, in its entirety, to an open file descriptor. + * + * TODO: this doesn't verify the data's CRC, but probably should (especially + * for uncompressed data). + */ +bool dexZipExtractEntryToFile(const ZipArchive* pArchive, + const ZipEntry entry, int fd) +{ + bool result = false; + int ent = entryToIndex(pArchive, entry); + if (ent < 0) + return -1; + + const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr; + int method; + long uncompLen, compLen; + off_t offset; + + if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen, + &offset, NULL, NULL)) + { + goto bail; + } + + if (method == kCompressStored) { + ssize_t actual; + + actual = write(fd, basePtr + offset, uncompLen); + if (actual < 0) { + LOGE("Write failed: %s\n", strerror(errno)); + goto bail; + } else if (actual != uncompLen) { + LOGE("Partial write during uncompress (%d of %ld)\n", + (int) actual, uncompLen); + goto bail; + } else { + LOGI("+++ successful write\n"); + } + } else { + if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen)) + goto bail; + } + + result = true; + +bail: + return result; +} + diff --git a/libdex/ZipArchive.h b/libdex/ZipArchive.h new file mode 100644 index 000000000..0cd98b221 --- /dev/null +++ b/libdex/ZipArchive.h @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Read-only access to Zip archives, with minimal heap allocation. + */ +#ifndef _LIBDEX_ZIPARCHIVE +#define _LIBDEX_ZIPARCHIVE + +#include "SysUtil.h" +#include "DexFile.h" // need DEX_INLINE + + +/* + * Trivial typedef to ensure that ZipEntry is not treated as a simple + * integer. We use NULL to indicate an invalid value. + */ +typedef void* ZipEntry; + +/* + * One entry in the hash table. + */ +typedef struct ZipHashEntry { + const char* name; + unsigned short nameLen; + //unsigned int hash; +} ZipHashEntry; + +/* + * Read-only Zip archive. + * + * We want "open" and "find entry by name" to be fast operations, and we + * want to use as little memory as possible. We memory-map the file, + * and load a hash table with pointers to the filenames (which aren't + * null-terminated). The other fields are at a fixed offset from the + * filename, so we don't need to extract those (but we do need to byte-read + * and endian-swap them every time we want them). + * + * To speed comparisons when doing a lookup by name, we could make the mapping + * "private" (copy-on-write) and null-terminate the filenames after verifying + * the record structure. However, this requires a private mapping of + * every page that the Central Directory touches. Easier to tuck a copy + * of the string length into the hash table entry. + */ +typedef struct ZipArchive { + /* open Zip archive */ + int mFd; + + /* mapped file */ + MemMapping mMap; + + /* number of entries in the Zip archive */ + int mNumEntries; + + /* + * We know how many entries are in the Zip archive, so we can have a + * fixed-size hash table. We probe on collisions. + */ + int mHashTableSize; + ZipHashEntry* mHashTable; +} ZipArchive; + +/* Zip compression methods we support */ +enum { + kCompressStored = 0, // no compression + kCompressDeflated = 8, // standard deflate +}; + + +/* + * Open a Zip archive. + * + * On success, returns 0 and populates "pArchive". Returns nonzero errno + * value on failure. + */ +int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive); + +/* + * Like dexZipOpenArchive, but takes a file descriptor open for reading + * at the start of the file. The descriptor must be mappable (this does + * not allow access to a stream). + * + * "debugFileName" will appear in error messages, but is not otherwise used. + */ +int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive); + +/* + * Close archive, releasing resources associated with it. + * + * Depending on the implementation this could unmap pages used by classes + * stored in a Jar. This should only be done after unloading classes. + */ +void dexZipCloseArchive(ZipArchive* pArchive); + +/* + * Return the archive's file descriptor. + */ +DEX_INLINE int dexZipGetArchiveFd(const ZipArchive* pArchive) { + return pArchive->mFd; +} + +/* + * Find an entry in the Zip archive, by name. Returns NULL if the entry + * was not found. + */ +ZipEntry dexZipFindEntry(const ZipArchive* pArchive, + const char* entryName); + +/* + * Retrieve one or more of the "interesting" fields. Non-NULL pointers + * are filled in. + */ +bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, + int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset, + long* pModWhen, long* pCrc32); + +/* + * Simple accessors. + */ +DEX_INLINE long dexGetZipEntryOffset(const ZipArchive* pArchive, + const ZipEntry entry) +{ + off_t val = 0; + dexZipGetEntryInfo(pArchive, entry, NULL, NULL, NULL, &val, NULL, NULL); + return (long) val; +} +DEX_INLINE long dexGetZipEntryUncompLen(const ZipArchive* pArchive, + const ZipEntry entry) +{ + long val = 0; + dexZipGetEntryInfo(pArchive, entry, NULL, &val, NULL, NULL, NULL, NULL); + return val; +} +DEX_INLINE long dexGetZipEntryModTime(const ZipArchive* pArchive, + const ZipEntry entry) +{ + long val = 0; + dexZipGetEntryInfo(pArchive, entry, NULL, NULL, NULL, NULL, &val, NULL); + return val; +} +DEX_INLINE long dexGetZipEntryCrc32(const ZipArchive* pArchive, + const ZipEntry entry) +{ + long val = 0; + dexZipGetEntryInfo(pArchive, entry, NULL, NULL, NULL, NULL, NULL, &val); + return val; +} + +/* + * Uncompress and write an entry to a file descriptor. + */ +bool dexZipExtractEntryToFile(const ZipArchive* pArchive, + const ZipEntry entry, int fd); + +/* + * Utility function to compute a CRC-32. + */ +u4 dexInitCrc32(void); +u4 dexComputeCrc32(u4 crc, const void* buf, size_t len); + +#endif /*_LIBDEX_ZIPARCHIVE*/ diff --git a/libdex/sha1.c b/libdex/sha1.c new file mode 100644 index 000000000..62892488a --- /dev/null +++ b/libdex/sha1.c @@ -0,0 +1,514 @@ +/* + * Tweaked in various ways for Google/Android: + * - Changed from .cpp to .c. + * - Made argument to SHA1Update a const pointer, and enabled + * SHA1HANDSOFF. This incurs a speed penalty but prevents us from + * trashing the input. + * - Include <endian.h> to get endian info. + * - Split a small piece into a header file. + */ + +/* +sha1sum: inspired by md5sum. + +SHA-1 in C +By Steve Reid <steve@edmweb.com> +100% Public Domain + +----------------- +Modified 7/98 +By James H. Brown <jbrown@burgoyne.com> +Still 100% Public Domain + +bit machines +Routine SHA1Update changed from + void SHA1Update(SHA1_CTX* context, unsigned char* data, + unsigned int len) +to + void SHA1Update(SHA1_CTX* context, unsigned char* data, + unsigned long len) + +The 'len' parameter was declared an int which works fine on 32 +bit machines. However, on 16 bit machines an int is too small +for the shifts being done against it. This caused the hash +function to generate incorrect values if len was greater than +8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update(). + +Since the file IO in main() reads 16K at a time, any file 8K or +larger would be guaranteed to generate the wrong hash (e.g. +Test Vector #3, a million "a"s). + +I also changed the declaration of variables i & j in SHA1Update +to unsigned long from unsigned int for the same reason. + +These changes should make no difference to any 32 bit +implementations since an int and a long are the same size in +those environments. + +-- +I also corrected a few compiler warnings generated by Borland +C. +1. Added #include <process.h> for exit() prototype +2. Removed unused variable 'j' in SHA1Final +3. Changed exit(0) to return(0) at end of main. + +ALL changes I made can be located by searching for comments +containing 'JHB' + +----------------- +Modified 13 August 2000 +By Michael Paul Johnson <mpj@cryptography.org> +Still 100% Public Domain + +Changed command line syntax, added feature to automatically +check files against their previous SHA-1 check values, kind of +like md5sum does. Added functions hexval, verifyfile, +and sha1file. Rewrote main(). +----------------- + +Test Vectors (from FIPS PUB 180-1) +"abc" + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +#define SHA1HANDSOFF /*Copies data before messing with it.*/ + +/*#define CMDLINE * include main() and file processing */ + +#include "sha1.h" + +#include <stdio.h> +#include <string.h> +#ifdef __BORLANDC__ +#include <dir.h> +#include <dos.h> +#include <process.h> /* prototype for exit() - JHB + needed for Win32, but chokes Linux - MPJ */ +#define X_LITTLE_ENDIAN /* This should be #define'd if true.*/ +#else +# include <unistd.h> +# include <stdlib.h> +//# include <endian.h> + +#include "DexFile.h" // want common byte ordering def + +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define X_LITTLE_ENDIAN +# endif +#endif +#include <ctype.h> + +#define LINESIZE 2048 + +static void SHA1Transform(unsigned long state[5], + const unsigned char buffer[64]); + +#define rol(value,bits) \ + (((value)<<(bits))|((value)>>(32-(bits)))) + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from + SSLeay */ +#ifdef X_LITTLE_ENDIAN +#define blk0(i) (block->l[i]=(rol(block->l[i],24)&0xFF00FF00) \ + |(rol(block->l[i],8)&0x00FF00FF)) +#else +#define blk0(i) block->l[i] +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \ + ^block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + + +/* Hash a single 512-bit block. This is the core of the algorithm. */ + +static void SHA1Transform(unsigned long state[5], + const unsigned char buffer[64]) +{ +unsigned long a, b, c, d, e; +typedef union { + unsigned char c[64]; + unsigned long l[16]; +} CHAR64LONG16; +CHAR64LONG16* block; +#ifdef SHA1HANDSOFF +static unsigned char workspace[64]; + block = (CHAR64LONG16*)workspace; + memcpy(block, buffer, 64); +#else + block = (CHAR64LONG16*)buffer; +#endif + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); + R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); + R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); + R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); + R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); + R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); + R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); + R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); + R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); + R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); + R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); + R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); + R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); + R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); + R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); + R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); + R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); + R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); + R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); + R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); + R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + /* Wipe variables */ +/* a = b = c = d = e = 0; Nice try, but the compiler +optimizes this out, anyway, and it produces an annoying +warning. */ +} + + +/* SHA1Init - Initialize new context */ + +void SHA1Init(SHA1_CTX* context) +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = context->count[1] = 0; +} + + +/* Run your data through this. */ + +void SHA1Update(SHA1_CTX* context, const unsigned char* data, + unsigned long len) /* JHB */ +{ + unsigned long i, j; /* JHB */ + + j = (context->count[0] >> 3) & 63; + if ((context->count[0] += len << 3) < (len << 3)) + context->count[1]++; + context->count[1] += (len >> 29); + if ((j + len) > 63) + { + memcpy(&context->buffer[j], data, (i = 64-j)); + SHA1Transform(context->state, context->buffer); + for ( ; i + 63 < len; i += 64) { + SHA1Transform(context->state, &data[i]); + } + j = 0; + } + else + i = 0; + memcpy(&context->buffer[j], &data[i], len - i); +} + + +/* Add padding and return the message digest. */ + +void SHA1Final(unsigned char digest[HASHSIZE], SHA1_CTX* +context) +{ +unsigned long i; /* JHB */ +unsigned char finalcount[8]; + + for (i = 0; i < 8; i++) + { + finalcount[i] = (unsigned char)((context->count[(i>=4? + 0:1)]>>((3-(i&3))*8))&255); + /* Endian independent */ + } + SHA1Update(context, (unsigned char *)"\200", 1); + while ((context->count[0] & 504) != 448) { + SHA1Update(context, (unsigned char *)"\0", 1); + } + SHA1Update(context, finalcount, 8); + /* Should cause a SHA1Transform() */ + for (i = 0; i < HASHSIZE; i++) { + digest[i] = (unsigned char) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + /* Wipe variables */ + memset(context->buffer, 0, 64); + memset(context->state, 0, HASHSIZE); + memset(context->count, 0, 8); + memset(&finalcount, 0, 8); +#ifdef SHA1HANDSOFF + /* make SHA1Transform overwrite it's own static vars */ + SHA1Transform(context->state, context->buffer); +#endif +} + + + +#ifdef CMDLINE + +/* sha1file computes the SHA-1 hash of the named file and puts + it in the 20-byte array digest. If fname is NULL, stdin is + assumed. +*/ +void sha1file(char *fname, unsigned char* digest) +{ + int bytesread; + SHA1_CTX context; + unsigned char buffer[16384]; + FILE* f; + + if (fname) + { + f = fopen(fname, "rb"); + if (!f) + { + fprintf(stderr, "Can't open %s\n", fname); + memset(digest, 0, HASHSIZE); + return; + } + } + else + { + f = stdin; + } + SHA1Init(&context); + while (!feof(f)) + { + bytesread = fread(buffer, 1, 16384, f); + SHA1Update(&context, buffer, bytesread); + } + SHA1Final(digest, &context); + if (fname) + fclose(f); +} + +/* Convert ASCII hexidecimal digit to 4-bit value. */ +unsigned char hexval(char c) +{ + unsigned char h; + + c = toupper(c); + if (c >= 'A') + h = c - 'A' + 10; + else + h = c - '0'; + return h; +} + +/* Verify a file created with sha1sum by redirecting output + to a file. */ +int verifyfile(char *fname) +{ + int j, k; + int found = 0; + unsigned char digest[HASHSIZE]; + unsigned char expected_digest[HASHSIZE]; + FILE *checkfile; + char checkline[LINESIZE]; + char *s; + unsigned char err; + + checkfile = fopen(fname, "rt"); + if (!checkfile) + { + fprintf(stderr, "Can't open %s\n", fname); + return(0); + } + do + { + s = fgets(checkline, LINESIZE, checkfile); + if (s) + { + if ((strlen(checkline)>26)&& + 1 /*(!strncmp(checkline,"SHA1=", 5))*/) + { + /* Overwrite newline. */ + checkline[strlen(checkline)-1]=0; + found = 1; + + /* Read expected check value. */ + for (k=0, j=5; k < HASHSIZE; k++) + { + expected_digest[k]=hexval(checkline[j++]); + expected_digest[k]=(expected_digest[k]<<4) + +hexval(checkline[j++]); + } + + /* Compute fingerprints */ + s = checkline+46; + sha1file(s, digest); + + /* Compare fingerprints */ + err = 0; + for (k=0; k<HASHSIZE; k++) + err |= digest[k]- + expected_digest[k]; + if (err) + { + fprintf(stderr, "FAILED: %s\n" + " EXPECTED: ", s); + for (k=0; k<HASHSIZE; k++) + fprintf(stderr, "%02X", + expected_digest[k]); + fprintf(stderr,"\n FOUND: "); + for (k=0; k<HASHSIZE; k++) + fprintf(stderr, "%02X", digest[k]); + fprintf(stderr, "\n"); + } + else + { + printf("OK: %s\n", s); + } + } + } + } while (s); + return found; +} + + + +void syntax(char *progname) +{ + printf("\nsyntax:\n" + "%s [-c|-h][-q] file name[s]\n" + " -c = check files against previous check values\n" + " -g = generate SHA-1 check values (default action)\n" + " -h = display this help\n" + "For example,\n" + "sha1sum test.txt > check.txt\n" + "generates check value for test.txt in check.txt, and\n" + "sha1sum -c check.txt\n" + "checks test.txt against the check value in check.txt\n", + progname); + exit(1); +} + + +/**********************************************************/ + +int main(int argc, char** argv) +{ + int i, j, k; + int check = 0; + int found = 0; + unsigned char digest[HASHSIZE]; + unsigned char expected_digest[HASHSIZE]; + FILE *checkfile; + char checkline[LINESIZE]; + char *s; +#ifdef __BORLANDC__ + struct ffblk f; + int done; + char path[MAXPATH]; + char drive[MAXDRIVE]; + char dir[MAXDIR]; + char name[MAXFILE]; + char ext[MAXEXT]; +#endif + unsigned char err; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + switch (argv[i][1]) + { + case 'c': + case 'C': + check = 1; + break; + case 'g': + case 'G': + check = 0; + break; + default: + syntax(argv[0]); + } + } + } + + for (i=1; i<argc; i++) + { + if (argv[i][0] != '-') + { +#ifdef __BORLANDC__ + fnsplit(argv[i], drive, dir, name, ext); + done = findfirst(argv[i], &f, FA_RDONLY | + FA_HIDDEN|FA_SYSTEM|FA_ARCH); + while (!done) + { + sprintf(path, "%s%s%s", drive, dir, f.ff_name); + s = path; +#else + s = argv[i]; +#endif + + if (check) + { /* Check fingerprint file. */ + found |= verifyfile(s); + } + else + { /* Generate fingerprints & write to + stdout. */ + sha1file(s, digest); + //printf("SHA1="); + for (j=0; j<HASHSIZE; j++) + printf("%02x", digest[j]); + printf(" %s\n", s); + found = 1; + } + +#ifdef __BORLANDC__ + done = findnext(&f); + } +#endif + + } + } + if (!found) + { + if (check) + { + fprintf(stderr, + "No SHA1 lines found in %s\n", + argv[i]); + } + else + { + fprintf(stderr, "No files checked.\n"); + syntax(argv[0]); + } + } + return(0); /* JHB */ +} + +#endif /*CMDLINE*/ diff --git a/libdex/sha1.h b/libdex/sha1.h new file mode 100644 index 000000000..65cf667cd --- /dev/null +++ b/libdex/sha1.h @@ -0,0 +1,20 @@ +/* + * See "sha1.c" for author info. + */ +#ifndef _DALVIK_SHA1 +#define _DALVIK_SHA1 + +typedef struct { + unsigned long state[5]; + unsigned long count[2]; + unsigned char buffer[64]; +} SHA1_CTX; + +#define HASHSIZE 20 + +void SHA1Init(SHA1_CTX* context); +void SHA1Update(SHA1_CTX* context, const unsigned char* data, + unsigned long len); +void SHA1Final(unsigned char digest[HASHSIZE], SHA1_CTX* context); + +#endif /*_DALVIK_SHA1*/ |
