From 111221644c5b7b1d4f426d02786aeebf1addc8f6 Mon Sep 17 00:00:00 2001 From: Dan Bornstein Date: Wed, 1 Dec 2010 12:30:21 -0800 Subject: Add more "extended opcode" structure to libdex. Although we don't yet generate any of the extended opcodes, this change makes it a bit easier to add them. In particular, we now differentiate between the raw opcode in a code unit and an associated "packed opcode number." The packed opcode space is densely populated in the range 0x000-0x1ff (though there will still be a few unused slots), whereas the raw opcode values are sparse throughout the range 0x0000-0xffff. The OpCode enum is redefined/clarified to have packed, not sparse, opcode values. Change-Id: Ie3208a258648fbf044d344646f66c49ad24c31b2 --- dexdump/DexDump.c | 13 ++++++++++--- libdex/DexInlines.c | 2 ++ libdex/InstrUtils.c | 6 +++--- libdex/OpCode.h | 37 ++++++++++++++++++++++++++++++++++--- opcode-gen/opcode-gen | 11 +++++++++++ vm/compiler/Frontend.c | 2 +- 6 files changed, 61 insertions(+), 10 deletions(-) diff --git a/dexdump/DexDump.c b/dexdump/DexDump.c index 3950a81d0..158ac0c0e 100644 --- a/dexdump/DexDump.c +++ b/dexdump/DexDump.c @@ -1070,10 +1070,17 @@ void dumpBytecodes(DexFile* pDexFile, const DexMethod* pDexMethod) insnIdx = 0; while (insnIdx < (int) pCode->insnsSize) { int insnWidth; - OpCode opCode; DecodedInstruction decInsn; u2 instr; + /* + * Note: This code parallels the function + * dexGetInstrOrTableWidth() in InstrUtils.c, but this version + * can deal with data in either endianness. + * + * TODO: Figure out if this really matters, and possibly change + * this to just use dexGetInstrOrTableWidth(). + */ instr = get2LE((const u1*)insns); if (instr == kPackedSwitchSignature) { insnWidth = 4 + get2LE((const u1*)(insns+1)) * 2; @@ -1083,10 +1090,10 @@ void dumpBytecodes(DexFile* pDexFile, const DexMethod* pDexMethod) int width = get2LE((const u1*)(insns+1)); int size = get2LE((const u1*)(insns+2)) | (get2LE((const u1*)(insns+3))<<16); - // The plus 1 is to round up for odd size and width + // The plus 1 is to round up for odd size and width. insnWidth = 4 + ((size * width) + 1) / 2; } else { - opCode = instr & 0xff; + OpCode opCode = dexOpCodeFromCodeUnit(instr); insnWidth = dexGetInstrWidth(opCode); if (insnWidth == 0) { fprintf(stderr, diff --git a/libdex/DexInlines.c b/libdex/DexInlines.c index 6b3aed8b8..3bb07cca5 100644 --- a/libdex/DexInlines.c +++ b/libdex/DexInlines.c @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + /* * Generate non-inline copies of inline functions in header files. */ @@ -27,4 +28,5 @@ #include "DexProto.h" #include "InstrUtils.h" #include "Leb128.h" +#include "OpCode.h" #include "ZipArchive.h" diff --git a/libdex/InstrUtils.c b/libdex/InstrUtils.c index c194bc3b4..d0dea5f61 100644 --- a/libdex/InstrUtils.c +++ b/libdex/InstrUtils.c @@ -472,7 +472,6 @@ InstructionInfoTables gDexOpcodeInfo = { */ #define FETCH(_offset) (insns[(_offset)]) #define FETCH_u4(_offset) (fetch_u4_impl((_offset), insns)) -#define INST_INST(_inst) ((_inst) & 0xff) #define INST_A(_inst) (((u2)(_inst) >> 8) & 0x0f) #define INST_B(_inst) ((u2)(_inst) >> 12) #define INST_AA(_inst) ((_inst) >> 8) @@ -491,7 +490,7 @@ static inline u4 fetch_u4_impl(u4 offset, const u2* insns) { void dexDecodeInstruction(const u2* insns, DecodedInstruction* pDec) { u2 inst = *insns; - OpCode opCode = (OpCode) INST_INST(inst); + OpCode opCode = dexOpCodeFromCodeUnit(inst); InstructionFormat format = dexGetInstrFormat(opCode); pDec->opCode = opCode; @@ -705,9 +704,10 @@ size_t dexGetInstrOrTableWidth(const u2* insns) } else if (*insns == kArrayDataSignature) { u2 elemWidth = insns[1]; u4 len = insns[2] | (((u4)insns[3]) << 16); + // The plus 1 is to round up for odd size and width. width = 4 + (elemWidth * len + 1) / 2; } else { - width = dexGetInstrWidth(INST_INST(insns[0])); + width = dexGetInstrWidth(dexOpCodeFromCodeUnit(insns[0])); } return width; } diff --git a/libdex/OpCode.h b/libdex/OpCode.h index 0d0078334..c5c89b8cc 100644 --- a/libdex/OpCode.h +++ b/libdex/OpCode.h @@ -28,7 +28,14 @@ #ifndef _LIBDEX_OPCODE #define _LIBDEX_OPCODE -/* the highest opcode value of a valid Dalvik opcode, plus one */ +#include "DexFile.h" + +/* + * the highest possible packed opcode value of a valid Dalvik opcode, plus one + * + * TODO: Change this once the rest of the code is prepared to deal with + * extended opcodes. + */ #define kNumDalvikInstructions 256 /* @@ -41,8 +48,9 @@ /* * Enumeration of all Dalvik opcodes, where the enumeration value - * associated with each is the corresponding opcode number as noted in - * the Dalvik bytecode spec. + * associated with each is the corresponding packed opcode number. + * This is different than the opcode value from the Dalvik bytecode + * spec for opcode values >= 0xff; see dexOpCodeFromCodeUnit() below. * * A note about the "breakpoint" opcode. This instruction is special, * in that it should never be seen by anything but the debug @@ -578,4 +586,27 @@ typedef enum OpCode { /* END(libdex-goto-table) */ \ }; +/* + * Return the OpCode for a given raw opcode code unit (which may + * include data payload). The packed index is a zero-based index which + * can be used to point into various opcode-related tables. The Dalvik + * opcode space is inherently sparse, in that the opcode unit is 16 + * bits wide, but for most opcodes, eight of those bits are for data. + */ +DEX_INLINE OpCode dexOpCodeFromCodeUnit(u2 codeUnit) { + /* + * This will want to become table-driven should the opcode layout + * get more complicated. + * + * Note: This has to match the corresponding code in opcode-gen, so + * that data tables get generated in a consistent way. + */ + int lowByte = codeUnit & 0xff; + if (lowByte != 0xff) { + return (OpCode) lowByte; + } else { + return (OpCode) ((codeUnit >> 8) | 0x100); + } +} + #endif /*_LIBDEX_OPCODE*/ diff --git a/opcode-gen/opcode-gen b/opcode-gen/opcode-gen index 8a99134ad..64011ce83 100755 --- a/opcode-gen/opcode-gen +++ b/opcode-gen/opcode-gen @@ -480,6 +480,17 @@ function flagsToC(f, parts, result, i) { return result; } +# Given a packed opcode, returns the raw (unpacked) opcode value. +function unpackOpcode(idx) { + # Note: This must be the inverse of the corresponding code in + # libdex/OpCode.h. + if (idx <= 0xff) { + return idx; + } else { + return (idx << 8) | 0xff; + } +} + # Returns true if the given opcode (by index) is an "optimized" opcode. function isOptimized(idx, parts, f) { # locals: parts, f diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c index c2ee4964a..ad046764a 100644 --- a/vm/compiler/Frontend.c +++ b/vm/compiler/Frontend.c @@ -27,7 +27,7 @@ static inline int parseInsn(const u2 *codePtr, DecodedInstruction *decInsn, bool printMe) { u2 instr = *codePtr; - OpCode opcode = instr & 0xff; + OpCode opcode = dexOpCodeFromCodeUnit(instr); int insnWidth; // Don't parse instruction data -- cgit v1.2.3