From 111221644c5b7b1d4f426d02786aeebf1addc8f6 Mon Sep 17 00:00:00 2001
From: Dan Bornstein <danfuzz@android.com>
Date: Wed, 1 Dec 2010 12:30:21 -0800
Subject: Add more "extended opcode" structure to libdex.

Although we don't yet generate any of the extended opcodes, this
change makes it a bit easier to add them.

In particular, we now differentiate between the raw opcode in a code
unit and an associated "packed opcode number." The packed opcode space
is densely populated in the range 0x000-0x1ff (though there will still
be a few unused slots), whereas the raw opcode values are sparse
throughout the range 0x0000-0xffff.

The OpCode enum is redefined/clarified to have packed, not sparse,
opcode values.

Change-Id: Ie3208a258648fbf044d344646f66c49ad24c31b2
---
 dexdump/DexDump.c      | 13 ++++++++++---
 libdex/DexInlines.c    |  2 ++
 libdex/InstrUtils.c    |  6 +++---
 libdex/OpCode.h        | 37 ++++++++++++++++++++++++++++++++++---
 opcode-gen/opcode-gen  | 11 +++++++++++
 vm/compiler/Frontend.c |  2 +-
 6 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/dexdump/DexDump.c b/dexdump/DexDump.c
index 3950a81d0..158ac0c0e 100644
--- a/dexdump/DexDump.c
+++ b/dexdump/DexDump.c
@@ -1070,10 +1070,17 @@ void dumpBytecodes(DexFile* pDexFile, const DexMethod* pDexMethod)
     insnIdx = 0;
     while (insnIdx < (int) pCode->insnsSize) {
         int insnWidth;
-        OpCode opCode;
         DecodedInstruction decInsn;
         u2 instr;
 
+        /*
+         * Note: This code parallels the function
+         * dexGetInstrOrTableWidth() in InstrUtils.c, but this version
+         * can deal with data in either endianness.
+         *
+         * TODO: Figure out if this really matters, and possibly change
+         * this to just use dexGetInstrOrTableWidth().
+         */
         instr = get2LE((const u1*)insns);
         if (instr == kPackedSwitchSignature) {
             insnWidth = 4 + get2LE((const u1*)(insns+1)) * 2;
@@ -1083,10 +1090,10 @@ void dumpBytecodes(DexFile* pDexFile, const DexMethod* pDexMethod)
             int width = get2LE((const u1*)(insns+1));
             int size = get2LE((const u1*)(insns+2)) |
                        (get2LE((const u1*)(insns+3))<<16);
-            // The plus 1 is to round up for odd size and width
+            // The plus 1 is to round up for odd size and width.
             insnWidth = 4 + ((size * width) + 1) / 2;
         } else {
-            opCode = instr & 0xff;
+            OpCode opCode = dexOpCodeFromCodeUnit(instr);
             insnWidth = dexGetInstrWidth(opCode);
             if (insnWidth == 0) {
                 fprintf(stderr,
diff --git a/libdex/DexInlines.c b/libdex/DexInlines.c
index 6b3aed8b8..3bb07cca5 100644
--- a/libdex/DexInlines.c
+++ b/libdex/DexInlines.c
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Generate non-inline copies of inline functions in header files.
  */
@@ -27,4 +28,5 @@
 #include "DexProto.h"
 #include "InstrUtils.h"
 #include "Leb128.h"
+#include "OpCode.h"
 #include "ZipArchive.h"
diff --git a/libdex/InstrUtils.c b/libdex/InstrUtils.c
index c194bc3b4..d0dea5f61 100644
--- a/libdex/InstrUtils.c
+++ b/libdex/InstrUtils.c
@@ -472,7 +472,6 @@ InstructionInfoTables gDexOpcodeInfo = {
  */
 #define FETCH(_offset)      (insns[(_offset)])
 #define FETCH_u4(_offset)   (fetch_u4_impl((_offset), insns))
-#define INST_INST(_inst)    ((_inst) & 0xff)
 #define INST_A(_inst)       (((u2)(_inst) >> 8) & 0x0f)
 #define INST_B(_inst)       ((u2)(_inst) >> 12)
 #define INST_AA(_inst)      ((_inst) >> 8)
@@ -491,7 +490,7 @@ static inline u4 fetch_u4_impl(u4 offset, const u2* insns) {
 void dexDecodeInstruction(const u2* insns, DecodedInstruction* pDec)
 {
     u2 inst = *insns;
-    OpCode opCode = (OpCode) INST_INST(inst);
+    OpCode opCode = dexOpCodeFromCodeUnit(inst);
     InstructionFormat format = dexGetInstrFormat(opCode);
 
     pDec->opCode = opCode;
@@ -705,9 +704,10 @@ size_t dexGetInstrOrTableWidth(const u2* insns)
     } else if (*insns == kArrayDataSignature) {
         u2 elemWidth = insns[1];
         u4 len = insns[2] | (((u4)insns[3]) << 16);
+        // The plus 1 is to round up for odd size and width.
         width = 4 + (elemWidth * len + 1) / 2;
     } else {
-        width = dexGetInstrWidth(INST_INST(insns[0]));
+        width = dexGetInstrWidth(dexOpCodeFromCodeUnit(insns[0]));
     }
     return width;
 }
diff --git a/libdex/OpCode.h b/libdex/OpCode.h
index 0d0078334..c5c89b8cc 100644
--- a/libdex/OpCode.h
+++ b/libdex/OpCode.h
@@ -28,7 +28,14 @@
 #ifndef _LIBDEX_OPCODE
 #define _LIBDEX_OPCODE
 
-/* the highest opcode value of a valid Dalvik opcode, plus one */
+#include "DexFile.h"
+
+/*
+ * the highest possible packed opcode value of a valid Dalvik opcode, plus one
+ *
+ * TODO: Change this once the rest of the code is prepared to deal with
+ * extended opcodes.
+ */
 #define kNumDalvikInstructions 256
 
 /*
@@ -41,8 +48,9 @@
 
 /*
  * Enumeration of all Dalvik opcodes, where the enumeration value
- * associated with each is the corresponding opcode number as noted in
- * the Dalvik bytecode spec.
+ * associated with each is the corresponding packed opcode number.
+ * This is different than the opcode value from the Dalvik bytecode
+ * spec for opcode values >= 0xff; see dexOpCodeFromCodeUnit() below.
  *
  * A note about the "breakpoint" opcode. This instruction is special,
  * in that it should never be seen by anything but the debug
@@ -578,4 +586,27 @@ typedef enum OpCode {
         /* END(libdex-goto-table) */                                          \
     };
 
+/*
+ * Return the OpCode for a given raw opcode code unit (which may
+ * include data payload). The packed index is a zero-based index which
+ * can be used to point into various opcode-related tables. The Dalvik
+ * opcode space is inherently sparse, in that the opcode unit is 16
+ * bits wide, but for most opcodes, eight of those bits are for data.
+ */
+DEX_INLINE OpCode dexOpCodeFromCodeUnit(u2 codeUnit) {
+    /*
+     * This will want to become table-driven should the opcode layout
+     * get more complicated.
+     *
+     * Note: This has to match the corresponding code in opcode-gen, so
+     * that data tables get generated in a consistent way.
+     */
+    int lowByte = codeUnit & 0xff;
+    if (lowByte != 0xff) {
+        return (OpCode) lowByte;
+    } else {
+        return (OpCode) ((codeUnit >> 8) | 0x100);
+    }
+}
+
 #endif /*_LIBDEX_OPCODE*/
diff --git a/opcode-gen/opcode-gen b/opcode-gen/opcode-gen
index 8a99134ad..64011ce83 100755
--- a/opcode-gen/opcode-gen
+++ b/opcode-gen/opcode-gen
@@ -480,6 +480,17 @@ function flagsToC(f, parts, result, i) {
     return result;
 }
 
+# Given a packed opcode, returns the raw (unpacked) opcode value.
+function unpackOpcode(idx) {
+    # Note: This must be the inverse of the corresponding code in
+    # libdex/OpCode.h.
+    if (idx <= 0xff) {
+        return idx;
+    } else {
+        return (idx << 8) | 0xff;
+    }
+}
+
 # Returns true if the given opcode (by index) is an "optimized" opcode.
 function isOptimized(idx, parts, f) {
     # locals: parts, f
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index c2ee4964a..ad046764a 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -27,7 +27,7 @@ static inline int parseInsn(const u2 *codePtr, DecodedInstruction *decInsn,
                             bool printMe)
 {
     u2 instr = *codePtr;
-    OpCode opcode = instr & 0xff;
+    OpCode opcode = dexOpCodeFromCodeUnit(instr);
     int insnWidth;
 
     // Don't parse instruction data
-- 
cgit v1.2.3