1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
/*
* Copyright (C) 2008 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Dalvik instruction utility functions.
*/
#ifndef _LIBDEX_INSTRUTILS
#define _LIBDEX_INSTRUTILS
#include "DexFile.h"
#include "OpCode.h"
/*
* Dalvik-defined instruction formats.
*
* (This defines InstructionFormat as an unsigned char to reduce the size
* of the table. This isn't necessary with some compilers, which use an
* integer width appropriate for the number of enum values.)
*
* If you add or delete a format, you have to change some or all of:
* - this enum
* - the switch inside dexDecodeInstruction() in InstrUtils.c
* - the switch inside dumpInstruction() in DexDump.c
* - the switch inside dvmCompilerMIR2LIR() in CodegenDriver.c
*/
typedef unsigned char InstructionFormat;
enum InstructionFormat {
kFmtUnknown = 0,
kFmt10x, // op
kFmt12x, // op vA, vB
kFmt11n, // op vA, #+B
kFmt11x, // op vAA
kFmt10t, // op +AA
kFmt20bc, // [opt] op AA, thing@BBBB
kFmt20t, // op +AAAA
kFmt22x, // op vAA, vBBBB
kFmt21t, // op vAA, +BBBB
kFmt21s, // op vAA, #+BBBB
kFmt21h, // op vAA, #+BBBB00000[00000000]
kFmt21c, // op vAA, thing@BBBB
kFmt23x, // op vAA, vBB, vCC
kFmt22b, // op vAA, vBB, #+CC
kFmt22t, // op vA, vB, +CCCC
kFmt22s, // op vA, vB, #+CCCC
kFmt22c, // op vA, vB, thing@CCCC
kFmt22cs, // [opt] op vA, vB, field offset CCCC
kFmt30t, // op +AAAAAAAA
kFmt32x, // op vAAAA, vBBBB
kFmt31i, // op vAA, #+BBBBBBBB
kFmt31t, // op vAA, +BBBBBBBB
kFmt31c, // op vAA, string@BBBBBBBB
kFmt35c, // op {vC,vD,vE,vF,vG}, thing@BBBB
kFmt35ms, // [opt] invoke-virtual+super
kFmt3rc, // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
kFmt3rms, // [opt] invoke-virtual+super/range
kFmt51l, // op vAA, #+BBBBBBBBBBBBBBBB
kFmt35mi, // [opt] inline invoke
kFmt3rmi, // [opt] inline invoke/range
kFmt33x, // exop vAA, vBB, vCCCC
kFmt32s, // exop vAA, vBB, #+CCCC
kFmt41c, // exop vAAAA, thing@BBBBBBBB
kFmt52c, // exop vAAAA, vBBBB, thing@CCCCCCCC
kFmt5rc, // exop {vCCCC .. v(CCCC+AAAA-1)}, thing@BBBBBBBB
};
/*
* Different kinds of indexed reference, for formats that include such an
* indexed reference (e.g., 21c and 35c)
*/
typedef unsigned char InstructionIndexType;
enum InstructionIndexType {
kIndexUnknown = 0,
kIndexNone, // has no index
kIndexVaries, // "It depends." Used for throw-verification-error
kIndexTypeRef, // type reference index
kIndexStringRef, // string reference index
kIndexMethodRef, // method reference index
kIndexFieldRef, // field reference index
kIndexInlineMethod, // inline method index (for inline linked methods)
kIndexVtableOffset, // vtable offset (for static linked methods)
kIndexFieldOffset // field offset (for static linked fields)
};
/*
* Holds the contents of a decoded instruction.
*/
typedef struct DecodedInstruction {
u4 vA;
u4 vB;
u8 vB_wide; /* for kFmt51l */
u4 vC;
u4 arg[5]; /* vC/D/E/F/G in invoke or filled-new-array */
OpCode opCode;
InstructionIndexType indexType;
} DecodedInstruction;
/*
* Instruction width, a value in the range 0 to 5.
*/
typedef unsigned char InstructionWidth;
/*
* Instruction flags, used by the verifier and JIT to determine where
* control can flow to next. Expected to fit in 8 bits.
*/
typedef unsigned char InstructionFlags;
enum InstructionFlags {
kInstrCanBranch = 1, // conditional or unconditional branch
kInstrCanContinue = 1 << 1, // flow can continue to next statement
kInstrCanSwitch = 1 << 2, // switch statement
kInstrCanThrow = 1 << 3, // could cause an exception to be thrown
kInstrCanReturn = 1 << 4, // returns, no additional statements
kInstrInvoke = 1 << 5, // a flavor of invoke
};
/*
* Struct that includes a pointer to each of the instruction information
* tables.
*/
typedef struct InstructionInfoTables {
InstructionFormat* formats;
InstructionIndexType* indexTypes;
InstructionFlags* flags;
InstructionWidth* widths;
} InstructionInfoTables;
/*
* Allocate and populate a 256-element array with instruction widths. A
* width of zero means the entry does not exist.
*/
InstructionWidth* dexCreateInstrWidthTable(void);
/*
* Return the width of the specified instruction, or 0 if not defined.
*/
DEX_INLINE size_t dexGetInstrWidth(const InstructionWidth* widths,
OpCode opCode)
{
//assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions);
return widths[opCode];
}
/*
* Return the width of the specified instruction, or 0 if not defined. Also
* works for special OP_NOP entries, including switch statement data tables
* and array data.
*/
size_t dexGetInstrOrTableWidth(const InstructionWidth* widths,
const u2* insns);
/*
* Allocate and populate a 256-element array with instruction flags.
*/
InstructionFlags* dexCreateInstrFlagsTable(void);
/*
* Returns the flags for the specified opcode.
*/
DEX_INLINE int dexGetInstrFlags(const InstructionFlags* flags, OpCode opCode)
{
//assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions);
return flags[opCode];
}
/*
* Returns true if the given flags represent a goto (unconditional branch).
*/
DEX_INLINE bool dexIsGoto(int flags)
{
return (flags & (kInstrCanBranch | kInstrCanContinue)) == kInstrCanBranch;
}
/*
* Allocate and populate a 256-element array with instruction formats.
*/
InstructionFormat* dexCreateInstrFormatTable(void);
/*
* Return the instruction format for the specified opcode.
*/
DEX_INLINE InstructionFormat dexGetInstrFormat(const InstructionFormat* fmts,
OpCode opCode)
{
//assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions);
return fmts[opCode];
}
/*
* Allocate and populate an array with index types for all instructions.
* Used in conjunction with dexDecodeInstruction.
*/
InstructionIndexType* dexCreateInstrIndexTypeTable(void);
/*
* Return the instruction index type for the specified opcode.
*/
DEX_INLINE InstructionIndexType dexGetInstrIndexType(
const InstructionIndexType* types, OpCode opCode)
{
//assert(/*opCode >= 0 &&*/ opCode < kNumDalvikInstructions);
return types[opCode];
}
/*
* Construct all of the instruction info tables, storing references to
* them into the given struct. This returns 0 on success or non-zero on
* failure. If this fails, then no net allocation will have occurred.
*/
int dexCreateInstructionInfoTables(InstructionInfoTables* info);
/*
* Free up the tables referred to by the given instruction info struct.
*/
void dexFreeInstructionInfoTables(InstructionInfoTables* info);
/*
* Decode the instruction pointed to by "insns".
*/
void dexDecodeInstruction(const InstructionInfoTables* info, const u2* insns,
DecodedInstruction* pDec);
#endif /*_LIBDEX_INSTRUTILS*/
|