1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
|
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
#define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
#include "dex/compiler_internals.h"
namespace art {
/*
* Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64), although
* we currently only target x86. The ABI has different conventions and we hope to have a single
* convention to simplify code generation. Changing something that is callee save and making it
* caller save places a burden on up-calls to save/restore the callee save register, however, there
* are few registers that are callee save in the ABI. Changing something that is caller save and
* making it callee save places a burden on down-calls to save/restore the callee save register.
* For these reasons we aim to match native conventions for caller and callee save. The first 4
* registers can be used for byte operations, for this reason they are preferred for temporary
* scratch registers.
*
* General Purpose Register:
* Native: x86 | x86-64 / x32 | ART
* r0/eax: caller save | caller save | caller, Method*, scratch, return value
* r1/ecx: caller save | caller save, arg4 | caller, arg1, scratch
* r2/edx: caller save | caller save, arg3 | caller, arg2, scratch, high half of long return
* r3/ebx: callEE save | callEE save | callER, arg3, scratch
* r4/esp: stack pointer
* r5/ebp: callee save | callee save | callee, available for dalvik register promotion
* r6/esi: callEE save | callER save, arg2 | callee, available for dalvik register promotion
* r7/edi: callEE save | callER save, arg1 | callee, available for dalvik register promotion
* --- x86-64/x32 registers
* Native: x86-64 / x32 | ART
* r8: caller save, arg5 | caller, scratch
* r9: caller save, arg6 | caller, scratch
* r10: caller save | caller, scratch
* r11: caller save | caller, scratch
* r12: callee save | callee, available for dalvik register promotion
* r13: callee save | callee, available for dalvik register promotion
* r14: callee save | callee, available for dalvik register promotion
* r15: callee save | callee, available for dalvik register promotion
*
* There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
* x86-64/x32 gs: holds it.
*
* For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
* Native: x86 | x86-64 / x32 | ART
* XMM0: caller save |caller save, arg1 | caller, float/double return value (except for native x86 code)
* XMM1: caller save |caller save, arg2 | caller, scratch
* XMM2: caller save |caller save, arg3 | caller, scratch
* XMM3: caller save |caller save, arg4 | caller, scratch
* XMM4: caller save |caller save, arg5 | caller, scratch
* XMM5: caller save |caller save, arg6 | caller, scratch
* XMM6: caller save |caller save, arg7 | caller, scratch
* XMM7: caller save |caller save, arg8 | caller, scratch
* --- x86-64/x32 registers
* XMM8 .. 15: caller save
*
* X87 is a necessary evil outside of ART code:
* ST0: x86 float/double native return value, caller save
* ST1 .. ST7: caller save
*
* Stack frame diagram (stack grows down, higher addresses at top):
*
* +------------------------+
* | IN[ins-1] | {Note: resides in caller's frame}
* | . |
* | IN[0] |
* | caller's Method* |
* +========================+ {Note: start of callee's frame}
* | return address | {pushed by call}
* | spill region | {variable sized}
* +------------------------+
* | ...filler word... | {Note: used as 2nd word of V[locals-1] if long]
* +------------------------+
* | V[locals-1] |
* | V[locals-2] |
* | . |
* | . |
* | V[1] |
* | V[0] |
* +------------------------+
* | 0 to 3 words padding |
* +------------------------+
* | OUT[outs-1] |
* | OUT[outs-2] |
* | . |
* | OUT[0] |
* | cur_method* | <<== sp w/ 16-byte alignment
* +========================+
*/
// Offset to distingish FP regs.
#define X86_FP_REG_OFFSET 32
// Offset to distinguish DP FP regs.
#define X86_FP_DOUBLE (X86_FP_REG_OFFSET + 16)
// Offset to distingish the extra regs.
#define X86_EXTRA_REG_OFFSET (X86_FP_DOUBLE + 16)
// Reg types.
#define X86_REGTYPE(x) (x & (X86_FP_REG_OFFSET | X86_FP_DOUBLE))
#define X86_FPREG(x) ((x & X86_FP_REG_OFFSET) == X86_FP_REG_OFFSET)
#define X86_EXTRAREG(x) ((x & X86_EXTRA_REG_OFFSET) == X86_EXTRA_REG_OFFSET)
#define X86_DOUBLEREG(x) ((x & X86_FP_DOUBLE) == X86_FP_DOUBLE)
#define X86_SINGLEREG(x) (X86_FPREG(x) && !X86_DOUBLEREG(x))
/*
* Note: the low register of a floating point pair is sufficient to
* create the name of a double, but require both names to be passed to
* allow for asserts to verify that the pair is consecutive if significant
* rework is done in this area. Also, it is a good reminder in the calling
* code that reg locations always describe doubles as a pair of singles.
*/
#define X86_S2D(x, y) ((x) | X86_FP_DOUBLE)
/* Mask to strip off fp flags */
#define X86_FP_REG_MASK 0xF
// RegisterLocation templates return values (rAX, rAX/rDX or XMM0).
// location, wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg, s_reg_low
#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG}
#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG, INVALID_SREG}
#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG}
#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, fr0, fr1, INVALID_SREG, INVALID_SREG}
enum X86ResourceEncodingPos {
kX86GPReg0 = 0,
kX86RegSP = 4,
kX86FPReg0 = 16, // xmm0 .. xmm7/xmm15.
kX86FPRegEnd = 32,
kX86RegEnd = kX86FPRegEnd,
};
#define ENCODE_X86_REG_LIST(N) (static_cast<uint64_t>(N))
#define ENCODE_X86_REG_SP (1ULL << kX86RegSP)
enum X86NativeRegisterPool {
r0 = 0,
rAX = r0,
r1 = 1,
rCX = r1,
r2 = 2,
rDX = r2,
r3 = 3,
rBX = r3,
r4sp = 4,
rX86_SP = r4sp,
r4sib_no_index = r4sp,
r5 = 5,
rBP = r5,
r5sib_no_base = r5,
r6 = 6,
rSI = r6,
r7 = 7,
rDI = r7,
#ifndef TARGET_REX_SUPPORT
rRET = 8, // fake return address register for core spill mask.
#else
r8 = 8,
r9 = 9,
r10 = 10,
r11 = 11,
r12 = 12,
r13 = 13,
r14 = 14,
r15 = 15,
rRET = 16, // fake return address register for core spill mask.
#endif
fr0 = 0 + X86_FP_REG_OFFSET,
fr1 = 1 + X86_FP_REG_OFFSET,
fr2 = 2 + X86_FP_REG_OFFSET,
fr3 = 3 + X86_FP_REG_OFFSET,
fr4 = 4 + X86_FP_REG_OFFSET,
fr5 = 5 + X86_FP_REG_OFFSET,
fr6 = 6 + X86_FP_REG_OFFSET,
fr7 = 7 + X86_FP_REG_OFFSET,
fr8 = 8 + X86_FP_REG_OFFSET,
fr9 = 9 + X86_FP_REG_OFFSET,
fr10 = 10 + X86_FP_REG_OFFSET,
fr11 = 11 + X86_FP_REG_OFFSET,
fr12 = 12 + X86_FP_REG_OFFSET,
fr13 = 13 + X86_FP_REG_OFFSET,
fr14 = 14 + X86_FP_REG_OFFSET,
fr15 = 15 + X86_FP_REG_OFFSET,
};
#define rX86_ARG0 rAX
#define rX86_ARG1 rCX
#define rX86_ARG2 rDX
#define rX86_ARG3 rBX
#define rX86_FARG0 rAX
#define rX86_FARG1 rCX
#define rX86_FARG2 rDX
#define rX86_FARG3 rBX
#define rX86_RET0 rAX
#define rX86_RET1 rDX
#define rX86_INVOKE_TGT rAX
#define rX86_LR INVALID_REG
#define rX86_SUSPEND INVALID_REG
#define rX86_SELF INVALID_REG
#define rX86_COUNT rCX
#define rX86_PC INVALID_REG
/*
* The following enum defines the list of supported X86 instructions by the
* assembler. Their corresponding EncodingMap positions will be defined in
* Assemble.cc.
*/
enum X86OpCode {
kX86First = 0,
kX8632BitData = kX86First, // data [31..0].
kX86Bkpt,
kX86Nop,
// Define groups of binary operations
// MR - Memory Register - opcode [base + disp], reg
// - lir operands - 0: base, 1: disp, 2: reg
// AR - Array Register - opcode [base + index * scale + disp], reg
// - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
// TR - Thread Register - opcode fs:[disp], reg - where fs: is equal to Thread::Current()
// - lir operands - 0: disp, 1: reg
// RR - Register Register - opcode reg1, reg2
// - lir operands - 0: reg1, 1: reg2
// RM - Register Memory - opcode reg, [base + disp]
// - lir operands - 0: reg, 1: base, 2: disp
// RA - Register Array - opcode reg, [base + index * scale + disp]
// - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
// RT - Register Thread - opcode reg, fs:[disp] - where fs: is equal to Thread::Current()
// - lir operands - 0: reg, 1: disp
// RI - Register Immediate - opcode reg, #immediate
// - lir operands - 0: reg, 1: immediate
// MI - Memory Immediate - opcode [base + disp], #immediate
// - lir operands - 0: base, 1: disp, 2: immediate
// AI - Array Immediate - opcode [base + index * scale + disp], #immediate
// - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
// TI - Thread Register - opcode fs:[disp], imm - where fs: is equal to Thread::Current()
// - lir operands - 0: disp, 1: imm
#define BinaryOpCode(opcode) \
opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \
opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \
opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \
opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \
opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
opcode ## 32MR, opcode ## 32AR, opcode ## 32TR, \
opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
BinaryOpCode(kX86Add),
BinaryOpCode(kX86Or),
BinaryOpCode(kX86Adc),
BinaryOpCode(kX86Sbb),
BinaryOpCode(kX86And),
BinaryOpCode(kX86Sub),
BinaryOpCode(kX86Xor),
BinaryOpCode(kX86Cmp),
#undef BinaryOpCode
kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
kX86Lea32RA,
// RC - Register CL - opcode reg, CL
// - lir operands - 0: reg, 1: CL
// MC - Memory CL - opcode [base + disp], CL
// - lir operands - 0: base, 1: disp, 2: CL
// AC - Array CL - opcode [base + index * scale + disp], CL
// - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL
#define BinaryShiftOpCode(opcode) \
opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \
opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \
opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
opcode ## 32RC, opcode ## 32MC, opcode ## 32AC
BinaryShiftOpCode(kX86Rol),
BinaryShiftOpCode(kX86Ror),
BinaryShiftOpCode(kX86Rcl),
BinaryShiftOpCode(kX86Rcr),
BinaryShiftOpCode(kX86Sal),
BinaryShiftOpCode(kX86Shr),
BinaryShiftOpCode(kX86Sar),
#undef BinaryShiftOpcode
kX86Cmc,
#define UnaryOpcode(opcode, reg, mem, array) \
opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array
UnaryOpcode(kX86Test, RI, MI, AI),
kX86Test32RR,
UnaryOpcode(kX86Not, R, M, A),
UnaryOpcode(kX86Neg, R, M, A),
UnaryOpcode(kX86Mul, DaR, DaM, DaA),
UnaryOpcode(kX86Imul, DaR, DaM, DaA),
UnaryOpcode(kX86Divmod, DaR, DaM, DaA),
UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
#undef UnaryOpcode
#define Binary0fOpCode(opcode) \
opcode ## RR, opcode ## RM, opcode ## RA
Binary0fOpCode(kX86Movsd),
kX86MovsdMR,
kX86MovsdAR,
Binary0fOpCode(kX86Movss),
kX86MovssMR,
kX86MovssAR,
Binary0fOpCode(kX86Cvtsi2sd), // int to double
Binary0fOpCode(kX86Cvtsi2ss), // int to float
Binary0fOpCode(kX86Cvttsd2si),// truncating double to int
Binary0fOpCode(kX86Cvttss2si),// truncating float to int
Binary0fOpCode(kX86Cvtsd2si), // rounding double to int
Binary0fOpCode(kX86Cvtss2si), // rounding float to int
Binary0fOpCode(kX86Ucomisd), // unordered double compare
Binary0fOpCode(kX86Ucomiss), // unordered float compare
Binary0fOpCode(kX86Comisd), // double compare
Binary0fOpCode(kX86Comiss), // float compare
Binary0fOpCode(kX86Orps), // or of floating point registers
Binary0fOpCode(kX86Xorps), // xor of floating point registers
Binary0fOpCode(kX86Addsd), // double add
Binary0fOpCode(kX86Addss), // float add
Binary0fOpCode(kX86Mulsd), // double multiply
Binary0fOpCode(kX86Mulss), // float multiply
Binary0fOpCode(kX86Cvtsd2ss), // double to float
Binary0fOpCode(kX86Cvtss2sd), // float to double
Binary0fOpCode(kX86Subsd), // double subtract
Binary0fOpCode(kX86Subss), // float subtract
Binary0fOpCode(kX86Divsd), // double divide
Binary0fOpCode(kX86Divss), // float divide
kX86PsrlqRI, // right shift of floating point registers
kX86PsllqRI, // left shift of floating point registers
Binary0fOpCode(kX86Movdxr), // move into xmm from gpr
kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,// move into reg from xmm
kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
kX86Mfence, // memory barrier
Binary0fOpCode(kX86Imul16), // 16bit multiply
Binary0fOpCode(kX86Imul32), // 32bit multiply
kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,// compare and exchange
kX86LockCmpxchgRR, kX86LockCmpxchgMR, kX86LockCmpxchgAR,// locked compare and exchange
Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value
Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value
Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value
Binary0fOpCode(kX86Movsx16), // sign-extend 16-bit value
#undef Binary0fOpCode
kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned
kX86JmpR, // jmp reg; lir operands - 0: reg
kX86CallR, // call reg; lir operands - 0: reg
kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp
kX86CallA, // call [base + index * scale + disp]
// lir operands - 0: base, 1: index, 2: scale, 3: disp
kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
kX86Ret, // ret; no lir operands
kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg
// lir operands - 0: reg
kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement]
// lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
kX86Last
};
/* Instruction assembly field_loc kind */
enum X86EncodingKind {
kData, // Special case for raw data.
kNop, // Special case for variable length nop.
kNullary, // Opcode that takes no arguments.
kReg, kMem, kArray, // R, M and A instruction kinds.
kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds.
kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds.
kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load.
kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds.
kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds.
kMovRegImm, // Shorter form move RI.
kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate.
kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL.
kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds.
kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition.
kJmp, kJcc, kCall, // Branch instruction kinds.
kPcRel, // Operation with displacement that is PC relative
kMacro, // An instruction composing multiple others
kUnimplemented // Encoding used when an instruction isn't yet implemented.
};
/* Struct used to define the EncodingMap positions for each X86 opcode */
struct X86EncodingMap {
X86OpCode opcode; // e.g. kOpAddRI
X86EncodingKind kind; // Used to discriminate in the union below
uint64_t flags;
struct {
uint8_t prefix1; // non-zero => a prefix byte
uint8_t prefix2; // non-zero => a second prefix byte
uint8_t opcode; // 1 byte opcode
uint8_t extra_opcode1; // possible extra opcode byte
uint8_t extra_opcode2; // possible second extra opcode byte
// 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
// encoding kind
uint8_t modrm_opcode;
uint8_t ax_opcode; // non-zero => shorter encoding for AX as a destination
uint8_t immediate_bytes; // number of bytes of immediate
} skeleton;
const char *name;
const char* fmt;
};
// FIXME: mem barrier type - what do we do for x86?
#define kSY 0
#define kST 0
// Offsets of high and low halves of a 64bit value.
#define LOWORD_OFFSET 0
#define HIWORD_OFFSET 4
// Segment override instruction prefix used for quick TLS access to Thread::Current().
#define THREAD_PREFIX 0x64
#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
extern X86EncodingMap EncodingMap[kX86Last];
extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
} // namespace art
#endif // ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
|