diff options
Diffstat (limited to 'vm/compiler/codegen')
23 files changed, 13425 insertions, 0 deletions
diff --git a/vm/compiler/codegen/arm/ArchUtility.cpp b/vm/compiler/codegen/arm/ArchUtility.cpp index 0bbb87588..a1cb7417c 100644 --- a/vm/compiler/codegen/arm/ArchUtility.cpp +++ b/vm/compiler/codegen/arm/ArchUtility.cpp @@ -425,3 +425,10 @@ int dvmCompilerCacheFlush(long start, long end, long flags) { return cacheflush(start, end, flags); } + +/* Target-specific cache clearing */ +void dvmCompilerCacheClear(char *start, size_t size) +{ + /* 0 is an invalid opcode for arm. */ + memset(start, 0, size); +} diff --git a/vm/compiler/codegen/mips/ArchUtility.cpp b/vm/compiler/codegen/mips/ArchUtility.cpp new file mode 100644 index 000000000..df7d00897 --- /dev/null +++ b/vm/compiler/codegen/mips/ArchUtility.cpp @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../CompilerInternals.h" +#include "libdex/DexOpcodes.h" +#include "MipsLIR.h" + +/* For dumping instructions */ +#define MIPS_REG_COUNT 32 +static const char *mipsRegName[MIPS_REG_COUNT] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" +}; + +/* + * Interpret a format string and build a string no longer than size + * See format key in Assemble.c. + */ +static void buildInsnString(const char *fmt, MipsLIR *lir, char* buf, + unsigned char *baseAddr, int size) +{ + int i; + char *bufEnd = &buf[size-1]; + const char *fmtEnd = &fmt[strlen(fmt)]; + char tbuf[256]; + char nc; + while (fmt < fmtEnd) { + int operand; + if (*fmt == '!') { + fmt++; + assert(fmt < fmtEnd); + nc = *fmt++; + if (nc=='!') { + strcpy(tbuf, "!"); + } else { + assert(fmt < fmtEnd); + assert((unsigned)(nc-'0') < 4); + operand = lir->operands[nc-'0']; + switch(*fmt++) { + case 'b': + strcpy(tbuf,"0000"); + for (i=3; i>= 0; i--) { + tbuf[i] += operand & 1; + operand >>= 1; + } + break; + case 's': + sprintf(tbuf,"$f%d",operand & FP_REG_MASK); + break; + case 'S': + assert(((operand & FP_REG_MASK) & 1) == 0); + sprintf(tbuf,"$f%d",operand & FP_REG_MASK); + break; + case 'h': + sprintf(tbuf,"%04x", operand); + break; + case 'M': + case 'd': + sprintf(tbuf,"%d", operand); + break; + case 'D': + sprintf(tbuf,"%d", operand+1); + break; + case 'E': + sprintf(tbuf,"%d", operand*4); + break; + case 'F': + sprintf(tbuf,"%d", operand*2); + break; + case 'c': + switch (operand) { + case kMipsCondEq: + strcpy(tbuf, "eq"); + break; + case kMipsCondNe: + strcpy(tbuf, "ne"); + break; + case kMipsCondLt: + strcpy(tbuf, "lt"); + break; + case kMipsCondGe: + strcpy(tbuf, "ge"); + break; + case kMipsCondGt: + strcpy(tbuf, "gt"); + break; + case kMipsCondLe: + strcpy(tbuf, "le"); + break; + case kMipsCondCs: + strcpy(tbuf, "cs"); + break; + case kMipsCondMi: + strcpy(tbuf, "mi"); + break; + default: + strcpy(tbuf, ""); + break; + } + break; + case 't': + sprintf(tbuf,"0x%08x (L%p)", + (int) baseAddr + lir->generic.offset + 4 + + (operand << 2), + lir->generic.target); + break; + case 'T': + sprintf(tbuf,"0x%08x", + (int) (operand << 2)); + break; + case 'u': { + int offset_1 = lir->operands[0]; + int offset_2 = NEXT_LIR(lir)->operands[0]; + intptr_t target = + ((((intptr_t) baseAddr + lir->generic.offset + 4) & + ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) & + 0xfffffffc; + sprintf(tbuf, "%p", (void *) target); + break; + } + + /* Nothing to print for BLX_2 */ + case 'v': + strcpy(tbuf, "see above"); + break; + case 'r': + assert(operand >= 0 && operand < MIPS_REG_COUNT); + strcpy(tbuf, mipsRegName[operand]); + break; + default: + strcpy(tbuf,"DecodeError"); + break; + } + if (buf+strlen(tbuf) <= bufEnd) { + strcpy(buf, tbuf); + buf += strlen(tbuf); + } else { + break; + } + } + } else { + *buf++ = *fmt++; + } + if (buf == bufEnd) + break; + } + *buf = 0; +} + +void dvmDumpResourceMask(LIR *lir, u8 mask, const char *prefix) +{ + char buf[256]; + buf[0] = 0; + MipsLIR *mipsLIR = (MipsLIR *) lir; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kRegEnd; i++) { + if (mask & (1ULL << i)) { + sprintf(num, "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + if (mask & ENCODE_FP_STATUS) { + strcat(buf, "fpcc "); + } + /* Memory bits */ + if (mipsLIR && (mask & ENCODE_DALVIK_REG)) { + sprintf(buf + strlen(buf), "dr%d%s", mipsLIR->aliasInfo & 0xffff, + (mipsLIR->aliasInfo & 0x80000000) ? "(+1)" : ""); + } + if (mask & ENCODE_LITERAL) { + strcat(buf, "lit "); + } + + if (mask & ENCODE_HEAP_REF) { + strcat(buf, "heap "); + } + if (mask & ENCODE_MUST_NOT_ALIAS) { + strcat(buf, "noalias "); + } + } + if (buf[0]) { + LOGD("%s: %s", prefix, buf); + } +} + +/* + * Debugging macros + */ +#define DUMP_RESOURCE_MASK(X) +#define DUMP_SSA_REP(X) + +/* Pretty-print a LIR instruction */ +void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) +{ + MipsLIR *lir = (MipsLIR *) arg; + char buf[256]; + char opName[256]; + int offset = lir->generic.offset; + int dest = lir->operands[0]; + const bool dumpNop = false; + + /* Handle pseudo-ops individually, and all regular insns as a group */ + switch(lir->opcode) { + case kMipsChainingCellBottom: + LOGD("-------- end of chaining cells (0x%04x)", offset); + break; + case kMipsPseudoBarrier: + LOGD("-------- BARRIER"); + break; + case kMipsPseudoExtended: + /* intentional fallthrough */ + case kMipsPseudoSSARep: + DUMP_SSA_REP(LOGD("-------- %s", (char *) dest)); + break; + case kMipsPseudoChainingCellBackwardBranch: + LOGD("L%p:", lir); + LOGD("-------- chaining cell (backward branch): 0x%04x", dest); + break; + case kMipsPseudoChainingCellNormal: + LOGD("L%p:", lir); + LOGD("-------- chaining cell (normal): 0x%04x", dest); + break; + case kMipsPseudoChainingCellHot: + LOGD("L%p:", lir); + LOGD("-------- chaining cell (hot): 0x%04x", dest); + break; + case kMipsPseudoChainingCellInvokePredicted: + LOGD("L%p:", lir); + LOGD("-------- chaining cell (predicted): %s%s", + dest ? ((Method *) dest)->clazz->descriptor : "", + dest ? ((Method *) dest)->name : "N/A"); + break; + case kMipsPseudoChainingCellInvokeSingleton: + LOGD("L%p:", lir); + LOGD("-------- chaining cell (invoke singleton): %s%s/%p", + ((Method *)dest)->clazz->descriptor, + ((Method *)dest)->name, + ((Method *)dest)->insns); + break; + case kMipsPseudoEntryBlock: + LOGD("-------- entry offset: 0x%04x", dest); + break; + case kMipsPseudoDalvikByteCodeBoundary: + LOGD("-------- dalvik offset: 0x%04x @ %s", dest, + (char *) lir->operands[1]); + break; + case kMipsPseudoExitBlock: + LOGD("-------- exit offset: 0x%04x", dest); + break; + case kMipsPseudoPseudoAlign4: + LOGD("%p (%04x): .align4", baseAddr + offset, offset); + break; + case kMipsPseudoPCReconstructionCell: + LOGD("L%p:", lir); + LOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x", dest, + lir->operands[1]); + break; + case kMipsPseudoPCReconstructionBlockLabel: + /* Do nothing */ + break; + case kMipsPseudoEHBlockLabel: + LOGD("Exception_Handling:"); + break; + case kMipsPseudoTargetLabel: + case kMipsPseudoNormalBlockLabel: + LOGD("L%p:", lir); + break; + default: + if (lir->flags.isNop && !dumpNop) { + break; + } + buildInsnString(EncodingMap[lir->opcode].name, lir, opName, + baseAddr, 256); + buildInsnString(EncodingMap[lir->opcode].fmt, lir, buf, baseAddr, + 256); + LOGD("%p (%04x): %08x %-9s%s%s", + baseAddr + offset, offset, *(u4 *)(baseAddr + offset), opName, buf, + lir->flags.isNop ? "(nop)" : ""); + break; + } + + if (lir->useMask && (!lir->flags.isNop || dumpNop)) { + DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, + lir->useMask, "use")); + } + if (lir->defMask && (!lir->flags.isNop || dumpNop)) { + DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir, + lir->defMask, "def")); + } +} + +/* Dump instructions and constant pool contents */ +void dvmCompilerCodegenDump(CompilationUnit *cUnit) +{ + LOGD("Dumping LIR insns"); + LIR *lirInsn; + MipsLIR *mipsLIR; + + LOGD("installed code is at %p", cUnit->baseAddr); + LOGD("total size is %d bytes", cUnit->totalSize); + for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) { + dvmDumpLIRInsn(lirInsn, (unsigned char *) cUnit->baseAddr); + } + for (lirInsn = cUnit->classPointerList; lirInsn; lirInsn = lirInsn->next) { + mipsLIR = (MipsLIR *) lirInsn; + LOGD("%p (%04x): .class (%s)", + (char*)cUnit->baseAddr + mipsLIR->generic.offset, + mipsLIR->generic.offset, + ((CallsiteInfo *) mipsLIR->operands[0])->classDescriptor); + } + for (lirInsn = cUnit->literalList; lirInsn; lirInsn = lirInsn->next) { + mipsLIR = (MipsLIR *) lirInsn; + LOGD("%p (%04x): .word (%#x)", + (char*)cUnit->baseAddr + mipsLIR->generic.offset, + mipsLIR->generic.offset, + mipsLIR->operands[0]); + } +} + +/* Target-specific cache flushing */ +int dvmCompilerCacheFlush(long start, long end, long flags) +{ + return cacheflush(start, end, flags); +} + +/* Target-specific cache clearing */ +void dvmCompilerCacheClear(char *start, size_t size) +{ + /* 0x66 is an invalid opcode for mips. */ + memset(start, 0x66, size); +} diff --git a/vm/compiler/codegen/mips/Assemble.cpp b/vm/compiler/codegen/mips/Assemble.cpp new file mode 100644 index 000000000..a97857d12 --- /dev/null +++ b/vm/compiler/codegen/mips/Assemble.cpp @@ -0,0 +1,2324 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dalvik.h" +#include "libdex/DexOpcodes.h" + +#include "../../CompilerInternals.h" +#include "MipsLIR.h" +#include "Codegen.h" +#include <unistd.h> /* for cacheflush */ +#include <sys/mman.h> /* for protection change */ + +#define MAX_ASSEMBLER_RETRIES 10 + +/* + * opcode: MipsOpCode enum + * skeleton: pre-designated bit-pattern for this opcode + * k0: key to applying ds/de + * ds: dest start bit position + * de: dest end bit position + * k1: key to applying s1s/s1e + * s1s: src1 start bit position + * s1e: src1 end bit position + * k2: key to applying s2s/s2e + * s2s: src2 start bit position + * s2e: src2 end bit position + * operands: number of operands (for sanity check purposes) + * name: mnemonic name + * fmt: for pretty-printing + */ +#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ + k3, k3s, k3e, flags, name, fmt, size) \ + {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ + {k3, k3s, k3e}}, opcode, flags, name, fmt, size} + +/* Instruction dump string format keys: !pf, where "!" is the start + * of the key, "p" is which numeric operand to use and "f" is the + * print format. + * + * [p]ositions: + * 0 -> operands[0] (dest) + * 1 -> operands[1] (src1) + * 2 -> operands[2] (src2) + * 3 -> operands[3] (extra) + * + * [f]ormats: + * h -> 4-digit hex + * d -> decimal + * E -> decimal*4 + * F -> decimal*2 + * c -> branch condition (beq, bne, etc.) + * t -> pc-relative target + * T -> pc-region target + * u -> 1st half of bl[x] target + * v -> 2nd half ob bl[x] target + * R -> register list + * s -> single precision floating point register + * S -> double precision floating point register + * m -> Thumb2 modified immediate + * n -> complimented Thumb2 modified immediate + * M -> Thumb2 16-bit zero-extended immediate + * b -> 4-digit binary + * + * [!] escape. To insert "!", use "!!" + */ +/* NOTE: must be kept in sync with enum MipsOpcode from MipsLIR.h */ +MipsEncodingMap EncodingMap[kMipsLast] = { + ENCODING_MAP(kMips32BitData, 0x00000000, + kFmtBitBlt, 31, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "data", "0x!0h(!0d)", 2), + ENCODING_MAP(kMipsAddiu, 0x24000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "addiu", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsAddu, 0x00000021, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "addu", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsAnd, 0x00000024, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "and", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsAndi, 0x30000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "andi", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsB, 0x10000000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, + "b", "!0t", 2), + ENCODING_MAP(kMipsBal, 0x04110000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH | REG_DEF_LR, + "bal", "!0t", 2), + ENCODING_MAP(kMipsBeq, 0x10000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01, + "beq", "!0r,!1r,!2t", 2), + ENCODING_MAP(kMipsBeqz, 0x10000000, /* same as beq above with t = $zero */ + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "beqz", "!0r,!1t", 2), + ENCODING_MAP(kMipsBgez, 0x04010000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "bgez", "!0r,!1t", 2), + ENCODING_MAP(kMipsBgtz, 0x1C000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "bgtz", "!0r,!1t", 2), + ENCODING_MAP(kMipsBlez, 0x18000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "blez", "!0r,!1t", 2), + ENCODING_MAP(kMipsBltz, 0x04000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "bltz", "!0r,!1t", 2), + ENCODING_MAP(kMipsBnez, 0x14000000, /* same as bne below with t = $zero */ + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "bnez", "!0r,!1t", 2), + ENCODING_MAP(kMipsBne, 0x14000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01, + "bne", "!0r,!1r,!2t", 2), + ENCODING_MAP(kMipsDiv, 0x0000001a, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtBitBlt, 25, 21, + kFmtBitBlt, 20, 16, IS_QUAD_OP | REG_DEF01 | REG_USE23, + "div", "!2r,!3r", 2), +#if __mips_isa_rev>=2 + ENCODING_MAP(kMipsExt, 0x7c000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 10, 6, + kFmtBitBlt, 15, 11, IS_QUAD_OP | REG_DEF0 | REG_USE1, + "ext", "!0r,!1r,!2d,!3D", 2), +#endif + ENCODING_MAP(kMipsJal, 0x0c000000, + kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, + "jal", "!0T(!0E)", 2), + ENCODING_MAP(kMipsJalr, 0x00000009, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF0_USE1, + "jalr", "!0r,!1r", 2), + ENCODING_MAP(kMipsJr, 0x00000008, + kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0, + "jr", "!0r", 2), + ENCODING_MAP(kMipsLahi, 0x3C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "lahi/lui", "!0r,0x!1h(!1d)", 2), + ENCODING_MAP(kMipsLalo, 0x34000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lalo/ori", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsLui, 0x3C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "lui", "!0r,0x!1h(!1d)", 2), + ENCODING_MAP(kMipsLb, 0x80000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lb", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsLbu, 0x90000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lbu", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsLh, 0x84000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lh", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsLhu, 0x94000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lhu", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsLw, 0x8C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lw", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsMfhi, 0x00000010, + kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mfhi", "!0r", 2), + ENCODING_MAP(kMipsMflo, 0x00000012, + kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mflo", "!0r", 2), + ENCODING_MAP(kMipsMove, 0x00000025, /* or using zero reg */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "move", "!0r,!1r", 2), + ENCODING_MAP(kMipsMovz, 0x0000000a, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "movz", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsMul, 0x70000002, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsNop, 0x00000000, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "nop", "", 2), + ENCODING_MAP(kMipsNor, 0x00000027, /* used for "not" too */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "nor", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsOr, 0x00000025, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "or", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsOri, 0x34000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "ori", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsPref, 0xCC000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE2, + "pref", "!0d,!1d(!2r)", 2), + ENCODING_MAP(kMipsSb, 0xA0000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sb", "!0r,!1d(!2r)", 2), +#if __mips_isa_rev>=2 + ENCODING_MAP(kMipsSeb, 0x7c000420, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "seb", "!0r,!1r", 2), + ENCODING_MAP(kMipsSeh, 0x7c000620, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "seh", "!0r,!1r", 2), +#endif + ENCODING_MAP(kMipsSh, 0xA4000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sh", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsSll, 0x00000000, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "sll", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsSllv, 0x00000004, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sllv", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSlt, 0x0000002a, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "slt", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSlti, 0x28000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "slti", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsSltu, 0x0000002b, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sltu", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSra, 0x00000003, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "sra", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsSrav, 0x00000007, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "srav", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSrl, 0x00000002, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "srl", "!0r,!1r,0x!2h(!2d)", 2), + ENCODING_MAP(kMipsSrlv, 0x00000006, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "srlv", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSubu, 0x00000023, /* used for "neg" too */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "subu", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsSw, 0xAC000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sw", "!0r,!1d(!2r)", 2), + ENCODING_MAP(kMipsXor, 0x00000026, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "xor", "!0r,!1r,!2r", 2), + ENCODING_MAP(kMipsXori, 0x38000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "xori", "!0r,!1r,0x!2h(!2d)", 2), +#ifdef __mips_hard_float + ENCODING_MAP(kMipsFadds, 0x46000000, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "add.s", "!0s,!1s,!2s", 2), + ENCODING_MAP(kMipsFsubs, 0x46000001, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sub.s", "!0s,!1s,!2s", 2), + ENCODING_MAP(kMipsFmuls, 0x46000002, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul.s", "!0s,!1s,!2s", 2), + ENCODING_MAP(kMipsFdivs, 0x46000003, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "div.s", "!0s,!1s,!2s", 2), + ENCODING_MAP(kMipsFaddd, 0x46200000, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "add.d", "!0S,!1S,!2S", 2), + ENCODING_MAP(kMipsFsubd, 0x46200001, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sub.d", "!0S,!1S,!2S", 2), + ENCODING_MAP(kMipsFmuld, 0x46200002, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul.d", "!0S,!1S,!2S", 2), + ENCODING_MAP(kMipsFdivd, 0x46200003, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "div.d", "!0S,!1S,!2S", 2), + ENCODING_MAP(kMipsFcvtsd, 0x46200020, + kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.s.d", "!0s,!1S", 2), + ENCODING_MAP(kMipsFcvtsw, 0x46800020, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.s.w", "!0s,!1s", 2), + ENCODING_MAP(kMipsFcvtds, 0x46000021, + kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.d.s", "!0S,!1s", 2), + ENCODING_MAP(kMipsFcvtdw, 0x46800021, + kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.d.w", "!0S,!1s", 2), + ENCODING_MAP(kMipsFcvtws, 0x46000024, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.w.s", "!0s,!1s", 2), + ENCODING_MAP(kMipsFcvtwd, 0x46200024, + kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.w.d", "!0s,!1S", 2), + ENCODING_MAP(kMipsFmovs, 0x46000006, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov.s", "!0s,!1s", 2), + ENCODING_MAP(kMipsFmovd, 0x46200006, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov.d", "!0S,!1S", 2), + ENCODING_MAP(kMipsFlwc1, 0xC4000000, + kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lwc1", "!0s,!1d(!2r)", 2), + ENCODING_MAP(kMipsFldc1, 0xD4000000, + kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "ldc1", "!0S,!1d(!2r)", 2), + ENCODING_MAP(kMipsFswc1, 0xE4000000, + kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "swc1", "!0s,!1d(!2r)", 2), + ENCODING_MAP(kMipsFsdc1, 0xF4000000, + kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sdc1", "!0S,!1d(!2r)", 2), + ENCODING_MAP(kMipsMfc1, 0x44000000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mfc1", "!0r,!1s", 2), + ENCODING_MAP(kMipsMtc1, 0x44800000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1, + "mtc1", "!0r,!1s", 2), +#endif + ENCODING_MAP(kMipsUndefined, 0x64000000, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "undefined", "", 2), +}; + +/* Track the number of times that the code cache is patched */ +#if defined(WITH_JIT_TUNING) +#define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) +#else +#define UPDATE_CODE_CACHE_PATCHES() +#endif + +/* Write the numbers in the constant and class pool to the output stream */ +static void installLiteralPools(CompilationUnit *cUnit) +{ + int *dataPtr = (int *) ((char *) cUnit->baseAddr + cUnit->dataOffset); + /* Install number of class pointer literals */ + *dataPtr++ = cUnit->numClassPointers; + MipsLIR *dataLIR = (MipsLIR *) cUnit->classPointerList; + while (dataLIR) { + /* + * Install the callsiteinfo pointers into the cells for now. They will + * be converted into real pointers in dvmJitInstallClassObjectPointers. + */ + *dataPtr++ = dataLIR->operands[0]; + dataLIR = NEXT_LIR(dataLIR); + } + dataLIR = (MipsLIR *) cUnit->literalList; + while (dataLIR) { + *dataPtr++ = dataLIR->operands[0]; + dataLIR = NEXT_LIR(dataLIR); + } +} + +/* + * Assemble the LIR into binary instruction format. Note that we may + * discover that pc-relative displacements may not fit the selected + * instruction. In those cases we will try to substitute a new code + * sequence or request that the trace be shortened and retried. + */ +static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, + intptr_t startAddr) +{ + int *bufferAddr = (int *) cUnit->codeBuffer; + MipsLIR *lir; + + for (lir = (MipsLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) { + if (lir->opcode < 0) { + continue; + } + + + if (lir->flags.isNop) { + continue; + } + + if (lir->opcode == kMipsB || lir->opcode == kMipsBal) { + MipsLIR *targetLIR = (MipsLIR *) lir->generic.target; + intptr_t pc = lir->generic.offset + 4; + intptr_t target = targetLIR->generic.offset; + int delta = target - pc; + if (delta & 0x3) { + LOGE("PC-rel distance is not multiple of 4: %d", delta); + dvmAbort(); + } + if (delta > 131068 || delta < -131069) { + LOGE("Unconditional branch distance out of range: %d", delta); + dvmAbort(); + } + lir->operands[0] = delta >> 2; + } else if (lir->opcode >= kMipsBeqz && lir->opcode <= kMipsBnez) { + MipsLIR *targetLIR = (MipsLIR *) lir->generic.target; + intptr_t pc = lir->generic.offset + 4; + intptr_t target = targetLIR->generic.offset; + int delta = target - pc; + if (delta & 0x3) { + LOGE("PC-rel distance is not multiple of 4: %d", delta); + dvmAbort(); + } + if (delta > 131068 || delta < -131069) { + LOGE("Conditional branch distance out of range: %d", delta); + dvmAbort(); + } + lir->operands[1] = delta >> 2; + } else if (lir->opcode == kMipsBeq || lir->opcode == kMipsBne) { + MipsLIR *targetLIR = (MipsLIR *) lir->generic.target; + intptr_t pc = lir->generic.offset + 4; + intptr_t target = targetLIR->generic.offset; + int delta = target - pc; + if (delta & 0x3) { + LOGE("PC-rel distance is not multiple of 4: %d", delta); + dvmAbort(); + } + if (delta > 131068 || delta < -131069) { + LOGE("Conditional branch distance out of range: %d", delta); + dvmAbort(); + } + lir->operands[2] = delta >> 2; + } else if (lir->opcode == kMipsJal) { + intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3; + intptr_t target = lir->operands[0]; + /* ensure PC-region branch can be used */ + assert((curPC & 0xF0000000) == (target & 0xF0000000)); + if (target & 0x3) { + LOGE("Jump target is not multiple of 4: %d", target); + dvmAbort(); + } + lir->operands[0] = target >> 2; + } else if (lir->opcode == kMipsLahi) { /* load address hi (via lui) */ + MipsLIR *targetLIR = (MipsLIR *) lir->generic.target; + intptr_t target = startAddr + targetLIR->generic.offset; + lir->operands[1] = target >> 16; + } else if (lir->opcode == kMipsLalo) { /* load address lo (via ori) */ + MipsLIR *targetLIR = (MipsLIR *) lir->generic.target; + intptr_t target = startAddr + targetLIR->generic.offset; + lir->operands[2] = lir->operands[2] + target; + } + + + MipsEncodingMap *encoder = &EncodingMap[lir->opcode]; + u4 bits = encoder->skeleton; + int i; + for (i = 0; i < 4; i++) { + u4 operand; + u4 value; + operand = lir->operands[i]; + switch(encoder->fieldLoc[i].kind) { + case kFmtUnused: + break; + case kFmtBitBlt: + if (encoder->fieldLoc[i].start == 0 && encoder->fieldLoc[i].end == 31) { + value = operand; + } else { + value = (operand << encoder->fieldLoc[i].start) & + ((1 << (encoder->fieldLoc[i].end + 1)) - 1); + } + bits |= value; + break; + case kFmtDfp: { + assert(DOUBLEREG(operand)); + assert((operand & 0x1) == 0); + value = ((operand & FP_REG_MASK) << encoder->fieldLoc[i].start) & + ((1 << (encoder->fieldLoc[i].end + 1)) - 1); + bits |= value; + break; + } + case kFmtSfp: + assert(SINGLEREG(operand)); + value = ((operand & FP_REG_MASK) << encoder->fieldLoc[i].start) & + ((1 << (encoder->fieldLoc[i].end + 1)) - 1); + bits |= value; + break; + default: + assert(0); + } + } + assert(encoder->size == 2); + *bufferAddr++ = bits; + } + return kSuccess; +} + +static int assignLiteralOffsetCommon(LIR *lir, int offset) +{ + for (;lir != NULL; lir = lir->next) { + lir->offset = offset; + offset += 4; + } + return offset; +} + +/* Determine the offset of each literal field */ +static int assignLiteralOffset(CompilationUnit *cUnit, int offset) +{ + /* Reserved for the size field of class pointer pool */ + offset += 4; + offset = assignLiteralOffsetCommon(cUnit->classPointerList, offset); + offset = assignLiteralOffsetCommon(cUnit->literalList, offset); + return offset; +} + +/* + * Translation layout in the code cache. Note that the codeAddress pointer + * in JitTable will point directly to the code body (field codeAddress). The + * chain cell offset codeAddress - 4, and the address of the trace profile + * counter is at codeAddress - 8. + * + * +----------------------------+ + * | Trace Profile Counter addr | -> 4 bytes (PROF_COUNTER_ADDR_SIZE) + * +----------------------------+ + * +--| Offset to chain cell counts| -> 4 bytes (CHAIN_CELL_OFFSET_SIZE) + * | +----------------------------+ + * | | Trace profile code | <- entry point when profiling + * | . - - - - - - - . + * | | Code body | <- entry point when not profiling + * | . . + * | | | + * | +----------------------------+ + * | | Chaining Cells | -> 16/20 bytes, 4 byte aligned + * | . . + * | . . + * | | | + * | +----------------------------+ + * | | Gap for large switch stmt | -> # cases >= MAX_CHAINED_SWITCH_CASES + * | +----------------------------+ + * +->| Chaining cell counts | -> 8 bytes, chain cell counts by type + * +----------------------------+ + * | Trace description | -> variable sized + * . . + * | | + * +----------------------------+ + * | # Class pointer pool size | -> 4 bytes + * +----------------------------+ + * | Class pointer pool | -> 4-byte aligned, variable size + * . . + * . . + * | | + * +----------------------------+ + * | Literal pool | -> 4-byte aligned, variable size + * . . + * . . + * | | + * +----------------------------+ + * + */ + +#define PROF_COUNTER_ADDR_SIZE 4 +#define CHAIN_CELL_OFFSET_SIZE 4 + +/* + * Utility functions to navigate various parts in a trace. If we change the + * layout/offset in the future, we just modify these functions and we don't need + * to propagate the changes to all the use cases. + */ +static inline char *getTraceBase(const JitEntry *p) +{ + return (char*)p->codeAddress - + (PROF_COUNTER_ADDR_SIZE + CHAIN_CELL_OFFSET_SIZE); +} + +/* Handy function to retrieve the profile count */ +static inline JitTraceCounter_t getProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0 || + entry->codeAddress == dvmCompilerGetInterpretTemplate()) + return 0; + + JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry); + + return **p; +} + +/* Handy function to reset the profile count */ +static inline void resetProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0 || + entry->codeAddress == dvmCompilerGetInterpretTemplate()) + return; + + JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry); + + **p = 0; +} + +/* Get the pointer of the chain cell count */ +static inline ChainCellCounts* getChainCellCountsPointer(const char *base) +{ + /* 4 is the size of the profile count */ + u4 *chainCellOffsetP = (u4 *) (base + PROF_COUNTER_ADDR_SIZE); + u4 chainCellOffset = *chainCellOffsetP; + return (ChainCellCounts *) ((char *) chainCellOffsetP + chainCellOffset); +} + +/* Get the size of all chaining cells */ +static inline u4 getChainCellSize(const ChainCellCounts* pChainCellCounts) +{ + int cellSize = 0; + int i; + + /* Get total count of chain cells */ + for (i = 0; i < kChainingCellGap; i++) { + if (i != kChainingCellInvokePredicted) { + cellSize += pChainCellCounts->u.count[i] * + (CHAIN_CELL_NORMAL_SIZE >> 2); + } else { + cellSize += pChainCellCounts->u.count[i] * + (CHAIN_CELL_PREDICTED_SIZE >> 2); + } + } + return cellSize; +} + +/* Get the starting pointer of the trace description section */ +static JitTraceDescription* getTraceDescriptionPointer(const char *base) +{ + ChainCellCounts* pCellCounts = getChainCellCountsPointer(base); + return (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts)); +} + +/* Get the size of a trace description */ +static int getTraceDescriptionSize(const JitTraceDescription *desc) +{ + int runCount; + /* Trace end is always of non-meta type (ie isCode == true) */ + for (runCount = 0; ; runCount++) { + if (desc->trace[runCount].isCode && + desc->trace[runCount].info.frag.runEnd) + break; + } + return sizeof(JitTraceDescription) + ((runCount+1) * sizeof(JitTraceRun)); +} + +#if defined(SIGNATURE_BREAKPOINT) +/* Inspect the assembled instruction stream to find potential matches */ +static void matchSignatureBreakpoint(const CompilationUnit *cUnit, + unsigned int size) +{ + unsigned int i, j; + u4 *ptr = (u4 *) cUnit->codeBuffer; + + for (i = 0; i < size - gDvmJit.signatureBreakpointSize + 1; i++) { + if (ptr[i] == gDvmJit.signatureBreakpoint[0]) { + for (j = 1; j < gDvmJit.signatureBreakpointSize; j++) { + if (ptr[i+j] != gDvmJit.signatureBreakpoint[j]) { + break; + } + } + if (j == gDvmJit.signatureBreakpointSize) { + LOGD("Signature match starting from offset %#x (%d words)", + i*4, gDvmJit.signatureBreakpointSize); + int descSize = getTraceDescriptionSize(cUnit->traceDesc); + JitTraceDescription *newCopy = + (JitTraceDescription *) malloc(descSize); + memcpy(newCopy, cUnit->traceDesc, descSize); + dvmCompilerWorkEnqueue(NULL, kWorkOrderTraceDebug, newCopy); + break; + } + } + } +} +#endif + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + */ +void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) +{ + MipsLIR *mipsLIR; + int offset = 0; + int i; + ChainCellCounts chainCellCounts; + int descSize = (cUnit->jitMode == kJitMethod) ? + 0 : getTraceDescriptionSize(cUnit->traceDesc); + int chainingCellGap = 0; + + info->instructionSet = cUnit->instructionSet; + + /* Beginning offset needs to allow space for chain cell offset */ + for (mipsLIR = (MipsLIR *) cUnit->firstLIRInsn; + mipsLIR; + mipsLIR = NEXT_LIR(mipsLIR)) { + mipsLIR->generic.offset = offset; + if (mipsLIR->opcode >= 0 && !mipsLIR->flags.isNop) { + mipsLIR->flags.size = EncodingMap[mipsLIR->opcode].size * 2; + offset += mipsLIR->flags.size; + } + /* Pseudo opcodes don't consume space */ + } + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + u4 chainCellOffset = offset; + MipsLIR *chainCellOffsetLIR = NULL; + + if (cUnit->jitMode != kJitMethod) { + /* + * Get the gap (# of u4) between the offset of chaining cell count and + * the bottom of real chaining cells. If the translation has chaining + * cells, the gap is guaranteed to be multiples of 4. + */ + chainingCellGap = (offset - cUnit->chainingCellBottom->offset) >> 2; + + /* Add space for chain cell counts & trace description */ + chainCellOffsetLIR = (MipsLIR *) cUnit->chainCellOffsetLIR; + assert(chainCellOffsetLIR); + assert(chainCellOffset < 0x10000); + assert(chainCellOffsetLIR->opcode == kMips32BitData && + chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG); + + /* + * Adjust the CHAIN_CELL_OFFSET_TAG LIR's offset to remove the + * space occupied by the pointer to the trace profiling counter. + */ + chainCellOffsetLIR->operands[0] = chainCellOffset - 4; + + offset += sizeof(chainCellCounts) + descSize; + + assert((offset & 0x3) == 0); /* Should still be word aligned */ + } + + /* Set up offsets for literals */ + cUnit->dataOffset = offset; + + /* + * Assign each class pointer/constant an offset from the beginning of the + * compilation unit. + */ + offset = assignLiteralOffset(cUnit, offset); + + cUnit->totalSize = offset; + + if (gDvmJit.codeCacheByteUsed + cUnit->totalSize > gDvmJit.codeCacheSize) { + gDvmJit.codeCacheFull = true; + info->discardResult = true; + return; + } + + /* Allocate enough space for the code block */ + cUnit->codeBuffer = (unsigned char *)dvmCompilerNew(chainCellOffset, true); + if (cUnit->codeBuffer == NULL) { + LOGE("Code buffer allocation failure"); + info->discardResult = true; + return; + } + + /* + * Attempt to assemble the trace. Note that assembleInstructions + * may rewrite the code sequence and request a retry. + */ + cUnit->assemblerStatus = assembleInstructions(cUnit, + (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed); + + switch(cUnit->assemblerStatus) { + case kSuccess: + break; + case kRetryAll: + if (cUnit->assemblerRetries < MAX_ASSEMBLER_RETRIES) { + if (cUnit->jitMode != kJitMethod) { + /* Restore pristine chain cell marker on retry */ + chainCellOffsetLIR->operands[0] = CHAIN_CELL_OFFSET_TAG; + } + return; + } + /* Too many retries - reset and try cutting the trace in half */ + cUnit->assemblerRetries = 0; + cUnit->assemblerStatus = kRetryHalve; + return; + case kRetryHalve: + return; + default: + LOGE("Unexpected assembler status: %d", cUnit->assemblerStatus); + dvmAbort(); + } + +#if defined(SIGNATURE_BREAKPOINT) + if (info->discardResult == false && gDvmJit.signatureBreakpoint != NULL && + chainCellOffset/4 >= gDvmJit.signatureBreakpointSize) { + matchSignatureBreakpoint(cUnit, chainCellOffset/4); + } +#endif + + /* Don't go all the way if the goal is just to get the verbose output */ + if (info->discardResult) return; + + /* + * The cache might disappear - acquire lock and check version + * Continue holding lock until translation cache update is complete. + * These actions are required here in the compiler thread because + * it is unaffected by suspend requests and doesn't know if a + * translation cache flush is in progress. + */ + dvmLockMutex(&gDvmJit.compilerLock); + if (info->cacheVersion != gDvmJit.cacheVersion) { + /* Cache changed - discard current translation */ + info->discardResult = true; + info->codeAddress = NULL; + dvmUnlockMutex(&gDvmJit.compilerLock); + return; + } + + cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; + gDvmJit.codeCacheByteUsed += offset; + + UNPROTECT_CODE_CACHE(cUnit->baseAddr, offset); + + /* Install the code block */ + memcpy((char*)cUnit->baseAddr, cUnit->codeBuffer, chainCellOffset); + gDvmJit.numCompilations++; + + if (cUnit->jitMode != kJitMethod) { + /* Install the chaining cell counts */ + for (i=0; i< kChainingCellGap; i++) { + chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; + } + + /* Set the gap number in the chaining cell count structure */ + chainCellCounts.u.count[kChainingCellGap] = chainingCellGap; + + memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts, + sizeof(chainCellCounts)); + + /* Install the trace description */ + memcpy((char*) cUnit->baseAddr + chainCellOffset + + sizeof(chainCellCounts), + cUnit->traceDesc, descSize); + } + + /* Write the literals directly into the code cache */ + installLiteralPools(cUnit); + + /* Flush dcache and invalidate the icache to maintain coherence */ + dvmCompilerCacheFlush((long)cUnit->baseAddr, + (long)((char *) cUnit->baseAddr + offset), 0); + + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(cUnit->baseAddr, offset); + + /* Translation cache update complete - release lock */ + dvmUnlockMutex(&gDvmJit.compilerLock); + + /* Record code entry point and instruction set */ + info->codeAddress = (char*)cUnit->baseAddr + cUnit->headerSize; + /* transfer the size of the profiling code */ + info->profileCodeSize = cUnit->profileCodeSize; +} + +/* + * Returns the skeleton bit pattern associated with an opcode. All + * variable fields are zeroed. + */ +static u4 getSkeleton(MipsOpCode op) +{ + return EncodingMap[op].skeleton; +} + +static u4 assembleChainingBranch(int branchOffset, bool thumbTarget) +{ + return getSkeleton(kMipsJal) | ((branchOffset & 0x0FFFFFFF) >> 2); +} + +/* + * Perform translation chain operation. + * For MIPS, we'll use a JAL instruction to generate an + * unconditional chaining branch of up to 256M. The JAL + * instruction also has a restriction that the jump target + * must be in the same 256M page as the JAL instruction's + * delay slot address. + * If the target is out of JAL's range, don't chain. + * If one or more threads is suspended, don't chain. + */ +void* dvmJitChain(void* tgtAddr, u4* branchAddr) +{ + u4 newInst; + + /* + * Only chain translations when there is no urge to ask all threads to + * suspend themselves via the interpreter. + */ + if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && + (gDvmJit.codeCacheFull == false) && + ((((int) tgtAddr) & 0xF0000000) == (((int) branchAddr+4) & 0xF0000000))) { + gDvmJit.translationChains++; + + COMPILER_TRACE_CHAINING( + LOGD("Jit Runtime: chaining 0x%x to 0x%x", + (int) branchAddr, (int) tgtAddr & -2)); + + newInst = assembleChainingBranch((int) tgtAddr & -2, 0); + + UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); + + *branchAddr = newInst; + dvmCompilerCacheFlush((long)branchAddr, (long)branchAddr + 4, 0); + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); + + gDvmJit.hasNewChain = true; + } + + return tgtAddr; +} + +#if !defined(WITH_SELF_VERIFICATION) +/* + * Attempt to enqueue a work order to patch an inline cache for a predicted + * chaining cell for virtual/interface calls. + */ +static void inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, + PredictedChainingCell *newContent) +{ + /* + * Make sure only one thread gets here since updating the cell (ie fast + * path and queueing the request (ie the queued path) have to be done + * in an atomic fashion. + */ + dvmLockMutex(&gDvmJit.compilerICPatchLock); + + /* Fast path for uninitialized chaining cell */ + if (cellAddr->clazz == NULL && + cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { + + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->method = newContent->method; + cellAddr->branch = newContent->branch; + + /* + * The update order matters - make sure clazz is updated last since it + * will bring the uninitialized chaining cell to life. + */ + android_atomic_release_store((int32_t)newContent->clazz, + (volatile int32_t *)(void*) &cellAddr->clazz); + dvmCompilerCacheFlush((long) cellAddr, (long) (cellAddr+1), 0); + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchInit++; +#endif + /* Check if this is a frequently missed clazz */ + } else if (cellAddr->stagedClazz != newContent->clazz) { + /* Not proven to be frequent yet - build up the filter cache */ + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->stagedClazz = newContent->clazz; + + UPDATE_CODE_CACHE_PATCHES(); + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchRejected++; +#endif + /* + * Different classes but same method implementation - it is safe to just + * patch the class value without the need to stop the world. + */ + } else if (cellAddr->method == newContent->method) { + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->clazz = newContent->clazz; + /* No need to flush the cache here since the branch is not patched */ + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchLockFree++; +#endif + /* + * Cannot patch the chaining cell inline - queue it until the next safe + * point. + */ + } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { + int index = gDvmJit.compilerICPatchIndex++; + const ClassObject *clazz = newContent->clazz; + + gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; + gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; + gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; + gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; + /* For verification purpose only */ + gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchQueued++; +#endif + } else { + /* Queue is full - just drop this patch request */ +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchDropped++; +#endif + } + + dvmUnlockMutex(&gDvmJit.compilerICPatchLock); +} +#endif + +/* + * This method is called from the invoke templates for virtual and interface + * methods to speculatively setup a chain to the callee. The templates are + * written in assembly and have setup method, cell, and clazz at r0, r2, and + * r3 respectively, so there is a unused argument in the list. Upon return one + * of the following three results may happen: + * 1) Chain is not setup because the callee is native. Reset the rechain + * count to a big number so that it will take a long time before the next + * rechain attempt to happen. + * 2) Chain is not setup because the callee has not been created yet. Reset + * the rechain count to a small number and retry in the near future. + * 3) Ask all other threads to stop before patching this chaining cell. + * This is required because another thread may have passed the class check + * but hasn't reached the chaining cell yet to follow the chain. If we + * patch the content before halting the other thread, there could be a + * small window for race conditions to happen that it may follow the new + * but wrong chain to invoke a different method. + */ +const Method *dvmJitToPatchPredictedChain(const Method *method, + Thread *self, + PredictedChainingCell *cell, + const ClassObject *clazz) +{ + int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; +#if defined(WITH_SELF_VERIFICATION) + newRechainCount = PREDICTED_CHAIN_COUNTER_AVOID; + goto done; +#else + PredictedChainingCell newCell; + int baseAddr, tgtAddr; + if (dvmIsNativeMethod(method)) { + UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); + + /* + * Put a non-zero/bogus value in the clazz field so that it won't + * trigger immediate patching and will continue to fail to match with + * a real clazz pointer. + */ + cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; + + UPDATE_CODE_CACHE_PATCHES(); + PROTECT_CODE_CACHE(cell, sizeof(*cell)); + goto done; + } + + tgtAddr = (int) dvmJitGetTraceAddr(method->insns); + baseAddr = (int) cell + 4; // PC is cur_addr + 4 + + if ((baseAddr & 0xF0000000) != (tgtAddr & 0xF0000000)) { + COMPILER_TRACE_CHAINING( + LOGD("Jit Runtime: predicted chain %p to distant target %s ignored", + cell, method->name)); + goto done; + } + + /* + * Compilation not made yet for the callee. Reset the counter to a small + * value and come back to check soon. + */ + if ((tgtAddr == 0) || + ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { + COMPILER_TRACE_CHAINING( + LOGD("Jit Runtime: predicted chain %p to method %s%s delayed", + cell, method->clazz->descriptor, method->name)); + goto done; + } + + if (cell->clazz == NULL) { + newRechainCount = self->icRechainCount; + } + + newCell.branch = assembleChainingBranch(tgtAddr, true); + newCell.delay_slot = getSkeleton(kMipsNop); + newCell.clazz = clazz; + newCell.method = method; + newCell.stagedClazz = NULL; + + /* + * Enter the work order to the queue and the chaining cell will be patched + * the next time a safe point is entered. + * + * If the enqueuing fails reset the rechain count to a normal value so that + * it won't get indefinitely delayed. + */ + inlineCachePatchEnqueue(cell, &newCell); +#endif +done: + self->icRechainCount = newRechainCount; + return method; +} + +/* + * Patch the inline cache content based on the content passed from the work + * order. + */ +void dvmCompilerPatchInlineCache(void) +{ + int i; + PredictedChainingCell *minAddr, *maxAddr; + + /* Nothing to be done */ + if (gDvmJit.compilerICPatchIndex == 0) return; + + /* + * Since all threads are already stopped we don't really need to acquire + * the lock. But race condition can be easily introduced in the future w/o + * paying attention so we still acquire the lock here. + */ + dvmLockMutex(&gDvmJit.compilerICPatchLock); + + UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + //LOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); + + /* Initialize the min/max address range */ + minAddr = (PredictedChainingCell *) + ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); + maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; + + for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { + ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; + PredictedChainingCell *cellAddr = workOrder->cellAddr; + PredictedChainingCell *cellContent = &workOrder->cellContent; + ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, + workOrder->classLoader); + + assert(clazz->serialNumber == workOrder->serialNumber); + + /* Use the newly resolved clazz pointer */ + cellContent->clazz = clazz; + + COMPILER_TRACE_CHAINING( + LOGD("Jit Runtime: predicted chain %p from %s to %s (%s) " + "patched", + cellAddr, + cellAddr->clazz->descriptor, + cellContent->clazz->descriptor, + cellContent->method->name)); + + /* Patch the chaining cell */ + *cellAddr = *cellContent; + minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; + maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; + } + + /* Then synchronize the I/D cache */ + dvmCompilerCacheFlush((long) minAddr, (long) (maxAddr+1), 0); + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + gDvmJit.compilerICPatchIndex = 0; + dvmUnlockMutex(&gDvmJit.compilerICPatchLock); +} + +/* + * Unchain a trace given the starting address of the translation + * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. + * Returns the address following the last cell unchained. Note that + * the incoming codeAddr is a thumb code address, and therefore has + * the low bit set. + */ +static u4* unchainSingle(JitEntry *trace) +{ + const char *base = getTraceBase(trace); + ChainCellCounts *pChainCellCounts = getChainCellCountsPointer(base); + int cellSize = getChainCellSize(pChainCellCounts); + u4* pChainCells; + int i,j; + PredictedChainingCell *predChainCell; + + if (cellSize == 0) + return (u4 *) pChainCellCounts; + + /* Locate the beginning of the chain cell region */ + pChainCells = ((u4 *) pChainCellCounts) - cellSize - + pChainCellCounts->u.count[kChainingCellGap]; + + /* The cells are sorted in order - walk through them and reset */ + for (i = 0; i < kChainingCellGap; i++) { + int elemSize = CHAIN_CELL_NORMAL_SIZE >> 2; /* In 32-bit words */ + if (i == kChainingCellInvokePredicted) { + elemSize = CHAIN_CELL_PREDICTED_SIZE >> 2; + } + + for (j = 0; j < pChainCellCounts->u.count[i]; j++) { + int targetOffset; + switch(i) { + case kChainingCellNormal: + targetOffset = offsetof(Thread, + jitToInterpEntries.dvmJitToInterpNormal); + break; + case kChainingCellHot: + case kChainingCellInvokeSingleton: + targetOffset = offsetof(Thread, + jitToInterpEntries.dvmJitToInterpTraceSelect); + break; + case kChainingCellInvokePredicted: + targetOffset = 0; + predChainCell = (PredictedChainingCell *) pChainCells; + /* + * There could be a race on another mutator thread to use + * this particular predicted cell and the check has passed + * the clazz comparison. So we cannot safely wipe the + * method and branch but it is safe to clear the clazz, + * which serves as the key. + */ + predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; + break; +#if defined(WITH_SELF_VERIFICATION) + case kChainingCellBackwardBranch: + targetOffset = offsetof(Thread, + jitToInterpEntries.dvmJitToInterpBackwardBranch); + break; +#else + case kChainingCellBackwardBranch: + targetOffset = offsetof(Thread, + jitToInterpEntries.dvmJitToInterpNormal); + break; +#endif + default: + targetOffset = 0; // make gcc happy + LOGE("Unexpected chaining type: %d", i); + dvmAbort(); // dvmAbort OK here - can't safely recover + } + COMPILER_TRACE_CHAINING( + LOGD("Jit Runtime: unchaining %#x", (int)pChainCells)); + /* + * Code sequence for a chaining cell is: + * lw a0, offset(rSELF) + * jalr ra, a0 + */ + if (i != kChainingCellInvokePredicted) { + *pChainCells = getSkeleton(kMipsLw) | (r_A0 << 16) | + targetOffset | (rSELF << 21); + *(pChainCells+1) = getSkeleton(kMipsJalr) | (r_RA << 11) | + (r_A0 << 21); + } + pChainCells += elemSize; /* Advance by a fixed number of words */ + } + } + return pChainCells; +} + +/* Unchain all translation in the cache. */ +void dvmJitUnchainAll() +{ + u4* lowAddress = NULL; + u4* highAddress = NULL; + unsigned int i; + if (gDvmJit.pJitEntryTable != NULL) { + COMPILER_TRACE_CHAINING(LOGD("Jit Runtime: unchaining all")); + dvmLockMutex(&gDvmJit.tableLock); + + UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + for (i = 0; i < gDvmJit.jitTableSize; i++) { + if (gDvmJit.pJitEntryTable[i].dPC && + !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && + gDvmJit.pJitEntryTable[i].codeAddress && + (gDvmJit.pJitEntryTable[i].codeAddress != + dvmCompilerGetInterpretTemplate())) { + u4* lastAddress; + lastAddress = unchainSingle(&gDvmJit.pJitEntryTable[i]); + if (lowAddress == NULL || + (u4*)gDvmJit.pJitEntryTable[i].codeAddress < lowAddress) + lowAddress = (u4*)gDvmJit.pJitEntryTable[i].codeAddress; + if (lastAddress > highAddress) + highAddress = lastAddress; + } + } + + if (lowAddress && highAddress) + dvmCompilerCacheFlush((long)lowAddress, (long)highAddress, 0); + + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + dvmUnlockMutex(&gDvmJit.tableLock); + gDvmJit.translationChains = 0; + } + gDvmJit.hasNewChain = false; +} + +typedef struct jitProfileAddrToLine { + u4 lineNum; + u4 bytecodeOffset; +} jitProfileAddrToLine; + + +/* Callback function to track the bytecode offset/line number relationiship */ +static int addrToLineCb (void *cnxt, u4 bytecodeOffset, u4 lineNum) +{ + jitProfileAddrToLine *addrToLine = (jitProfileAddrToLine *) cnxt; + + /* Best match so far for this offset */ + if (addrToLine->bytecodeOffset >= bytecodeOffset) { + addrToLine->lineNum = lineNum; + } + return 0; +} + +/* Dumps profile info for a single trace */ +static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, + unsigned long sum) +{ + int idx; + + if (p->codeAddress == NULL) { + if (!silent) + LOGD("TRACEPROFILE NULL"); + return 0; + } + if (p->codeAddress == dvmCompilerGetInterpretTemplate()) { + if (!silent) + LOGD("TRACEPROFILE INTERPRET_ONLY"); + return 0; + } + + JitTraceCounter_t count = getProfileCount(p); + if (reset) { + resetProfileCount(p); + } + if (silent) { + return count; + } + JitTraceDescription *desc = getTraceDescriptionPointer(getTraceBase(p)); + const Method *method = desc->method; + char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype); + jitProfileAddrToLine addrToLine = {0, desc->trace[0].info.frag.startOffset}; + + /* + * We may end up decoding the debug information for the same method + * multiple times, but the tradeoff is we don't need to allocate extra + * space to store the addr/line mapping. Since this is a debugging feature + * and done infrequently so the slower but simpler mechanism should work + * just fine. + */ + dexDecodeDebugInfo(method->clazz->pDvmDex->pDexFile, + dvmGetMethodCode(method), + method->clazz->descriptor, + method->prototype.protoIdx, + method->accessFlags, + addrToLineCb, NULL, &addrToLine); + + LOGD("TRACEPROFILE 0x%08x % 10d %5.2f%% [%#x(+%d), %d] %s%s;%s", + (int) getTraceBase(p), + count, + ((float ) count) / sum * 100.0, + desc->trace[0].info.frag.startOffset, + desc->trace[0].info.frag.numInsts, + addrToLine.lineNum, + method->clazz->descriptor, method->name, methodDesc); + free(methodDesc); + + /* Find the last fragment (ie runEnd is set) */ + for (idx = 0; + desc->trace[idx].isCode && !desc->trace[idx].info.frag.runEnd; + idx++) { + } + + /* + * runEnd must comes with a JitCodeDesc frag. If isCode is false it must + * be a meta info field (only used by callsite info for now). + */ + if (!desc->trace[idx].isCode) { + const Method *method = (const Method *) + desc->trace[idx+JIT_TRACE_CUR_METHOD-1].info.meta; + char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype); + /* Print the callee info in the trace */ + LOGD(" -> %s%s;%s", method->clazz->descriptor, method->name, + methodDesc); + } + + return count; +} + +/* Create a copy of the trace descriptor of an existing compilation */ +JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, + const JitEntry *knownEntry) +{ + const JitEntry *jitEntry = knownEntry ? knownEntry + : dvmJitFindEntry(pc, false); + if ((jitEntry == NULL) || (jitEntry->codeAddress == 0)) + return NULL; + + JitTraceDescription *desc = + getTraceDescriptionPointer(getTraceBase(jitEntry)); + + /* Now make a copy and return */ + int descSize = getTraceDescriptionSize(desc); + JitTraceDescription *newCopy = (JitTraceDescription *) malloc(descSize); + memcpy(newCopy, desc, descSize); + return newCopy; +} + +/* qsort callback function */ +static int sortTraceProfileCount(const void *entry1, const void *entry2) +{ + const JitEntry *jitEntry1 = (const JitEntry *)entry1; + const JitEntry *jitEntry2 = (const JitEntry *)entry2; + + JitTraceCounter_t count1 = getProfileCount(jitEntry1); + JitTraceCounter_t count2 = getProfileCount(jitEntry2); + return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); +} + +/* Sort the trace profile counts and dump them */ +void dvmCompilerSortAndPrintTraceProfiles() +{ + JitEntry *sortedEntries; + int numTraces = 0; + unsigned long sum = 0; + unsigned int i; + + /* Make sure that the table is not changing */ + dvmLockMutex(&gDvmJit.tableLock); + + /* Sort the entries by descending order */ + sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); + if (sortedEntries == NULL) + goto done; + memcpy(sortedEntries, gDvmJit.pJitEntryTable, + sizeof(JitEntry) * gDvmJit.jitTableSize); + qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), + sortTraceProfileCount); + + /* Analyze the sorted entries */ + for (i=0; i < gDvmJit.jitTableSize; i++) { + if (sortedEntries[i].dPC != 0) { + sum += dumpTraceProfile(&sortedEntries[i], + true /* silent */, + false /* reset */, + 0); + numTraces++; + } + } + if (numTraces == 0) + numTraces = 1; + if (sum == 0) { + sum = 1; + } + + LOGD("JIT: Average execution count -> %d",(int)(sum / numTraces)); + + /* Dump the sorted entries. The count of each trace will be reset to 0. */ + for (i=0; i < gDvmJit.jitTableSize; i++) { + if (sortedEntries[i].dPC != 0) { + dumpTraceProfile(&sortedEntries[i], + false /* silent */, + true /* reset */, + sum); + } + } + + for (i=0; i < gDvmJit.jitTableSize && i < 10; i++) { + /* Stip interpreter stubs */ + if (sortedEntries[i].codeAddress == dvmCompilerGetInterpretTemplate()) { + continue; + } + JitTraceDescription* desc = + dvmCopyTraceDescriptor(NULL, &sortedEntries[i]); + if (desc) { + dvmCompilerWorkEnqueue(sortedEntries[i].dPC, + kWorkOrderTraceDebug, desc); + } + } + + free(sortedEntries); +done: + dvmUnlockMutex(&gDvmJit.tableLock); + return; +} + +static void findClassPointersSingleTrace(char *base, void (*callback)(void *)) +{ + unsigned int chainTypeIdx, chainIdx; + ChainCellCounts *pChainCellCounts = getChainCellCountsPointer(base); + int cellSize = getChainCellSize(pChainCellCounts); + /* Scan the chaining cells */ + if (cellSize) { + /* Locate the beginning of the chain cell region */ + u4 *pChainCells = ((u4 *) pChainCellCounts) - cellSize - + pChainCellCounts->u.count[kChainingCellGap]; + /* The cells are sorted in order - walk through them */ + for (chainTypeIdx = 0; chainTypeIdx < kChainingCellGap; + chainTypeIdx++) { + if (chainTypeIdx != kChainingCellInvokePredicted) { + /* In 32-bit words */ + pChainCells += (CHAIN_CELL_NORMAL_SIZE >> 2) * + pChainCellCounts->u.count[chainTypeIdx]; + continue; + } + for (chainIdx = 0; + chainIdx < pChainCellCounts->u.count[chainTypeIdx]; + chainIdx++) { + PredictedChainingCell *cell = + (PredictedChainingCell *) pChainCells; + /* + * Report the cell if it contains a sane class + * pointer. + */ + if (cell->clazz != NULL && + cell->clazz != + (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ) { + callback(&cell->clazz); + } + pChainCells += CHAIN_CELL_PREDICTED_SIZE >> 2; + } + } + } + + /* Scan the class pointer pool */ + JitTraceDescription *desc = getTraceDescriptionPointer(base); + int descSize = getTraceDescriptionSize(desc); + int *classPointerP = (int *) ((char *) desc + descSize); + int numClassPointers = *classPointerP++; + for (; numClassPointers; numClassPointers--, classPointerP++) { + callback(classPointerP); + } +} + +/* + * Scan class pointers in each translation and pass its address to the callback + * function. Currently such a pointers can be found in the pointer pool and the + * clazz field in the predicted chaining cells. + */ +void dvmJitScanAllClassPointers(void (*callback)(void *)) +{ + UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + /* Handle the inflight compilation first */ + if (gDvmJit.inflightBaseAddr) + findClassPointersSingleTrace((char *) gDvmJit.inflightBaseAddr, + callback); + + if (gDvmJit.pJitEntryTable != NULL) { + unsigned int traceIdx; + dvmLockMutex(&gDvmJit.tableLock); + for (traceIdx = 0; traceIdx < gDvmJit.jitTableSize; traceIdx++) { + const JitEntry *entry = &gDvmJit.pJitEntryTable[traceIdx]; + if (entry->dPC && + !entry->u.info.isMethodEntry && + entry->codeAddress && + (entry->codeAddress != dvmCompilerGetInterpretTemplate())) { + char *base = getTraceBase(entry); + findClassPointersSingleTrace(base, callback); + } + } + dvmUnlockMutex(&gDvmJit.tableLock); + } + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); +} + +/* + * Provide the final touch on the class object pointer pool to install the + * actual pointers. The thread has to be in the running state. + */ +void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) +{ + char *base = codeAddress - cUnit->headerSize; + + /* Scan the class pointer pool */ + JitTraceDescription *desc = getTraceDescriptionPointer(base); + int descSize = getTraceDescriptionSize(desc); + intptr_t *classPointerP = (int *) ((char *) desc + descSize); + int numClassPointers = *(int *)classPointerP++; + intptr_t *startClassPointerP = classPointerP; + + /* + * Change the thread state to VM_RUNNING so that GC won't be happening + * when the assembler looks up the class pointers. May suspend the current + * thread if there is a pending request before the state is actually + * changed to RUNNING. + */ + dvmChangeStatus(gDvmJit.compilerThread, THREAD_RUNNING); + + /* + * Unprotecting the code cache will need to acquire the code cache + * protection lock first. Doing so after the state change may increase the + * time spent in the RUNNING state (which may delay the next GC request + * should there be contention on codeCacheProtectionLock). In practice + * this is probably not going to happen often since a GC is just served. + * More importantly, acquiring the lock before the state change will + * cause deadlock (b/4192964). + */ + UNPROTECT_CODE_CACHE(startClassPointerP, + numClassPointers * sizeof(intptr_t)); +#if defined(WITH_JIT_TUNING) + u8 startTime = dvmGetRelativeTimeUsec(); +#endif + for (;numClassPointers; numClassPointers--) { + CallsiteInfo *callsiteInfo = (CallsiteInfo *) *classPointerP; + ClassObject *clazz = dvmFindClassNoInit( + callsiteInfo->classDescriptor, callsiteInfo->classLoader); + assert(!strcmp(clazz->descriptor, callsiteInfo->classDescriptor)); + *classPointerP++ = (intptr_t) clazz; + } + + /* + * Register the base address so that if GC kicks in after the thread state + * has been changed to VMWAIT and before the compiled code is registered + * in the JIT table, its content can be patched if class objects are + * moved. + */ + gDvmJit.inflightBaseAddr = base; + +#if defined(WITH_JIT_TUNING) + u8 blockTime = dvmGetRelativeTimeUsec() - startTime; + gDvmJit.compilerThreadBlockGCTime += blockTime; + if (blockTime > gDvmJit.maxCompilerThreadBlockGCTime) + gDvmJit.maxCompilerThreadBlockGCTime = blockTime; + gDvmJit.numCompilerThreadBlockGC++; +#endif + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(startClassPointerP, numClassPointers * sizeof(intptr_t)); + + /* Change the thread state back to VMWAIT */ + dvmChangeStatus(gDvmJit.compilerThread, THREAD_VMWAIT); +} + +#if defined(WITH_SELF_VERIFICATION) +/* + * The following are used to keep compiled loads and stores from modifying + * memory during self verification mode. + * + * Stores do not modify memory. Instead, the address and value pair are stored + * into heapSpace. Addresses within heapSpace are unique. For accesses smaller + * than a word, the word containing the address is loaded first before being + * updated. + * + * Loads check heapSpace first and return data from there if an entry exists. + * Otherwise, data is loaded from memory as usual. + */ + +/* Used to specify sizes of memory operations */ +enum { + kSVByte, + kSVSignedByte, + kSVHalfword, + kSVSignedHalfword, + kSVWord, + kSVDoubleword, + kSVVariable, +}; + +/* Load the value of a decoded register from the stack */ +static int selfVerificationMemRegLoad(int* sp, int reg) +{ +assert(0); /* MIPSTODO retarg func */ + return *(sp + reg); +} + +/* Load the value of a decoded doubleword register from the stack */ +static s8 selfVerificationMemRegLoadDouble(int* sp, int reg) +{ +assert(0); /* MIPSTODO retarg func */ + return *((s8*)(sp + reg)); +} + +/* Store the value of a decoded register out to the stack */ +static void selfVerificationMemRegStore(int* sp, int data, int reg) +{ +assert(0); /* MIPSTODO retarg func */ + *(sp + reg) = data; +} + +/* Store the value of a decoded doubleword register out to the stack */ +static void selfVerificationMemRegStoreDouble(int* sp, s8 data, int reg) +{ +assert(0); /* MIPSTODO retarg func */ + *((s8*)(sp + reg)) = data; +} + +/* + * Load the specified size of data from the specified address, checking + * heapSpace first if Self Verification mode wrote to it previously, and + * falling back to actual memory otherwise. + */ +static int selfVerificationLoad(int addr, int size) +{ +assert(0); /* MIPSTODO retarg func */ + Thread *self = dvmThreadSelf(); + ShadowSpace *shadowSpace = self->shadowSpace; + ShadowHeap *heapSpacePtr; + + int data; + int maskedAddr = addr & 0xFFFFFFFC; + int alignment = addr & 0x3; + + for (heapSpacePtr = shadowSpace->heapSpace; + heapSpacePtr != shadowSpace->heapSpaceTail; heapSpacePtr++) { + if (heapSpacePtr->addr == maskedAddr) { + addr = ((unsigned int) &(heapSpacePtr->data)) | alignment; + break; + } + } + + switch (size) { + case kSVByte: + data = *((u1*) addr); + break; + case kSVSignedByte: + data = *((s1*) addr); + break; + case kSVHalfword: + data = *((u2*) addr); + break; + case kSVSignedHalfword: + data = *((s2*) addr); + break; + case kSVWord: + data = *((u4*) addr); + break; + default: + LOGE("*** ERROR: BAD SIZE IN selfVerificationLoad: %d", size); + data = 0; + dvmAbort(); + } + + //LOGD("*** HEAP LOAD: Addr: %#x Data: %#x Size: %d", addr, data, size); + return data; +} + +/* Like selfVerificationLoad, but specifically for doublewords */ +static s8 selfVerificationLoadDoubleword(int addr) +{ +assert(0); /* MIPSTODO retarg func */ + Thread *self = dvmThreadSelf(); + ShadowSpace* shadowSpace = self->shadowSpace; + ShadowHeap* heapSpacePtr; + + int addr2 = addr+4; + unsigned int data = *((unsigned int*) addr); + unsigned int data2 = *((unsigned int*) addr2); + + for (heapSpacePtr = shadowSpace->heapSpace; + heapSpacePtr != shadowSpace->heapSpaceTail; heapSpacePtr++) { + if (heapSpacePtr->addr == addr) { + data = heapSpacePtr->data; + } else if (heapSpacePtr->addr == addr2) { + data2 = heapSpacePtr->data; + } + } + + //LOGD("*** HEAP LOAD DOUBLEWORD: Addr: %#x Data: %#x Data2: %#x", + // addr, data, data2); + return (((s8) data2) << 32) | data; +} + +/* + * Handles a store of a specified size of data to a specified address. + * This gets logged as an addr/data pair in heapSpace instead of modifying + * memory. Addresses in heapSpace are unique, and accesses smaller than a + * word pull the entire word from memory first before updating. + */ +static void selfVerificationStore(int addr, int data, int size) +{ +assert(0); /* MIPSTODO retarg func */ + Thread *self = dvmThreadSelf(); + ShadowSpace *shadowSpace = self->shadowSpace; + ShadowHeap *heapSpacePtr; + + int maskedAddr = addr & 0xFFFFFFFC; + int alignment = addr & 0x3; + + //LOGD("*** HEAP STORE: Addr: %#x Data: %#x Size: %d", addr, data, size); + + for (heapSpacePtr = shadowSpace->heapSpace; + heapSpacePtr != shadowSpace->heapSpaceTail; heapSpacePtr++) { + if (heapSpacePtr->addr == maskedAddr) break; + } + + if (heapSpacePtr == shadowSpace->heapSpaceTail) { + heapSpacePtr->addr = maskedAddr; + heapSpacePtr->data = *((unsigned int*) maskedAddr); + shadowSpace->heapSpaceTail++; + } + + addr = ((unsigned int) &(heapSpacePtr->data)) | alignment; + switch (size) { + case kSVByte: + *((u1*) addr) = data; + break; + case kSVSignedByte: + *((s1*) addr) = data; + break; + case kSVHalfword: + *((u2*) addr) = data; + break; + case kSVSignedHalfword: + *((s2*) addr) = data; + break; + case kSVWord: + *((u4*) addr) = data; + break; + default: + LOGE("*** ERROR: BAD SIZE IN selfVerificationSave: %d", size); + dvmAbort(); + } +} + +/* Like selfVerificationStore, but specifically for doublewords */ +static void selfVerificationStoreDoubleword(int addr, s8 double_data) +{ +assert(0); /* MIPSTODO retarg func */ + Thread *self = dvmThreadSelf(); + ShadowSpace *shadowSpace = self->shadowSpace; + ShadowHeap *heapSpacePtr; + + int addr2 = addr+4; + int data = double_data; + int data2 = double_data >> 32; + bool store1 = false, store2 = false; + + //LOGD("*** HEAP STORE DOUBLEWORD: Addr: %#x Data: %#x, Data2: %#x", + // addr, data, data2); + + for (heapSpacePtr = shadowSpace->heapSpace; + heapSpacePtr != shadowSpace->heapSpaceTail; heapSpacePtr++) { + if (heapSpacePtr->addr == addr) { + heapSpacePtr->data = data; + store1 = true; + } else if (heapSpacePtr->addr == addr2) { + heapSpacePtr->data = data2; + store2 = true; + } + } + + if (!store1) { + shadowSpace->heapSpaceTail->addr = addr; + shadowSpace->heapSpaceTail->data = data; + shadowSpace->heapSpaceTail++; + } + if (!store2) { + shadowSpace->heapSpaceTail->addr = addr2; + shadowSpace->heapSpaceTail->data = data2; + shadowSpace->heapSpaceTail++; + } +} + +/* + * Decodes the memory instruction at the address specified in the link + * register. All registers (r0-r12,lr) and fp registers (d0-d15) are stored + * consecutively on the stack beginning at the specified stack pointer. + * Calls the proper Self Verification handler for the memory instruction and + * updates the link register to point past the decoded memory instruction. + */ +void dvmSelfVerificationMemOpDecode(int lr, int* sp) +{ +assert(0); /* MIPSTODO retarg func */ + enum { + kMemOpLdrPcRel = 0x09, // ldr(3) [01001] rd[10..8] imm_8[7..0] + kMemOpRRR = 0x0A, // Full opcode is 7 bits + kMemOp2Single = 0x0A, // Used for Vstrs and Vldrs + kMemOpRRR2 = 0x0B, // Full opcode is 7 bits + kMemOp2Double = 0x0B, // Used for Vstrd and Vldrd + kMemOpStrRRI5 = 0x0C, // str(1) [01100] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpLdrRRI5 = 0x0D, // ldr(1) [01101] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpStrbRRI5 = 0x0E, // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpLdrbRRI5 = 0x0F, // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpStrhRRI5 = 0x10, // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpLdrhRRI5 = 0x11, // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0] + kMemOpLdrSpRel = 0x13, // ldr(4) [10011] rd[10..8] imm_8[7..0] + kMemOpStmia = 0x18, // stmia [11000] rn[10..8] reglist [7..0] + kMemOpLdmia = 0x19, // ldmia [11001] rn[10..8] reglist [7..0] + kMemOpStrRRR = 0x28, // str(2) [0101000] rm[8..6] rn[5..3] rd[2..0] + kMemOpStrhRRR = 0x29, // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0] + kMemOpStrbRRR = 0x2A, // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0] + kMemOpLdrsbRRR = 0x2B, // ldrsb [0101011] rm[8..6] rn[5..3] rd[2..0] + kMemOpLdrRRR = 0x2C, // ldr(2) [0101100] rm[8..6] rn[5..3] rd[2..0] + kMemOpLdrhRRR = 0x2D, // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0] + kMemOpLdrbRRR = 0x2E, // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0] + kMemOpLdrshRRR = 0x2F, // ldrsh [0101111] rm[8..6] rn[5..3] rd[2..0] + kMemOp2Stmia = 0xE88, // stmia [111010001000[ rn[19..16] mask[15..0] + kMemOp2Ldmia = 0xE89, // ldmia [111010001001[ rn[19..16] mask[15..0] + kMemOp2Stmia2 = 0xE8A, // stmia [111010001010[ rn[19..16] mask[15..0] + kMemOp2Ldmia2 = 0xE8B, // ldmia [111010001011[ rn[19..16] mask[15..0] + kMemOp2Vstr = 0xED8, // Used for Vstrs and Vstrd + kMemOp2Vldr = 0xED9, // Used for Vldrs and Vldrd + kMemOp2Vstr2 = 0xEDC, // Used for Vstrs and Vstrd + kMemOp2Vldr2 = 0xEDD, // Used for Vstrs and Vstrd + kMemOp2StrbRRR = 0xF80, /* str rt,[rn,rm,LSL #imm] [111110000000] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2LdrbRRR = 0xF81, /* ldrb rt,[rn,rm,LSL #imm] [111110000001] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2StrhRRR = 0xF82, /* str rt,[rn,rm,LSL #imm] [111110000010] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2LdrhRRR = 0xF83, /* ldrh rt,[rn,rm,LSL #imm] [111110000011] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2StrRRR = 0xF84, /* str rt,[rn,rm,LSL #imm] [111110000100] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2LdrRRR = 0xF85, /* ldr rt,[rn,rm,LSL #imm] [111110000101] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2StrbRRI12 = 0xF88, /* strb rt,[rn,#imm12] [111110001000] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2LdrbRRI12 = 0xF89, /* ldrb rt,[rn,#imm12] [111110001001] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2StrhRRI12 = 0xF8A, /* strh rt,[rn,#imm12] [111110001010] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2LdrhRRI12 = 0xF8B, /* ldrh rt,[rn,#imm12] [111110001011] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2StrRRI12 = 0xF8C, /* str(Imm,T3) rd,[rn,#imm12] [111110001100] + rn[19..16] rt[15..12] imm12[11..0] */ + kMemOp2LdrRRI12 = 0xF8D, /* ldr(Imm,T3) rd,[rn,#imm12] [111110001101] + rn[19..16] rt[15..12] imm12[11..0] */ + kMemOp2LdrsbRRR = 0xF91, /* ldrsb rt,[rn,rm,LSL #imm] [111110010001] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2LdrshRRR = 0xF93, /* ldrsh rt,[rn,rm,LSL #imm] [111110010011] + rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0] */ + kMemOp2LdrsbRRI12 = 0xF99, /* ldrsb rt,[rn,#imm12] [111110011001] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2LdrshRRI12 = 0xF9B, /* ldrsh rt,[rn,#imm12] [111110011011] + rt[15..12] rn[19..16] imm12[11..0] */ + kMemOp2 = 0xE000, // top 3 bits set indicates Thumb2 + }; + + int addr, offset, data; + long long double_data; + int size = kSVWord; + bool store = false; + unsigned int *lr_masked = (unsigned int *) (lr & 0xFFFFFFFE); + unsigned int insn = *lr_masked; + + int old_lr; + old_lr = selfVerificationMemRegLoad(sp, 13); + + if ((insn & kMemOp2) == kMemOp2) { + insn = (insn << 16) | (insn >> 16); + //LOGD("*** THUMB2 - Addr: %#x Insn: %#x", lr, insn); + + int opcode12 = (insn >> 20) & 0xFFF; + int opcode6 = (insn >> 6) & 0x3F; + int opcode4 = (insn >> 8) & 0xF; + int imm2 = (insn >> 4) & 0x3; + int imm8 = insn & 0xFF; + int imm12 = insn & 0xFFF; + int rd = (insn >> 12) & 0xF; + int rm = insn & 0xF; + int rn = (insn >> 16) & 0xF; + int rt = (insn >> 12) & 0xF; + bool wBack = true; + + // Update the link register + selfVerificationMemRegStore(sp, old_lr+4, 13); + + // Determine whether the mem op is a store or load + switch (opcode12) { + case kMemOp2Stmia: + case kMemOp2Stmia2: + case kMemOp2Vstr: + case kMemOp2Vstr2: + case kMemOp2StrbRRR: + case kMemOp2StrhRRR: + case kMemOp2StrRRR: + case kMemOp2StrbRRI12: + case kMemOp2StrhRRI12: + case kMemOp2StrRRI12: + store = true; + } + + // Determine the size of the mem access + switch (opcode12) { + case kMemOp2StrbRRR: + case kMemOp2LdrbRRR: + case kMemOp2StrbRRI12: + case kMemOp2LdrbRRI12: + size = kSVByte; + break; + case kMemOp2LdrsbRRR: + case kMemOp2LdrsbRRI12: + size = kSVSignedByte; + break; + case kMemOp2StrhRRR: + case kMemOp2LdrhRRR: + case kMemOp2StrhRRI12: + case kMemOp2LdrhRRI12: + size = kSVHalfword; + break; + case kMemOp2LdrshRRR: + case kMemOp2LdrshRRI12: + size = kSVSignedHalfword; + break; + case kMemOp2Vstr: + case kMemOp2Vstr2: + case kMemOp2Vldr: + case kMemOp2Vldr2: + if (opcode4 == kMemOp2Double) size = kSVDoubleword; + break; + case kMemOp2Stmia: + case kMemOp2Ldmia: + case kMemOp2Stmia2: + case kMemOp2Ldmia2: + size = kSVVariable; + break; + } + + // Load the value of the address + addr = selfVerificationMemRegLoad(sp, rn); + + // Figure out the offset + switch (opcode12) { + case kMemOp2Vstr: + case kMemOp2Vstr2: + case kMemOp2Vldr: + case kMemOp2Vldr2: + offset = imm8 << 2; + if (opcode4 == kMemOp2Single) { + rt = rd << 1; + if (insn & 0x400000) rt |= 0x1; + } else if (opcode4 == kMemOp2Double) { + if (insn & 0x400000) rt |= 0x10; + rt = rt << 1; + } else { + LOGE("*** ERROR: UNRECOGNIZED VECTOR MEM OP: %x", opcode4); + dvmAbort(); + } + rt += 14; + break; + case kMemOp2StrbRRR: + case kMemOp2LdrbRRR: + case kMemOp2StrhRRR: + case kMemOp2LdrhRRR: + case kMemOp2StrRRR: + case kMemOp2LdrRRR: + case kMemOp2LdrsbRRR: + case kMemOp2LdrshRRR: + offset = selfVerificationMemRegLoad(sp, rm) << imm2; + break; + case kMemOp2StrbRRI12: + case kMemOp2LdrbRRI12: + case kMemOp2StrhRRI12: + case kMemOp2LdrhRRI12: + case kMemOp2StrRRI12: + case kMemOp2LdrRRI12: + case kMemOp2LdrsbRRI12: + case kMemOp2LdrshRRI12: + offset = imm12; + break; + case kMemOp2Stmia: + case kMemOp2Ldmia: + wBack = false; + case kMemOp2Stmia2: + case kMemOp2Ldmia2: + offset = 0; + break; + default: + LOGE("*** ERROR: UNRECOGNIZED THUMB2 MEM OP: %x", opcode12); + offset = 0; + dvmAbort(); + } + + // Handle the decoded mem op accordingly + if (store) { + if (size == kSVVariable) { + LOGD("*** THUMB2 STMIA CURRENTLY UNUSED (AND UNTESTED)"); + int i; + int regList = insn & 0xFFFF; + for (i = 0; i < 16; i++) { + if (regList & 0x1) { + data = selfVerificationMemRegLoad(sp, i); + selfVerificationStore(addr, data, kSVWord); + addr += 4; + } + regList = regList >> 1; + } + if (wBack) selfVerificationMemRegStore(sp, addr, rn); + } else if (size == kSVDoubleword) { + double_data = selfVerificationMemRegLoadDouble(sp, rt); + selfVerificationStoreDoubleword(addr+offset, double_data); + } else { + data = selfVerificationMemRegLoad(sp, rt); + selfVerificationStore(addr+offset, data, size); + } + } else { + if (size == kSVVariable) { + LOGD("*** THUMB2 LDMIA CURRENTLY UNUSED (AND UNTESTED)"); + int i; + int regList = insn & 0xFFFF; + for (i = 0; i < 16; i++) { + if (regList & 0x1) { + data = selfVerificationLoad(addr, kSVWord); + selfVerificationMemRegStore(sp, data, i); + addr += 4; + } + regList = regList >> 1; + } + if (wBack) selfVerificationMemRegStore(sp, addr, rn); + } else if (size == kSVDoubleword) { + double_data = selfVerificationLoadDoubleword(addr+offset); + selfVerificationMemRegStoreDouble(sp, double_data, rt); + } else { + data = selfVerificationLoad(addr+offset, size); + selfVerificationMemRegStore(sp, data, rt); + } + } + } else { + //LOGD("*** THUMB - Addr: %#x Insn: %#x", lr, insn); + + // Update the link register + selfVerificationMemRegStore(sp, old_lr+2, 13); + + int opcode5 = (insn >> 11) & 0x1F; + int opcode7 = (insn >> 9) & 0x7F; + int imm = (insn >> 6) & 0x1F; + int rd = (insn >> 8) & 0x7; + int rm = (insn >> 6) & 0x7; + int rn = (insn >> 3) & 0x7; + int rt = insn & 0x7; + + // Determine whether the mem op is a store or load + switch (opcode5) { + case kMemOpRRR: + switch (opcode7) { + case kMemOpStrRRR: + case kMemOpStrhRRR: + case kMemOpStrbRRR: + store = true; + } + break; + case kMemOpStrRRI5: + case kMemOpStrbRRI5: + case kMemOpStrhRRI5: + case kMemOpStmia: + store = true; + } + + // Determine the size of the mem access + switch (opcode5) { + case kMemOpRRR: + case kMemOpRRR2: + switch (opcode7) { + case kMemOpStrbRRR: + case kMemOpLdrbRRR: + size = kSVByte; + break; + case kMemOpLdrsbRRR: + size = kSVSignedByte; + break; + case kMemOpStrhRRR: + case kMemOpLdrhRRR: + size = kSVHalfword; + break; + case kMemOpLdrshRRR: + size = kSVSignedHalfword; + break; + } + break; + case kMemOpStrbRRI5: + case kMemOpLdrbRRI5: + size = kSVByte; + break; + case kMemOpStrhRRI5: + case kMemOpLdrhRRI5: + size = kSVHalfword; + break; + case kMemOpStmia: + case kMemOpLdmia: + size = kSVVariable; + break; + } + + // Load the value of the address + if (opcode5 == kMemOpLdrPcRel) + addr = selfVerificationMemRegLoad(sp, 4); + else if (opcode5 == kMemOpStmia || opcode5 == kMemOpLdmia) + addr = selfVerificationMemRegLoad(sp, rd); + else + addr = selfVerificationMemRegLoad(sp, rn); + + // Figure out the offset + switch (opcode5) { + case kMemOpLdrPcRel: + offset = (insn & 0xFF) << 2; + rt = rd; + break; + case kMemOpRRR: + case kMemOpRRR2: + offset = selfVerificationMemRegLoad(sp, rm); + break; + case kMemOpStrRRI5: + case kMemOpLdrRRI5: + offset = imm << 2; + break; + case kMemOpStrhRRI5: + case kMemOpLdrhRRI5: + offset = imm << 1; + break; + case kMemOpStrbRRI5: + case kMemOpLdrbRRI5: + offset = imm; + break; + case kMemOpStmia: + case kMemOpLdmia: + offset = 0; + break; + default: + LOGE("*** ERROR: UNRECOGNIZED THUMB MEM OP: %x", opcode5); + offset = 0; + dvmAbort(); + } + + // Handle the decoded mem op accordingly + if (store) { + if (size == kSVVariable) { + int i; + int regList = insn & 0xFF; + for (i = 0; i < 8; i++) { + if (regList & 0x1) { + data = selfVerificationMemRegLoad(sp, i); + selfVerificationStore(addr, data, kSVWord); + addr += 4; + } + regList = regList >> 1; + } + selfVerificationMemRegStore(sp, addr, rd); + } else { + data = selfVerificationMemRegLoad(sp, rt); + selfVerificationStore(addr+offset, data, size); + } + } else { + if (size == kSVVariable) { + bool wBack = true; + int i; + int regList = insn & 0xFF; + for (i = 0; i < 8; i++) { + if (regList & 0x1) { + if (i == rd) wBack = false; + data = selfVerificationLoad(addr, kSVWord); + selfVerificationMemRegStore(sp, data, i); + addr += 4; + } + regList = regList >> 1; + } + if (wBack) selfVerificationMemRegStore(sp, addr, rd); + } else { + data = selfVerificationLoad(addr+offset, size); + selfVerificationMemRegStore(sp, data, rt); + } + } + } +} +#endif diff --git a/vm/compiler/codegen/mips/CalloutHelper.h b/vm/compiler/codegen/mips/CalloutHelper.h new file mode 100644 index 000000000..6e2343d98 --- /dev/null +++ b/vm/compiler/codegen/mips/CalloutHelper.h @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DALVIK_VM_COMPILER_CODEGEN_MIPS_CALLOUT_HELPER_H_ +#define DALVIK_VM_COMPILER_CODEGEN_MIPS_CALLOUT_HELPER_H_ + +#include "Dalvik.h" + +/* + * Declare/comment prototypes of all native callout functions invoked by the + * JIT'ed code here and use the LOAD_FUNC_ADDR macro to load the address into + * a register. In this way we have a centralized place to find out all native + * helper functions and we can grep for LOAD_FUNC_ADDR to find out all the + * callsites. + */ + +/* Load a statically compiled function address as a constant */ +#define LOAD_FUNC_ADDR(cUnit, reg, addr) loadConstant(cUnit, reg, addr) + +/* Conversions */ +extern "C" float __floatsisf(int op1); // OP_INT_TO_FLOAT +extern "C" int __fixsfsi(float op1); // OP_FLOAT_TO_INT +extern "C" float __truncdfsf2(double op1); // OP_DOUBLE_TO_FLOAT +extern "C" double __extendsfdf2(float op1); // OP_FLOAT_TO_DOUBLE +extern "C" double __floatsidf(int op1); // OP_INT_TO_DOUBLE +extern "C" int __fixdfsi(double op1); // OP_DOUBLE_TO_INT +extern "C" float __floatdisf(long long op1); // OP_LONG_TO_FLOAT +extern "C" double __floatdidf(long long op1); // OP_LONG_TO_DOUBLE +extern "C" long long __fixsfdi(float op1); // OP_FLOAT_TO_LONG +extern "C" long long __fixdfdi(double op1); // OP_DOUBLE_TO_LONG + +/* Single-precision FP arithmetics */ +extern "C" float __addsf3(float a, float b); // OP_ADD_FLOAT[_2ADDR] +extern "C" float __subsf3(float a, float b); // OP_SUB_FLOAT[_2ADDR] +extern "C" float __divsf3(float a, float b); // OP_DIV_FLOAT[_2ADDR] +extern "C" float __mulsf3(float a, float b); // OP_MUL_FLOAT[_2ADDR] +extern "C" float fmodf(float a, float b); // OP_REM_FLOAT[_2ADDR] + +/* Double-precision FP arithmetics */ +extern "C" double __adddf3(double a, double b); // OP_ADD_DOUBLE[_2ADDR] +extern "C" double __subdf3(double a, double b); // OP_SUB_DOUBLE[_2ADDR] +extern "C" double __divdf3(double a, double b); // OP_DIV_DOUBLE[_2ADDR] +extern "C" double __muldf3(double a, double b); // OP_MUL_DOUBLE[_2ADDR] +extern "C" double fmod(double a, double b); // OP_REM_DOUBLE[_2ADDR] + +/* Long long arithmetics - OP_REM_LONG[_2ADDR] & OP_DIV_LONG[_2ADDR] */ +extern "C" long long __divdi3(long long op1, long long op2); +extern "C" long long __moddi3(long long op1, long long op2); + +/* Originally declared in Sync.h */ +bool dvmUnlockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_EXIT + +/* Originally declared in oo/TypeCheck.h */ +bool dvmCanPutArrayElement(const ClassObject* elemClass, // OP_APUT_OBJECT + const ClassObject* arrayClass); +int dvmInstanceofNonTrivial(const ClassObject* instance, // OP_CHECK_CAST && + const ClassObject* clazz); // OP_INSTANCE_OF + +/* Originally declared in oo/Array.h */ +ArrayObject* dvmAllocArrayByClass(ClassObject* arrayClass, // OP_NEW_ARRAY + size_t length, int allocFlags); + +/* Originally declared in interp/InterpDefs.h */ +bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject,// OP_FILL_ARRAY_DATA + const u2* arrayData); + +/* Originally declared in compiler/codegen/mips/Assemble.c */ +const Method *dvmJitToPatchPredictedChain(const Method *method, + Thread *self, + PredictedChainingCell *cell, + const ClassObject *clazz); + +/* + * Switch dispatch offset calculation for OP_PACKED_SWITCH & OP_SPARSE_SWITCH + * Used in CodegenDriver.c + * static s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc); + * static s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc); + */ + +/* + * Resolve interface callsites - OP_INVOKE_INTERFACE & OP_INVOKE_INTERFACE_RANGE + * + * Originally declared in mterp/common/FindInterface.h and only comment it here + * due to the INLINE attribute. + * + * INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass, + * u4 methodIdx, const Method* method, DvmDex* methodClassDex) + */ + +/* Originally declared in alloc/Alloc.h */ +Object* dvmAllocObject(ClassObject* clazz, int flags); // OP_NEW_INSTANCE + +/* + * Functions declared in gDvmInlineOpsTable[] are used for + * OP_EXECUTE_INLINE & OP_EXECUTE_INLINE_RANGE. + */ +extern "C" double sqrt(double x); // INLINE_MATH_SQRT + +/* + * The following functions are invoked through the compiler templates (declared + * in compiler/template/armv5te/footer.S: + * + * __aeabi_cdcmple // CMPG_DOUBLE + * __aeabi_cfcmple // CMPG_FLOAT + * dvmLockObject // MONITOR_ENTER + */ + +#endif // DALVIK_VM_COMPILER_CODEGEN_MIPS_CALLOUT_HELPER_H_ diff --git a/vm/compiler/codegen/mips/Codegen.h b/vm/compiler/codegen/mips/Codegen.h new file mode 100644 index 000000000..107fa8654 --- /dev/null +++ b/vm/compiler/codegen/mips/Codegen.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains register alloction support and is intended to be + * included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +#include "compiler/CompilerIR.h" +#include "CalloutHelper.h" + +#if defined(_CODEGEN_C) +/* + * loadConstant() sometimes needs to add a small imm to a pre-existing constant + */ +static MipsLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, + int value); +static MipsLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1, + int rSrc2); + +/* Forward-declare the portable versions due to circular dependency */ +static bool genArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2); + +static bool genArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2); + +static bool genConversionPortable(CompilationUnit *cUnit, MIR *mir); + +static void genMonitorPortable(CompilationUnit *cUnit, MIR *mir); + +static void genInterpSingleStep(CompilationUnit *cUnit, MIR *mir); + + +#endif + +#if defined(WITH_SELF_VERIFICATION) +/* Self Verification memory instruction decoder */ +extern "C" void dvmSelfVerificationMemOpDecode(int lr, int* sp); +#endif + +/* + * Architecture-dependent register allocation routines implemented in + * Mips/Ralloc.c + */ +extern int dvmCompilerAllocTypedTempPair(CompilationUnit *cUnit, + bool fpHint, int regClass); + +extern int dvmCompilerAllocTypedTemp(CompilationUnit *cUnit, bool fpHint, + int regClass); + +extern MipsLIR* dvmCompilerRegCopyNoInsert(CompilationUnit *cUnit, int rDest, + int rSrc); + +extern MipsLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc); + +extern void dvmCompilerRegCopyWide(CompilationUnit *cUnit, int destLo, + int destHi, int srcLo, int srcHi); + +extern void dvmCompilerSetupResourceMasks(MipsLIR *lir); + +extern void dvmCompilerFlushRegImpl(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc, OpSize size); + +extern void dvmCompilerFlushRegWideImpl(CompilationUnit *cUnit, int rBase, + int displacement, int rSrcLo, + int rSrcHi); diff --git a/vm/compiler/codegen/mips/CodegenCommon.cpp b/vm/compiler/codegen/mips/CodegenCommon.cpp new file mode 100644 index 000000000..287e8c17b --- /dev/null +++ b/vm/compiler/codegen/mips/CodegenCommon.cpp @@ -0,0 +1,437 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen and support common to all supported + * Mips variants. It is included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + * which combines this common code with specific support found in the + * applicable directory below this one. + */ + +#include "compiler/Loop.h" + +/* Array holding the entry offset of each template relative to the first one */ +static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK]; + +/* Track exercised opcodes */ +static int opcodeCoverage[256]; + +static void setMemRefType(MipsLIR *lir, bool isLoad, int memType) +{ + /* MIPSTODO simplify setMemRefType() */ + u8 *maskPtr; + u8 mask = ENCODE_MEM;; + assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE)); + + if (isLoad) { + maskPtr = &lir->useMask; + } else { + maskPtr = &lir->defMask; + } + /* Clear out the memref flags */ + *maskPtr &= ~mask; + /* ..and then add back the one we need */ + switch(memType) { + case kLiteral: + assert(isLoad); + *maskPtr |= ENCODE_LITERAL; + break; + case kDalvikReg: + *maskPtr |= ENCODE_DALVIK_REG; + break; + case kHeapRef: + *maskPtr |= ENCODE_HEAP_REF; + break; + case kMustNotAlias: + /* Currently only loads can be marked as kMustNotAlias */ + assert(!(EncodingMap[lir->opcode].flags & IS_STORE)); + *maskPtr |= ENCODE_MUST_NOT_ALIAS; + break; + default: + LOGE("Jit: invalid memref kind - %d", memType); + assert(0); // Bail if debug build, set worst-case in the field + *maskPtr |= ENCODE_ALL; + } +} + +/* + * Mark load/store instructions that access Dalvik registers through rFP + + * offset. + */ +static void annotateDalvikRegAccess(MipsLIR *lir, int regId, bool isLoad) +{ + /* MIPSTODO simplify annotateDalvikRegAccess() */ + setMemRefType(lir, isLoad, kDalvikReg); + + /* + * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit + * access. + */ + lir->aliasInfo = regId; + if (DOUBLEREG(lir->operands[0])) { + lir->aliasInfo |= 0x80000000; + } +} + +/* + * Decode the register id + */ +static inline u8 getRegMaskCommon(int reg) +{ + u8 seed; + int shift; + int regId = reg & 0x1f; + + /* + * Each double register is equal to a pair of single-precision FP registers + */ + if (!DOUBLEREG(reg)) { + seed = 1; + } else { + assert((regId & 1) == 0); /* double registers must be even */ + seed = 3; + } + + if (FPREG(reg)) { + assert(regId < 16); /* only 16 fp regs */ + shift = kFPReg0; + } else if (EXTRAREG(reg)) { + assert(regId < 3); /* only 3 extra regs */ + shift = kFPRegEnd; + } else { + shift = 0; + } + + /* Expand the double register id into single offset */ + shift += regId; + return (seed << shift); +} + +/* External version of getRegMaskCommon */ +u8 dvmGetRegResourceMask(int reg) +{ + return getRegMaskCommon(reg); +} + +/* + * Mark the corresponding bit(s). + */ +static inline void setupRegMask(u8 *mask, int reg) +{ + *mask |= getRegMaskCommon(reg); +} + +/* + * Set up the proper fields in the resource mask + */ +static void setupResourceMasks(MipsLIR *lir) +{ + /* MIPSTODO simplify setupResourceMasks() */ + int opcode = lir->opcode; + int flags; + + if (opcode <= 0) { + lir->useMask = lir->defMask = 0; + return; + } + + flags = EncodingMap[lir->opcode].flags; + + /* Set up the mask for resources that are updated */ + if (flags & (IS_LOAD | IS_STORE)) { + /* Default to heap - will catch specialized classes later */ + setMemRefType(lir, flags & IS_LOAD, kHeapRef); + } + + /* + * Conservatively assume the branch here will call out a function that in + * turn will trash everything. + */ + if (flags & IS_BRANCH) { + lir->defMask = lir->useMask = ENCODE_ALL; + return; + } + + if (flags & REG_DEF0) { + setupRegMask(&lir->defMask, lir->operands[0]); + } + + if (flags & REG_DEF1) { + setupRegMask(&lir->defMask, lir->operands[1]); + } + + if (flags & REG_DEF_SP) { + lir->defMask |= ENCODE_REG_SP; + } + + if (flags & REG_DEF_LR) { + lir->defMask |= ENCODE_REG_LR; + } + + if (flags & REG_DEF_LIST0) { + lir->defMask |= ENCODE_REG_LIST(lir->operands[0]); + } + + if (flags & REG_DEF_LIST1) { + lir->defMask |= ENCODE_REG_LIST(lir->operands[1]); + } + + if (flags & SETS_CCODES) { + lir->defMask |= ENCODE_CCODE; + } + + /* Conservatively treat the IT block */ + if (flags & IS_IT) { + lir->defMask = ENCODE_ALL; + } + + if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { + int i; + + for (i = 0; i < 4; i++) { + if (flags & (1 << (kRegUse0 + i))) { + setupRegMask(&lir->useMask, lir->operands[i]); + } + } + } + + if (flags & REG_USE_PC) { + lir->useMask |= ENCODE_REG_PC; + } + + if (flags & REG_USE_SP) { + lir->useMask |= ENCODE_REG_SP; + } + + if (flags & REG_USE_LIST0) { + lir->useMask |= ENCODE_REG_LIST(lir->operands[0]); + } + + if (flags & REG_USE_LIST1) { + lir->useMask |= ENCODE_REG_LIST(lir->operands[1]); + } + + if (flags & USES_CCODES) { + lir->useMask |= ENCODE_CCODE; + } +} + +/* + * Set up the accurate resource mask for branch instructions + */ +static void relaxBranchMasks(MipsLIR *lir) +{ + int flags = EncodingMap[lir->opcode].flags; + + /* Make sure only branch instructions are passed here */ + assert(flags & IS_BRANCH); + + lir->defMask |= ENCODE_REG_PC; + lir->useMask |= ENCODE_REG_PC; + + + if (flags & REG_DEF_LR) { + lir->defMask |= ENCODE_REG_LR; + } + + if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { + int i; + + for (i = 0; i < 4; i++) { + if (flags & (1 << (kRegUse0 + i))) { + setupRegMask(&lir->useMask, lir->operands[i]); + } + } + } + + if (flags & USES_CCODES) { + lir->useMask |= ENCODE_CCODE; + } +} + +/* + * The following are building blocks to construct low-level IRs with 0 - 4 + * operands. + */ +static MipsLIR *newLIR0(CompilationUnit *cUnit, MipsOpCode opcode) +{ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + assert(isPseudoOpCode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND)); + insn->opcode = opcode; + setupResourceMasks(insn); + dvmCompilerAppendLIR(cUnit, (LIR *) insn); + return insn; +} + +static MipsLIR *newLIR1(CompilationUnit *cUnit, MipsOpCode opcode, + int dest) +{ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + assert(isPseudoOpCode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP)); + insn->opcode = opcode; + insn->operands[0] = dest; + setupResourceMasks(insn); + dvmCompilerAppendLIR(cUnit, (LIR *) insn); + return insn; +} + +static MipsLIR *newLIR2(CompilationUnit *cUnit, MipsOpCode opcode, + int dest, int src1) +{ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + assert(isPseudoOpCode(opcode) || + (EncodingMap[opcode].flags & IS_BINARY_OP)); + insn->opcode = opcode; + insn->operands[0] = dest; + insn->operands[1] = src1; + setupResourceMasks(insn); + dvmCompilerAppendLIR(cUnit, (LIR *) insn); + return insn; +} + +static MipsLIR *newLIR3(CompilationUnit *cUnit, MipsOpCode opcode, + int dest, int src1, int src2) +{ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + if (!(EncodingMap[opcode].flags & IS_TERTIARY_OP)) { + LOGE("Bad LIR3: %s[%d]",EncodingMap[opcode].name,opcode); + } + assert(isPseudoOpCode(opcode) || + (EncodingMap[opcode].flags & IS_TERTIARY_OP)); + insn->opcode = opcode; + insn->operands[0] = dest; + insn->operands[1] = src1; + insn->operands[2] = src2; + setupResourceMasks(insn); + dvmCompilerAppendLIR(cUnit, (LIR *) insn); + return insn; +} + +static MipsLIR *newLIR4(CompilationUnit *cUnit, MipsOpCode opcode, + int dest, int src1, int src2, int info) +{ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + assert(isPseudoOpCode(opcode) || + (EncodingMap[opcode].flags & IS_QUAD_OP)); + insn->opcode = opcode; + insn->operands[0] = dest; + insn->operands[1] = src1; + insn->operands[2] = src2; + insn->operands[3] = info; + setupResourceMasks(insn); + dvmCompilerAppendLIR(cUnit, (LIR *) insn); + return insn; +} + +/* + * If the next instruction is a move-result or move-result-long, + * return the target Dalvik sReg[s] and convert the next to a + * nop. Otherwise, return INVALID_SREG. Used to optimize method inlining. + */ +static RegLocation inlinedTarget(CompilationUnit *cUnit, MIR *mir, + bool fpHint) +{ + if (mir->next && + ((mir->next->dalvikInsn.opcode == OP_MOVE_RESULT) || + (mir->next->dalvikInsn.opcode == OP_MOVE_RESULT_OBJECT))) { + mir->next->dalvikInsn.opcode = OP_NOP; + return dvmCompilerGetDest(cUnit, mir->next, 0); + } else { + RegLocation res = LOC_DALVIK_RETURN_VAL; + res.fp = fpHint; + return res; + } +} + +/* + * The following are building blocks to insert constants into the pool or + * instruction streams. + */ + +/* Add a 32-bit constant either in the constant pool or mixed with code */ +static MipsLIR *addWordData(CompilationUnit *cUnit, LIR **constantListP, + int value) +{ + /* Add the constant to the literal pool */ + if (constantListP) { + MipsLIR *newValue = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + newValue->operands[0] = value; + newValue->generic.next = *constantListP; + *constantListP = (LIR *) newValue; + return newValue; + } else { + /* Add the constant in the middle of code stream */ + newLIR1(cUnit, kMips32BitData, value); + } + return NULL; +} + +static RegLocation inlinedTargetWide(CompilationUnit *cUnit, MIR *mir, + bool fpHint) +{ + if (mir->next && + (mir->next->dalvikInsn.opcode == OP_MOVE_RESULT_WIDE)) { + mir->next->dalvikInsn.opcode = OP_NOP; + return dvmCompilerGetDestWide(cUnit, mir->next, 0, 1); + } else { + RegLocation res = LOC_DALVIK_RETURN_VAL_WIDE; + res.fp = fpHint; + return res; + } +} + + +/* + * Generate an kMipsPseudoBarrier marker to indicate the boundary of special + * blocks. + */ +static void genBarrier(CompilationUnit *cUnit) +{ + MipsLIR *barrier = newLIR0(cUnit, kMipsPseudoBarrier); + /* Mark all resources as being clobbered */ + barrier->defMask = -1; +} + +/* Create the PC reconstruction slot if not already done */ +extern MipsLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset, + MipsLIR *branch, + MipsLIR *pcrLabel) +{ + /* Forget all def info (because we might rollback here. Bug #2367397 */ + dvmCompilerResetDefTracking(cUnit); + + /* Set up the place holder to reconstruct this Dalvik PC */ + if (pcrLabel == NULL) { + int dPC = (int) (cUnit->method->insns + dOffset); + pcrLabel = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pcrLabel->opcode = kMipsPseudoPCReconstructionCell; + pcrLabel->operands[0] = dPC; + pcrLabel->operands[1] = dOffset; + /* Insert the place holder to the growable list */ + dvmInsertGrowableList(&cUnit->pcReconstructionList, + (intptr_t) pcrLabel); + } + /* Branch to the PC reconstruction code */ + branch->generic.target = (LIR *) pcrLabel; + + /* Clear the conservative flags for branches that punt to the interpreter */ + relaxBranchMasks(branch); + + return pcrLabel; +} diff --git a/vm/compiler/codegen/mips/CodegenDriver.cpp b/vm/compiler/codegen/mips/CodegenDriver.cpp new file mode 100644 index 000000000..6ef2ce4d5 --- /dev/null +++ b/vm/compiler/codegen/mips/CodegenDriver.cpp @@ -0,0 +1,4938 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen and support common to all supported + * Mips variants. It is included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + * which combines this common code with specific support found in the + * applicable directory below this one. + */ + +/* + * Mark garbage collection card. Skip if the value we're storing is null. + */ +static void markCard(CompilationUnit *cUnit, int valReg, int tgtAddrReg) +{ + int regCardBase = dvmCompilerAllocTemp(cUnit); + int regCardNo = dvmCompilerAllocTemp(cUnit); + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBeq, valReg, r_ZERO); + loadWordDisp(cUnit, rSELF, offsetof(Thread, cardTable), + regCardBase); + opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT); + storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0, + kUnsignedByte); + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *)target; + dvmCompilerFreeTemp(cUnit, regCardBase); + dvmCompilerFreeTemp(cUnit, regCardNo); +} + +static bool genConversionCall(CompilationUnit *cUnit, MIR *mir, void *funct, + int srcSize, int tgtSize) +{ + /* + * Don't optimize the register usage since it calls out to template + * functions + */ + RegLocation rlSrc; + RegLocation rlDest; + int srcReg = 0; + int srcRegHi = 0; + dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ + + if (srcSize == kWord) { + srcReg = r_A0; + } else if (srcSize == kSingle) { +#ifdef __mips_hard_float + srcReg = r_F12; +#else + srcReg = r_A0; +#endif + } else if (srcSize == kLong) { + srcReg = r_ARG0; + srcRegHi = r_ARG1; + } else if (srcSize == kDouble) { +#ifdef __mips_hard_float + srcReg = r_FARG0; + srcRegHi = r_FARG1; +#else + srcReg = r_ARG0; + srcRegHi = r_ARG1; +#endif + } + else { + assert(0); + } + + if (srcSize == kWord || srcSize == kSingle) { + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + loadValueDirectFixed(cUnit, rlSrc, srcReg); + } else { + rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + loadValueDirectWideFixed(cUnit, rlSrc, srcReg, srcRegHi); + } + LOAD_FUNC_ADDR(cUnit, r_T9, (int)funct); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + if (tgtSize == kWord || tgtSize == kSingle) { + RegLocation rlResult; + rlDest = dvmCompilerGetDest(cUnit, mir, 0); +#ifdef __mips_hard_float + if (tgtSize == kSingle) + rlResult = dvmCompilerGetReturnAlt(cUnit); + else + rlResult = dvmCompilerGetReturn(cUnit); +#else + rlResult = dvmCompilerGetReturn(cUnit); +#endif + storeValue(cUnit, rlDest, rlResult); + } else { + RegLocation rlResult; + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); +#ifdef __mips_hard_float + if (tgtSize == kDouble) + rlResult = dvmCompilerGetReturnWideAlt(cUnit); + else + rlResult = dvmCompilerGetReturnWide(cUnit); +#else + rlResult = dvmCompilerGetReturnWide(cUnit); +#endif + storeValueWide(cUnit, rlDest, rlResult); + } + return false; +} + + +static bool genArithOpFloatPortable(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ + RegLocation rlResult; + void* funct; + + switch (mir->dalvikInsn.opcode) { + case OP_ADD_FLOAT_2ADDR: + case OP_ADD_FLOAT: + funct = (void*) __addsf3; + break; + case OP_SUB_FLOAT_2ADDR: + case OP_SUB_FLOAT: + funct = (void*) __subsf3; + break; + case OP_DIV_FLOAT_2ADDR: + case OP_DIV_FLOAT: + funct = (void*) __divsf3; + break; + case OP_MUL_FLOAT_2ADDR: + case OP_MUL_FLOAT: + funct = (void*) __mulsf3; + break; + case OP_REM_FLOAT_2ADDR: + case OP_REM_FLOAT: + funct = (void*) fmodf; + break; + case OP_NEG_FLOAT: { + genNegFloat(cUnit, rlDest, rlSrc1); + return false; + } + default: + return true; + } + + dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ +#ifdef __mips_hard_float + loadValueDirectFixed(cUnit, rlSrc1, r_F12); + loadValueDirectFixed(cUnit, rlSrc2, r_F14); +#else + loadValueDirectFixed(cUnit, rlSrc1, r_A0); + loadValueDirectFixed(cUnit, rlSrc2, r_A1); +#endif + LOAD_FUNC_ADDR(cUnit, r_T9, (int)funct); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); +#ifdef __mips_hard_float + rlResult = dvmCompilerGetReturnAlt(cUnit); +#else + rlResult = dvmCompilerGetReturn(cUnit); +#endif + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genArithOpDoublePortable(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ + RegLocation rlResult; + void* funct; + + switch (mir->dalvikInsn.opcode) { + case OP_ADD_DOUBLE_2ADDR: + case OP_ADD_DOUBLE: + funct = (void*) __adddf3; + break; + case OP_SUB_DOUBLE_2ADDR: + case OP_SUB_DOUBLE: + funct = (void*) __subdf3; + break; + case OP_DIV_DOUBLE_2ADDR: + case OP_DIV_DOUBLE: + funct = (void*) __divsf3; + break; + case OP_MUL_DOUBLE_2ADDR: + case OP_MUL_DOUBLE: + funct = (void*) __muldf3; + break; + case OP_REM_DOUBLE_2ADDR: + case OP_REM_DOUBLE: + funct = (void*) (double (*)(double, double)) fmod; + break; + case OP_NEG_DOUBLE: { + genNegDouble(cUnit, rlDest, rlSrc1); + return false; + } + default: + return true; + } + dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ + LOAD_FUNC_ADDR(cUnit, r_T9, (int)funct); +#ifdef __mips_hard_float + loadValueDirectWideFixed(cUnit, rlSrc1, r_F12, r_F13); + loadValueDirectWideFixed(cUnit, rlSrc2, r_F14, r_F15); +#else + loadValueDirectWideFixed(cUnit, rlSrc1, r_ARG0, r_ARG1); + loadValueDirectWideFixed(cUnit, rlSrc2, r_ARG2, r_ARG3); +#endif + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); +#ifdef __mips_hard_float + rlResult = dvmCompilerGetReturnWideAlt(cUnit); +#else + rlResult = dvmCompilerGetReturnWide(cUnit); +#endif + storeValueWide(cUnit, rlDest, rlResult); +#if defined(WITH_SELF_VERIFICATION) + cUnit->usesLinkRegister = true; +#endif + return false; +} + +static bool genConversionPortable(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + + switch (opcode) { + case OP_INT_TO_FLOAT: + return genConversionCall(cUnit, mir, (void*)__floatsisf, kWord, kSingle); + case OP_FLOAT_TO_INT: + return genConversionCall(cUnit, mir, (void*)__fixsfsi, kSingle, kWord); + case OP_DOUBLE_TO_FLOAT: + return genConversionCall(cUnit, mir, (void*)__truncdfsf2, kDouble, kSingle); + case OP_FLOAT_TO_DOUBLE: + return genConversionCall(cUnit, mir, (void*)__extendsfdf2, kSingle, kDouble); + case OP_INT_TO_DOUBLE: + return genConversionCall(cUnit, mir, (void*)__floatsidf, kWord, kDouble); + case OP_DOUBLE_TO_INT: + return genConversionCall(cUnit, mir, (void*)__fixdfsi, kDouble, kWord); + case OP_FLOAT_TO_LONG: + return genConversionCall(cUnit, mir, (void*)__fixsfdi, kSingle, kLong); + case OP_LONG_TO_FLOAT: + return genConversionCall(cUnit, mir, (void*)__floatdisf, kLong, kSingle); + case OP_DOUBLE_TO_LONG: + return genConversionCall(cUnit, mir, (void*)__fixdfdi, kDouble, kLong); + case OP_LONG_TO_DOUBLE: + return genConversionCall(cUnit, mir, (void*)__floatdidf, kLong, kDouble); + default: + return true; + } + return false; +} + +#if defined(WITH_SELF_VERIFICATION) +static void selfVerificationBranchInsert(LIR *currentLIR, Mipsopcode opcode, + int dest, int src1) +{ +assert(0); /* MIPSTODO port selfVerificationBranchInsert() */ + MipsLIR *insn = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + insn->opcode = opcode; + insn->operands[0] = dest; + insn->operands[1] = src1; + setupResourceMasks(insn); + dvmCompilerInsertLIRBefore(currentLIR, (LIR *) insn); +} + +/* + * Example where r14 (LR) is preserved around a heap access under + * self-verification mode in Thumb2: + * + * D/dalvikvm( 1538): 0x59414c5e (0026): ldr r14, [r15pc, #220] <-hoisted + * D/dalvikvm( 1538): 0x59414c62 (002a): mla r4, r0, r8, r4 + * D/dalvikvm( 1538): 0x59414c66 (002e): adds r3, r4, r3 + * D/dalvikvm( 1538): 0x59414c6a (0032): push <r5, r14> ---+ + * D/dalvikvm( 1538): 0x59414c6c (0034): blx_1 0x5940f494 | + * D/dalvikvm( 1538): 0x59414c6e (0036): blx_2 see above <-MEM_OP_DECODE + * D/dalvikvm( 1538): 0x59414c70 (0038): ldr r10, [r9, #0] | + * D/dalvikvm( 1538): 0x59414c74 (003c): pop <r5, r14> ---+ + * D/dalvikvm( 1538): 0x59414c78 (0040): mov r11, r10 + * D/dalvikvm( 1538): 0x59414c7a (0042): asr r12, r11, #31 + * D/dalvikvm( 1538): 0x59414c7e (0046): movs r0, r2 + * D/dalvikvm( 1538): 0x59414c80 (0048): movs r1, r3 + * D/dalvikvm( 1538): 0x59414c82 (004a): str r2, [r5, #16] + * D/dalvikvm( 1538): 0x59414c84 (004c): mov r2, r11 + * D/dalvikvm( 1538): 0x59414c86 (004e): str r3, [r5, #20] + * D/dalvikvm( 1538): 0x59414c88 (0050): mov r3, r12 + * D/dalvikvm( 1538): 0x59414c8a (0052): str r11, [r5, #24] + * D/dalvikvm( 1538): 0x59414c8e (0056): str r12, [r5, #28] + * D/dalvikvm( 1538): 0x59414c92 (005a): blx r14 <-use of LR + * + */ +static void selfVerificationBranchInsertPass(CompilationUnit *cUnit) +{ +assert(0); /* MIPSTODO port selfVerificationBranchInsertPass() */ + MipsLIR *thisLIR; + Templateopcode opcode = TEMPLATE_MEM_OP_DECODE; + + for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; + thisLIR != (MipsLIR *) cUnit->lastLIRInsn; + thisLIR = NEXT_LIR(thisLIR)) { + if (!thisLIR->flags.isNop && thisLIR->flags.insertWrapper) { + /* + * Push r5(FP) and r14(LR) onto stack. We need to make sure that + * SP is 8-byte aligned, and we use r5 as a temp to restore LR + * for Thumb-only target since LR cannot be directly accessed in + * Thumb mode. Another reason to choose r5 here is it is the Dalvik + * frame pointer and cannot be the target of the emulated heap + * load. + */ + if (cUnit->usesLinkRegister) { + genSelfVerificationPreBranch(cUnit, thisLIR); + } + + /* Branch to mem op decode template */ + selfVerificationBranchInsert((LIR *) thisLIR, kThumbBlx1, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + selfVerificationBranchInsert((LIR *) thisLIR, kThumbBlx2, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + + /* Restore LR */ + if (cUnit->usesLinkRegister) { + genSelfVerificationPostBranch(cUnit, thisLIR); + } + } + } +} +#endif + +/* Generate conditional branch instructions */ +static MipsLIR *genConditionalBranchMips(CompilationUnit *cUnit, + MipsOpCode opc, int rs, int rt, + MipsLIR *target) +{ + MipsLIR *branch = opCompareBranch(cUnit, opc, rs, rt); + branch->generic.target = (LIR *) target; + return branch; +} + +/* Generate a unconditional branch to go to the interpreter */ +static inline MipsLIR *genTrap(CompilationUnit *cUnit, int dOffset, + MipsLIR *pcrLabel) +{ + MipsLIR *branch = opNone(cUnit, kOpUncondBr); + return genCheckCommon(cUnit, dOffset, branch, pcrLabel); +} + +/* Load a wide field from an object instance */ +static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset) +{ + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + RegLocation rlResult; + rlObj = loadValue(cUnit, rlObj, kCoreReg); + int regPtr = dvmCompilerAllocTemp(cUnit); + + assert(rlDest.wide); + + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, + NULL);/* null object? */ + opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + + HEAP_ACCESS_SHADOW(true); + loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + storeValueWide(cUnit, rlDest, rlResult); +} + +/* Store a wide field to an object instance */ +static void genIPutWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset) +{ + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 2); + rlObj = loadValue(cUnit, rlObj, kCoreReg); + int regPtr; + rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg); + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, + NULL);/* null object? */ + regPtr = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset); + + HEAP_ACCESS_SHADOW(true); + storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); +} + +/* + * Load a field from an object instance + * + */ +static void genIGet(CompilationUnit *cUnit, MIR *mir, OpSize size, + int fieldOffset, bool isVolatile) +{ + RegLocation rlResult; + RegisterClass regClass = dvmCompilerRegClassBySize(size); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlObj = loadValue(cUnit, rlObj, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, + NULL);/* null object? */ + + HEAP_ACCESS_SHADOW(true); + loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg, + size, rlObj.sRegLow); + HEAP_ACCESS_SHADOW(false); + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + + storeValue(cUnit, rlDest, rlResult); +} + +/* + * Store a field to an object instance + * + */ +static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size, + int fieldOffset, bool isObject, bool isVolatile) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 1); + rlObj = loadValue(cUnit, rlObj, kCoreReg); + rlSrc = loadValue(cUnit, rlSrc, regClass); + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, + NULL);/* null object? */ + + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + HEAP_ACCESS_SHADOW(true); + storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size); + HEAP_ACCESS_SHADOW(false); + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + if (isObject) { + /* NOTE: marking card based on object head */ + markCard(cUnit, rlSrc.lowReg, rlObj.lowReg); + } +} + + +/* + * Generate array load + */ +static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlDest, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + RegLocation rlResult; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + int regPtr; + + /* null object? */ + MipsLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, + rlArray.lowReg, mir->offset, NULL); + } + + regPtr = dvmCompilerAllocTemp(cUnit); + + assert(IS_SIMM16(dataOffset)); + if (scale) { + opRegRegImm(cUnit, kOpLsl, regPtr, rlIndex.lowReg, scale); + } + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } + + if (scale) { + opRegReg(cUnit, kOpAdd, regPtr, rlArray.lowReg); + } else { + opRegRegReg(cUnit, kOpAdd, regPtr, rlArray.lowReg, rlIndex.lowReg); + } + + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + if ((size == kLong) || (size == kDouble)) { + HEAP_ACCESS_SHADOW(true); + loadBaseDispWide(cUnit, mir, regPtr, dataOffset, rlResult.lowReg, + rlResult.highReg, INVALID_SREG); + HEAP_ACCESS_SHADOW(false); + dvmCompilerFreeTemp(cUnit, regPtr); + storeValueWide(cUnit, rlDest, rlResult); + } else { + HEAP_ACCESS_SHADOW(true); + loadBaseDisp(cUnit, mir, regPtr, dataOffset, rlResult.lowReg, + size, INVALID_SREG); + HEAP_ACCESS_SHADOW(false); + dvmCompilerFreeTemp(cUnit, regPtr); + storeValue(cUnit, rlDest, rlResult); + } +} + +/* + * Generate array store + * + */ +static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlSrc, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + + int regPtr; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + + if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) { + dvmCompilerClobber(cUnit, rlArray.lowReg); + regPtr = rlArray.lowReg; + } else { + regPtr = dvmCompilerAllocTemp(cUnit); + genRegCopy(cUnit, regPtr, rlArray.lowReg); + } + + /* null object? */ + MipsLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, + mir->offset, NULL); + } + + assert(IS_SIMM16(dataOffset)); + int tReg = dvmCompilerAllocTemp(cUnit); + if (scale) { + opRegRegImm(cUnit, kOpLsl, tReg, rlIndex.lowReg, scale); + } + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + //NOTE: max live temps(4) here. + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } + + if (scale) { + opRegReg(cUnit, kOpAdd, tReg, rlArray.lowReg); + } else { + opRegRegReg(cUnit, kOpAdd, tReg, rlArray.lowReg, rlIndex.lowReg); + } + + /* at this point, tReg points to array, 2 live temps */ + if ((size == kLong) || (size == kDouble)) { + rlSrc = loadValueWide(cUnit, rlSrc, regClass); + HEAP_ACCESS_SHADOW(true); + storeBaseDispWide(cUnit, tReg, dataOffset, rlSrc.lowReg, rlSrc.highReg) + HEAP_ACCESS_SHADOW(false); + dvmCompilerFreeTemp(cUnit, tReg); + dvmCompilerFreeTemp(cUnit, regPtr); + } else { + rlSrc = loadValue(cUnit, rlSrc, regClass); + HEAP_ACCESS_SHADOW(true); + storeBaseDisp(cUnit, tReg, dataOffset, rlSrc.lowReg, size); + dvmCompilerFreeTemp(cUnit, tReg); + HEAP_ACCESS_SHADOW(false); + } +} + +/* + * Generate array object store + * Must use explicit register allocation here because of + * call-out to dvmCanPutArrayElement + */ +static void genArrayObjectPut(CompilationUnit *cUnit, MIR *mir, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlSrc, int scale) +{ + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + + int regLen = r_A0; + int regPtr = r_S0; /* Preserved across call */ + int regArray = r_A1; + int regIndex = r_S4; /* Preserved across call */ + + dvmCompilerFlushAllRegs(cUnit); + // moved lock for r_S0 and r_S4 here from below since genBoundsCheck + // allocates a temporary that can result in clobbering either of them + dvmCompilerLockTemp(cUnit, regPtr); // r_S0 + dvmCompilerLockTemp(cUnit, regIndex); // r_S4 + + loadValueDirectFixed(cUnit, rlArray, regArray); + loadValueDirectFixed(cUnit, rlIndex, regIndex); + + /* null object? */ + MipsLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, regArray, + mir->offset, NULL); + } + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + /* Get len */ + loadWordDisp(cUnit, regArray, lenOffset, regLen); + /* regPtr -> array data */ + opRegRegImm(cUnit, kOpAdd, regPtr, regArray, dataOffset); + genBoundsCheck(cUnit, regIndex, regLen, mir->offset, + pcrLabel); + } else { + /* regPtr -> array data */ + opRegRegImm(cUnit, kOpAdd, regPtr, regArray, dataOffset); + } + + /* Get object to store */ + loadValueDirectFixed(cUnit, rlSrc, r_A0); + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmCanPutArrayElement); + + /* Are we storing null? If so, avoid check */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBeqz, r_A0, -1); + + /* Make sure the types are compatible */ + loadWordDisp(cUnit, regArray, offsetof(Object, clazz), r_A1); + loadWordDisp(cUnit, r_A0, offsetof(Object, clazz), r_A0); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + + /* + * Using fixed registers here, and counting on r_S0 and r_S4 being + * preserved across the above call. Tell the register allocation + * utilities about the regs we are using directly + */ + dvmCompilerLockTemp(cUnit, r_A0); + dvmCompilerLockTemp(cUnit, r_A1); + + /* Bad? - roll back and re-execute if so */ + genRegImmCheck(cUnit, kMipsCondEq, r_V0, 0, mir->offset, pcrLabel); + + /* Resume here - must reload element & array, regPtr & index preserved */ + loadValueDirectFixed(cUnit, rlSrc, r_A0); + loadValueDirectFixed(cUnit, rlArray, r_A1); + + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + + HEAP_ACCESS_SHADOW(true); + storeBaseIndexed(cUnit, regPtr, regIndex, r_A0, + scale, kWord); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + dvmCompilerFreeTemp(cUnit, regIndex); + + /* NOTE: marking card here based on object head */ + markCard(cUnit, r_A0, r_A1); +} + +static bool genShiftOpLong(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlShift) +{ + /* + * Don't mess with the regsiters here as there is a particular calling + * convention to the out-of-line handler. + */ + RegLocation rlResult; + + loadValueDirectWideFixed(cUnit, rlSrc1, r_ARG0, r_ARG1); + loadValueDirect(cUnit, rlShift, r_A2); + switch( mir->dalvikInsn.opcode) { + case OP_SHL_LONG: + case OP_SHL_LONG_2ADDR: + genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG); + break; + case OP_SHR_LONG: + case OP_SHR_LONG_2ADDR: + genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG); + break; + case OP_USHR_LONG: + case OP_USHR_LONG_2ADDR: + genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG); + break; + default: + return true; + } + rlResult = dvmCompilerGetReturnWide(cUnit); + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ + RegLocation rlResult; + OpKind firstOp = kOpBkpt; + OpKind secondOp = kOpBkpt; + bool callOut = false; + void *callTgt; + + switch (mir->dalvikInsn.opcode) { + case OP_NOT_LONG: + rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg); + opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg); + storeValueWide(cUnit, rlDest, rlResult); + return false; + break; + case OP_ADD_LONG: + case OP_ADD_LONG_2ADDR: + firstOp = kOpAdd; + secondOp = kOpAdc; + break; + case OP_SUB_LONG: + case OP_SUB_LONG_2ADDR: + firstOp = kOpSub; + secondOp = kOpSbc; + break; + case OP_MUL_LONG: + case OP_MUL_LONG_2ADDR: + genMulLong(cUnit, rlDest, rlSrc1, rlSrc2); + return false; + case OP_DIV_LONG: + case OP_DIV_LONG_2ADDR: + callOut = true; + callTgt = (void*)__divdi3; + break; + case OP_REM_LONG: + case OP_REM_LONG_2ADDR: + callOut = true; + callTgt = (void*)__moddi3; + break; + case OP_AND_LONG_2ADDR: + case OP_AND_LONG: + firstOp = kOpAnd; + secondOp = kOpAnd; + break; + case OP_OR_LONG: + case OP_OR_LONG_2ADDR: + firstOp = kOpOr; + secondOp = kOpOr; + break; + case OP_XOR_LONG: + case OP_XOR_LONG_2ADDR: + firstOp = kOpXor; + secondOp = kOpXor; + break; + case OP_NEG_LONG: { + int tReg = dvmCompilerAllocTemp(cUnit); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + newLIR3(cUnit, kMipsSubu, rlResult.lowReg, r_ZERO, rlSrc2.lowReg); + newLIR3(cUnit, kMipsSubu, tReg, r_ZERO, rlSrc2.highReg); + newLIR3(cUnit, kMipsSltu, rlResult.highReg, r_ZERO, rlResult.lowReg); + newLIR3(cUnit, kMipsSubu, rlResult.highReg, tReg, rlResult.highReg); + dvmCompilerFreeTemp(cUnit, tReg); + storeValueWide(cUnit, rlDest, rlResult); + return false; + break; + } + default: + LOGE("Invalid long arith op"); + dvmCompilerAbort(cUnit); + } + if (!callOut) { + genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2); + } else { + dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ + loadValueDirectWideFixed(cUnit, rlSrc1, r_ARG0, r_ARG1); + LOAD_FUNC_ADDR(cUnit, r_T9, (int) callTgt); + loadValueDirectWideFixed(cUnit, rlSrc2, r_ARG2, r_ARG3); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + rlResult = dvmCompilerGetReturnWide(cUnit); + storeValueWide(cUnit, rlDest, rlResult); +#if defined(WITH_SELF_VERIFICATION) + cUnit->usesLinkRegister = true; +#endif + } + return false; +} + +static bool genArithOpInt(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ + OpKind op = kOpBkpt; + bool checkZero = false; + bool unary = false; + RegLocation rlResult; + bool shiftOp = false; + int isDivRem = false; + MipsOpCode opc; + int divReg; + + switch (mir->dalvikInsn.opcode) { + case OP_NEG_INT: + op = kOpNeg; + unary = true; + break; + case OP_NOT_INT: + op = kOpMvn; + unary = true; + break; + case OP_ADD_INT: + case OP_ADD_INT_2ADDR: + op = kOpAdd; + break; + case OP_SUB_INT: + case OP_SUB_INT_2ADDR: + op = kOpSub; + break; + case OP_MUL_INT: + case OP_MUL_INT_2ADDR: + op = kOpMul; + break; + case OP_DIV_INT: + case OP_DIV_INT_2ADDR: + isDivRem = true; + checkZero = true; + opc = kMipsMflo; + divReg = r_LO; + break; + case OP_REM_INT: + case OP_REM_INT_2ADDR: + isDivRem = true; + checkZero = true; + opc = kMipsMfhi; + divReg = r_HI; + break; + case OP_AND_INT: + case OP_AND_INT_2ADDR: + op = kOpAnd; + break; + case OP_OR_INT: + case OP_OR_INT_2ADDR: + op = kOpOr; + break; + case OP_XOR_INT: + case OP_XOR_INT_2ADDR: + op = kOpXor; + break; + case OP_SHL_INT: + case OP_SHL_INT_2ADDR: + shiftOp = true; + op = kOpLsl; + break; + case OP_SHR_INT: + case OP_SHR_INT_2ADDR: + shiftOp = true; + op = kOpAsr; + break; + case OP_USHR_INT: + case OP_USHR_INT_2ADDR: + shiftOp = true; + op = kOpLsr; + break; + default: + LOGE("Invalid word arith op: %#x(%d)", + mir->dalvikInsn.opcode, mir->dalvikInsn.opcode); + dvmCompilerAbort(cUnit); + } + + rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); + if (unary) { + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegReg(cUnit, op, rlResult.lowReg, + rlSrc1.lowReg); + } else if (isDivRem) { + rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); + if (checkZero) { + genNullCheck(cUnit, rlSrc2.sRegLow, rlSrc2.lowReg, mir->offset, NULL); + } + newLIR4(cUnit, kMipsDiv, r_HI, r_LO, rlSrc1.lowReg, rlSrc2.lowReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + newLIR2(cUnit, opc, rlResult.lowReg, divReg); + } else { + rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); + if (shiftOp) { + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegRegReg(cUnit, op, rlResult.lowReg, + rlSrc1.lowReg, tReg); + dvmCompilerFreeTemp(cUnit, tReg); + } else { + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegRegReg(cUnit, op, rlResult.lowReg, + rlSrc1.lowReg, rlSrc2.lowReg); + } + } + storeValue(cUnit, rlDest, rlResult); + + return false; +} + +static bool genArithOp(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + RegLocation rlDest; + RegLocation rlSrc1; + RegLocation rlSrc2; + /* Deduce sizes of operands */ + if (mir->ssaRep->numUses == 2) { + rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); + } else if (mir->ssaRep->numUses == 3) { + rlSrc1 = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 2); + } else { + rlSrc1 = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc2 = dvmCompilerGetSrcWide(cUnit, mir, 2, 3); + assert(mir->ssaRep->numUses == 4); + } + if (mir->ssaRep->numDefs == 1) { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + } else { + assert(mir->ssaRep->numDefs == 2); + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + } + + if ((opcode >= OP_ADD_LONG_2ADDR) && (opcode <= OP_XOR_LONG_2ADDR)) { + return genArithOpLong(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_LONG) && (opcode <= OP_XOR_LONG)) { + return genArithOpLong(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_SHL_LONG_2ADDR) && (opcode <= OP_USHR_LONG_2ADDR)) { + return genShiftOpLong(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_SHL_LONG) && (opcode <= OP_USHR_LONG)) { + return genShiftOpLong(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_INT_2ADDR) && (opcode <= OP_USHR_INT_2ADDR)) { + return genArithOpInt(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_INT) && (opcode <= OP_USHR_INT)) { + return genArithOpInt(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_FLOAT_2ADDR) && (opcode <= OP_REM_FLOAT_2ADDR)) { + return genArithOpFloat(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_FLOAT) && (opcode <= OP_REM_FLOAT)) { + return genArithOpFloat(cUnit, mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_DOUBLE_2ADDR) && (opcode <= OP_REM_DOUBLE_2ADDR)) { + return genArithOpDouble(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + if ((opcode >= OP_ADD_DOUBLE) && (opcode <= OP_REM_DOUBLE)) { + return genArithOpDouble(cUnit,mir, rlDest, rlSrc1, rlSrc2); + } + return true; +} + +/* Generate unconditional branch instructions */ +static MipsLIR *genUnconditionalBranch(CompilationUnit *cUnit, MipsLIR *target) +{ + MipsLIR *branch = opNone(cUnit, kOpUncondBr); + branch->generic.target = (LIR *) target; + return branch; +} + +/* Perform the actual operation for OP_RETURN_* */ +void genReturnCommon(CompilationUnit *cUnit, MIR *mir) +{ + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_RETURN_PROF : TEMPLATE_RETURN); +#if defined(WITH_JIT_TUNING) + gDvmJit.returnOp++; +#endif + int dPC = (int) (cUnit->method->insns + mir->offset); + /* Insert branch, but defer setting of target */ + MipsLIR *branch = genUnconditionalBranch(cUnit, NULL); + /* Set up the place holder to reconstruct this Dalvik PC */ + MipsLIR *pcrLabel = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pcrLabel->opcode = kMipsPseudoPCReconstructionCell; + pcrLabel->operands[0] = dPC; + pcrLabel->operands[1] = mir->offset; + /* Insert the place holder to the growable list */ + dvmInsertGrowableList(&cUnit->pcReconstructionList, (intptr_t) pcrLabel); + /* Branch to the PC reconstruction code */ + branch->generic.target = (LIR *) pcrLabel; +} + +static void genProcessArgsNoRange(CompilationUnit *cUnit, MIR *mir, + DecodedInstruction *dInsn, + MipsLIR **pcrLabel) +{ + unsigned int i; + unsigned int regMask = 0; + RegLocation rlArg; + int numDone = 0; + + /* + * Load arguments to r_A0..r_T0. Note that these registers may contain + * live values, so we clobber them immediately after loading to prevent + * them from being used as sources for subsequent loads. + */ + dvmCompilerLockAllTemps(cUnit); + for (i = 0; i < dInsn->vA; i++) { + regMask |= 1 << i; + rlArg = dvmCompilerGetSrc(cUnit, mir, numDone++); + loadValueDirectFixed(cUnit, rlArg, i+r_A0); /* r_A0 thru r_T0 */ + } + if (regMask) { + /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */ + opRegRegImm(cUnit, kOpSub, r_S4, rFP, + sizeof(StackSaveArea) + (dInsn->vA << 2)); + /* generate null check */ + if (pcrLabel) { + *pcrLabel = genNullCheck(cUnit, dvmCompilerSSASrc(mir, 0), r_A0, + mir->offset, NULL); + } + storeMultiple(cUnit, r_S4, regMask); + } +} + +static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir, + DecodedInstruction *dInsn, + MipsLIR **pcrLabel) +{ + int srcOffset = dInsn->vC << 2; + int numArgs = dInsn->vA; + int regMask; + + /* + * Note: here, all promoted registers will have been flushed + * back to the Dalvik base locations, so register usage restrictins + * are lifted. All parms loaded from original Dalvik register + * region - even though some might conceivably have valid copies + * cached in a preserved register. + */ + dvmCompilerLockAllTemps(cUnit); + + /* + * r4PC : &rFP[vC] + * r_S4: &newFP[0] + */ + opRegRegImm(cUnit, kOpAdd, r4PC, rFP, srcOffset); + /* load [r_A0 up to r_A3)] */ + regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1; + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + * + * This code is also shared by the invoke jumbo instructions, and this + * does not need to be done if the invoke jumbo has no arguments. + */ + if (numArgs != 0) loadMultiple(cUnit, r4PC, regMask); + + opRegRegImm(cUnit, kOpSub, r_S4, rFP, + sizeof(StackSaveArea) + (numArgs << 2)); + /* generate null check */ + if (pcrLabel) { + *pcrLabel = genNullCheck(cUnit, dvmCompilerSSASrc(mir, 0), r_A0, + mir->offset, NULL); + } + + /* + * Handle remaining 4n arguments: + * store previously loaded 4 values and load the next 4 values + */ + if (numArgs >= 8) { + MipsLIR *loopLabel = NULL; + /* + * r_A0 contains "this" and it will be used later, so push it to the stack + * first. Pushing r_S1 (rFP) is just for stack alignment purposes. + */ + + newLIR2(cUnit, kMipsMove, r_T0, r_A0); + newLIR2(cUnit, kMipsMove, r_T1, r_S1); + + /* No need to generate the loop structure if numArgs <= 11 */ + if (numArgs > 11) { + loadConstant(cUnit, rFP, ((numArgs - 4) >> 2) << 2); + loopLabel = newLIR0(cUnit, kMipsPseudoTargetLabel); + loopLabel->defMask = ENCODE_ALL; + } + storeMultiple(cUnit, r_S4, regMask); + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + loadMultiple(cUnit, r4PC, regMask); + /* No need to generate the loop structure if numArgs <= 11 */ + if (numArgs > 11) { + opRegImm(cUnit, kOpSub, rFP, 4); + genConditionalBranchMips(cUnit, kMipsBne, rFP, r_ZERO, loopLabel); + } + } + + /* Save the last batch of loaded values */ + if (numArgs != 0) storeMultiple(cUnit, r_S4, regMask); + + /* Generate the loop epilogue - don't use r_A0 */ + if ((numArgs > 4) && (numArgs % 4)) { + regMask = ((1 << (numArgs & 0x3)) - 1) << 1; + /* + * Protect the loadMultiple instruction from being reordered with other + * Dalvik stack accesses. + */ + loadMultiple(cUnit, r4PC, regMask); + } + if (numArgs >= 8) { + newLIR2(cUnit, kMipsMove, r_A0, r_T0); + newLIR2(cUnit, kMipsMove, r_S1, r_T1); + } + + /* Save the modulo 4 arguments */ + if ((numArgs > 4) && (numArgs % 4)) { + storeMultiple(cUnit, r_S4, regMask); + } +} + +/* + * Generate code to setup the call stack then jump to the chaining cell if it + * is not a native method. + */ +static void genInvokeSingletonCommon(CompilationUnit *cUnit, MIR *mir, + BasicBlock *bb, MipsLIR *labelList, + MipsLIR *pcrLabel, + const Method *calleeMethod) +{ + /* + * Note: all Dalvik register state should be flushed to + * memory by the point, so register usage restrictions no + * longer apply. All temp & preserved registers may be used. + */ + dvmCompilerLockAllTemps(cUnit); + MipsLIR *retChainingCell = &labelList[bb->fallThrough->id]; + + /* r_A1 = &retChainingCell */ + dvmCompilerLockTemp(cUnit, r_A1); + MipsLIR *addrRetChain = newLIR2(cUnit, kMipsLahi, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + addrRetChain = newLIR3(cUnit, kMipsLalo, r_A1, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + + /* r4PC = dalvikCallsite */ + loadConstant(cUnit, r4PC, + (int) (cUnit->method->insns + mir->offset)); + /* + * r_A0 = calleeMethod (loaded upon calling genInvokeSingletonCommon) + * r_A1 = &ChainingCell + * r4PC = callsiteDPC + */ + if (dvmIsNativeMethod(calleeMethod)) { + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_NATIVE_PROF : + TEMPLATE_INVOKE_METHOD_NATIVE); +#if defined(WITH_JIT_TUNING) + gDvmJit.invokeNative++; +#endif + } else { + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_CHAIN_PROF : + TEMPLATE_INVOKE_METHOD_CHAIN); +#if defined(WITH_JIT_TUNING) + gDvmJit.invokeMonomorphic++; +#endif + /* Branch to the chaining cell */ + genUnconditionalBranch(cUnit, &labelList[bb->taken->id]); + } + /* Handle exceptions using the interpreter */ + genTrap(cUnit, mir->offset, pcrLabel); +} + +/* + * Generate code to check the validity of a predicted chain and take actions + * based on the result. + * + * 0x2f1304c4 : lui s0,0x2d22(11554) # s0 <- dalvikPC + * 0x2f1304c8 : ori s0,s0,0x2d22848c(757236876) + * 0x2f1304cc : lahi/lui a1,0x2f13(12051) # a1 <- &retChainingCell + * 0x2f1304d0 : lalo/ori a1,a1,0x2f13055c(789775708) + * 0x2f1304d4 : lahi/lui a2,0x2f13(12051) # a2 <- &predictedChainingCell + * 0x2f1304d8 : lalo/ori a2,a2,0x2f13056c(789775724) + * 0x2f1304dc : jal 0x2f12d1ec(789762540) # call TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN + * 0x2f1304e0 : nop + * 0x2f1304e4 : b 0x2f13056c (L0x11ec10) # off to the predicted chain + * 0x2f1304e8 : nop + * 0x2f1304ec : b 0x2f13054c (L0x11fc80) # punt to the interpreter + * 0x2f1304f0 : lui a0,0x2d22(11554) + * 0x2f1304f4 : lw a0,156(s4) # a0 <- this->class->vtable[methodIdx] + * 0x2f1304f8 : bgtz a1,0x2f13051c (L0x11fa40) # if >0 don't rechain + * 0x2f1304fc : nop + * 0x2f130500 : lui t9,0x2aba(10938) + * 0x2f130504 : ori t9,t9,0x2abae3f8(716891128) + * 0x2f130508 : move a1,s2 + * 0x2f13050c : jalr ra,t9 # call dvmJitToPatchPredictedChain + * 0x2f130510 : nop + * 0x2f130514 : lw gp,84(sp) + * 0x2f130518 : move a0,v0 + * 0x2f13051c : lahi/lui a1,0x2f13(12051) # a1 <- &retChainingCell + * 0x2f130520 : lalo/ori a1,a1,0x2f13055c(789775708) + * 0x2f130524 : jal 0x2f12d0c4(789762244) # call TEMPLATE_INVOKE_METHOD_NO_OPT + * 0x2f130528 : nop + */ +static void genInvokeVirtualCommon(CompilationUnit *cUnit, MIR *mir, + int methodIndex, + MipsLIR *retChainingCell, + MipsLIR *predChainingCell, + MipsLIR *pcrLabel) +{ + /* + * Note: all Dalvik register state should be flushed to + * memory by the point, so register usage restrictions no + * longer apply. Lock temps to prevent them from being + * allocated by utility routines. + */ + dvmCompilerLockAllTemps(cUnit); + + /* + * For verbose printing, store the method pointer in operands[1] first as + * operands[0] will be clobbered in dvmCompilerMIR2LIR. + */ + predChainingCell->operands[1] = (int) mir->meta.callsiteInfo->method; + + /* "this" is already left in r_A0 by genProcessArgs* */ + + /* r4PC = dalvikCallsite */ + loadConstant(cUnit, r4PC, + (int) (cUnit->method->insns + mir->offset)); + + /* r_A1 = &retChainingCell */ + MipsLIR *addrRetChain = newLIR2(cUnit, kMipsLahi, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + addrRetChain = newLIR3(cUnit, kMipsLalo, r_A1, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + + /* r_A2 = &predictedChainingCell */ + MipsLIR *predictedChainingCell = newLIR2(cUnit, kMipsLahi, r_A2, 0); + predictedChainingCell->generic.target = (LIR *) predChainingCell; + predictedChainingCell = newLIR3(cUnit, kMipsLalo, r_A2, r_A2, 0); + predictedChainingCell->generic.target = (LIR *) predChainingCell; + + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF : + TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN); + + /* return through ra - jump to the chaining cell */ + genUnconditionalBranch(cUnit, predChainingCell); + + /* + * null-check on "this" may have been eliminated, but we still need a PC- + * reconstruction label for stack overflow bailout. + */ + if (pcrLabel == NULL) { + int dPC = (int) (cUnit->method->insns + mir->offset); + pcrLabel = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pcrLabel->opcode = kMipsPseudoPCReconstructionCell; + pcrLabel->operands[0] = dPC; + pcrLabel->operands[1] = mir->offset; + /* Insert the place holder to the growable list */ + dvmInsertGrowableList(&cUnit->pcReconstructionList, + (intptr_t) pcrLabel); + } + + /* return through ra+8 - punt to the interpreter */ + genUnconditionalBranch(cUnit, pcrLabel); + + /* + * return through ra+16 - fully resolve the callee method. + * r_A1 <- count + * r_A2 <- &predictedChainCell + * r_A3 <- this->class + * r4 <- dPC + * r_S4 <- this->class->vtable + */ + + /* r_A0 <- calleeMethod */ + loadWordDisp(cUnit, r_S4, methodIndex * 4, r_A0); + + /* Check if rechain limit is reached */ + MipsLIR *bypassRechaining = opCompareBranch(cUnit, kMipsBgtz, r_A1, -1); + + LOAD_FUNC_ADDR(cUnit, r_T9, (int) dvmJitToPatchPredictedChain); + + genRegCopy(cUnit, r_A1, rSELF); + + /* + * r_A0 = calleeMethod + * r_A2 = &predictedChainingCell + * r_A3 = class + * + * &returnChainingCell has been loaded into r_A1 but is not needed + * when patching the chaining cell and will be clobbered upon + * returning so it will be reconstructed again. + */ + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + newLIR2(cUnit, kMipsMove, r_A0, r_V0); + + /* r_A1 = &retChainingCell */ + addrRetChain = newLIR2(cUnit, kMipsLahi, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + bypassRechaining->generic.target = (LIR *) addrRetChain; + addrRetChain = newLIR3(cUnit, kMipsLalo, r_A1, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + + /* + * r_A0 = calleeMethod, + * r_A1 = &ChainingCell, + * r4PC = callsiteDPC, + */ + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_NO_OPT_PROF : + TEMPLATE_INVOKE_METHOD_NO_OPT); +#if defined(WITH_JIT_TUNING) + gDvmJit.invokePolymorphic++; +#endif + /* Handle exceptions using the interpreter */ + genTrap(cUnit, mir->offset, pcrLabel); +} + +/* "this" pointer is already in r0 */ +static void genInvokeVirtualWholeMethod(CompilationUnit *cUnit, + MIR *mir, + void *calleeAddr, + MipsLIR *retChainingCell) +{ + CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; + dvmCompilerLockAllTemps(cUnit); + + loadClassPointer(cUnit, r_A1, (int) callsiteInfo); + + loadWordDisp(cUnit, r_A0, offsetof(Object, clazz), r_A2); + /* + * Set the misPredBranchOver target so that it will be generated when the + * code for the non-optimized invoke is generated. + */ + /* Branch to the slow path if classes are not equal */ + MipsLIR *classCheck = opCompareBranch(cUnit, kMipsBne, r_A1, r_A2); + + /* a0 = the Dalvik PC of the callsite */ + loadConstant(cUnit, r_A0, (int) (cUnit->method->insns + mir->offset)); + + newLIR1(cUnit, kMipsJal, (int) calleeAddr); + genUnconditionalBranch(cUnit, retChainingCell); + + /* Target of slow path */ + MipsLIR *slowPathLabel = newLIR0(cUnit, kMipsPseudoTargetLabel); + + slowPathLabel->defMask = ENCODE_ALL; + classCheck->generic.target = (LIR *) slowPathLabel; + + // FIXME + cUnit->printMe = true; +} + +static void genInvokeSingletonWholeMethod(CompilationUnit *cUnit, + MIR *mir, + void *calleeAddr, + MipsLIR *retChainingCell) +{ + /* a0 = the Dalvik PC of the callsite */ + loadConstant(cUnit, r_A0, (int) (cUnit->method->insns + mir->offset)); + + newLIR1(cUnit, kMipsJal, (int) calleeAddr); + genUnconditionalBranch(cUnit, retChainingCell); + + // FIXME + cUnit->printMe = true; +} + +/* Geneate a branch to go back to the interpreter */ +static void genPuntToInterp(CompilationUnit *cUnit, unsigned int offset) +{ + /* a0 = dalvik pc */ + dvmCompilerFlushAllRegs(cUnit); + loadConstant(cUnit, r_A0, (int) (cUnit->method->insns + offset)); +#if 0 /* MIPSTODO tempoary workaround unaligned access on sigma hardware + this can removed when we're not punting to genInterpSingleStep + for opcodes that haven't been activated yet */ + loadWordDisp(cUnit, r_A0, offsetof(Object, clazz), r_A3); +#endif + loadWordDisp(cUnit, rSELF, offsetof(Thread, + jitToInterpEntries.dvmJitToInterpPunt), r_A1); + + opReg(cUnit, kOpBlx, r_A1); +} + +/* + * Attempt to single step one instruction using the interpreter and return + * to the compiled code for the next Dalvik instruction + */ +static void genInterpSingleStep(CompilationUnit *cUnit, MIR *mir) +{ + int flags = dexGetFlagsFromOpcode(mir->dalvikInsn.opcode); + int flagsToCheck = kInstrCanBranch | kInstrCanSwitch | kInstrCanReturn; + + // Single stepping is considered loop mode breaker + if (cUnit->jitMode == kJitLoop) { + cUnit->quitLoopMode = true; + return; + } + + //If already optimized out, just ignore + if (mir->dalvikInsn.opcode == OP_NOP) + return; + + //Ugly, but necessary. Flush all Dalvik regs so Interp can find them + dvmCompilerFlushAllRegs(cUnit); + + if ((mir->next == NULL) || (flags & flagsToCheck)) { + genPuntToInterp(cUnit, mir->offset); + return; + } + int entryAddr = offsetof(Thread, + jitToInterpEntries.dvmJitToInterpSingleStep); + loadWordDisp(cUnit, rSELF, entryAddr, r_A2); + /* a0 = dalvik pc */ + loadConstant(cUnit, r_A0, (int) (cUnit->method->insns + mir->offset)); + /* a1 = dalvik pc of following instruction */ + loadConstant(cUnit, r_A1, (int) (cUnit->method->insns + mir->next->offset)); + opReg(cUnit, kOpBlx, r_A2); +} + +/* + * To prevent a thread in a monitor wait from blocking the Jit from + * resetting the code cache, heavyweight monitor lock will not + * be allowed to return to an existing translation. Instead, we will + * handle them by branching to a handler, which will in turn call the + * runtime lock routine and then branch directly back to the + * interpreter main loop. Given the high cost of the heavyweight + * lock operation, this additional cost should be slight (especially when + * considering that we expect the vast majority of lock operations to + * use the fast-path thin lock bypass). + */ +static void genMonitorPortable(CompilationUnit *cUnit, MIR *mir) +{ + bool isEnter = (mir->dalvikInsn.opcode == OP_MONITOR_ENTER); + genExportPC(cUnit, mir); + dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + loadValueDirectFixed(cUnit, rlSrc, r_A1); + genRegCopy(cUnit, r_A0, rSELF); + genNullCheck(cUnit, rlSrc.sRegLow, r_A1, mir->offset, NULL); + if (isEnter) { + /* Get dPC of next insn */ + loadConstant(cUnit, r4PC, (int)(cUnit->method->insns + mir->offset + + dexGetWidthFromOpcode(OP_MONITOR_ENTER))); + genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER); + } else { + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmUnlockObject); + /* Do the call */ + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + /* Did we throw? */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset + + dexGetWidthFromOpcode(OP_MONITOR_EXIT))); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + dvmCompilerClobberCallRegs(cUnit); + } +} +/*#endif*/ + +/* + * Fetch *self->info.breakFlags. If the breakFlags are non-zero, + * punt to the interpreter. + */ +static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir) +{ + int rTemp = dvmCompilerAllocTemp(cUnit); + MipsLIR *ld; + ld = loadBaseDisp(cUnit, NULL, rSELF, + offsetof(Thread, interpBreak.ctl.breakFlags), + rTemp, kUnsignedByte, INVALID_SREG); + setMemRefType(ld, true /* isLoad */, kMustNotAlias); + genRegImmCheck(cUnit, kMipsCondNe, rTemp, 0, mir->offset, NULL); +} + +/* + * The following are the first-level codegen routines that analyze the format + * of each bytecode then either dispatch special purpose codegen routines + * or produce corresponding Thumb instructions directly. + */ + +static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir, + BasicBlock *bb, MipsLIR *labelList) +{ + /* backward branch? */ + bool backwardBranch = (bb->taken->startOffset <= mir->offset); + + if (backwardBranch && + (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { + genSuspendPoll(cUnit, mir); + } + + int numPredecessors = dvmCountSetBits(bb->taken->predecessors); + /* + * Things could be hoisted out of the taken block into the predecessor, so + * make sure it is dominated by the predecessor. + */ + if (numPredecessors == 1 && bb->taken->visited == false && + bb->taken->blockType == kDalvikByteCode) { + cUnit->nextCodegenBlock = bb->taken; + } else { + /* For OP_GOTO, OP_GOTO_16, and OP_GOTO_32 */ + genUnconditionalBranch(cUnit, &labelList[bb->taken->id]); + } + return false; +} + +static bool handleFmt10x(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + if ((dalvikOpcode >= OP_UNUSED_3E) && (dalvikOpcode <= OP_UNUSED_43)) { + LOGE("Codegen: got unused opcode %#x",dalvikOpcode); + return true; + } + switch (dalvikOpcode) { + case OP_RETURN_VOID_BARRIER: + dvmCompilerGenMemBarrier(cUnit, 0); + // Intentional fallthrough + case OP_RETURN_VOID: + genReturnCommon(cUnit,mir); + break; + case OP_UNUSED_73: + case OP_UNUSED_79: + case OP_UNUSED_7A: + case OP_DISPATCH_FF: + LOGE("Codegen: got unused opcode %#x",dalvikOpcode); + return true; + case OP_NOP: + break; + default: + return true; + } + return false; +} + +static bool handleFmt11n_Fmt31i(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlDest; + RegLocation rlResult; + if (mir->ssaRep->numDefs == 2) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + } else { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + } + + switch (mir->dalvikInsn.opcode) { + case OP_CONST: + case OP_CONST_4: { + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_CONST_WIDE_32: { + //TUNING: single routine to load constant pair for support doubles + //TUNING: load 0/-1 separately to avoid load dependency + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB); + opRegRegImm(cUnit, kOpAsr, rlResult.highReg, + rlResult.lowReg, 31); + storeValueWide(cUnit, rlDest, rlResult); + break; + } + default: + return true; + } + return false; +} + +static bool handleFmt21h(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlDest; + RegLocation rlResult; + if (mir->ssaRep->numDefs == 2) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + } else { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + } + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + + switch (mir->dalvikInsn.opcode) { + case OP_CONST_HIGH16: { + loadConstantNoClobber(cUnit, rlResult.lowReg, + mir->dalvikInsn.vB << 16); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_CONST_WIDE_HIGH16: { + loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg, + 0, mir->dalvikInsn.vB << 16); + storeValueWide(cUnit, rlDest, rlResult); + break; + } + default: + return true; + } + return false; +} + +static bool handleFmt20bc_Fmt40sc(CompilationUnit *cUnit, MIR *mir) +{ + /* For OP_THROW_VERIFICATION_ERROR & OP_THROW_VERIFICATION_ERROR_JUMBO */ + genInterpSingleStep(cUnit, mir); + return false; +} + +static bool handleFmt21c_Fmt31c_Fmt41c(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlResult; + RegLocation rlDest; + RegLocation rlSrc; + + switch (mir->dalvikInsn.opcode) { + case OP_CONST_STRING_JUMBO: + case OP_CONST_STRING: { + void *strPtr = (void*) + (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]); + + if (strPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null string"); + dvmAbort(); + } + + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr ); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_CONST_CLASS: + case OP_CONST_CLASS_JUMBO: { + void *classPtr = (void*) + (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]); + + if (classPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null class"); + dvmAbort(); + } + + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr ); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_SGET: + case OP_SGET_VOLATILE: + case OP_SGET_VOLATILE_JUMBO: + case OP_SGET_JUMBO: + case OP_SGET_OBJECT: + case OP_SGET_OBJECT_VOLATILE: + case OP_SGET_OBJECT_VOLATILE_JUMBO: + case OP_SGET_OBJECT_JUMBO: + case OP_SGET_BOOLEAN: + case OP_SGET_BOOLEAN_JUMBO: + case OP_SGET_CHAR: + case OP_SGET_CHAR_JUMBO: + case OP_SGET_BYTE: + case OP_SGET_BYTE_JUMBO: + case OP_SGET_SHORT: + case OP_SGET_SHORT_JUMBO: { + int valOffset = OFFSETOF_MEMBER(StaticField, value); + int tReg = dvmCompilerAllocTemp(cUnit); + bool isVolatile; + const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ? + mir->meta.calleeMethod : cUnit->method; + void *fieldPtr = (void*) + (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]); + + if (fieldPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null static field"); + dvmAbort(); + } + + /* + * On SMP systems, Dalvik opcodes found to be referencing + * volatile fields are rewritten to their _VOLATILE variant. + * However, this does not happen on non-SMP systems. The JIT + * still needs to know about volatility to avoid unsafe + * optimizations so we determine volatility based on either + * the opcode or the field access flags. + */ +#if ANDROID_SMP != 0 + Opcode opcode = mir->dalvikInsn.opcode; + isVolatile = (opcode == OP_SGET_VOLATILE) || + (opcode == OP_SGET_VOLATILE_JUMBO) || + (opcode == OP_SGET_OBJECT_VOLATILE) || + (opcode == OP_SGET_OBJECT_VOLATILE_JUMBO); + assert(isVolatile == dvmIsVolatileField((Field *) fieldPtr)); +#else + isVolatile = dvmIsVolatileField((Field *) fieldPtr); +#endif + + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstant(cUnit, tReg, (int) fieldPtr + valOffset); + + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + HEAP_ACCESS_SHADOW(true); + loadWordDisp(cUnit, tReg, 0, rlResult.lowReg); + HEAP_ACCESS_SHADOW(false); + + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_SGET_WIDE: + case OP_SGET_WIDE_JUMBO: { + int valOffset = OFFSETOF_MEMBER(StaticField, value); + const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ? + mir->meta.calleeMethod : cUnit->method; + void *fieldPtr = (void*) + (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]); + + if (fieldPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null static field"); + dvmAbort(); + } + + int tReg = dvmCompilerAllocTemp(cUnit); + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstant(cUnit, tReg, (int) fieldPtr + valOffset); + + HEAP_ACCESS_SHADOW(true); + loadPair(cUnit, tReg, rlResult.lowReg, rlResult.highReg); + HEAP_ACCESS_SHADOW(false); + + storeValueWide(cUnit, rlDest, rlResult); + break; + } + case OP_SPUT: + case OP_SPUT_VOLATILE: + case OP_SPUT_VOLATILE_JUMBO: + case OP_SPUT_JUMBO: + case OP_SPUT_OBJECT: + case OP_SPUT_OBJECT_VOLATILE: + case OP_SPUT_OBJECT_VOLATILE_JUMBO: + case OP_SPUT_OBJECT_JUMBO: + case OP_SPUT_BOOLEAN: + case OP_SPUT_BOOLEAN_JUMBO: + case OP_SPUT_CHAR: + case OP_SPUT_CHAR_JUMBO: + case OP_SPUT_BYTE: + case OP_SPUT_BYTE_JUMBO: + case OP_SPUT_SHORT: + case OP_SPUT_SHORT_JUMBO: { + int valOffset = OFFSETOF_MEMBER(StaticField, value); + int tReg = dvmCompilerAllocTemp(cUnit); + int objHead = 0; + bool isVolatile; + bool isSputObject; + const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ? + mir->meta.calleeMethod : cUnit->method; + void *fieldPtr = (void*) + (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]); + Opcode opcode = mir->dalvikInsn.opcode; + + if (fieldPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null static field"); + dvmAbort(); + } + +#if ANDROID_SMP != 0 + isVolatile = (opcode == OP_SPUT_VOLATILE) || + (opcode == OP_SPUT_VOLATILE_JUMBO) || + (opcode == OP_SPUT_OBJECT_VOLATILE) || + (opcode == OP_SPUT_OBJECT_VOLATILE_JUMBO); + assert(isVolatile == dvmIsVolatileField((Field *) fieldPtr)); +#else + isVolatile = dvmIsVolatileField((Field *) fieldPtr); +#endif + + isSputObject = (opcode == OP_SPUT_OBJECT) || + (opcode == OP_SPUT_OBJECT_JUMBO) || + (opcode == OP_SPUT_OBJECT_VOLATILE) || + (opcode == OP_SPUT_OBJECT_VOLATILE_JUMBO); + + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kAnyReg); + loadConstant(cUnit, tReg, (int) fieldPtr); + if (isSputObject) { + objHead = dvmCompilerAllocTemp(cUnit); + loadWordDisp(cUnit, tReg, OFFSETOF_MEMBER(Field, clazz), objHead); + } + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + HEAP_ACCESS_SHADOW(true); + storeWordDisp(cUnit, tReg, valOffset ,rlSrc.lowReg); + dvmCompilerFreeTemp(cUnit, tReg); + HEAP_ACCESS_SHADOW(false); + if (isVolatile) { + dvmCompilerGenMemBarrier(cUnit, 0); + } + if (isSputObject) { + /* NOTE: marking card based sfield->clazz */ + markCard(cUnit, rlSrc.lowReg, objHead); + dvmCompilerFreeTemp(cUnit, objHead); + } + + break; + } + case OP_SPUT_WIDE: + case OP_SPUT_WIDE_JUMBO: { + int tReg = dvmCompilerAllocTemp(cUnit); + int valOffset = OFFSETOF_MEMBER(StaticField, value); + const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ? + mir->meta.calleeMethod : cUnit->method; + void *fieldPtr = (void*) + (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]); + + if (fieldPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null static field"); + dvmAbort(); + } + + rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg); + loadConstant(cUnit, tReg, (int) fieldPtr + valOffset); + + HEAP_ACCESS_SHADOW(true); + storePair(cUnit, tReg, rlSrc.lowReg, rlSrc.highReg); + HEAP_ACCESS_SHADOW(false); + break; + } + case OP_NEW_INSTANCE: + case OP_NEW_INSTANCE_JUMBO: { + /* + * Obey the calling convention and don't mess with the register + * usage. + */ + ClassObject *classPtr = (ClassObject *) + (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]); + + if (classPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null class"); + dvmAbort(); + } + + /* + * If it is going to throw, it should not make to the trace to begin + * with. However, Alloc might throw, so we need to genExportPC() + */ + assert((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) == 0); + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + genExportPC(cUnit, mir); + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmAllocObject); + loadConstant(cUnit, r_A0, (int) classPtr); + loadConstant(cUnit, r_A1, ALLOC_DONT_TRACK); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* generate a branch over if allocation is successful */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + + /* + * OOM exception needs to be thrown here and cannot re-execute + */ + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset)); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + /* noreturn */ + + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerGetReturn(cUnit); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_CHECK_CAST: + case OP_CHECK_CAST_JUMBO: { + /* + * Obey the calling convention and don't mess with the register + * usage. + */ + ClassObject *classPtr = + (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]); + /* + * Note: It is possible that classPtr is NULL at this point, + * even though this instruction has been successfully interpreted. + * If the previous interpretation had a null source, the + * interpreter would not have bothered to resolve the clazz. + * Bail out to the interpreter in this case, and log it + * so that we can tell if it happens frequently. + */ + if (classPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGVV("null clazz in OP_CHECK_CAST, single-stepping"); + genInterpSingleStep(cUnit, mir); + return false; + } + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + loadConstant(cUnit, r_A1, (int) classPtr ); + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + MipsLIR *branch1 = opCompareBranch(cUnit, kMipsBeqz, rlSrc.lowReg, -1); + /* + * rlSrc.lowReg now contains object->clazz. Note that + * it could have been allocated r_A0, but we're okay so long + * as we don't do anything desctructive until r_A0 is loaded + * with clazz. + */ + /* r_A0 now contains object->clazz */ + loadWordDisp(cUnit, rlSrc.lowReg, offsetof(Object, clazz), r_A0); + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmInstanceofNonTrivial); + MipsLIR *branch2 = opCompareBranch(cUnit, kMipsBeq, r_A0, r_A1); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* + * If null, check cast failed - punt to the interpreter. Because + * interpreter will be the one throwing, we don't need to + * genExportPC() here. + */ + genRegCopy(cUnit, r_A0, r_V0); + genZeroCheck(cUnit, r_V0, mir->offset, NULL); + /* check cast passed - branch target here */ + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branch1->generic.target = (LIR *)target; + branch2->generic.target = (LIR *)target; + break; + } + case OP_SGET_WIDE_VOLATILE: + case OP_SGET_WIDE_VOLATILE_JUMBO: + case OP_SPUT_WIDE_VOLATILE: + case OP_SPUT_WIDE_VOLATILE_JUMBO: + genInterpSingleStep(cUnit, mir); + break; + default: + return true; + } + return false; +} + +static bool handleFmt11x(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + RegLocation rlResult; + switch (dalvikOpcode) { + case OP_MOVE_EXCEPTION: { + int exOffset = offsetof(Thread, exception); + int resetReg = dvmCompilerAllocTemp(cUnit); + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg); + loadConstant(cUnit, resetReg, 0); + storeWordDisp(cUnit, rSELF, exOffset, resetReg); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_MOVE_RESULT: + case OP_MOVE_RESULT_OBJECT: { + /* An inlined move result is effectively no-op */ + if (mir->OptimizationFlags & MIR_INLINED) + break; + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + RegLocation rlSrc = LOC_DALVIK_RETURN_VAL; + rlSrc.fp = rlDest.fp; + storeValue(cUnit, rlDest, rlSrc); + break; + } + case OP_MOVE_RESULT_WIDE: { + /* An inlined move result is effectively no-op */ + if (mir->OptimizationFlags & MIR_INLINED) + break; + RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + RegLocation rlSrc = LOC_DALVIK_RETURN_VAL_WIDE; + rlSrc.fp = rlDest.fp; + storeValueWide(cUnit, rlDest, rlSrc); + break; + } + case OP_RETURN_WIDE: { + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlDest = LOC_DALVIK_RETURN_VAL_WIDE; + rlDest.fp = rlSrc.fp; + storeValueWide(cUnit, rlDest, rlSrc); + genReturnCommon(cUnit,mir); + break; + } + case OP_RETURN: + case OP_RETURN_OBJECT: { + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = LOC_DALVIK_RETURN_VAL; + rlDest.fp = rlSrc.fp; + storeValue(cUnit, rlDest, rlSrc); + genReturnCommon(cUnit, mir); + break; + } + case OP_MONITOR_EXIT: + case OP_MONITOR_ENTER: + genMonitor(cUnit, mir); + break; + case OP_THROW: + genInterpSingleStep(cUnit, mir); + break; + default: + return true; + } + return false; +} + +static bool handleFmt12x(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + RegLocation rlDest; + RegLocation rlSrc; + RegLocation rlResult; + + if ( (opcode >= OP_ADD_INT_2ADDR) && (opcode <= OP_REM_DOUBLE_2ADDR)) { + return genArithOp( cUnit, mir ); + } + + if (mir->ssaRep->numUses == 2) + rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + else + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + if (mir->ssaRep->numDefs == 2) + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + else + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + + switch (opcode) { + case OP_DOUBLE_TO_INT: + case OP_INT_TO_FLOAT: + case OP_FLOAT_TO_INT: + case OP_DOUBLE_TO_FLOAT: + case OP_FLOAT_TO_DOUBLE: + case OP_INT_TO_DOUBLE: + case OP_FLOAT_TO_LONG: + case OP_LONG_TO_FLOAT: + case OP_DOUBLE_TO_LONG: + case OP_LONG_TO_DOUBLE: + return genConversion(cUnit, mir); + case OP_NEG_INT: + case OP_NOT_INT: + return genArithOpInt(cUnit, mir, rlDest, rlSrc, rlSrc); + case OP_NEG_LONG: + case OP_NOT_LONG: + return genArithOpLong(cUnit, mir, rlDest, rlSrc, rlSrc); + case OP_NEG_FLOAT: + return genArithOpFloat(cUnit, mir, rlDest, rlSrc, rlSrc); + case OP_NEG_DOUBLE: + return genArithOpDouble(cUnit, mir, rlDest, rlSrc, rlSrc); + case OP_MOVE_WIDE: + storeValueWide(cUnit, rlDest, rlSrc); + break; + case OP_INT_TO_LONG: + rlSrc = dvmCompilerUpdateLoc(cUnit, rlSrc); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + //TUNING: shouldn't loadValueDirect already check for phys reg? + if (rlSrc.location == kLocPhysReg) { + genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg); + } else { + loadValueDirect(cUnit, rlSrc, rlResult.lowReg); + } + opRegRegImm(cUnit, kOpAsr, rlResult.highReg, + rlResult.lowReg, 31); + storeValueWide(cUnit, rlDest, rlResult); + break; + case OP_LONG_TO_INT: + rlSrc = dvmCompilerUpdateLocWide(cUnit, rlSrc); + rlSrc = dvmCompilerWideToNarrow(cUnit, rlSrc); + // Intentional fallthrough + case OP_MOVE: + case OP_MOVE_OBJECT: + storeValue(cUnit, rlDest, rlSrc); + break; + case OP_INT_TO_BYTE: + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegReg(cUnit, kOp2Byte, rlResult.lowReg, rlSrc.lowReg); + storeValue(cUnit, rlDest, rlResult); + break; + case OP_INT_TO_SHORT: + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegReg(cUnit, kOp2Short, rlResult.lowReg, rlSrc.lowReg); + storeValue(cUnit, rlDest, rlResult); + break; + case OP_INT_TO_CHAR: + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegReg(cUnit, kOp2Char, rlResult.lowReg, rlSrc.lowReg); + storeValue(cUnit, rlDest, rlResult); + break; + case OP_ARRAY_LENGTH: { + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + genNullCheck(cUnit, rlSrc.sRegLow, rlSrc.lowReg, + mir->offset, NULL); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadWordDisp(cUnit, rlSrc.lowReg, lenOffset, + rlResult.lowReg); + storeValue(cUnit, rlDest, rlResult); + break; + } + default: + return true; + } + return false; +} + +static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + RegLocation rlDest; + RegLocation rlResult; + int BBBB = mir->dalvikInsn.vB; + if (dalvikOpcode == OP_CONST_WIDE_16) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB); + //TUNING: do high separately to avoid load dependency + opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31); + storeValueWide(cUnit, rlDest, rlResult); + } else if (dalvikOpcode == OP_CONST_16) { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB); + storeValue(cUnit, rlDest, rlResult); + } else + return true; + return false; +} + +/* Compare agaist zero */ +static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, + MipsLIR *labelList) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + MipsOpCode opc = kMipsNop; + int rt = -1; + /* backward branch? */ + bool backwardBranch = (bb->taken->startOffset <= mir->offset); + + if (backwardBranch && + (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { + genSuspendPoll(cUnit, mir); + } + + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + + switch (dalvikOpcode) { + case OP_IF_EQZ: + opc = kMipsBeqz; + break; + case OP_IF_NEZ: + opc = kMipsBne; + rt = r_ZERO; + break; + case OP_IF_LTZ: + opc = kMipsBltz; + break; + case OP_IF_GEZ: + opc = kMipsBgez; + break; + case OP_IF_GTZ: + opc = kMipsBgtz; + break; + case OP_IF_LEZ: + opc = kMipsBlez; + break; + default: + LOGE("Unexpected opcode (%d) for Fmt21t", dalvikOpcode); + dvmCompilerAbort(cUnit); + } + genConditionalBranchMips(cUnit, opc, rlSrc.lowReg, rt, &labelList[bb->taken->id]); + /* This mostly likely will be optimized away in a later phase */ + genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]); + return false; +} + +static bool isPowerOfTwo(int x) +{ + return (x & (x - 1)) == 0; +} + +// Returns true if no more than two bits are set in 'x'. +static bool isPopCountLE2(unsigned int x) +{ + x &= x - 1; + return (x & (x - 1)) == 0; +} + +// Returns the index of the lowest set bit in 'x'. +static int lowestSetBit(unsigned int x) { + int bit_posn = 0; + while ((x & 0xf) == 0) { + bit_posn += 4; + x >>= 4; + } + while ((x & 1) == 0) { + bit_posn++; + x >>= 1; + } + return bit_posn; +} + +// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit' +// and store the result in 'rlDest'. +static bool handleEasyDivide(CompilationUnit *cUnit, Opcode dalvikOpcode, + RegLocation rlSrc, RegLocation rlDest, int lit) +{ + if (lit < 2 || !isPowerOfTwo(lit)) { + return false; + } + int k = lowestSetBit(lit); + if (k >= 30) { + // Avoid special cases. + return false; + } + bool div = (dalvikOpcode == OP_DIV_INT_LIT8 || dalvikOpcode == OP_DIV_INT_LIT16); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + if (div) { + int tReg = dvmCompilerAllocTemp(cUnit); + if (lit == 2) { + // Division by 2 is by far the most common division by constant. + opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k); + opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg); + opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k); + } else { + opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31); + opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k); + opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg); + opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k); + } + } else { + int cReg = dvmCompilerAllocTemp(cUnit); + loadConstant(cUnit, cReg, lit - 1); + int tReg1 = dvmCompilerAllocTemp(cUnit); + int tReg2 = dvmCompilerAllocTemp(cUnit); + if (lit == 2) { + opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k); + opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg); + opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1); + } else { + opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31); + opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k); + opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg); + opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1); + } + } + storeValue(cUnit, rlDest, rlResult); + return true; +} + +// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit' +// and store the result in 'rlDest'. +static bool handleEasyMultiply(CompilationUnit *cUnit, + RegLocation rlSrc, RegLocation rlDest, int lit) +{ + // Can we simplify this multiplication? + bool powerOfTwo = false; + bool popCountLE2 = false; + bool powerOfTwoMinusOne = false; + if (lit < 2) { + // Avoid special cases. + return false; + } else if (isPowerOfTwo(lit)) { + powerOfTwo = true; + } else if (isPopCountLE2(lit)) { + popCountLE2 = true; + } else if (isPowerOfTwo(lit + 1)) { + powerOfTwoMinusOne = true; + } else { + return false; + } + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + if (powerOfTwo) { + // Shift. + opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg, + lowestSetBit(lit)); + } else if (popCountLE2) { + // Shift and add and shift. + int firstBit = lowestSetBit(lit); + int secondBit = lowestSetBit(lit ^ (1 << firstBit)); + genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit, + firstBit, secondBit); + } else { + // Reverse subtract: (src << (shift + 1)) - src. + assert(powerOfTwoMinusOne); + // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1) + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1)); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg); + } + storeValue(cUnit, rlDest, rlResult); + return true; +} + +static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + RegLocation rlResult; + int lit = mir->dalvikInsn.vC; + OpKind op = (OpKind)0; /* Make gcc happy */ + int shiftOp = false; + + switch (dalvikOpcode) { + case OP_RSUB_INT_LIT8: + case OP_RSUB_INT: { + int tReg; + //TUNING: add support for use of Arm rsub op + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + tReg = dvmCompilerAllocTemp(cUnit); + loadConstant(cUnit, tReg, lit); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, + tReg, rlSrc.lowReg); + storeValue(cUnit, rlDest, rlResult); + return false; + break; + } + + case OP_ADD_INT_LIT8: + case OP_ADD_INT_LIT16: + op = kOpAdd; + break; + case OP_MUL_INT_LIT8: + case OP_MUL_INT_LIT16: { + if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) { + return false; + } + op = kOpMul; + break; + } + case OP_AND_INT_LIT8: + case OP_AND_INT_LIT16: + op = kOpAnd; + break; + case OP_OR_INT_LIT8: + case OP_OR_INT_LIT16: + op = kOpOr; + break; + case OP_XOR_INT_LIT8: + case OP_XOR_INT_LIT16: + op = kOpXor; + break; + case OP_SHL_INT_LIT8: + lit &= 31; + shiftOp = true; + op = kOpLsl; + break; + case OP_SHR_INT_LIT8: + lit &= 31; + shiftOp = true; + op = kOpAsr; + break; + case OP_USHR_INT_LIT8: + lit &= 31; + shiftOp = true; + op = kOpLsr; + break; + + case OP_DIV_INT_LIT8: + case OP_DIV_INT_LIT16: + case OP_REM_INT_LIT8: + case OP_REM_INT_LIT16: { + if (lit == 0) { + /* Let the interpreter deal with div by 0 */ + genInterpSingleStep(cUnit, mir); + return false; + } + if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) { + return false; + } + + MipsOpCode opc; + int divReg; + + if ((dalvikOpcode == OP_DIV_INT_LIT8) || + (dalvikOpcode == OP_DIV_INT_LIT16)) { + opc = kMipsMflo; + divReg = r_LO; + } else { + opc = kMipsMfhi; + divReg = r_HI; + } + + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + int tReg = dvmCompilerAllocTemp(cUnit); + newLIR3(cUnit, kMipsAddiu, tReg, r_ZERO, lit); + newLIR4(cUnit, kMipsDiv, r_HI, r_LO, rlSrc.lowReg, tReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + newLIR2(cUnit, opc, rlResult.lowReg, divReg); + dvmCompilerFreeTemp(cUnit, tReg); + storeValue(cUnit, rlDest, rlResult); + return false; + break; + } + default: + return true; + } + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + // Avoid shifts by literal 0 - no support in Thumb. Change to copy + if (shiftOp && (lit == 0)) { + genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg); + } else { + opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit); + } + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool handleFmt22c_Fmt52c(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + int fieldOffset = -1; + bool isVolatile = false; + switch (dalvikOpcode) { + /* + * Wide volatiles currently handled via single step. + * Add them here if generating in-line code. + * case OP_IGET_WIDE_VOLATILE: + * case OP_IGET_WIDE_VOLATILE_JUMBO: + * case OP_IPUT_WIDE_VOLATILE: + * case OP_IPUT_WIDE_VOLATILE_JUMBO: + */ + case OP_IGET_VOLATILE: + case OP_IGET_VOLATILE_JUMBO: + case OP_IGET_OBJECT_VOLATILE: + case OP_IGET_OBJECT_VOLATILE_JUMBO: + case OP_IPUT_VOLATILE: + case OP_IPUT_VOLATILE_JUMBO: + case OP_IPUT_OBJECT_VOLATILE: + case OP_IPUT_OBJECT_VOLATILE_JUMBO: +#if ANDROID_SMP != 0 + isVolatile = true; + // NOTE: intentional fallthrough +#endif + case OP_IGET: + case OP_IGET_JUMBO: + case OP_IGET_WIDE: + case OP_IGET_WIDE_JUMBO: + case OP_IGET_OBJECT: + case OP_IGET_OBJECT_JUMBO: + case OP_IGET_BOOLEAN: + case OP_IGET_BOOLEAN_JUMBO: + case OP_IGET_BYTE: + case OP_IGET_BYTE_JUMBO: + case OP_IGET_CHAR: + case OP_IGET_CHAR_JUMBO: + case OP_IGET_SHORT: + case OP_IGET_SHORT_JUMBO: + case OP_IPUT: + case OP_IPUT_JUMBO: + case OP_IPUT_WIDE: + case OP_IPUT_WIDE_JUMBO: + case OP_IPUT_OBJECT: + case OP_IPUT_OBJECT_JUMBO: + case OP_IPUT_BOOLEAN: + case OP_IPUT_BOOLEAN_JUMBO: + case OP_IPUT_BYTE: + case OP_IPUT_BYTE_JUMBO: + case OP_IPUT_CHAR: + case OP_IPUT_CHAR_JUMBO: + case OP_IPUT_SHORT: + case OP_IPUT_SHORT_JUMBO: { + const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ? + mir->meta.calleeMethod : cUnit->method; + Field *fieldPtr = + method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC]; + + if (fieldPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null instance field"); + dvmAbort(); + } +#if ANDROID_SMP != 0 + assert(isVolatile == dvmIsVolatileField((Field *) fieldPtr)); +#else + isVolatile = dvmIsVolatileField((Field *) fieldPtr); +#endif + fieldOffset = ((InstField *)fieldPtr)->byteOffset; + break; + } + default: + break; + } + + switch (dalvikOpcode) { + case OP_NEW_ARRAY: + case OP_NEW_ARRAY_JUMBO: { +#if 0 /* 080 triggers assert in Interp.c:1290 for out of memory exception. + i think the assert is in error and should be disabled. With + asserts disabled, 080 passes. */ +genInterpSingleStep(cUnit, mir); +return false; +#endif + // Generates a call - use explicit registers + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + RegLocation rlResult; + void *classPtr = (void*) + (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]); + + if (classPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGE("Unexpected null class"); + dvmAbort(); + } + + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + genExportPC(cUnit, mir); + loadValueDirectFixed(cUnit, rlSrc, r_A1); /* Len */ + loadConstant(cUnit, r_A0, (int) classPtr ); + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmAllocArrayByClass); + /* + * "len < 0": bail to the interpreter to re-execute the + * instruction + */ + genRegImmCheck(cUnit, kMipsCondMi, r_A1, 0, mir->offset, NULL); + loadConstant(cUnit, r_A2, ALLOC_DONT_TRACK); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* generate a branch over if allocation is successful */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + /* + * OOM exception needs to be thrown here and cannot re-execute + */ + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset)); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + /* noreturn */ + + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + rlResult = dvmCompilerGetReturn(cUnit); + storeValue(cUnit, rlDest, rlResult); + break; + } + case OP_INSTANCE_OF: + case OP_INSTANCE_OF_JUMBO: { + // May generate a call - use explicit registers + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0); + RegLocation rlResult; + ClassObject *classPtr = + (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]); + /* + * Note: It is possible that classPtr is NULL at this point, + * even though this instruction has been successfully interpreted. + * If the previous interpretation had a null source, the + * interpreter would not have bothered to resolve the clazz. + * Bail out to the interpreter in this case, and log it + * so that we can tell if it happens frequently. + */ + if (classPtr == NULL) { + BAIL_LOOP_COMPILATION(); + LOGD("null clazz in OP_INSTANCE_OF, single-stepping"); + genInterpSingleStep(cUnit, mir); + break; + } + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + loadValueDirectFixed(cUnit, rlSrc, r_V0); /* Ref */ + loadConstant(cUnit, r_A2, (int) classPtr ); + /* When taken r_V0 has NULL which can be used for store directly */ + MipsLIR *branch1 = opCompareBranch(cUnit, kMipsBeqz, r_V0, -1); + /* r_A1 now contains object->clazz */ + loadWordDisp(cUnit, r_V0, offsetof(Object, clazz), r_A1); + /* r_A1 now contains object->clazz */ + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmInstanceofNonTrivial); + loadConstant(cUnit, r_V0, 1); /* Assume true */ + MipsLIR *branch2 = opCompareBranch(cUnit, kMipsBeq, r_A1, r_A2); + genRegCopy(cUnit, r_A0, r_A1); + genRegCopy(cUnit, r_A1, r_A2); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* branch target here */ + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + rlResult = dvmCompilerGetReturn(cUnit); + storeValue(cUnit, rlDest, rlResult); + branch1->generic.target = (LIR *)target; + branch2->generic.target = (LIR *)target; + break; + } + case OP_IGET_WIDE: + case OP_IGET_WIDE_JUMBO: + genIGetWide(cUnit, mir, fieldOffset); + break; + case OP_IGET_VOLATILE: + case OP_IGET_VOLATILE_JUMBO: + case OP_IGET_OBJECT_VOLATILE: + case OP_IGET_OBJECT_VOLATILE_JUMBO: + case OP_IGET: + case OP_IGET_JUMBO: + case OP_IGET_OBJECT: + case OP_IGET_OBJECT_JUMBO: + case OP_IGET_BOOLEAN: + case OP_IGET_BOOLEAN_JUMBO: + case OP_IGET_BYTE: + case OP_IGET_BYTE_JUMBO: + case OP_IGET_CHAR: + case OP_IGET_CHAR_JUMBO: + case OP_IGET_SHORT: + case OP_IGET_SHORT_JUMBO: + genIGet(cUnit, mir, kWord, fieldOffset, isVolatile); + break; + case OP_IPUT_WIDE: + case OP_IPUT_WIDE_JUMBO: + genIPutWide(cUnit, mir, fieldOffset); + break; + case OP_IPUT_VOLATILE: + case OP_IPUT_VOLATILE_JUMBO: + case OP_IPUT: + case OP_IPUT_JUMBO: + case OP_IPUT_BOOLEAN: + case OP_IPUT_BOOLEAN_JUMBO: + case OP_IPUT_BYTE: + case OP_IPUT_BYTE_JUMBO: + case OP_IPUT_CHAR: + case OP_IPUT_CHAR_JUMBO: + case OP_IPUT_SHORT: + case OP_IPUT_SHORT_JUMBO: + genIPut(cUnit, mir, kWord, fieldOffset, false, isVolatile); + break; + case OP_IPUT_OBJECT_VOLATILE: + case OP_IPUT_OBJECT_VOLATILE_JUMBO: + case OP_IPUT_OBJECT: + case OP_IPUT_OBJECT_JUMBO: + genIPut(cUnit, mir, kWord, fieldOffset, true, isVolatile); + break; + case OP_IGET_WIDE_VOLATILE: + case OP_IGET_WIDE_VOLATILE_JUMBO: + case OP_IPUT_WIDE_VOLATILE: + case OP_IPUT_WIDE_VOLATILE_JUMBO: + genInterpSingleStep(cUnit, mir); + break; + default: + return true; + } + return false; +} + +static bool handleFmt22cs(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + int fieldOffset = mir->dalvikInsn.vC; + switch (dalvikOpcode) { + case OP_IGET_QUICK: + case OP_IGET_OBJECT_QUICK: + genIGet(cUnit, mir, kWord, fieldOffset, false); + break; + case OP_IPUT_QUICK: + genIPut(cUnit, mir, kWord, fieldOffset, false, false); + break; + case OP_IPUT_OBJECT_QUICK: + genIPut(cUnit, mir, kWord, fieldOffset, true, false); + break; + case OP_IGET_WIDE_QUICK: + genIGetWide(cUnit, mir, fieldOffset); + break; + case OP_IPUT_WIDE_QUICK: + genIPutWide(cUnit, mir, fieldOffset); + break; + default: + return true; + } + return false; + +} + +/* Compare against zero */ +static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, + MipsLIR *labelList) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + MipsConditionCode cond; + MipsOpCode opc = kMipsNop; + MipsLIR * test = NULL; + /* backward branch? */ + bool backwardBranch = (bb->taken->startOffset <= mir->offset); + + if (backwardBranch && + (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { + genSuspendPoll(cUnit, mir); + } + + RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); + rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); + rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); + int reg1 = rlSrc1.lowReg; + int reg2 = rlSrc2.lowReg; + int tReg; + + switch (dalvikOpcode) { + case OP_IF_EQ: + opc = kMipsBeq; + break; + case OP_IF_NE: + opc = kMipsBne; + break; + case OP_IF_LT: + opc = kMipsBne; + tReg = dvmCompilerAllocTemp(cUnit); + test = newLIR3(cUnit, kMipsSlt, tReg, reg1, reg2); + reg1 = tReg; + reg2 = r_ZERO; + break; + case OP_IF_LE: + opc = kMipsBeqz; + tReg = dvmCompilerAllocTemp(cUnit); + test = newLIR3(cUnit, kMipsSlt, tReg, reg2, reg1); + reg1 = tReg; + reg2 = -1; + break; + case OP_IF_GT: + opc = kMipsBne; + tReg = dvmCompilerAllocTemp(cUnit); + test = newLIR3(cUnit, kMipsSlt, tReg, reg2, reg1); + reg1 = tReg; + reg2 = r_ZERO; + break; + case OP_IF_GE: + opc = kMipsBeqz; + tReg = dvmCompilerAllocTemp(cUnit); + test = newLIR3(cUnit, kMipsSlt, tReg, reg1, reg2); + reg1 = tReg; + reg2 = -1; + break; + default: + cond = (MipsConditionCode)0; + LOGE("Unexpected opcode (%d) for Fmt22t", dalvikOpcode); + dvmCompilerAbort(cUnit); + } + + genConditionalBranchMips(cUnit, opc, reg1, reg2, &labelList[bb->taken->id]); + /* This mostly likely will be optimized away in a later phase */ + genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]); + return false; +} + +static bool handleFmt22x_Fmt32x(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + + switch (opcode) { + case OP_MOVE_16: + case OP_MOVE_OBJECT_16: + case OP_MOVE_FROM16: + case OP_MOVE_OBJECT_FROM16: { + storeValue(cUnit, dvmCompilerGetDest(cUnit, mir, 0), + dvmCompilerGetSrc(cUnit, mir, 0)); + break; + } + case OP_MOVE_WIDE_16: + case OP_MOVE_WIDE_FROM16: { + storeValueWide(cUnit, dvmCompilerGetDestWide(cUnit, mir, 0, 1), + dvmCompilerGetSrcWide(cUnit, mir, 0, 1)); + break; + } + default: + return true; + } + return false; +} + +static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + RegLocation rlSrc1; + RegLocation rlSrc2; + RegLocation rlDest; + + if ((opcode >= OP_ADD_INT) && (opcode <= OP_REM_DOUBLE)) { + return genArithOp( cUnit, mir ); + } + + /* APUTs have 3 sources and no targets */ + if (mir->ssaRep->numDefs == 0) { + if (mir->ssaRep->numUses == 3) { + rlDest = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 1); + rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 2); + } else { + assert(mir->ssaRep->numUses == 4); + rlDest = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 2); + rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 3); + } + } else { + /* Two sources and 1 dest. Deduce the operand sizes */ + if (mir->ssaRep->numUses == 4) { + rlSrc1 = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc2 = dvmCompilerGetSrcWide(cUnit, mir, 2, 3); + } else { + assert(mir->ssaRep->numUses == 2); + rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); + } + if (mir->ssaRep->numDefs == 2) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + } else { + assert(mir->ssaRep->numDefs == 1); + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + } + } + + switch (opcode) { + case OP_CMPL_FLOAT: + case OP_CMPG_FLOAT: + case OP_CMPL_DOUBLE: + case OP_CMPG_DOUBLE: + return genCmpFP(cUnit, mir, rlDest, rlSrc1, rlSrc2); + case OP_CMP_LONG: + genCmpLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); + break; + case OP_AGET_WIDE: + genArrayGet(cUnit, mir, kLong, rlSrc1, rlSrc2, rlDest, 3); + break; + case OP_AGET: + case OP_AGET_OBJECT: + genArrayGet(cUnit, mir, kWord, rlSrc1, rlSrc2, rlDest, 2); + break; + case OP_AGET_BOOLEAN: + genArrayGet(cUnit, mir, kUnsignedByte, rlSrc1, rlSrc2, rlDest, 0); + break; + case OP_AGET_BYTE: + genArrayGet(cUnit, mir, kSignedByte, rlSrc1, rlSrc2, rlDest, 0); + break; + case OP_AGET_CHAR: + genArrayGet(cUnit, mir, kUnsignedHalf, rlSrc1, rlSrc2, rlDest, 1); + break; + case OP_AGET_SHORT: + genArrayGet(cUnit, mir, kSignedHalf, rlSrc1, rlSrc2, rlDest, 1); + break; + case OP_APUT_WIDE: + genArrayPut(cUnit, mir, kLong, rlSrc1, rlSrc2, rlDest, 3); + break; + case OP_APUT: + genArrayPut(cUnit, mir, kWord, rlSrc1, rlSrc2, rlDest, 2); + break; + case OP_APUT_OBJECT: + genArrayObjectPut(cUnit, mir, rlSrc1, rlSrc2, rlDest, 2); + break; + case OP_APUT_SHORT: + case OP_APUT_CHAR: + genArrayPut(cUnit, mir, kUnsignedHalf, rlSrc1, rlSrc2, rlDest, 1); + break; + case OP_APUT_BYTE: + case OP_APUT_BOOLEAN: + genArrayPut(cUnit, mir, kUnsignedByte, rlSrc1, rlSrc2, rlDest, 0); + break; + default: + return true; + } + return false; +} + +/* + * Find the matching case. + * + * return values: + * r_RESULT0 (low 32-bit): pc of the chaining cell corresponding to the resolved case, + * including default which is placed at MIN(size, MAX_CHAINED_SWITCH_CASES). + * r_RESULT1 (high 32-bit): the branch offset of the matching case (only for indexes + * above MAX_CHAINED_SWITCH_CASES). + * + * Instructions around the call are: + * + * jalr &findPackedSwitchIndex + * nop + * lw gp, 84(sp) | + * addu | 20 bytes for these 5 instructions + * move | (NOTE: if this sequence is shortened or lengthened, then + * jr | the 20 byte offset added below in 3 places must be changed + * nop | accordingly.) + * chaining cell for case 0 [16 bytes] + * chaining cell for case 1 [16 bytes] + * : + * chaining cell for case MIN(size, MAX_CHAINED_SWITCH_CASES)-1 [16 bytes] + * chaining cell for case default [16 bytes] + * noChain exit + */ +static s8 findPackedSwitchIndex(const u2* switchData, int testVal) +{ + int size; + int firstKey; + const int *entries; + int index; + int jumpIndex; + int caseDPCOffset = 0; + + /* + * Packed switch data format: + * ushort ident = 0x0100 magic value + * ushort size number of entries in the table + * int first_key first (and lowest) switch case value + * int targets[size] branch targets, relative to switch opcode + * + * Total size is (4+size*2) 16-bit code units. + */ + size = switchData[1]; + assert(size > 0); + + firstKey = switchData[2]; + firstKey |= switchData[3] << 16; + + + /* The entries are guaranteed to be aligned on a 32-bit boundary; + * we can treat them as a native int array. + */ + entries = (const int*) &switchData[4]; + assert(((u4)entries & 0x3) == 0); + + index = testVal - firstKey; + + /* Jump to the default cell */ + if (index < 0 || index >= size) { + jumpIndex = MIN(size, MAX_CHAINED_SWITCH_CASES); + /* Jump to the non-chaining exit point */ + } else if (index >= MAX_CHAINED_SWITCH_CASES) { + jumpIndex = MAX_CHAINED_SWITCH_CASES + 1; +#ifdef HAVE_LITTLE_ENDIAN + caseDPCOffset = entries[index]; +#else + caseDPCOffset = (unsigned int)entries[index] >> 16 | entries[index] << 16; +#endif + /* Jump to the inline chaining cell */ + } else { + jumpIndex = index; + } + + return (((s8) caseDPCOffset) << 32) | (u8) (jumpIndex * CHAIN_CELL_NORMAL_SIZE + 20); +} + +/* See comments for findPackedSwitchIndex */ +static s8 findSparseSwitchIndex(const u2* switchData, int testVal) +{ + int size; + const int *keys; + const int *entries; + /* In Thumb mode pc is 4 ahead of the "mov r2, pc" instruction */ + int i; + + /* + * Sparse switch data format: + * ushort ident = 0x0200 magic value + * ushort size number of entries in the table; > 0 + * int keys[size] keys, sorted low-to-high; 32-bit aligned + * int targets[size] branch targets, relative to switch opcode + * + * Total size is (2+size*4) 16-bit code units. + */ + + size = switchData[1]; + assert(size > 0); + + /* The keys are guaranteed to be aligned on a 32-bit boundary; + * we can treat them as a native int array. + */ + keys = (const int*) &switchData[2]; + assert(((u4)keys & 0x3) == 0); + + /* The entries are guaranteed to be aligned on a 32-bit boundary; + * we can treat them as a native int array. + */ + entries = keys + size; + assert(((u4)entries & 0x3) == 0); + + /* + * Run through the list of keys, which are guaranteed to + * be sorted low-to-high. + * + * Most tables have 3-4 entries. Few have more than 10. A binary + * search here is probably not useful. + */ + for (i = 0; i < size; i++) { +#ifdef HAVE_LITTLE_ENDIAN + int k = keys[i]; + if (k == testVal) { + /* MAX_CHAINED_SWITCH_CASES + 1 is the start of the overflow case */ + int jumpIndex = (i < MAX_CHAINED_SWITCH_CASES) ? + i : MAX_CHAINED_SWITCH_CASES + 1; + return (((s8) entries[i]) << 32) | (u8) (jumpIndex * CHAIN_CELL_NORMAL_SIZE + 20); +#else + int k = (unsigned int)keys[i] >> 16 | keys[i] << 16; + if (k == testVal) { + /* MAX_CHAINED_SWITCH_CASES + 1 is the start of the overflow case */ + int jumpIndex = (i < MAX_CHAINED_SWITCH_CASES) ? + i : MAX_CHAINED_SWITCH_CASES + 1; + int temp = (unsigned int)entries[i] >> 16 | entries[i] << 16; + return (((s8) temp) << 32) | (u8) (jumpIndex * CHAIN_CELL_NORMAL_SIZE + 20); +#endif + } else if (k > testVal) { + break; + } + } + return MIN(size, MAX_CHAINED_SWITCH_CASES) * CHAIN_CELL_NORMAL_SIZE + 20; +} + +static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir) +{ + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + switch (dalvikOpcode) { + case OP_FILL_ARRAY_DATA: { + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + // Making a call - use explicit registers + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + genExportPC(cUnit, mir); + loadValueDirectFixed(cUnit, rlSrc, r_A0); + LOAD_FUNC_ADDR(cUnit, r_T9, (int)dvmInterpHandleFillArrayData); + loadConstant(cUnit, r_A1, + (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB)); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* generate a branch over if successful */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset)); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + break; + } + /* + * Compute the goto target of up to + * MIN(switchSize, MAX_CHAINED_SWITCH_CASES) + 1 chaining cells. + * See the comment before findPackedSwitchIndex for the code layout. + */ + case OP_PACKED_SWITCH: + case OP_SPARSE_SWITCH: { + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + loadValueDirectFixed(cUnit, rlSrc, r_A1); + dvmCompilerLockAllTemps(cUnit); + + if (dalvikOpcode == OP_PACKED_SWITCH) { + LOAD_FUNC_ADDR(cUnit, r_T9, (int)findPackedSwitchIndex); + } else { + LOAD_FUNC_ADDR(cUnit, r_T9, (int)findSparseSwitchIndex); + } + /* r_A0 <- Addr of the switch data */ + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB)); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + dvmCompilerClobberCallRegs(cUnit); + /* pc <- computed goto target using value in RA */ + newLIR3(cUnit, kMipsAddu, r_A0, r_RA, r_RESULT0); + newLIR2(cUnit, kMipsMove, r_A1, r_RESULT1); + newLIR1(cUnit, kMipsJr, r_A0); + newLIR0(cUnit, kMipsNop); /* for maintaining 20 byte offset */ + break; + } + default: + return true; + } + return false; +} + +/* + * See the example of predicted inlining listed before the + * genValidationForPredictedInline function. The function here takes care the + * branch over at 0x4858de78 and the misprediction target at 0x4858de7a. + */ +static void genLandingPadForMispredictedCallee(CompilationUnit *cUnit, MIR *mir, + BasicBlock *bb, + MipsLIR *labelList) +{ + BasicBlock *fallThrough = bb->fallThrough; + + /* Bypass the move-result block if there is one */ + if (fallThrough->firstMIRInsn) { + assert(fallThrough->firstMIRInsn->OptimizationFlags & MIR_INLINED_PRED); + fallThrough = fallThrough->fallThrough; + } + /* Generate a branch over if the predicted inlining is correct */ + genUnconditionalBranch(cUnit, &labelList[fallThrough->id]); + + /* Reset the register state */ + dvmCompilerResetRegPool(cUnit); + dvmCompilerClobberAllRegs(cUnit); + dvmCompilerResetNullCheck(cUnit); + + /* Target for the slow invoke path */ + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + /* Hook up the target to the verification branch */ + mir->meta.callsiteInfo->misPredBranchOver->target = (LIR *) target; +} + +static bool handleFmt35c_3rc_5rc(CompilationUnit *cUnit, MIR *mir, + BasicBlock *bb, MipsLIR *labelList) +{ + MipsLIR *retChainingCell = NULL; + MipsLIR *pcrLabel = NULL; + + /* An invoke with the MIR_INLINED is effectively a no-op */ + if (mir->OptimizationFlags & MIR_INLINED) + return false; + + if (bb->fallThrough != NULL) + retChainingCell = &labelList[bb->fallThrough->id]; + + DecodedInstruction *dInsn = &mir->dalvikInsn; + switch (mir->dalvikInsn.opcode) { + /* + * calleeMethod = this->clazz->vtable[ + * method->clazz->pDvmDex->pResMethods[BBBB]->methodIndex + * ] + */ + case OP_INVOKE_VIRTUAL: + case OP_INVOKE_VIRTUAL_RANGE: + case OP_INVOKE_VIRTUAL_JUMBO: { + MipsLIR *predChainingCell = &labelList[bb->taken->id]; + int methodIndex = + cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]-> + methodIndex; + + /* + * If the invoke has non-null misPredBranchOver, we need to generate + * the non-inlined version of the invoke here to handle the + * mispredicted case. + */ + if (mir->meta.callsiteInfo->misPredBranchOver) { + genLandingPadForMispredictedCallee(cUnit, mir, bb, labelList); + } + + if (mir->dalvikInsn.opcode == OP_INVOKE_VIRTUAL) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + genInvokeVirtualCommon(cUnit, mir, methodIndex, + retChainingCell, + predChainingCell, + pcrLabel); + break; + } + /* + * calleeMethod = method->clazz->super->vtable[method->clazz->pDvmDex + * ->pResMethods[BBBB]->methodIndex] + */ + case OP_INVOKE_SUPER: + case OP_INVOKE_SUPER_RANGE: + case OP_INVOKE_SUPER_JUMBO: { + /* Grab the method ptr directly from what the interpreter sees */ + const Method *calleeMethod = mir->meta.callsiteInfo->method; + assert(calleeMethod == cUnit->method->clazz->super->vtable[ + cUnit->method->clazz->pDvmDex-> + pResMethods[dInsn->vB]->methodIndex]); + + if (mir->dalvikInsn.opcode == OP_INVOKE_SUPER) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + if (mir->OptimizationFlags & MIR_INVOKE_METHOD_JIT) { + const Method *calleeMethod = mir->meta.callsiteInfo->method; + void *calleeAddr = dvmJitGetMethodAddr(calleeMethod->insns); + assert(calleeAddr); + genInvokeSingletonWholeMethod(cUnit, mir, calleeAddr, + retChainingCell); + } else { + /* r_A0 = calleeMethod */ + loadConstant(cUnit, r_A0, (int) calleeMethod); + + genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel, + calleeMethod); + } + break; + } + /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */ + case OP_INVOKE_DIRECT: + case OP_INVOKE_DIRECT_RANGE: + case OP_INVOKE_DIRECT_JUMBO: { + /* Grab the method ptr directly from what the interpreter sees */ + const Method *calleeMethod = mir->meta.callsiteInfo->method; + assert(calleeMethod == + cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]); + + if (mir->dalvikInsn.opcode == OP_INVOKE_DIRECT) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + /* r_A0 = calleeMethod */ + loadConstant(cUnit, r_A0, (int) calleeMethod); + + genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel, + calleeMethod); + break; + } + /* calleeMethod = method->clazz->pDvmDex->pResMethods[BBBB] */ + case OP_INVOKE_STATIC: + case OP_INVOKE_STATIC_RANGE: + case OP_INVOKE_STATIC_JUMBO: { + /* Grab the method ptr directly from what the interpreter sees */ + const Method *calleeMethod = mir->meta.callsiteInfo->method; + assert(calleeMethod == + cUnit->method->clazz->pDvmDex->pResMethods[dInsn->vB]); + + if (mir->dalvikInsn.opcode == OP_INVOKE_STATIC) + genProcessArgsNoRange(cUnit, mir, dInsn, + NULL /* no null check */); + else + genProcessArgsRange(cUnit, mir, dInsn, + NULL /* no null check */); + + if (mir->OptimizationFlags & MIR_INVOKE_METHOD_JIT) { + const Method *calleeMethod = mir->meta.callsiteInfo->method; + void *calleeAddr = dvmJitGetMethodAddr(calleeMethod->insns); + assert(calleeAddr); + genInvokeSingletonWholeMethod(cUnit, mir, calleeAddr, + retChainingCell); + } else { + /* r_A0 = calleeMethod */ + loadConstant(cUnit, r_A0, (int) calleeMethod); + + genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel, + calleeMethod); + } + break; + } + + /* + * calleeMethod = dvmFindInterfaceMethodInCache(this->clazz, + * BBBB, method, method->clazz->pDvmDex) + * + * The following is an example of generated code for + * "invoke-interface v0" + * + * -------- dalvik offset: 0x000f @ invoke-interface (PI) v2 + * 0x2f140c54 : lw a0,8(s1) # genProcessArgsNoRange + * 0x2f140c58 : addiu s4,s1,0xffffffe8(-24) + * 0x2f140c5c : beqz a0,0x2f140d5c (L0x11f864) + * 0x2f140c60 : pref 1,0(s4) + * -------- BARRIER + * 0x2f140c64 : sw a0,0(s4) + * 0x2f140c68 : addiu s4,s4,0x0004(4) + * -------- BARRIER + * 0x2f140c6c : lui s0,0x2d23(11555) # dalvikPC + * 0x2f140c70 : ori s0,s0,0x2d2365a6(757294502) + * 0x2f140c74 : lahi/lui a1,0x2f14(12052) # a1 <- &retChainingCell + * 0x2f140c78 : lalo/ori a1,a1,0x2f140d38(789843256) + * 0x2f140c7c : lahi/lui a2,0x2f14(12052) # a2 <- &predictedChainingCell + * 0x2f140c80 : lalo/ori a2,a2,0x2f140d80(789843328) + * 0x2f140c84 : jal 0x2f1311ec(789778924) # call TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN + * 0x2f140c88 : nop + * 0x2f140c8c : b 0x2f140d80 (L0x11efc0) # off to the predicted chain + * 0x2f140c90 : nop + * 0x2f140c94 : b 0x2f140d60 (L0x12457c) # punt to the interpreter + * 0x2f140c98 : lui a0,0x2d23(11555) + * 0x2f140c9c : move s5,a1 # prepare for dvmFindInterfaceMethodInCache + * 0x2f140ca0 : move s6,a2 + * 0x2f140ca4 : move s7,a3 + * 0x2f140ca8 : move a0,a3 + * 0x2f140cac : ori a1,zero,0x2b42(11074) + * 0x2f140cb0 : lui a2,0x2c92(11410) + * 0x2f140cb4 : ori a2,a2,0x2c92adf8(747810296) + * 0x2f140cb8 : lui a3,0x0009(9) + * 0x2f140cbc : ori a3,a3,0x924b8(599224) + * 0x2f140cc0 : lui t9,0x2ab2(10930) + * 0x2f140cc4 : ori t9,t9,0x2ab2a48c(716350604) + * 0x2f140cc8 : jalr ra,t9 # call dvmFindInterfaceMethodInCache + * 0x2f140ccc : nop + * 0x2f140cd0 : lw gp,84(sp) + * 0x2f140cd4 : move a0,v0 + * 0x2f140cd8 : bne v0,zero,0x2f140cf0 (L0x120064) + * 0x2f140cdc : nop + * 0x2f140ce0 : lui a0,0x2d23(11555) # a0 <- dalvikPC + * 0x2f140ce4 : ori a0,a0,0x2d2365a6(757294502) + * 0x2f140ce8 : jal 0x2f131720(789780256) # call TEMPLATE_THROW_EXCEPTION_COMMON + * 0x2f140cec : nop + * 0x2f140cf0 : move a1,s5 # a1 <- &retChainingCell + * 0x2f140cf4 : bgtz s5,0x2f140d20 (L0x120324) # >0? don't rechain + * 0x2f140cf8 : nop + * 0x2f140cfc : lui t9,0x2aba(10938) # prepare for dvmJitToPatchPredictedChain + * 0x2f140d00 : ori t9,t9,0x2abae3c4(716891076) + * 0x2f140d04 : move a1,s2 + * 0x2f140d08 : move a2,s6 + * 0x2f140d0c : move a3,s7 + * 0x2f140d10 : jalr ra,t9 # call dvmJitToPatchPredictedChain + * 0x2f140d14 : nop + * 0x2f140d18 : lw gp,84(sp) + * 0x2f140d1c : move a0,v0 + * 0x2f140d20 : lahi/lui a1,0x2f14(12052) + * 0x2f140d24 : lalo/ori a1,a1,0x2f140d38(789843256) # a1 <- &retChainingCell + * 0x2f140d28 : jal 0x2f1310c4(789778628) # call TEMPLATE_INVOKE_METHOD_NO_OPT + * 0x2f140d2c : nop + * 0x2f140d30 : b 0x2f140d60 (L0x12457c) + * 0x2f140d34 : lui a0,0x2d23(11555) + * 0x2f140d38 : .align4 + * -------- dalvik offset: 0x0012 @ move-result (PI) v1, (#0), (#0) + * 0x2f140d38 : lw a2,16(s2) + * 0x2f140d3c : sw a2,4(s1) + * 0x2f140d40 : b 0x2f140d74 (L0x1246fc) + * 0x2f140d44 : lw a0,116(s2) + * 0x2f140d48 : undefined + * -------- reconstruct dalvik PC : 0x2d2365a6 @ +0x000f + * 0x2f140d4c : lui a0,0x2d23(11555) + * 0x2f140d50 : ori a0,a0,0x2d2365a6(757294502) + * 0x2f140d54 : b 0x2f140d68 (L0x12463c) + * 0x2f140d58 : lw a1,108(s2) + * -------- reconstruct dalvik PC : 0x2d2365a6 @ +0x000f + * 0x2f140d5c : lui a0,0x2d23(11555) + * 0x2f140d60 : ori a0,a0,0x2d2365a6(757294502) + * Exception_Handling: + * 0x2f140d64 : lw a1,108(s2) + * 0x2f140d68 : jalr ra,a1 + * 0x2f140d6c : nop + * 0x2f140d70 : .align4 + * -------- chaining cell (hot): 0x0013 + * 0x2f140d70 : lw a0,116(s2) + * 0x2f140d74 : jalr ra,a0 + * 0x2f140d78 : nop + * 0x2f140d7c : data 0x2d2365ae(757294510) + * 0x2f140d80 : .align4 + * -------- chaining cell (predicted): N/A + * 0x2f140d80 : data 0xe7fe(59390) + * 0x2f140d84 : data 0x0000(0) + * 0x2f140d88 : data 0x0000(0) + * 0x2f140d8c : data 0x0000(0) + * 0x2f140d90 : data 0x0000(0) + * -------- end of chaining cells (0x0190) + */ + case OP_INVOKE_INTERFACE: + case OP_INVOKE_INTERFACE_RANGE: + case OP_INVOKE_INTERFACE_JUMBO: { + MipsLIR *predChainingCell = &labelList[bb->taken->id]; + + /* + * If the invoke has non-null misPredBranchOver, we need to generate + * the non-inlined version of the invoke here to handle the + * mispredicted case. + */ + if (mir->meta.callsiteInfo->misPredBranchOver) { + genLandingPadForMispredictedCallee(cUnit, mir, bb, labelList); + } + + if (mir->dalvikInsn.opcode == OP_INVOKE_INTERFACE) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + /* "this" is already left in r_A0 by genProcessArgs* */ + + /* r4PC = dalvikCallsite */ + loadConstant(cUnit, r4PC, + (int) (cUnit->method->insns + mir->offset)); + + /* r_A1 = &retChainingCell */ + MipsLIR *addrRetChain = newLIR2(cUnit, kMipsLahi, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + addrRetChain = newLIR3(cUnit, kMipsLalo, r_A1, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + + + /* r_A2 = &predictedChainingCell */ + MipsLIR *predictedChainingCell = newLIR2(cUnit, kMipsLahi, r_A2, 0); + predictedChainingCell->generic.target = (LIR *) predChainingCell; + predictedChainingCell = newLIR3(cUnit, kMipsLalo, r_A2, r_A2, 0); + predictedChainingCell->generic.target = (LIR *) predChainingCell; + + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF : + TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN); + + /* return through ra - jump to the chaining cell */ + genUnconditionalBranch(cUnit, predChainingCell); + + /* + * null-check on "this" may have been eliminated, but we still need + * a PC-reconstruction label for stack overflow bailout. + */ + if (pcrLabel == NULL) { + int dPC = (int) (cUnit->method->insns + mir->offset); + pcrLabel = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pcrLabel->opcode = kMipsPseudoPCReconstructionCell; + pcrLabel->operands[0] = dPC; + pcrLabel->operands[1] = mir->offset; + /* Insert the place holder to the growable list */ + dvmInsertGrowableList(&cUnit->pcReconstructionList, + (intptr_t) pcrLabel); + } + + /* return through ra+8 - punt to the interpreter */ + genUnconditionalBranch(cUnit, pcrLabel); + + /* + * return through ra+16 - fully resolve the callee method. + * r_A1 <- count + * r_A2 <- &predictedChainCell + * r_A3 <- this->class + * r4 <- dPC + * r_S4 <- this->class->vtable + */ + + /* Save count, &predictedChainCell, and class to high regs first */ + genRegCopy(cUnit, r_S5, r_A1); + genRegCopy(cUnit, r_S6, r_A2); + genRegCopy(cUnit, r_S7, r_A3); + + /* r_A0 now contains this->clazz */ + genRegCopy(cUnit, r_A0, r_A3); + + /* r_A1 = BBBB */ + loadConstant(cUnit, r_A1, dInsn->vB); + + /* r_A2 = method (caller) */ + loadConstant(cUnit, r_A2, (int) cUnit->method); + + /* r_A3 = pDvmDex */ + loadConstant(cUnit, r_A3, (int) cUnit->method->clazz->pDvmDex); + + LOAD_FUNC_ADDR(cUnit, r_T9, + (intptr_t) dvmFindInterfaceMethodInCache); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + /* r_V0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */ + genRegCopy(cUnit, r_A0, r_V0); + + dvmCompilerClobberCallRegs(cUnit); + /* generate a branch over if the interface method is resolved */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + /* + * calleeMethod == NULL -> throw + */ + loadConstant(cUnit, r_A0, + (int) (cUnit->method->insns + mir->offset)); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + /* noreturn */ + + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + + genRegCopy(cUnit, r_A1, r_S5); + + /* Check if rechain limit is reached */ + MipsLIR *bypassRechaining = opCompareBranch(cUnit, kMipsBgtz, r_S5, -1); + + LOAD_FUNC_ADDR(cUnit, r_T9, (int) dvmJitToPatchPredictedChain); + + genRegCopy(cUnit, r_A1, rSELF); + genRegCopy(cUnit, r_A2, r_S6); + genRegCopy(cUnit, r_A3, r_S7); + + /* + * r_A0 = calleeMethod + * r_A2 = &predictedChainingCell + * r_A3 = class + * + * &returnChainingCell has been loaded into r_A1 but is not needed + * when patching the chaining cell and will be clobbered upon + * returning so it will be reconstructed again. + */ + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + genRegCopy(cUnit, r_A0, r_V0); + + /* r_A1 = &retChainingCell */ + addrRetChain = newLIR2(cUnit, kMipsLahi, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + bypassRechaining->generic.target = (LIR *) addrRetChain; + addrRetChain = newLIR3(cUnit, kMipsLalo, r_A1, r_A1, 0); + addrRetChain->generic.target = (LIR *) retChainingCell; + + + /* + * r_A0 = this, r_A1 = calleeMethod, + * r_A1 = &ChainingCell, + * r4PC = callsiteDPC, + */ + genDispatchToHandler(cUnit, gDvmJit.methodTraceSupport ? + TEMPLATE_INVOKE_METHOD_NO_OPT_PROF : + TEMPLATE_INVOKE_METHOD_NO_OPT); + +#if defined(WITH_JIT_TUNING) + gDvmJit.invokePolymorphic++; +#endif + /* Handle exceptions using the interpreter */ + genTrap(cUnit, mir->offset, pcrLabel); + break; + } + case OP_INVOKE_OBJECT_INIT_JUMBO: + case OP_INVOKE_OBJECT_INIT_RANGE: + case OP_FILLED_NEW_ARRAY: + case OP_FILLED_NEW_ARRAY_RANGE: + case OP_FILLED_NEW_ARRAY_JUMBO: { + /* Just let the interpreter deal with these */ + genInterpSingleStep(cUnit, mir); + break; + } + default: + return true; + } + return false; +} + +static bool handleFmt35ms_3rms(CompilationUnit *cUnit, MIR *mir, + BasicBlock *bb, MipsLIR *labelList) +{ + MipsLIR *pcrLabel = NULL; + + /* An invoke with the MIR_INLINED is effectively a no-op */ + if (mir->OptimizationFlags & MIR_INLINED) + return false; + + DecodedInstruction *dInsn = &mir->dalvikInsn; + switch (mir->dalvikInsn.opcode) { + /* calleeMethod = this->clazz->vtable[BBBB] */ + case OP_INVOKE_VIRTUAL_QUICK_RANGE: + case OP_INVOKE_VIRTUAL_QUICK: { + int methodIndex = dInsn->vB; + MipsLIR *retChainingCell = &labelList[bb->fallThrough->id]; + MipsLIR *predChainingCell = &labelList[bb->taken->id]; + + /* + * If the invoke has non-null misPredBranchOver, we need to generate + * the non-inlined version of the invoke here to handle the + * mispredicted case. + */ + if (mir->meta.callsiteInfo->misPredBranchOver) { + genLandingPadForMispredictedCallee(cUnit, mir, bb, labelList); + } + + if (mir->dalvikInsn.opcode == OP_INVOKE_VIRTUAL_QUICK) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + if (mir->OptimizationFlags & MIR_INVOKE_METHOD_JIT) { + const Method *calleeMethod = mir->meta.callsiteInfo->method; + void *calleeAddr = dvmJitGetMethodAddr(calleeMethod->insns); + assert(calleeAddr); + genInvokeVirtualWholeMethod(cUnit, mir, calleeAddr, + retChainingCell); + } + + genInvokeVirtualCommon(cUnit, mir, methodIndex, + retChainingCell, + predChainingCell, + pcrLabel); + break; + } + /* calleeMethod = method->clazz->super->vtable[BBBB] */ + case OP_INVOKE_SUPER_QUICK: + case OP_INVOKE_SUPER_QUICK_RANGE: { + /* Grab the method ptr directly from what the interpreter sees */ + const Method *calleeMethod = mir->meta.callsiteInfo->method; + assert(calleeMethod == + cUnit->method->clazz->super->vtable[dInsn->vB]); + + if (mir->dalvikInsn.opcode == OP_INVOKE_SUPER_QUICK) + genProcessArgsNoRange(cUnit, mir, dInsn, &pcrLabel); + else + genProcessArgsRange(cUnit, mir, dInsn, &pcrLabel); + + /* r_A0 = calleeMethod */ + loadConstant(cUnit, r_A0, (int) calleeMethod); + + genInvokeSingletonCommon(cUnit, mir, bb, labelList, pcrLabel, + calleeMethod); + break; + } + default: + return true; + } + return false; +} + +/* + * This operation is complex enough that we'll do it partly inline + * and partly with a handler. NOTE: the handler uses hardcoded + * values for string object offsets and must be revisitied if the + * layout changes. + */ +static bool genInlinedCompareTo(CompilationUnit *cUnit, MIR *mir) +{ +#if defined(USE_GLOBAL_STRING_DEFS) + return handleExecuteInlineC(cUnit, mir); +#else + MipsLIR *rollback; + RegLocation rlThis = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlComp = dvmCompilerGetSrc(cUnit, mir, 1); + + loadValueDirectFixed(cUnit, rlThis, r_A0); + loadValueDirectFixed(cUnit, rlComp, r_A1); + /* Test objects for NULL */ + rollback = genNullCheck(cUnit, rlThis.sRegLow, r_A0, mir->offset, NULL); + genNullCheck(cUnit, rlComp.sRegLow, r_A1, mir->offset, rollback); + /* + * TUNING: we could check for object pointer equality before invoking + * handler. Unclear whether the gain would be worth the added code size + * expansion. + */ + genDispatchToHandler(cUnit, TEMPLATE_STRING_COMPARETO); + storeValue(cUnit, inlinedTarget(cUnit, mir, false), + dvmCompilerGetReturn(cUnit)); + return false; +#endif +} + +static bool genInlinedFastIndexOf(CompilationUnit *cUnit, MIR *mir) +{ +#if defined(USE_GLOBAL_STRING_DEFS) + return handleExecuteInlineC(cUnit, mir); +#else + RegLocation rlThis = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlChar = dvmCompilerGetSrc(cUnit, mir, 1); + + loadValueDirectFixed(cUnit, rlThis, r_A0); + loadValueDirectFixed(cUnit, rlChar, r_A1); + + RegLocation rlStart = dvmCompilerGetSrc(cUnit, mir, 2); + loadValueDirectFixed(cUnit, rlStart, r_A2); + + /* Test objects for NULL */ + genNullCheck(cUnit, rlThis.sRegLow, r_A0, mir->offset, NULL); + genDispatchToHandler(cUnit, TEMPLATE_STRING_INDEXOF); + storeValue(cUnit, inlinedTarget(cUnit, mir, false), + dvmCompilerGetReturn(cUnit)); + return false; +#endif +} + +// Generates an inlined String.isEmpty or String.length. +static bool genInlinedStringIsEmptyOrLength(CompilationUnit *cUnit, MIR *mir, + bool isEmpty) +{ + // dst = src.length(); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + rlObj = loadValue(cUnit, rlObj, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, + rlResult.lowReg); + if (isEmpty) { + // dst = (dst == 0); + int tReg = dvmCompilerAllocTemp(cUnit); + newLIR3(cUnit, kMipsSltu, tReg, r_ZERO, rlResult.lowReg); + opRegRegImm(cUnit, kOpXor, rlResult.lowReg, tReg, 1); + } + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir) +{ + return genInlinedStringIsEmptyOrLength(cUnit, mir, false); +} + +static bool genInlinedStringIsEmpty(CompilationUnit *cUnit, MIR *mir) +{ + return genInlinedStringIsEmptyOrLength(cUnit, mir, true); +} + +static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir) +{ + int contents = OFFSETOF_MEMBER(ArrayObject, contents); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + RegLocation rlResult; + rlObj = loadValue(cUnit, rlObj, kCoreReg); + rlIdx = loadValue(cUnit, rlIdx, kCoreReg); + int regMax = dvmCompilerAllocTemp(cUnit); + int regOff = dvmCompilerAllocTemp(cUnit); + int regPtr = dvmCompilerAllocTemp(cUnit); + MipsLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, + mir->offset, NULL); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr); + genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel); + dvmCompilerFreeTemp(cUnit, regMax); + opRegImm(cUnit, kOpAdd, regPtr, contents); + opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf); + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + int signReg = dvmCompilerAllocTemp(cUnit); + /* + * abs(x) = y<=x>>31, (x+y)^y. + * Thumb2's IT block also yields 3 instructions, but imposes + * scheduling constraints. + */ + opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31); + opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); + opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlDest = inlinedTargetWide(cUnit, mir, false); + rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + int signReg = dvmCompilerAllocTemp(cUnit); + int tReg = dvmCompilerAllocTemp(cUnit); + /* + * abs(x) = y<=x>>31, (x+y)^y. + * Thumb2 IT block allows slightly shorter sequence, + * but introduces a scheduling barrier. Stick with this + * mechanism for now. + */ + opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31); + opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); + newLIR3(cUnit, kMipsSltu, tReg, rlResult.lowReg, signReg); + opRegRegReg(cUnit, kOpAdd, rlResult.highReg, rlSrc.highReg, signReg); + opRegRegReg(cUnit, kOpAdd, rlResult.highReg, rlResult.highReg, tReg); + opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); + opRegReg(cUnit, kOpXor, rlResult.highReg, signReg); + dvmCompilerFreeTemp(cUnit, signReg); + dvmCompilerFreeTemp(cUnit, tReg); + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedIntFloatConversion(CompilationUnit *cUnit, MIR *mir) +{ + // Just move from source to destination... + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + storeValue(cUnit, rlDest, rlSrc); + return false; +} + +static bool genInlinedLongDoubleConversion(CompilationUnit *cUnit, MIR *mir) +{ + // Just move from source to destination... + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlDest = inlinedTargetWide(cUnit, mir, false); + storeValueWide(cUnit, rlDest, rlSrc); + return false; +} +/* + * JITs a call to a C function. + * TODO: use this for faster native method invocation for simple native + * methods (http://b/3069458). + */ +static bool handleExecuteInlineC(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + int operation = dInsn->vB; + unsigned int i; + const InlineOperation* inLineTable = dvmGetInlineOpsTable(); + uintptr_t fn = (int) inLineTable[operation].func; + if (fn == 0) { + dvmCompilerAbort(cUnit); + } + dvmCompilerFlushAllRegs(cUnit); /* Everything to home location */ + dvmCompilerClobberCallRegs(cUnit); + dvmCompilerClobber(cUnit, r4PC); + dvmCompilerClobber(cUnit, rINST); + int offset = offsetof(Thread, interpSave.retval); + opRegRegImm(cUnit, kOpAdd, r4PC, rSELF, offset); + newLIR3(cUnit, kMipsSw, r4PC, 16, r_SP); /* sp has plenty of space */ + genExportPC(cUnit, mir); + assert(dInsn->vA <= 4); + for (i=0; i < dInsn->vA; i++) { + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i+r_A0); + } + LOAD_FUNC_ADDR(cUnit, r_T9, fn); + opReg(cUnit, kOpBlx, r_T9); + newLIR3(cUnit, kMipsLw, r_GP, STACK_OFFSET_GP, r_SP); + /* NULL? */ + MipsLIR *branchOver = opCompareBranch(cUnit, kMipsBne, r_V0, r_ZERO); + loadConstant(cUnit, r_A0, (int) (cUnit->method->insns + mir->offset)); + genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); + MipsLIR *target = newLIR0(cUnit, kMipsPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branchOver->generic.target = (LIR *) target; + return false; +} + +/* + * NOTE: Handles both range and non-range versions (arguments + * have already been normalized by this point). + */ +static bool handleExecuteInline(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + assert(dInsn->opcode == OP_EXECUTE_INLINE_RANGE || + dInsn->opcode == OP_EXECUTE_INLINE); + switch (dInsn->vB) { + case INLINE_EMPTYINLINEMETHOD: + return false; /* Nop */ + + /* These ones we potentially JIT inline. */ + case INLINE_STRING_LENGTH: + return genInlinedStringLength(cUnit, mir); + case INLINE_STRING_IS_EMPTY: + return genInlinedStringIsEmpty(cUnit, mir); + case INLINE_MATH_ABS_INT: + return genInlinedAbsInt(cUnit, mir); + case INLINE_MATH_ABS_LONG: + return genInlinedAbsLong(cUnit, mir); + case INLINE_MATH_MIN_INT: + return genInlinedMinMaxInt(cUnit, mir, true); + case INLINE_MATH_MAX_INT: + return genInlinedMinMaxInt(cUnit, mir, false); + case INLINE_STRING_CHARAT: + return genInlinedStringCharAt(cUnit, mir); + case INLINE_MATH_SQRT: + return genInlineSqrt(cUnit, mir); + case INLINE_MATH_ABS_FLOAT: + return genInlinedAbsFloat(cUnit, mir); + case INLINE_MATH_ABS_DOUBLE: + return genInlinedAbsDouble(cUnit, mir); + case INLINE_STRING_COMPARETO: + return genInlinedCompareTo(cUnit, mir); + case INLINE_STRING_FASTINDEXOF_II: + return genInlinedFastIndexOf(cUnit, mir); + case INLINE_FLOAT_TO_RAW_INT_BITS: + case INLINE_INT_BITS_TO_FLOAT: + return genInlinedIntFloatConversion(cUnit, mir); + case INLINE_DOUBLE_TO_RAW_LONG_BITS: + case INLINE_LONG_BITS_TO_DOUBLE: + return genInlinedLongDoubleConversion(cUnit, mir); + + /* + * These ones we just JIT a call to a C function for. + * TODO: special-case these in the other "invoke" call paths. + */ + case INLINE_STRING_EQUALS: + case INLINE_MATH_COS: + case INLINE_MATH_SIN: + case INLINE_FLOAT_TO_INT_BITS: + case INLINE_DOUBLE_TO_LONG_BITS: + return handleExecuteInlineC(cUnit, mir); + } + dvmCompilerAbort(cUnit); + return false; // Not reachable; keeps compiler happy. +} + +static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir) +{ + //TUNING: We're using core regs here - not optimal when target is a double + RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadConstantNoClobber(cUnit, rlResult.lowReg, + mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL); + loadConstantNoClobber(cUnit, rlResult.highReg, + (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL); + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +/* + * The following are special processing routines that handle transfer of + * controls between compiled code and the interpreter. Certain VM states like + * Dalvik PC and special-purpose registers are reconstructed here. + */ + +/* Chaining cell for code that may need warmup. */ +static void handleNormalChainingCell(CompilationUnit *cUnit, + unsigned int offset) +{ + newLIR3(cUnit, kMipsLw, r_A0, + offsetof(Thread, jitToInterpEntries.dvmJitToInterpNormal), + rSELF); + newLIR2(cUnit, kMipsJalr, r_RA, r_A0); + addWordData(cUnit, NULL, (int) (cUnit->method->insns + offset)); +} + +/* + * Chaining cell for instructions that immediately following already translated + * code. + */ +static void handleHotChainingCell(CompilationUnit *cUnit, + unsigned int offset) +{ + newLIR3(cUnit, kMipsLw, r_A0, + offsetof(Thread, jitToInterpEntries.dvmJitToInterpTraceSelect), + rSELF); + newLIR2(cUnit, kMipsJalr, r_RA, r_A0); + addWordData(cUnit, NULL, (int) (cUnit->method->insns + offset)); +} + +/* Chaining cell for branches that branch back into the same basic block */ +static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, + unsigned int offset) +{ + /* + * Use raw instruction constructors to guarantee that the generated + * instructions fit the predefined cell size. + */ +#if defined(WITH_SELF_VERIFICATION) + newLIR3(cUnit, kMipsLw, r_A0, + offsetof(Thread, jitToInterpEntries.dvmJitToInterpBackwardBranch), + rSELF); +#else + newLIR3(cUnit, kMipsLw, r_A0, + offsetof(Thread, jitToInterpEntries.dvmJitToInterpNormal), + rSELF); +#endif + newLIR2(cUnit, kMipsJalr, r_RA, r_A0); + addWordData(cUnit, NULL, (int) (cUnit->method->insns + offset)); +} + +/* Chaining cell for monomorphic method invocations. */ +static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, + const Method *callee) +{ + newLIR3(cUnit, kMipsLw, r_A0, + offsetof(Thread, jitToInterpEntries.dvmJitToInterpTraceSelect), + rSELF); + newLIR2(cUnit, kMipsJalr, r_RA, r_A0); + addWordData(cUnit, NULL, (int) (callee->insns)); +} + +/* Chaining cell for monomorphic method invocations. */ +static void handleInvokePredictedChainingCell(CompilationUnit *cUnit) +{ + /* Should not be executed in the initial state */ + addWordData(cUnit, NULL, PREDICTED_CHAIN_BX_PAIR_INIT); + /* branch delay slot nop */ + addWordData(cUnit, NULL, PREDICTED_CHAIN_DELAY_SLOT_INIT); + /* To be filled: class */ + addWordData(cUnit, NULL, PREDICTED_CHAIN_CLAZZ_INIT); + /* To be filled: method */ + addWordData(cUnit, NULL, PREDICTED_CHAIN_METHOD_INIT); + /* + * Rechain count. The initial value of 0 here will trigger chaining upon + * the first invocation of this callsite. + */ + addWordData(cUnit, NULL, PREDICTED_CHAIN_COUNTER_INIT); +} + +/* Load the Dalvik PC into a0 and jump to the specified target */ +static void handlePCReconstruction(CompilationUnit *cUnit, + MipsLIR *targetLabel) +{ + MipsLIR **pcrLabel = + (MipsLIR **) cUnit->pcReconstructionList.elemList; + int numElems = cUnit->pcReconstructionList.numUsed; + int i; + + /* + * We should never reach here through fall-through code, so insert + * a bomb to signal troubles immediately. + */ + if (numElems) { + newLIR0(cUnit, kMipsUndefined); + } + + for (i = 0; i < numElems; i++) { + dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); + /* a0 = dalvik PC */ + loadConstant(cUnit, r_A0, pcrLabel[i]->operands[0]); + genUnconditionalBranch(cUnit, targetLabel); + } +} + +static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = { + "kMirOpPhi", + "kMirOpNullNRangeUpCheck", + "kMirOpNullNRangeDownCheck", + "kMirOpLowerBound", + "kMirOpPunt", + "kMirOpCheckInlinePrediction", +}; + +/* + * vA = arrayReg; + * vB = idxReg; + * vC = endConditionReg; + * arg[0] = maxC + * arg[1] = minC + * arg[2] = loopBranchConditionCode + */ +static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) +{ + /* + * NOTE: these synthesized blocks don't have ssa names assigned + * for Dalvik registers. However, because they dominate the following + * blocks we can simply use the Dalvik name w/ subscript 0 as the + * ssa name. + */ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + const int maxC = dInsn->arg[0]; + int regLength; + RegLocation rlArray = cUnit->regLocation[mir->dalvikInsn.vA]; + RegLocation rlIdxEnd = cUnit->regLocation[mir->dalvikInsn.vC]; + + /* regArray <- arrayRef */ + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIdxEnd = loadValue(cUnit, rlIdxEnd, kCoreReg); + genRegImmCheck(cUnit, kMipsCondEq, rlArray.lowReg, 0, 0, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); + + /* regLength <- len(arrayRef) */ + regLength = dvmCompilerAllocTemp(cUnit); + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength); + + int delta = maxC; + /* + * If the loop end condition is ">=" instead of ">", then the largest value + * of the index is "endCondition - 1". + */ + if (dInsn->arg[2] == OP_IF_GE) { + delta--; + } + + if (delta) { + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpAdd, tReg, rlIdxEnd.lowReg, delta); + rlIdxEnd.lowReg = tReg; + dvmCompilerFreeTemp(cUnit, tReg); + } + /* Punt if "regIdxEnd < len(Array)" is false */ + genRegRegCheck(cUnit, kMipsCondGe, rlIdxEnd.lowReg, regLength, 0, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); +} + +/* + * vA = arrayReg; + * vB = idxReg; + * vC = endConditionReg; + * arg[0] = maxC + * arg[1] = minC + * arg[2] = loopBranchConditionCode + */ +static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + const int regLength = dvmCompilerAllocTemp(cUnit); + const int maxC = dInsn->arg[0]; + RegLocation rlArray = cUnit->regLocation[mir->dalvikInsn.vA]; + RegLocation rlIdxInit = cUnit->regLocation[mir->dalvikInsn.vB]; + + /* regArray <- arrayRef */ + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIdxInit = loadValue(cUnit, rlIdxInit, kCoreReg); + genRegImmCheck(cUnit, kMipsCondEq, rlArray.lowReg, 0, 0, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); + + /* regLength <- len(arrayRef) */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength); + + if (maxC) { + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpAdd, tReg, rlIdxInit.lowReg, maxC); + rlIdxInit.lowReg = tReg; + dvmCompilerFreeTemp(cUnit, tReg); + } + + /* Punt if "regIdxInit < len(Array)" is false */ + genRegRegCheck(cUnit, kMipsCondGe, rlIdxInit.lowReg, regLength, 0, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); +} + +/* + * vA = idxReg; + * vB = minC; + */ +static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int minC = dInsn->vB; + RegLocation rlIdx = cUnit->regLocation[mir->dalvikInsn.vA]; + + /* regIdx <- initial index value */ + rlIdx = loadValue(cUnit, rlIdx, kCoreReg); + + /* Punt if "regIdxInit + minC >= 0" is false */ + genRegImmCheck(cUnit, kMipsCondLt, rlIdx.lowReg, -minC, 0, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); +} + +/* + * vC = this + * + * A predicted inlining target looks like the following, where instructions + * between 0x2f130d24 and 0x2f130d40 are checking if the predicted class + * matches "this", and the verificaion code is generated by this routine. + * + * (C) means the instruction is inlined from the callee, and (PI) means the + * instruction is the predicted inlined invoke, whose corresponding + * instructions are still generated to handle the mispredicted case. + * + * D/dalvikvm( 2377): -------- kMirOpCheckInlinePrediction + * D/dalvikvm( 2377): 0x2f130d24 (0020): lw v0,16(s1) + * D/dalvikvm( 2377): 0x2f130d28 (0024): lui v1,0x0011(17) + * D/dalvikvm( 2377): 0x2f130d2c (0028): ori v1,v1,0x11e418(1172504) + * D/dalvikvm( 2377): 0x2f130d30 (002c): beqz v0,0x2f130df0 (L0x11f1f0) + * D/dalvikvm( 2377): 0x2f130d34 (0030): pref 0,0(v0) + * D/dalvikvm( 2377): 0x2f130d38 (0034): lw a0,0(v0) + * D/dalvikvm( 2377): 0x2f130d3c (0038): bne v1,a0,0x2f130d54 (L0x11f518) + * D/dalvikvm( 2377): 0x2f130d40 (003c): pref 0,8(v0) + * D/dalvikvm( 2377): -------- dalvik offset: 0x000a @ +iget-object-quick (C) v3, v4, (#8) + * D/dalvikvm( 2377): 0x2f130d44 (0040): lw a1,8(v0) + * D/dalvikvm( 2377): -------- dalvik offset: 0x000a @ +invoke-virtual-quick (PI) v4 + * D/dalvikvm( 2377): 0x2f130d48 (0044): sw a1,12(s1) + * D/dalvikvm( 2377): 0x2f130d4c (0048): b 0x2f130e18 (L0x120150) + * D/dalvikvm( 2377): 0x2f130d50 (004c): lw a0,116(s2) + * D/dalvikvm( 2377): L0x11f518: + * D/dalvikvm( 2377): 0x2f130d54 (0050): lw a0,16(s1) + * D/dalvikvm( 2377): 0x2f130d58 (0054): addiu s4,s1,0xffffffe8(-24) + * D/dalvikvm( 2377): 0x2f130d5c (0058): beqz a0,0x2f130e00 (L0x11f618) + * D/dalvikvm( 2377): 0x2f130d60 (005c): pref 1,0(s4) + * D/dalvikvm( 2377): -------- BARRIER + * D/dalvikvm( 2377): 0x2f130d64 (0060): sw a0,0(s4) + * D/dalvikvm( 2377): 0x2f130d68 (0064): addiu s4,s4,0x0004(4) + * D/dalvikvm( 2377): -------- BARRIER + * D/dalvikvm( 2377): 0x2f130d6c (0068): lui s0,0x2d22(11554) + * D/dalvikvm( 2377): 0x2f130d70 (006c): ori s0,s0,0x2d228464(757236836) + * D/dalvikvm( 2377): 0x2f130d74 (0070): lahi/lui a1,0x2f13(12051) + * D/dalvikvm( 2377): 0x2f130d78 (0074): lalo/ori a1,a1,0x2f130ddc(789777884) + * D/dalvikvm( 2377): 0x2f130d7c (0078): lahi/lui a2,0x2f13(12051) + * D/dalvikvm( 2377): 0x2f130d80 (007c): lalo/ori a2,a2,0x2f130e24(789777956) + * D/dalvikvm( 2377): 0x2f130d84 (0080): jal 0x2f12d1ec(789762540) + * D/dalvikvm( 2377): 0x2f130d88 (0084): nop + * D/dalvikvm( 2377): 0x2f130d8c (0088): b 0x2f130e24 (L0x11ed6c) + * D/dalvikvm( 2377): 0x2f130d90 (008c): nop + * D/dalvikvm( 2377): 0x2f130d94 (0090): b 0x2f130e04 (L0x11ffd0) + * D/dalvikvm( 2377): 0x2f130d98 (0094): lui a0,0x2d22(11554) + * D/dalvikvm( 2377): 0x2f130d9c (0098): lw a0,44(s4) + * D/dalvikvm( 2377): 0x2f130da0 (009c): bgtz a1,0x2f130dc4 (L0x11fb98) + * D/dalvikvm( 2377): 0x2f130da4 (00a0): nop + * D/dalvikvm( 2377): 0x2f130da8 (00a4): lui t9,0x2aba(10938) + * D/dalvikvm( 2377): 0x2f130dac (00a8): ori t9,t9,0x2abae3f8(716891128) + * D/dalvikvm( 2377): 0x2f130db0 (00ac): move a1,s2 + * D/dalvikvm( 2377): 0x2f130db4 (00b0): jalr ra,t9 + * D/dalvikvm( 2377): 0x2f130db8 (00b4): nop + * D/dalvikvm( 2377): 0x2f130dbc (00b8): lw gp,84(sp) + * D/dalvikvm( 2377): 0x2f130dc0 (00bc): move a0,v0 + * D/dalvikvm( 2377): 0x2f130dc4 (00c0): lahi/lui a1,0x2f13(12051) + * D/dalvikvm( 2377): 0x2f130dc8 (00c4): lalo/ori a1,a1,0x2f130ddc(789777884) + * D/dalvikvm( 2377): 0x2f130dcc (00c8): jal 0x2f12d0c4(789762244) + * D/dalvikvm( 2377): 0x2f130dd0 (00cc): nop + * D/dalvikvm( 2377): 0x2f130dd4 (00d0): b 0x2f130e04 (L0x11ffd0) + * D/dalvikvm( 2377): 0x2f130dd8 (00d4): lui a0,0x2d22(11554) + * D/dalvikvm( 2377): 0x2f130ddc (00d8): .align4 + * D/dalvikvm( 2377): L0x11ed2c: + * D/dalvikvm( 2377): -------- dalvik offset: 0x000d @ move-result-object (PI) v3, (#0), (#0) + * D/dalvikvm( 2377): 0x2f130ddc (00d8): lw a2,16(s2) + * D/dalvikvm( 2377): 0x2f130de0 (00dc): sw a2,12(s1) + * D/dalvikvm( 2377): 0x2f130de4 (00e0): b 0x2f130e18 (L0x120150) + * D/dalvikvm( 2377): 0x2f130de8 (00e4): lw a0,116(s2) + * D/dalvikvm( 2377): 0x2f130dec (00e8): undefined + * D/dalvikvm( 2377): L0x11f1f0: + * D/dalvikvm( 2377): -------- reconstruct dalvik PC : 0x2d228464 @ +0x000a + * D/dalvikvm( 2377): 0x2f130df0 (00ec): lui a0,0x2d22(11554) + * D/dalvikvm( 2377): 0x2f130df4 (00f0): ori a0,a0,0x2d228464(757236836) + * D/dalvikvm( 2377): 0x2f130df8 (00f4): b 0x2f130e0c (L0x120090) + * D/dalvikvm( 2377): 0x2f130dfc (00f8): lw a1,108(s2) + * D/dalvikvm( 2377): L0x11f618: + * D/dalvikvm( 2377): -------- reconstruct dalvik PC : 0x2d228464 @ +0x000a + * D/dalvikvm( 2377): 0x2f130e00 (00fc): lui a0,0x2d22(11554) + * D/dalvikvm( 2377): 0x2f130e04 (0100): ori a0,a0,0x2d228464(757236836) + * D/dalvikvm( 2377): Exception_Handling: + * D/dalvikvm( 2377): 0x2f130e08 (0104): lw a1,108(s2) + * D/dalvikvm( 2377): 0x2f130e0c (0108): jalr ra,a1 + * D/dalvikvm( 2377): 0x2f130e10 (010c): nop + * D/dalvikvm( 2377): 0x2f130e14 (0110): .align4 + * D/dalvikvm( 2377): L0x11edac: + * D/dalvikvm( 2377): -------- chaining cell (hot): 0x000e + * D/dalvikvm( 2377): 0x2f130e14 (0110): lw a0,116(s2) + * D/dalvikvm( 2377): 0x2f130e18 (0114): jalr ra,a0 + * D/dalvikvm( 2377): 0x2f130e1c (0118): nop + * D/dalvikvm( 2377): 0x2f130e20 (011c): data 0x2d22846c(757236844) + * D/dalvikvm( 2377): 0x2f130e24 (0120): .align4 + * D/dalvikvm( 2377): L0x11ed6c: + * D/dalvikvm( 2377): -------- chaining cell (predicted) + * D/dalvikvm( 2377): 0x2f130e24 (0120): data 0xe7fe(59390) + * D/dalvikvm( 2377): 0x2f130e28 (0124): data 0x0000(0) + * D/dalvikvm( 2377): 0x2f130e2c (0128): data 0x0000(0) + * D/dalvikvm( 2377): 0x2f130e30 (012c): data 0x0000(0) + * D/dalvikvm( 2377): 0x2f130e34 (0130): data 0x0000(0) + */ +static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) +{ + CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; + RegLocation rlThis = cUnit->regLocation[mir->dalvikInsn.vC]; + + rlThis = loadValue(cUnit, rlThis, kCoreReg); + int regPredictedClass = dvmCompilerAllocTemp(cUnit); + loadClassPointer(cUnit, regPredictedClass, (int) callsiteInfo); + genNullCheck(cUnit, rlThis.sRegLow, rlThis.lowReg, mir->offset, + NULL);/* null object? */ + int regActualClass = dvmCompilerAllocTemp(cUnit); + loadWordDisp(cUnit, rlThis.lowReg, offsetof(Object, clazz), regActualClass); +// opRegReg(cUnit, kOpCmp, regPredictedClass, regActualClass); + /* + * Set the misPredBranchOver target so that it will be generated when the + * code for the non-optimized invoke is generated. + */ + callsiteInfo->misPredBranchOver = (LIR *) opCompareBranch(cUnit, kMipsBne, regPredictedClass, regActualClass); +} + +/* Extended MIR instructions like PHI */ +static void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) +{ + int opOffset = mir->dalvikInsn.opcode - kMirOpFirst; + char *msg = (char *)dvmCompilerNew(strlen(extendedMIROpNames[opOffset]) + 1, + false); + strcpy(msg, extendedMIROpNames[opOffset]); + newLIR1(cUnit, kMipsPseudoExtended, (int) msg); + + switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { + case kMirOpPhi: { + char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep); + newLIR1(cUnit, kMipsPseudoSSARep, (int) ssaString); + break; + } + case kMirOpNullNRangeUpCheck: { + genHoistedChecksForCountUpLoop(cUnit, mir); + break; + } + case kMirOpNullNRangeDownCheck: { + genHoistedChecksForCountDownLoop(cUnit, mir); + break; + } + case kMirOpLowerBound: { + genHoistedLowerBoundCheck(cUnit, mir); + break; + } + case kMirOpPunt: { + genUnconditionalBranch(cUnit, + (MipsLIR *) cUnit->loopAnalysis->branchToPCR); + break; + } + case kMirOpCheckInlinePrediction: { + genValidationForPredictedInline(cUnit, mir); + break; + } + default: + break; + } +} + +/* + * Create a PC-reconstruction cell for the starting offset of this trace. + * Since the PCR cell is placed near the end of the compiled code which is + * usually out of range for a conditional branch, we put two branches (one + * branch over to the loop body and one layover branch to the actual PCR) at the + * end of the entry block. + */ +static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, + MipsLIR *bodyLabel) +{ + /* Set up the place holder to reconstruct this Dalvik PC */ + MipsLIR *pcrLabel = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pcrLabel->opcode = kMipsPseudoPCReconstructionCell; + pcrLabel->operands[0] = + (int) (cUnit->method->insns + entry->startOffset); + pcrLabel->operands[1] = entry->startOffset; + /* Insert the place holder to the growable list */ + dvmInsertGrowableList(&cUnit->pcReconstructionList, (intptr_t) pcrLabel); + + /* + * Next, create two branches - one branch over to the loop body and the + * other branch to the PCR cell to punt. + */ + MipsLIR *branchToBody = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + branchToBody->opcode = kMipsB; + branchToBody->generic.target = (LIR *) bodyLabel; + setupResourceMasks(branchToBody); + cUnit->loopAnalysis->branchToBody = (LIR *) branchToBody; + + MipsLIR *branchToPCR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + branchToPCR->opcode = kMipsB; + branchToPCR->generic.target = (LIR *) pcrLabel; + setupResourceMasks(branchToPCR); + cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; +} + +#if defined(WITH_SELF_VERIFICATION) +static bool selfVerificationPuntOps(MIR *mir) +{ +assert(0); /* MIPSTODO port selfVerificationPuntOps() */ + DecodedInstruction *decInsn = &mir->dalvikInsn; + + /* + * All opcodes that can throw exceptions and use the + * TEMPLATE_THROW_EXCEPTION_COMMON template should be excluded in the trace + * under self-verification mode. + */ + switch (decInsn->opcode) { + case OP_MONITOR_ENTER: + case OP_MONITOR_EXIT: + case OP_NEW_INSTANCE: + case OP_NEW_INSTANCE_JUMBO: + case OP_NEW_ARRAY: + case OP_NEW_ARRAY_JUMBO: + case OP_CHECK_CAST: + case OP_CHECK_CAST_JUMBO: + case OP_MOVE_EXCEPTION: + case OP_FILL_ARRAY_DATA: + case OP_EXECUTE_INLINE: + case OP_EXECUTE_INLINE_RANGE: + return true; + default: + return false; + } +} +#endif + +void dvmCompilerMIR2LIR(CompilationUnit *cUnit) +{ + /* Used to hold the labels of each block */ + MipsLIR *labelList = + (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR) * cUnit->numBlocks, true); + MipsLIR *headLIR = NULL; + GrowableList chainingListByType[kChainingCellGap]; + int i; + + /* + * Initialize various types chaining lists. + */ + for (i = 0; i < kChainingCellGap; i++) { + dvmInitGrowableList(&chainingListByType[i], 2); + } + + /* Clear the visited flag for each block */ + dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, + kAllNodes, false /* isIterative */); + + GrowableListIterator iterator; + dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); + + /* Traces start with a profiling entry point. Generate it here */ + cUnit->profileCodeSize = genTraceProfileEntry(cUnit); + + /* Handle the content in each basic block */ + for (i = 0; ; i++) { + MIR *mir; + BasicBlock *bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); + if (bb == NULL) break; + if (bb->visited == true) continue; + + labelList[i].operands[0] = bb->startOffset; + + if (bb->blockType >= kChainingCellGap) { + if (bb->isFallThroughFromInvoke == true) { + /* Align this block first since it is a return chaining cell */ + newLIR0(cUnit, kMipsPseudoPseudoAlign4); + } + /* + * Append the label pseudo LIR first. Chaining cells will be handled + * separately afterwards. + */ + dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); + } + + if (bb->blockType == kEntryBlock) { + labelList[i].opcode = kMipsPseudoEntryBlock; + if (bb->firstMIRInsn == NULL) { + continue; + } else { + setupLoopEntryBlock(cUnit, bb, + &labelList[bb->fallThrough->id]); + } + } else if (bb->blockType == kExitBlock) { + labelList[i].opcode = kMipsPseudoExitBlock; + goto gen_fallthrough; + } else if (bb->blockType == kDalvikByteCode) { + if (bb->hidden == true) continue; + labelList[i].opcode = kMipsPseudoNormalBlockLabel; + /* Reset the register state */ + dvmCompilerResetRegPool(cUnit); + dvmCompilerClobberAllRegs(cUnit); + dvmCompilerResetNullCheck(cUnit); + } else { + switch (bb->blockType) { + case kChainingCellNormal: + labelList[i].opcode = kMipsPseudoChainingCellNormal; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellNormal], i); + break; + case kChainingCellInvokeSingleton: + labelList[i].opcode = + kMipsPseudoChainingCellInvokeSingleton; + labelList[i].operands[0] = + (int) bb->containingMethod; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellInvokeSingleton], i); + break; + case kChainingCellInvokePredicted: + labelList[i].opcode = + kMipsPseudoChainingCellInvokePredicted; + /* + * Move the cached method pointer from operand 1 to 0. + * Operand 0 was clobbered earlier in this routine to store + * the block starting offset, which is not applicable to + * predicted chaining cell. + */ + labelList[i].operands[0] = labelList[i].operands[1]; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellInvokePredicted], i); + break; + case kChainingCellHot: + labelList[i].opcode = + kMipsPseudoChainingCellHot; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellHot], i); + break; + case kPCReconstruction: + /* Make sure exception handling block is next */ + labelList[i].opcode = + kMipsPseudoPCReconstructionBlockLabel; + handlePCReconstruction(cUnit, + &labelList[cUnit->puntBlock->id]); + break; + case kExceptionHandling: + labelList[i].opcode = kMipsPseudoEHBlockLabel; + if (cUnit->pcReconstructionList.numUsed) { + loadWordDisp(cUnit, rSELF, offsetof(Thread, + jitToInterpEntries.dvmJitToInterpPunt), + r_A1); + opReg(cUnit, kOpBlx, r_A1); + } + break; + case kChainingCellBackwardBranch: + labelList[i].opcode = + kMipsPseudoChainingCellBackwardBranch; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellBackwardBranch], + i); + break; + default: + break; + } + continue; + } + + /* + * Try to build a longer optimization unit. Currently if the previous + * block ends with a goto, we continue adding instructions and don't + * reset the register allocation pool. + */ + for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { + bb = nextBB; + bb->visited = true; + cUnit->nextCodegenBlock = NULL; + + for (mir = bb->firstMIRInsn; mir; mir = mir->next) { + + dvmCompilerResetRegPool(cUnit); + if (gDvmJit.disableOpt & (1 << kTrackLiveTemps)) { + dvmCompilerClobberAllRegs(cUnit); + } + + if (gDvmJit.disableOpt & (1 << kSuppressLoads)) { + dvmCompilerResetDefTracking(cUnit); + } + + if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) { + handleExtendedMIR(cUnit, mir); + continue; + } + + Opcode dalvikOpcode = mir->dalvikInsn.opcode; + InstructionFormat dalvikFormat = + dexGetFormatFromOpcode(dalvikOpcode); + const char *note; + if (mir->OptimizationFlags & MIR_INLINED) { + note = " (I)"; + } else if (mir->OptimizationFlags & MIR_INLINED_PRED) { + note = " (PI)"; + } else if (mir->OptimizationFlags & MIR_CALLEE) { + note = " (C)"; + } else { + note = NULL; + } + + MipsLIR *boundaryLIR = + newLIR2(cUnit, kMipsPseudoDalvikByteCodeBoundary, + mir->offset, + (int) dvmCompilerGetDalvikDisassembly(&mir->dalvikInsn, + note)); + if (mir->ssaRep) { + char *ssaString = dvmCompilerGetSSAString(cUnit, mir->ssaRep); + newLIR1(cUnit, kMipsPseudoSSARep, (int) ssaString); + } + + /* Remember the first LIR for this block */ + if (headLIR == NULL) { + headLIR = boundaryLIR; + /* Set the first boundaryLIR as a scheduling barrier */ + headLIR->defMask = ENCODE_ALL; + } + + bool notHandled; + /* + * Debugging: screen the opcode first to see if it is in the + * do[-not]-compile list + */ + bool singleStepMe = SINGLE_STEP_OP(dalvikOpcode); +#if defined(WITH_SELF_VERIFICATION) + if (singleStepMe == false) { + singleStepMe = selfVerificationPuntOps(mir); + } +#endif + if (singleStepMe || cUnit->allSingleStep) { + notHandled = false; + genInterpSingleStep(cUnit, mir); + } else { + opcodeCoverage[dalvikOpcode]++; + switch (dalvikFormat) { + case kFmt10t: + case kFmt20t: + case kFmt30t: + notHandled = handleFmt10t_Fmt20t_Fmt30t(cUnit, + mir, bb, labelList); + break; + case kFmt10x: + notHandled = handleFmt10x(cUnit, mir); + break; + case kFmt11n: + case kFmt31i: + notHandled = handleFmt11n_Fmt31i(cUnit, mir); + break; + case kFmt11x: + notHandled = handleFmt11x(cUnit, mir); + break; + case kFmt12x: + notHandled = handleFmt12x(cUnit, mir); + break; + case kFmt20bc: + case kFmt40sc: + notHandled = handleFmt20bc_Fmt40sc(cUnit, mir); + break; + case kFmt21c: + case kFmt31c: + case kFmt41c: + notHandled = handleFmt21c_Fmt31c_Fmt41c(cUnit, mir); + break; + case kFmt21h: + notHandled = handleFmt21h(cUnit, mir); + break; + case kFmt21s: + notHandled = handleFmt21s(cUnit, mir); + break; + case kFmt21t: + notHandled = handleFmt21t(cUnit, mir, bb, + labelList); + break; + case kFmt22b: + case kFmt22s: + notHandled = handleFmt22b_Fmt22s(cUnit, mir); + break; + case kFmt22c: + case kFmt52c: + notHandled = handleFmt22c_Fmt52c(cUnit, mir); + break; + case kFmt22cs: + notHandled = handleFmt22cs(cUnit, mir); + break; + case kFmt22t: + notHandled = handleFmt22t(cUnit, mir, bb, + labelList); + break; + case kFmt22x: + case kFmt32x: + notHandled = handleFmt22x_Fmt32x(cUnit, mir); + break; + case kFmt23x: + notHandled = handleFmt23x(cUnit, mir); + break; + case kFmt31t: + notHandled = handleFmt31t(cUnit, mir); + break; + case kFmt3rc: + case kFmt35c: + case kFmt5rc: + notHandled = handleFmt35c_3rc_5rc(cUnit, mir, bb, + labelList); + break; + case kFmt3rms: + case kFmt35ms: + notHandled = handleFmt35ms_3rms(cUnit, mir,bb, + labelList); + break; + case kFmt35mi: + case kFmt3rmi: + notHandled = handleExecuteInline(cUnit, mir); + break; + case kFmt51l: + notHandled = handleFmt51l(cUnit, mir); + break; + default: + notHandled = true; + break; + } + } + if (notHandled) { + LOGE("%#06x: Opcode %#x (%s) / Fmt %d not handled", + mir->offset, + dalvikOpcode, dexGetOpcodeName(dalvikOpcode), + dalvikFormat); + dvmCompilerAbort(cUnit); + break; + } + } + } + + if (bb->blockType == kEntryBlock) { + dvmCompilerAppendLIR(cUnit, + (LIR *) cUnit->loopAnalysis->branchToBody); + dvmCompilerAppendLIR(cUnit, + (LIR *) cUnit->loopAnalysis->branchToPCR); + } + + if (headLIR) { + /* + * Eliminate redundant loads/stores and delay stores into later + * slots + */ + dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, + cUnit->lastLIRInsn); + /* Reset headLIR which is also the optimization boundary */ + headLIR = NULL; + } + +gen_fallthrough: + /* + * Check if the block is terminated due to trace length constraint - + * insert an unconditional branch to the chaining cell. + */ + if (bb->needFallThroughBranch) { + genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]); + } + } + + /* Handle the chaining cells in predefined order */ + for (i = 0; i < kChainingCellGap; i++) { + size_t j; + int *blockIdList = (int *) chainingListByType[i].elemList; + + cUnit->numChainingCells[i] = chainingListByType[i].numUsed; + + /* No chaining cells of this type */ + if (cUnit->numChainingCells[i] == 0) + continue; + + /* Record the first LIR for a new type of chaining cell */ + cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; + + for (j = 0; j < chainingListByType[i].numUsed; j++) { + int blockId = blockIdList[j]; + BasicBlock *chainingBlock = + (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, + blockId); + + /* Align this chaining cell first */ + newLIR0(cUnit, kMipsPseudoPseudoAlign4); + + /* Insert the pseudo chaining instruction */ + dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); + + + switch (chainingBlock->blockType) { + case kChainingCellNormal: + handleNormalChainingCell(cUnit, chainingBlock->startOffset); + break; + case kChainingCellInvokeSingleton: + handleInvokeSingletonChainingCell(cUnit, + chainingBlock->containingMethod); + break; + case kChainingCellInvokePredicted: + handleInvokePredictedChainingCell(cUnit); + break; + case kChainingCellHot: + handleHotChainingCell(cUnit, chainingBlock->startOffset); + break; + case kChainingCellBackwardBranch: + handleBackwardBranchChainingCell(cUnit, + chainingBlock->startOffset); + break; + default: + LOGE("Bad blocktype %d", chainingBlock->blockType); + dvmCompilerAbort(cUnit); + } + } + } + + /* Mark the bottom of chaining cells */ + cUnit->chainingCellBottom = (LIR *) newLIR0(cUnit, kMipsChainingCellBottom); + + /* + * Generate the branch to the dvmJitToInterpNoChain entry point at the end + * of all chaining cells for the overflow cases. + */ + if (cUnit->switchOverflowPad) { + loadConstant(cUnit, r_A0, (int) cUnit->switchOverflowPad); + loadWordDisp(cUnit, rSELF, offsetof(Thread, + jitToInterpEntries.dvmJitToInterpNoChain), r_A2); + opRegReg(cUnit, kOpAdd, r_A1, r_A1); + opRegRegReg(cUnit, kOpAdd, r4PC, r_A0, r_A1); +#if defined(WITH_JIT_TUNING) + loadConstant(cUnit, r_A0, kSwitchOverflow); +#endif + opReg(cUnit, kOpBlx, r_A2); + } + + dvmCompilerApplyGlobalOptimizations(cUnit); + +#if defined(WITH_SELF_VERIFICATION) + selfVerificationBranchInsertPass(cUnit); +#endif +} + +/* + * Accept the work and start compiling. Returns true if compilation + * is attempted. + */ +bool dvmCompilerDoWork(CompilerWorkOrder *work) +{ + JitTraceDescription *desc; + bool isCompile; + bool success = true; + + if (gDvmJit.codeCacheFull) { + return false; + } + + switch (work->kind) { + case kWorkOrderTrace: + isCompile = true; + /* Start compilation with maximally allowed trace length */ + desc = (JitTraceDescription *)work->info; + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); + break; + case kWorkOrderTraceDebug: { + bool oldPrintMe = gDvmJit.printMe; + gDvmJit.printMe = true; + isCompile = true; + /* Start compilation with maximally allowed trace length */ + desc = (JitTraceDescription *)work->info; + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); + gDvmJit.printMe = oldPrintMe; + break; + } + case kWorkOrderProfileMode: + dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); + isCompile = false; + break; + default: + isCompile = false; + LOGE("Jit: unknown work order type"); + assert(0); // Bail if debug build, discard otherwise + } + if (!success) + work->result.codeAddress = NULL; + return isCompile; +} + +/* Architectural-specific debugging helpers go here */ +void dvmCompilerArchDump(void) +{ + /* Print compiled opcode in this VM instance */ + int i, start, streak; + char buf[1024]; + + streak = i = 0; + buf[0] = 0; + while (opcodeCoverage[i] == 0 && i < 256) { + i++; + } + if (i == 256) { + return; + } + for (start = i++, streak = 1; i < 256; i++) { + if (opcodeCoverage[i]) { + streak++; + } else { + if (streak == 1) { + sprintf(buf+strlen(buf), "%x,", start); + } else { + sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1); + } + streak = 0; + while (opcodeCoverage[i] == 0 && i < 256) { + i++; + } + if (i < 256) { + streak = 1; + start = i; + } + } + } + if (streak) { + if (streak == 1) { + sprintf(buf+strlen(buf), "%x", start); + } else { + sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1); + } + } + if (strlen(buf)) { + LOGD("dalvik.vm.jit.op = %s", buf); + } +} + +/* Common initialization routine for an architecture family */ +bool dvmCompilerArchInit() +{ + int i; + + for (i = 0; i < kMipsLast; i++) { + if (EncodingMap[i].opcode != i) { + LOGE("Encoding order for %s is wrong: expecting %d, seeing %d", + EncodingMap[i].name, i, EncodingMap[i].opcode); + dvmAbort(); // OK to dvmAbort - build error + } + } + + return dvmCompilerArchVariantInit(); +} + +void *dvmCompilerGetInterpretTemplate() +{ + return (void*) ((int)gDvmJit.codeCache + + templateEntryOffsets[TEMPLATE_INTERPRET]); +} + +JitInstructionSetType dvmCompilerGetInterpretTemplateSet() +{ + return DALVIK_JIT_MIPS; +} + +/* Needed by the Assembler */ +void dvmCompilerSetupResourceMasks(MipsLIR *lir) +{ + setupResourceMasks(lir); +} + +/* Needed by the ld/st optmizatons */ +MipsLIR* dvmCompilerRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc) +{ + return genRegCopyNoInsert(cUnit, rDest, rSrc); +} + +/* Needed by the register allocator */ +MipsLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) +{ + return genRegCopy(cUnit, rDest, rSrc); +} + +/* Needed by the register allocator */ +void dvmCompilerRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi, + int srcLo, int srcHi) +{ + genRegCopyWide(cUnit, destLo, destHi, srcLo, srcHi); +} + +void dvmCompilerFlushRegImpl(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc, OpSize size) +{ + storeBaseDisp(cUnit, rBase, displacement, rSrc, size); +} + +void dvmCompilerFlushRegWideImpl(CompilationUnit *cUnit, int rBase, + int displacement, int rSrcLo, int rSrcHi) +{ + storeBaseDispWide(cUnit, rBase, displacement, rSrcLo, rSrcHi); +} diff --git a/vm/compiler/codegen/mips/CodegenFactory.cpp b/vm/compiler/codegen/mips/CodegenFactory.cpp new file mode 100644 index 000000000..1b604ec98 --- /dev/null +++ b/vm/compiler/codegen/mips/CodegenFactory.cpp @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen and support common to all supported + * Mips variants. It is included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + * which combines this common code with specific support found in the + * applicable directory below this one. + */ + + +/* Load a word at base + displacement. Displacement must be word multiple */ +static MipsLIR *loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement, + int rDest) +{ + return loadBaseDisp(cUnit, NULL, rBase, displacement, rDest, kWord, + INVALID_SREG); +} + +static MipsLIR *storeWordDisp(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc) +{ + return storeBaseDisp(cUnit, rBase, displacement, rSrc, kWord); +} + +/* + * Load a Dalvik register into a physical register. Take care when + * using this routine, as it doesn't perform any bookkeeping regarding + * register liveness. That is the responsibility of the caller. + */ +static void loadValueDirect(CompilationUnit *cUnit, RegLocation rlSrc, + int reg1) +{ + rlSrc = dvmCompilerUpdateLoc(cUnit, rlSrc); + if (rlSrc.location == kLocPhysReg) { + genRegCopy(cUnit, reg1, rlSrc.lowReg); + } else if (rlSrc.location == kLocRetval) { + loadWordDisp(cUnit, rSELF, offsetof(Thread, interpSave.retval), reg1); + } else { + assert(rlSrc.location == kLocDalvikFrame); + loadWordDisp(cUnit, rFP, dvmCompilerS2VReg(cUnit, rlSrc.sRegLow) << 2, + reg1); + } +} + +/* + * Similar to loadValueDirect, but clobbers and allocates the target + * register. Should be used when loading to a fixed register (for example, + * loading arguments to an out of line call. + */ +static void loadValueDirectFixed(CompilationUnit *cUnit, RegLocation rlSrc, + int reg1) +{ + dvmCompilerClobber(cUnit, reg1); + dvmCompilerMarkInUse(cUnit, reg1); + loadValueDirect(cUnit, rlSrc, reg1); +} + +/* + * Load a Dalvik register pair into a physical register[s]. Take care when + * using this routine, as it doesn't perform any bookkeeping regarding + * register liveness. That is the responsibility of the caller. + */ +static void loadValueDirectWide(CompilationUnit *cUnit, RegLocation rlSrc, + int regLo, int regHi) +{ + rlSrc = dvmCompilerUpdateLocWide(cUnit, rlSrc); + if (rlSrc.location == kLocPhysReg) { + genRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg); + } else if (rlSrc.location == kLocRetval) { + loadBaseDispWide(cUnit, NULL, rSELF, offsetof(Thread, interpSave.retval), + regLo, regHi, INVALID_SREG); + } else { + assert(rlSrc.location == kLocDalvikFrame); + loadBaseDispWide(cUnit, NULL, rFP, + dvmCompilerS2VReg(cUnit, rlSrc.sRegLow) << 2, + regLo, regHi, INVALID_SREG); + } +} + +/* + * Similar to loadValueDirect, but clobbers and allocates the target + * registers. Should be used when loading to a fixed registers (for example, + * loading arguments to an out of line call. + */ +static void loadValueDirectWideFixed(CompilationUnit *cUnit, RegLocation rlSrc, + int regLo, int regHi) +{ + dvmCompilerClobber(cUnit, regLo); + dvmCompilerClobber(cUnit, regHi); + dvmCompilerMarkInUse(cUnit, regLo); + dvmCompilerMarkInUse(cUnit, regHi); + loadValueDirectWide(cUnit, rlSrc, regLo, regHi); +} + +static RegLocation loadValue(CompilationUnit *cUnit, RegLocation rlSrc, + RegisterClass opKind) +{ + rlSrc = dvmCompilerEvalLoc(cUnit, rlSrc, opKind, false); + if (rlSrc.location == kLocDalvikFrame) { + loadValueDirect(cUnit, rlSrc, rlSrc.lowReg); + rlSrc.location = kLocPhysReg; + dvmCompilerMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); + } else if (rlSrc.location == kLocRetval) { + loadWordDisp(cUnit, rSELF, offsetof(Thread, interpSave.retval), rlSrc.lowReg); + rlSrc.location = kLocPhysReg; + dvmCompilerClobber(cUnit, rlSrc.lowReg); + } + return rlSrc; +} + +static void storeValue(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + LIR *defStart; + LIR *defEnd; + assert(!rlDest.wide); + assert(!rlSrc.wide); + dvmCompilerKillNullCheckedLoc(cUnit, rlDest); + rlSrc = dvmCompilerUpdateLoc(cUnit, rlSrc); + rlDest = dvmCompilerUpdateLoc(cUnit, rlDest); + if (rlSrc.location == kLocPhysReg) { + if (dvmCompilerIsLive(cUnit, rlSrc.lowReg) || + (rlDest.location == kLocPhysReg)) { + // Src is live or Dest has assigned reg. + rlDest = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, false); + genRegCopy(cUnit, rlDest.lowReg, rlSrc.lowReg); + } else { + // Just re-assign the registers. Dest gets Src's regs + rlDest.lowReg = rlSrc.lowReg; + dvmCompilerClobber(cUnit, rlSrc.lowReg); + } + } else { + // Load Src either into promoted Dest or temps allocated for Dest + rlDest = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, false); + loadValueDirect(cUnit, rlSrc, rlDest.lowReg); + } + + // Dest is now live and dirty (until/if we flush it to home location) + dvmCompilerMarkLive(cUnit, rlDest.lowReg, rlDest.sRegLow); + dvmCompilerMarkDirty(cUnit, rlDest.lowReg); + + + if (rlDest.location == kLocRetval) { + storeBaseDisp(cUnit, rSELF, offsetof(Thread, interpSave.retval), + rlDest.lowReg, kWord); + dvmCompilerClobber(cUnit, rlDest.lowReg); + } else { + dvmCompilerResetDefLoc(cUnit, rlDest); + if (dvmCompilerLiveOut(cUnit, rlDest.sRegLow)) { + defStart = (LIR *)cUnit->lastLIRInsn; + int vReg = dvmCompilerS2VReg(cUnit, rlDest.sRegLow); + storeBaseDisp(cUnit, rFP, vReg << 2, rlDest.lowReg, kWord); + dvmCompilerMarkClean(cUnit, rlDest.lowReg); + defEnd = (LIR *)cUnit->lastLIRInsn; + dvmCompilerMarkDef(cUnit, rlDest, defStart, defEnd); + } + } +} + +static RegLocation loadValueWide(CompilationUnit *cUnit, RegLocation rlSrc, + RegisterClass opKind) +{ + assert(rlSrc.wide); + rlSrc = dvmCompilerEvalLoc(cUnit, rlSrc, opKind, false); + if (rlSrc.location == kLocDalvikFrame) { + loadValueDirectWide(cUnit, rlSrc, rlSrc.lowReg, rlSrc.highReg); + rlSrc.location = kLocPhysReg; + dvmCompilerMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); + dvmCompilerMarkLive(cUnit, rlSrc.highReg, + dvmCompilerSRegHi(rlSrc.sRegLow)); + } else if (rlSrc.location == kLocRetval) { + loadBaseDispWide(cUnit, NULL, rSELF, offsetof(Thread, interpSave.retval), + rlSrc.lowReg, rlSrc.highReg, INVALID_SREG); + rlSrc.location = kLocPhysReg; + dvmCompilerClobber(cUnit, rlSrc.lowReg); + dvmCompilerClobber(cUnit, rlSrc.highReg); + } + return rlSrc; +} + +static void storeValueWide(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + LIR *defStart; + LIR *defEnd; + assert(FPREG(rlSrc.lowReg)==FPREG(rlSrc.highReg)); + assert(rlDest.wide); + assert(rlSrc.wide); + dvmCompilerKillNullCheckedLoc(cUnit, rlDest); + if (rlSrc.location == kLocPhysReg) { + if (dvmCompilerIsLive(cUnit, rlSrc.lowReg) || + dvmCompilerIsLive(cUnit, rlSrc.highReg) || + (rlDest.location == kLocPhysReg)) { + // Src is live or Dest has assigned reg. + rlDest = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, false); + genRegCopyWide(cUnit, rlDest.lowReg, rlDest.highReg, + rlSrc.lowReg, rlSrc.highReg); + } else { + // Just re-assign the registers. Dest gets Src's regs + rlDest.lowReg = rlSrc.lowReg; + rlDest.highReg = rlSrc.highReg; + dvmCompilerClobber(cUnit, rlSrc.lowReg); + dvmCompilerClobber(cUnit, rlSrc.highReg); + } + } else { + // Load Src either into promoted Dest or temps allocated for Dest + rlDest = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, false); + loadValueDirectWide(cUnit, rlSrc, rlDest.lowReg, + rlDest.highReg); + } + + // Dest is now live and dirty (until/if we flush it to home location) + dvmCompilerMarkLive(cUnit, rlDest.lowReg, rlDest.sRegLow); + dvmCompilerMarkLive(cUnit, rlDest.highReg, + dvmCompilerSRegHi(rlDest.sRegLow)); + dvmCompilerMarkDirty(cUnit, rlDest.lowReg); + dvmCompilerMarkDirty(cUnit, rlDest.highReg); + dvmCompilerMarkPair(cUnit, rlDest.lowReg, rlDest.highReg); + + + if (rlDest.location == kLocRetval) { + storeBaseDispWide(cUnit, rSELF, offsetof(Thread, interpSave.retval), + rlDest.lowReg, rlDest.highReg); + dvmCompilerClobber(cUnit, rlDest.lowReg); + dvmCompilerClobber(cUnit, rlDest.highReg); + } else { + dvmCompilerResetDefLocWide(cUnit, rlDest); + if (dvmCompilerLiveOut(cUnit, rlDest.sRegLow) || + dvmCompilerLiveOut(cUnit, dvmCompilerSRegHi(rlDest.sRegLow))) { + defStart = (LIR *)cUnit->lastLIRInsn; + int vReg = dvmCompilerS2VReg(cUnit, rlDest.sRegLow); + assert((vReg+1) == dvmCompilerS2VReg(cUnit, + dvmCompilerSRegHi(rlDest.sRegLow))); + storeBaseDispWide(cUnit, rFP, vReg << 2, rlDest.lowReg, + rlDest.highReg); + dvmCompilerMarkClean(cUnit, rlDest.lowReg); + dvmCompilerMarkClean(cUnit, rlDest.highReg); + defEnd = (LIR *)cUnit->lastLIRInsn; + dvmCompilerMarkDefWide(cUnit, rlDest, defStart, defEnd); + } + } +} +/* + * Perform null-check on a register. sReg is the ssa register being checked, + * and mReg is the machine register holding the actual value. If internal state + * indicates that sReg has been checked before the check request is ignored. + */ +static MipsLIR *genNullCheck(CompilationUnit *cUnit, int sReg, int mReg, + int dOffset, MipsLIR *pcrLabel) +{ + /* This particular Dalvik register has been null-checked */ + if (dvmIsBitSet(cUnit->regPool->nullCheckedRegs, sReg)) { + return pcrLabel; + } + dvmSetBit(cUnit->regPool->nullCheckedRegs, sReg); + return genRegImmCheck(cUnit, kMipsCondEq, mReg, 0, dOffset, pcrLabel); +} + + + +/* + * Perform a "reg cmp reg" operation and jump to the PCR region if condition + * satisfies. + */ +static MipsLIR *genRegRegCheck(CompilationUnit *cUnit, + MipsConditionCode cond, + int reg1, int reg2, int dOffset, + MipsLIR *pcrLabel) +{ + MipsLIR *res = NULL; + if (cond == kMipsCondGe) { /* signed >= case */ + int tReg = dvmCompilerAllocTemp(cUnit); + res = newLIR3(cUnit, kMipsSlt, tReg, reg1, reg2); + MipsLIR *branch = opCompareBranch(cUnit, kMipsBeqz, tReg, -1); + genCheckCommon(cUnit, dOffset, branch, pcrLabel); + } else if (cond == kMipsCondCs) { /* unsigned >= case */ + int tReg = dvmCompilerAllocTemp(cUnit); + res = newLIR3(cUnit, kMipsSltu, tReg, reg1, reg2); + MipsLIR *branch = opCompareBranch(cUnit, kMipsBeqz, tReg, -1); + genCheckCommon(cUnit, dOffset, branch, pcrLabel); + } else { + LOGE("Unexpected condition in genRegRegCheck: %d\n", (int) cond); + dvmAbort(); + } + return res; +} + +/* + * Perform zero-check on a register. Similar to genNullCheck but the value being + * checked does not have a corresponding Dalvik register. + */ +static MipsLIR *genZeroCheck(CompilationUnit *cUnit, int mReg, + int dOffset, MipsLIR *pcrLabel) +{ + return genRegImmCheck(cUnit, kMipsCondEq, mReg, 0, dOffset, pcrLabel); +} + +/* Perform bound check on two registers */ +static MipsLIR *genBoundsCheck(CompilationUnit *cUnit, int rIndex, + int rBound, int dOffset, MipsLIR *pcrLabel) +{ + return genRegRegCheck(cUnit, kMipsCondCs, rIndex, rBound, dOffset, + pcrLabel); +} + +/* + * Jump to the out-of-line handler to finish executing the + * remaining of more complex instructions. + */ +static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpcode opCode) +{ + /* + * We're jumping from a trace to a template. Using jal is preferable to jalr, + * but we need to ensure source and target addresses allow the use of jal. + * This should almost always be the case, but if source and target are in + * different 256mb regions then use jalr. The test below is very conservative + * since we don't have a source address yet, but this is ok for now given that + * we expect this case to be very rare. The test can be made less conservative + * as needed in the future in coordination with address assignment during + * the assembly process. + */ + dvmCompilerClobberHandlerRegs(cUnit); + int targetAddr = (int) gDvmJit.codeCache + templateEntryOffsets[opCode]; + int maxSourceAddr = (int) gDvmJit.codeCache + gDvmJit.codeCacheSize; + + if ((targetAddr & 0xF0000000) == (maxSourceAddr & 0xF0000000)) { + newLIR1(cUnit, kMipsJal, targetAddr); + } else { + loadConstant(cUnit, r_T9, targetAddr); + newLIR2(cUnit, kMipsJalr, r_RA, r_T9); + } +} diff --git a/vm/compiler/codegen/mips/FP/MipsFP.cpp b/vm/compiler/codegen/mips/FP/MipsFP.cpp new file mode 100644 index 000000000..cf44b0e9c --- /dev/null +++ b/vm/compiler/codegen/mips/FP/MipsFP.cpp @@ -0,0 +1,409 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is included by Codegen-armv5te-vfp.c, and implements architecture + * variant-specific code. + */ + +extern void dvmCompilerFlushRegWideForV5TEVFP(CompilationUnit *cUnit, + int reg1, int reg2); +extern void dvmCompilerFlushRegForV5TEVFP(CompilationUnit *cUnit, int reg); + +/* First, flush any registers associated with this value */ +static void loadValueAddress(CompilationUnit *cUnit, RegLocation rlSrc, + int rDest) +{ + rlSrc = rlSrc.wide ? dvmCompilerUpdateLocWide(cUnit, rlSrc) : + dvmCompilerUpdateLoc(cUnit, rlSrc); + if (rlSrc.location == kLocPhysReg) { + if (rlSrc.wide) { + dvmCompilerFlushRegWideForV5TEVFP(cUnit, rlSrc.lowReg, + rlSrc.highReg); + } else { + dvmCompilerFlushRegForV5TEVFP(cUnit, rlSrc.lowReg); + } + } + opRegRegImm(cUnit, kOpAdd, rDest, rFP, + dvmCompilerS2VReg(cUnit, rlSrc.sRegLow) << 2); +} + +static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); +#ifdef __mips_hard_float + RegLocation rlResult = LOC_C_RETURN_WIDE_ALT; +#else + RegLocation rlResult = LOC_C_RETURN_WIDE; +#endif + RegLocation rlDest = LOC_DALVIK_RETURN_VAL_WIDE; + loadValueAddress(cUnit, rlSrc, r_A2); + genDispatchToHandler(cUnit, TEMPLATE_SQRT_DOUBLE_VFP); + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +/* + * TUNING: On some implementations, it is quicker to pass addresses + * to the handlers rather than load the operands into core registers + * and then move the values to FP regs in the handlers. Other implementations + * may prefer passing data in registers (and the latter approach would + * yeild cleaner register handling - avoiding the requirement that operands + * be flushed to memory prior to the call). + */ +static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ +#ifdef __mips_hard_float + int op = kMipsNop; + RegLocation rlResult; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (mir->dalvikInsn.opcode) { + case OP_ADD_FLOAT_2ADDR: + case OP_ADD_FLOAT: + op = kMipsFadds; + break; + case OP_SUB_FLOAT_2ADDR: + case OP_SUB_FLOAT: + op = kMipsFsubs; + break; + case OP_DIV_FLOAT_2ADDR: + case OP_DIV_FLOAT: + op = kMipsFdivs; + break; + case OP_MUL_FLOAT_2ADDR: + case OP_MUL_FLOAT: + op = kMipsFmuls; + break; + case OP_REM_FLOAT_2ADDR: + case OP_REM_FLOAT: + case OP_NEG_FLOAT: { + return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); + } + default: + return true; + } + rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg); + rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); + newLIR3(cUnit, (MipsOpCode)op, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + storeValue(cUnit, rlDest, rlResult); + + return false; +#else + TemplateOpcode opcode; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (mir->dalvikInsn.opcode) { + case OP_ADD_FLOAT_2ADDR: + case OP_ADD_FLOAT: + opcode = TEMPLATE_ADD_FLOAT_VFP; + break; + case OP_SUB_FLOAT_2ADDR: + case OP_SUB_FLOAT: + opcode = TEMPLATE_SUB_FLOAT_VFP; + break; + case OP_DIV_FLOAT_2ADDR: + case OP_DIV_FLOAT: + opcode = TEMPLATE_DIV_FLOAT_VFP; + break; + case OP_MUL_FLOAT_2ADDR: + case OP_MUL_FLOAT: + opcode = TEMPLATE_MUL_FLOAT_VFP; + break; + case OP_REM_FLOAT_2ADDR: + case OP_REM_FLOAT: + case OP_NEG_FLOAT: { + return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); + } + default: + return true; + } + loadValueAddress(cUnit, rlDest, r_A0); + dvmCompilerClobber(cUnit, r_A0); + loadValueAddress(cUnit, rlSrc1, r_A1); + dvmCompilerClobber(cUnit, r_A1); + loadValueAddress(cUnit, rlSrc2, r_A2); + genDispatchToHandler(cUnit, opcode); + rlDest = dvmCompilerUpdateLoc(cUnit, rlDest); + if (rlDest.location == kLocPhysReg) { + dvmCompilerClobber(cUnit, rlDest.lowReg); + } + return false; +#endif +} + +static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ +#ifdef __mips_hard_float + int op = kMipsNop; + RegLocation rlResult; + + switch (mir->dalvikInsn.opcode) { + case OP_ADD_DOUBLE_2ADDR: + case OP_ADD_DOUBLE: + op = kMipsFaddd; + break; + case OP_SUB_DOUBLE_2ADDR: + case OP_SUB_DOUBLE: + op = kMipsFsubd; + break; + case OP_DIV_DOUBLE_2ADDR: + case OP_DIV_DOUBLE: + op = kMipsFdivd; + break; + case OP_MUL_DOUBLE_2ADDR: + case OP_MUL_DOUBLE: + op = kMipsFmuld; + break; + case OP_REM_DOUBLE_2ADDR: + case OP_REM_DOUBLE: + case OP_NEG_DOUBLE: { + return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); + } + default: + return true; + } + rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg); + assert(rlSrc1.wide); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg); + assert(rlSrc2.wide); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); + assert(rlDest.wide); + assert(rlResult.wide); + newLIR3(cUnit, (MipsOpCode)op, S2D(rlResult.lowReg, rlResult.highReg), + S2D(rlSrc1.lowReg, rlSrc1.highReg), + S2D(rlSrc2.lowReg, rlSrc2.highReg)); + storeValueWide(cUnit, rlDest, rlResult); + return false; +#else + TemplateOpcode opcode; + + switch (mir->dalvikInsn.opcode) { + case OP_ADD_DOUBLE_2ADDR: + case OP_ADD_DOUBLE: + opcode = TEMPLATE_ADD_DOUBLE_VFP; + break; + case OP_SUB_DOUBLE_2ADDR: + case OP_SUB_DOUBLE: + opcode = TEMPLATE_SUB_DOUBLE_VFP; + break; + case OP_DIV_DOUBLE_2ADDR: + case OP_DIV_DOUBLE: + opcode = TEMPLATE_DIV_DOUBLE_VFP; + break; + case OP_MUL_DOUBLE_2ADDR: + case OP_MUL_DOUBLE: + opcode = TEMPLATE_MUL_DOUBLE_VFP; + break; + case OP_REM_DOUBLE_2ADDR: + case OP_REM_DOUBLE: + case OP_NEG_DOUBLE: { + return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, + rlSrc2); + } + default: + return true; + } + loadValueAddress(cUnit, rlDest, r_A0); + dvmCompilerClobber(cUnit, r_A0); + loadValueAddress(cUnit, rlSrc1, r_A1); + dvmCompilerClobber(cUnit, r_A1); + loadValueAddress(cUnit, rlSrc2, r_A2); + genDispatchToHandler(cUnit, opcode); + rlDest = dvmCompilerUpdateLocWide(cUnit, rlDest); + if (rlDest.location == kLocPhysReg) { + dvmCompilerClobber(cUnit, rlDest.lowReg); + dvmCompilerClobber(cUnit, rlDest.highReg); + } + return false; +#endif +} + +static bool genConversion(CompilationUnit *cUnit, MIR *mir) +{ + Opcode opcode = mir->dalvikInsn.opcode; + bool longSrc = false; + bool longDest = false; + RegLocation rlSrc; + RegLocation rlDest; +#ifdef __mips_hard_float + int op = kMipsNop; + int srcReg; + RegLocation rlResult; + + switch (opcode) { + case OP_INT_TO_FLOAT: + longSrc = false; + longDest = false; + op = kMipsFcvtsw; + break; + case OP_DOUBLE_TO_FLOAT: + longSrc = true; + longDest = false; + op = kMipsFcvtsd; + break; + case OP_FLOAT_TO_DOUBLE: + longSrc = false; + longDest = true; + op = kMipsFcvtds; + break; + case OP_INT_TO_DOUBLE: + longSrc = false; + longDest = true; + op = kMipsFcvtdw; + break; + case OP_FLOAT_TO_INT: + case OP_DOUBLE_TO_INT: + case OP_LONG_TO_DOUBLE: + case OP_FLOAT_TO_LONG: + case OP_LONG_TO_FLOAT: + case OP_DOUBLE_TO_LONG: + return genConversionPortable(cUnit, mir); + default: + return true; + } + if (longSrc) { + rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + rlSrc = loadValueWide(cUnit, rlSrc, kFPReg); + srcReg = S2D(rlSrc.lowReg, rlSrc.highReg); + } else { + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kFPReg); + srcReg = rlSrc.lowReg; + } + if (longDest) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); + newLIR2(cUnit, (MipsOpCode)op, S2D(rlResult.lowReg, rlResult.highReg), srcReg); + storeValueWide(cUnit, rlDest, rlResult); + } else { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); + newLIR2(cUnit, (MipsOpCode)op, rlResult.lowReg, srcReg); + storeValue(cUnit, rlDest, rlResult); + } + return false; +#else + TemplateOpcode templateOpcode; + switch (opcode) { + case OP_INT_TO_FLOAT: + longSrc = false; + longDest = false; + templateOpcode = TEMPLATE_INT_TO_FLOAT_VFP; + break; + case OP_FLOAT_TO_INT: + longSrc = false; + longDest = false; + templateOpcode = TEMPLATE_FLOAT_TO_INT_VFP; + break; + case OP_DOUBLE_TO_FLOAT: + longSrc = true; + longDest = false; + templateOpcode = TEMPLATE_DOUBLE_TO_FLOAT_VFP; + break; + case OP_FLOAT_TO_DOUBLE: + longSrc = false; + longDest = true; + templateOpcode = TEMPLATE_FLOAT_TO_DOUBLE_VFP; + break; + case OP_INT_TO_DOUBLE: + longSrc = false; + longDest = true; + templateOpcode = TEMPLATE_INT_TO_DOUBLE_VFP; + break; + case OP_DOUBLE_TO_INT: + longSrc = true; + longDest = false; + templateOpcode = TEMPLATE_DOUBLE_TO_INT_VFP; + break; + case OP_LONG_TO_DOUBLE: + case OP_FLOAT_TO_LONG: + case OP_LONG_TO_FLOAT: + case OP_DOUBLE_TO_LONG: + return genConversionPortable(cUnit, mir); + default: + return true; + } + + if (longSrc) { + rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + } else { + rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + } + + if (longDest) { + rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); + } else { + rlDest = dvmCompilerGetDest(cUnit, mir, 0); + } + loadValueAddress(cUnit, rlDest, r_A0); + dvmCompilerClobber(cUnit, r_A0); + loadValueAddress(cUnit, rlSrc, r_A1); + genDispatchToHandler(cUnit, templateOpcode); + if (rlDest.wide) { + rlDest = dvmCompilerUpdateLocWide(cUnit, rlDest); + dvmCompilerClobber(cUnit, rlDest.highReg); + } else { + rlDest = dvmCompilerUpdateLoc(cUnit, rlDest); + } + dvmCompilerClobber(cUnit, rlDest.lowReg); + return false; +#endif +} + +static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + TemplateOpcode templateOpcode; + RegLocation rlResult = dvmCompilerGetReturn(cUnit); + bool wide = true; + + switch(mir->dalvikInsn.opcode) { + case OP_CMPL_FLOAT: + templateOpcode = TEMPLATE_CMPL_FLOAT_VFP; + wide = false; + break; + case OP_CMPG_FLOAT: + templateOpcode = TEMPLATE_CMPG_FLOAT_VFP; + wide = false; + break; + case OP_CMPL_DOUBLE: + templateOpcode = TEMPLATE_CMPL_DOUBLE_VFP; + break; + case OP_CMPG_DOUBLE: + templateOpcode = TEMPLATE_CMPG_DOUBLE_VFP; + break; + default: + return true; + } + loadValueAddress(cUnit, rlSrc1, r_A0); + dvmCompilerClobber(cUnit, r_A0); + loadValueAddress(cUnit, rlSrc2, r_A1); + genDispatchToHandler(cUnit, templateOpcode); + storeValue(cUnit, rlDest, rlResult); + return false; +} diff --git a/vm/compiler/codegen/mips/GlobalOptimizations.cpp b/vm/compiler/codegen/mips/GlobalOptimizations.cpp new file mode 100644 index 000000000..189d81843 --- /dev/null +++ b/vm/compiler/codegen/mips/GlobalOptimizations.cpp @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dalvik.h" +#include "vm/compiler/CompilerInternals.h" +#include "MipsLIR.h" + +/* + * Identify unconditional branches that jump to the immediate successor of the + * branch itself. + */ +static void applyRedundantBranchElimination(CompilationUnit *cUnit) +{ + MipsLIR *thisLIR; + + for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; + thisLIR != (MipsLIR *) cUnit->lastLIRInsn; + thisLIR = NEXT_LIR(thisLIR)) { + + /* Branch to the next instruction */ + if (!thisLIR->flags.isNop && thisLIR->opcode == kMipsB) { + MipsLIR *nextLIR = thisLIR; + + while (true) { + nextLIR = NEXT_LIR(nextLIR); + + /* + * Is the branch target the next instruction? + */ + if (nextLIR == (MipsLIR *) thisLIR->generic.target) { + thisLIR->flags.isNop = true; + break; + } + + /* + * Found real useful stuff between the branch and the target. + * Need to explicitly check the lastLIRInsn here since with + * method-based JIT the branch might be the last real + * instruction. + */ + if (!isPseudoOpCode(nextLIR->opcode) || + (nextLIR = (MipsLIR *) cUnit->lastLIRInsn)) + break; + } + } + } +} + +/* + * Do simple a form of copy propagation and elimination. + */ +static void applyCopyPropagation(CompilationUnit *cUnit) +{ + MipsLIR *thisLIR; + + /* look for copies to possibly eliminate */ + for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; + thisLIR != (MipsLIR *) cUnit->lastLIRInsn; + thisLIR = NEXT_LIR(thisLIR)) { + + if (thisLIR->flags.isNop || thisLIR->opcode != kMipsMove) + continue; + + const int max_insns = 10; + MipsLIR *savedLIR[max_insns]; + int srcRedefined = 0; + int insnCount = 0; + MipsLIR *nextLIR; + + /* look for and record all uses of reg defined by the copy */ + for (nextLIR = (MipsLIR *) NEXT_LIR(thisLIR); + nextLIR != (MipsLIR *) cUnit->lastLIRInsn; + nextLIR = NEXT_LIR(nextLIR)) { + + if (nextLIR->flags.isNop || nextLIR->opcode == kMips32BitData) + continue; + + if (isPseudoOpCode(nextLIR->opcode)) { + if (nextLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || + nextLIR->opcode == kMipsPseudoBarrier || + nextLIR->opcode == kMipsPseudoExtended || + nextLIR->opcode == kMipsPseudoSSARep) + continue; /* these pseudos don't pose problems */ + else if (nextLIR->opcode == kMipsPseudoTargetLabel || + nextLIR->opcode == kMipsPseudoEntryBlock || + nextLIR->opcode == kMipsPseudoExitBlock) + insnCount = 0; /* give up for these pseudos */ + break; /* reached end for copy propagation */ + } + + /* Since instructions with IS_BRANCH flag set will have its */ + /* useMask and defMask set to ENCODE_ALL, any checking of */ + /* these flags must come after the branching checks. */ + + /* don't propagate across branch/jump and link case + or jump via register */ + if (EncodingMap[nextLIR->opcode].flags & REG_DEF_LR || + nextLIR->opcode == kMipsJalr || + nextLIR->opcode == kMipsJr) { + insnCount = 0; + break; + } + + /* branches with certain targets ok while others aren't */ + if (EncodingMap[nextLIR->opcode].flags & IS_BRANCH) { + MipsLIR *targetLIR = (MipsLIR *) nextLIR->generic.target; + if (targetLIR->opcode != kMipsPseudoEHBlockLabel && + targetLIR->opcode != kMipsPseudoChainingCellHot && + targetLIR->opcode != kMipsPseudoChainingCellNormal && + targetLIR->opcode != kMipsPseudoChainingCellInvokePredicted && + targetLIR->opcode != kMipsPseudoChainingCellInvokeSingleton && + targetLIR->opcode != kMipsPseudoPCReconstructionBlockLabel && + targetLIR->opcode != kMipsPseudoPCReconstructionCell) { + insnCount = 0; + break; + } + /* FIXME - for now don't propagate across any branch/jump. */ + insnCount = 0; + break; + } + + /* copy def reg used here, so record insn for copy propagation */ + if (thisLIR->defMask & nextLIR->useMask) { + if (insnCount == max_insns || srcRedefined) { + insnCount = 0; + break; /* just give up if too many or not possible */ + } + savedLIR[insnCount++] = nextLIR; + } + + if (thisLIR->defMask & nextLIR->defMask) { + if (nextLIR->opcode == kMipsMovz) + insnCount = 0; /* movz relies on thisLIR setting dst reg so abandon propagation*/ + break; + } + + /* copy src reg redefined here, so can't propagate further */ + if (thisLIR->useMask & nextLIR->defMask) { + if (insnCount == 0) + break; /* nothing to propagate */ + srcRedefined = 1; + } + } + + /* conditions allow propagation and copy elimination */ + if (insnCount) { + int i; + for (i = 0; i < insnCount; i++) { + int flags = EncodingMap[savedLIR[i]->opcode].flags; + savedLIR[i]->useMask &= ~(1 << thisLIR->operands[0]); + savedLIR[i]->useMask |= 1 << thisLIR->operands[1]; + if ((flags & REG_USE0) && + savedLIR[i]->operands[0] == thisLIR->operands[0]) + savedLIR[i]->operands[0] = thisLIR->operands[1]; + if ((flags & REG_USE1) && + savedLIR[i]->operands[1] == thisLIR->operands[0]) + savedLIR[i]->operands[1] = thisLIR->operands[1]; + if ((flags & REG_USE2) && + savedLIR[i]->operands[2] == thisLIR->operands[0]) + savedLIR[i]->operands[2] = thisLIR->operands[1]; + if ((flags & REG_USE3) && + savedLIR[i]->operands[3] == thisLIR->operands[0]) + savedLIR[i]->operands[3] = thisLIR->operands[1]; + } + thisLIR->flags.isNop = true; + } + } +} + +#ifdef __mips_hard_float +/* + * Look for pairs of mov.s instructions that can be combined into mov.d + */ +static void mergeMovs(CompilationUnit *cUnit) +{ + MipsLIR *movsLIR = NULL; + MipsLIR *thisLIR; + + for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; + thisLIR != (MipsLIR *) cUnit->lastLIRInsn; + thisLIR = NEXT_LIR(thisLIR)) { + if (thisLIR->flags.isNop) + continue; + + if (isPseudoOpCode(thisLIR->opcode)) { + if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || + thisLIR->opcode == kMipsPseudoExtended || + thisLIR->opcode == kMipsPseudoSSARep) + continue; /* ok to move across these pseudos */ + movsLIR = NULL; /* don't merge across other pseudos */ + continue; + } + + /* merge pairs of mov.s instructions */ + if (thisLIR->opcode == kMipsFmovs) { + if (movsLIR == NULL) + movsLIR = thisLIR; + else if (((movsLIR->operands[0] & 1) == 0) && + ((movsLIR->operands[1] & 1) == 0) && + ((movsLIR->operands[0] + 1) == thisLIR->operands[0]) && + ((movsLIR->operands[1] + 1) == thisLIR->operands[1])) { + /* movsLIR is handling even register - upgrade to mov.d */ + movsLIR->opcode = kMipsFmovd; + movsLIR->operands[0] = S2D(movsLIR->operands[0], movsLIR->operands[0]+1); + movsLIR->operands[1] = S2D(movsLIR->operands[1], movsLIR->operands[1]+1); + thisLIR->flags.isNop = true; + movsLIR = NULL; + } + else if (((movsLIR->operands[0] & 1) == 1) && + ((movsLIR->operands[1] & 1) == 1) && + ((movsLIR->operands[0] - 1) == thisLIR->operands[0]) && + ((movsLIR->operands[1] - 1) == thisLIR->operands[1])) { + /* thissLIR is handling even register - upgrade to mov.d */ + thisLIR->opcode = kMipsFmovd; + thisLIR->operands[0] = S2D(thisLIR->operands[0], thisLIR->operands[0]+1); + thisLIR->operands[1] = S2D(thisLIR->operands[1], thisLIR->operands[1]+1); + movsLIR->flags.isNop = true; + movsLIR = NULL; + } + else + /* carry on searching from here */ + movsLIR = thisLIR; + continue; + } + + /* intervening instruction - start search from scratch */ + movsLIR = NULL; + } +} +#endif + + +/* + * Look back first and then ahead to try to find an instruction to move into + * the branch delay slot. If the analysis can be done cheaply enough, it may be + * be possible to tune this routine to be more beneficial (e.g., being more + * particular about what instruction is speculated). + */ +static MipsLIR *delaySlotLIR(MipsLIR *firstLIR, MipsLIR *branchLIR) +{ + int isLoad; + int loadVisited = 0; + int isStore; + int storeVisited = 0; + u8 useMask = branchLIR->useMask; + u8 defMask = branchLIR->defMask; + MipsLIR *thisLIR; + MipsLIR *newLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + + for (thisLIR = PREV_LIR(branchLIR); + thisLIR != firstLIR; + thisLIR = PREV_LIR(thisLIR)) { + if (thisLIR->flags.isNop) + continue; + + if (isPseudoOpCode(thisLIR->opcode)) { + if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || + thisLIR->opcode == kMipsPseudoExtended || + thisLIR->opcode == kMipsPseudoSSARep) + continue; /* ok to move across these pseudos */ + break; /* don't move across all other pseudos */ + } + + /* give up on moving previous instruction down into slot */ + if (thisLIR->opcode == kMipsNop || + thisLIR->opcode == kMips32BitData || + EncodingMap[thisLIR->opcode].flags & IS_BRANCH) + break; + + /* don't reorder loads/stores (the alias info could + possibly be used to allow as a future enhancement) */ + isLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD; + isStore = EncodingMap[thisLIR->opcode].flags & IS_STORE; + + if (!(thisLIR->useMask & defMask) && + !(thisLIR->defMask & useMask) && + !(thisLIR->defMask & defMask) && + !(isLoad && storeVisited) && + !(isStore && loadVisited) && + !(isStore && storeVisited)) { + *newLIR = *thisLIR; + thisLIR->flags.isNop = true; + return newLIR; /* move into delay slot succeeded */ + } + + loadVisited |= isLoad; + storeVisited |= isStore; + + /* accumulate def/use constraints */ + useMask |= thisLIR->useMask; + defMask |= thisLIR->defMask; + } + + /* for unconditional branches try to copy the instruction at the + branch target up into the delay slot and adjust the branch */ + if (branchLIR->opcode == kMipsB) { + MipsLIR *targetLIR; + for (targetLIR = (MipsLIR *) branchLIR->generic.target; + targetLIR; + targetLIR = NEXT_LIR(targetLIR)) { + if (!targetLIR->flags.isNop && + (!isPseudoOpCode(targetLIR->opcode) || /* can't pull predicted up */ + targetLIR->opcode == kMipsPseudoChainingCellInvokePredicted)) + break; /* try to get to next real op at branch target */ + } + if (targetLIR && !isPseudoOpCode(targetLIR->opcode) && + !(EncodingMap[targetLIR->opcode].flags & IS_BRANCH)) { + *newLIR = *targetLIR; + branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR); + return newLIR; + } + } else if (branchLIR->opcode >= kMipsBeq && branchLIR->opcode <= kMipsBne) { + /* for conditional branches try to fill branch delay slot + via speculative execution when safe */ + MipsLIR *targetLIR; + for (targetLIR = (MipsLIR *) branchLIR->generic.target; + targetLIR; + targetLIR = NEXT_LIR(targetLIR)) { + if (!targetLIR->flags.isNop && !isPseudoOpCode(targetLIR->opcode)) + break; /* try to get to next real op at branch target */ + } + + MipsLIR *nextLIR; + for (nextLIR = NEXT_LIR(branchLIR); + nextLIR; + nextLIR = NEXT_LIR(nextLIR)) { + if (!nextLIR->flags.isNop && !isPseudoOpCode(nextLIR->opcode)) + break; /* try to get to next real op for fall thru */ + } + + if (nextLIR && targetLIR) { + int flags = EncodingMap[nextLIR->opcode].flags; + int isLoad = flags & IS_LOAD; + + /* common branch and fall thru to normal chaining cells case */ + if (isLoad && nextLIR->opcode == targetLIR->opcode && + nextLIR->operands[0] == targetLIR->operands[0] && + nextLIR->operands[1] == targetLIR->operands[1] && + nextLIR->operands[2] == targetLIR->operands[2]) { + *newLIR = *targetLIR; + branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR); + return newLIR; + } + + /* try prefetching (maybe try speculating instructions along the + trace like dalvik frame load which is common and may be safe) */ + int isStore = flags & IS_STORE; + if (isLoad || isStore) { + newLIR->opcode = kMipsPref; + newLIR->operands[0] = isLoad ? 0 : 1; + newLIR->operands[1] = nextLIR->operands[1]; + newLIR->operands[2] = nextLIR->operands[2]; + newLIR->defMask = nextLIR->defMask; + newLIR->useMask = nextLIR->useMask; + return newLIR; + } + } + } + + /* couldn't find a useful instruction to move into the delay slot */ + newLIR->opcode = kMipsNop; + return newLIR; +} + +/* + * The branch delay slot has been ignored thus far. This is the point where + * a useful instruction is moved into it or a nop is inserted. Leave existing + * NOPs alone -- these came from sparse and packed switch ops and are needed + * to maintain the proper offset to the jump table. + */ +static void introduceBranchDelaySlot(CompilationUnit *cUnit) +{ + MipsLIR *thisLIR; + MipsLIR *firstLIR =(MipsLIR *) cUnit->firstLIRInsn; + MipsLIR *lastLIR =(MipsLIR *) cUnit->lastLIRInsn; + + for (thisLIR = lastLIR; thisLIR != firstLIR; thisLIR = PREV_LIR(thisLIR)) { + if (thisLIR->flags.isNop || + isPseudoOpCode(thisLIR->opcode) || + !(EncodingMap[thisLIR->opcode].flags & IS_BRANCH)) { + continue; + } else if (thisLIR == lastLIR) { + dvmCompilerAppendLIR(cUnit, + (LIR *) delaySlotLIR(firstLIR, thisLIR)); + } else if (NEXT_LIR(thisLIR)->opcode != kMipsNop) { + dvmCompilerInsertLIRAfter((LIR *) thisLIR, + (LIR *) delaySlotLIR(firstLIR, thisLIR)); + } + } + + if (!thisLIR->flags.isNop && + !isPseudoOpCode(thisLIR->opcode) && + EncodingMap[thisLIR->opcode].flags & IS_BRANCH) { + /* nothing available to move, so insert nop */ + MipsLIR *nopLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + nopLIR->opcode = kMipsNop; + dvmCompilerInsertLIRAfter((LIR *) thisLIR, (LIR *) nopLIR); + } +} + +void dvmCompilerApplyGlobalOptimizations(CompilationUnit *cUnit) +{ + applyRedundantBranchElimination(cUnit); + applyCopyPropagation(cUnit); +#ifdef __mips_hard_float + mergeMovs(cUnit); +#endif + introduceBranchDelaySlot(cUnit); +} diff --git a/vm/compiler/codegen/mips/LocalOptimizations.cpp b/vm/compiler/codegen/mips/LocalOptimizations.cpp new file mode 100644 index 000000000..2ccd40dcd --- /dev/null +++ b/vm/compiler/codegen/mips/LocalOptimizations.cpp @@ -0,0 +1,460 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dalvik.h" +#include "vm/compiler/CompilerInternals.h" +#include "MipsLIR.h" +#include "Codegen.h" + +#define DEBUG_OPT(X) + +/* Check RAW, WAR, and WAR dependency on the register operands */ +#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \ + ((use | def) & check->defMask)) + +/* Scheduler heuristics */ +#define MAX_HOIST_DISTANCE 20 +#define LDLD_DISTANCE 4 +#define LD_LATENCY 2 + +static inline bool isDalvikRegisterClobbered(MipsLIR *lir1, MipsLIR *lir2) +{ + int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo); + int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo); + int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo); + int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo); + + return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); +} + +#if 0 +/* Debugging utility routine */ +static void dumpDependentInsnPair(MipsLIR *thisLIR, MipsLIR *checkLIR, + const char *optimization) +{ + LOGD("************ %s ************", optimization); + dvmDumpLIRInsn((LIR *) thisLIR, 0); + dvmDumpLIRInsn((LIR *) checkLIR, 0); +} +#endif + +/* Convert a more expensive instruction (ie load) into a move */ +static void convertMemOpIntoMove(CompilationUnit *cUnit, MipsLIR *origLIR, + int dest, int src) +{ + /* Insert a move to replace the load */ + MipsLIR *moveLIR; + moveLIR = dvmCompilerRegCopyNoInsert( cUnit, dest, src); + /* + * Insert the converted instruction after the original since the + * optimization is scannng in the top-down order and the new instruction + * will need to be re-checked (eg the new dest clobbers the src used in + * thisLIR). + */ + dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR); +} + +/* + * Perform a pass of top-down walk, from the second-last instruction in the + * superblock, to eliminate redundant loads and stores. + * + * An earlier load can eliminate a later load iff + * 1) They are must-aliases + * 2) The native register is not clobbered in between + * 3) The memory location is not written to in between + * + * An earlier store can eliminate a later load iff + * 1) They are must-aliases + * 2) The native register is not clobbered in between + * 3) The memory location is not written to in between + * + * A later store can be eliminated by an earlier store iff + * 1) They are must-aliases + * 2) The memory location is not written to in between + */ +static void applyLoadStoreElimination(CompilationUnit *cUnit, + MipsLIR *headLIR, + MipsLIR *tailLIR) +{ + MipsLIR *thisLIR; + + if (headLIR == tailLIR) return; + + for (thisLIR = PREV_LIR(tailLIR); + thisLIR != headLIR; + thisLIR = PREV_LIR(thisLIR)) { + int sinkDistance = 0; + + /* Skip non-interesting instructions */ + if ((thisLIR->flags.isNop == true) || + isPseudoOpCode(thisLIR->opcode) || + !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) { + continue; + } + + int nativeRegId = thisLIR->operands[0]; + bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD; + MipsLIR *checkLIR; + /* Use the mem mask to determine the rough memory location */ + u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM; + + /* + * Currently only eliminate redundant ld/st for constant and Dalvik + * register accesses. + */ + if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue; + + /* + * Add r15 (pc) to the resource mask to prevent this instruction + * from sinking past branch instructions. Also take out the memory + * region bits since stopMask is used to check data/control + * dependencies. + */ + u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) & + ~ENCODE_MEM; + u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM; + + for (checkLIR = NEXT_LIR(thisLIR); + checkLIR != tailLIR; + checkLIR = NEXT_LIR(checkLIR)) { + + /* + * Skip already dead instructions (whose dataflow information is + * outdated and misleading). + */ + if (checkLIR->flags.isNop) continue; + + u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) & + ENCODE_MEM; + u8 aliasCondition = thisMemMask & checkMemMask; + bool stopHere = false; + + /* + * Potential aliases seen - check the alias relations + */ + if (checkMemMask != ENCODE_MEM && aliasCondition != 0) { + bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags & + IS_LOAD; + if (aliasCondition == ENCODE_LITERAL) { + /* + * Should only see literal loads in the instruction + * stream. + */ + assert(!(EncodingMap[checkLIR->opcode].flags & + IS_STORE)); + /* Same value && same register type */ + if (checkLIR->aliasInfo == thisLIR->aliasInfo && + REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){ + /* + * Different destination register - insert + * a move + */ + if (checkLIR->operands[0] != nativeRegId) { + convertMemOpIntoMove(cUnit, checkLIR, + checkLIR->operands[0], + nativeRegId); + } + checkLIR->flags.isNop = true; + } + } else if (aliasCondition == ENCODE_DALVIK_REG) { + /* Must alias */ + if (checkLIR->aliasInfo == thisLIR->aliasInfo) { + /* Only optimize compatible registers */ + bool regCompatible = + REGTYPE(checkLIR->operands[0]) == + REGTYPE(nativeRegId); + if ((isThisLIRLoad && isCheckLIRLoad) || + (!isThisLIRLoad && isCheckLIRLoad)) { + /* RAR or RAW */ + if (regCompatible) { + /* + * Different destination register - + * insert a move + */ + if (checkLIR->operands[0] != + nativeRegId) { + convertMemOpIntoMove(cUnit, + checkLIR, + checkLIR->operands[0], + nativeRegId); + } + checkLIR->flags.isNop = true; + } else { + /* + * Destinaions are of different types - + * something complicated going on so + * stop looking now. + */ + stopHere = true; + } + } else if (isThisLIRLoad && !isCheckLIRLoad) { + /* WAR - register value is killed */ + stopHere = true; + } else if (!isThisLIRLoad && !isCheckLIRLoad) { + /* WAW - nuke the earlier store */ + thisLIR->flags.isNop = true; + stopHere = true; + } + /* Partial overlap */ + } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) { + /* + * It is actually ok to continue if checkLIR + * is a read. But it is hard to make a test + * case for this so we just stop here to be + * conservative. + */ + stopHere = true; + } + } + /* Memory content may be updated. Stop looking now. */ + if (stopHere) { + break; + /* The checkLIR has been transformed - check the next one */ + } else if (checkLIR->flags.isNop) { + continue; + } + } + + + /* + * this and check LIRs have no memory dependency. Now check if + * their register operands have any RAW, WAR, and WAW + * dependencies. If so, stop looking. + */ + if (stopHere == false) { + stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask, + checkLIR); + } + + if (stopHere == true) { + DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, + "REG CLOBBERED")); + /* Only sink store instructions */ + if (sinkDistance && !isThisLIRLoad) { + MipsLIR *newStoreLIR = + (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + *newStoreLIR = *thisLIR; + /* + * Stop point found - insert *before* the checkLIR + * since the instruction list is scanned in the + * top-down order. + */ + dvmCompilerInsertLIRBefore((LIR *) checkLIR, + (LIR *) newStoreLIR); + thisLIR->flags.isNop = true; + } + break; + } else if (!checkLIR->flags.isNop) { + sinkDistance++; + } + } + } +} + +/* + * Perform a pass of bottom-up walk, from the second instruction in the + * superblock, to try to hoist loads to earlier slots. + */ +static void applyLoadHoisting(CompilationUnit *cUnit, + MipsLIR *headLIR, + MipsLIR *tailLIR) +{ + MipsLIR *thisLIR, *checkLIR; + /* + * Store the list of independent instructions that can be hoisted past. + * Will decide the best place to insert later. + */ + MipsLIR *prevInstList[MAX_HOIST_DISTANCE]; + + /* Empty block */ + if (headLIR == tailLIR) return; + + /* Start from the second instruction */ + for (thisLIR = NEXT_LIR(headLIR); + thisLIR != tailLIR; + thisLIR = NEXT_LIR(thisLIR)) { + + /* Skip non-interesting instructions */ + if ((thisLIR->flags.isNop == true) || + isPseudoOpCode(thisLIR->opcode) || + !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) { + continue; + } + + u8 stopUseAllMask = thisLIR->useMask; + + /* + * Branches for null/range checks are marked with the true resource + * bits, and loads to Dalvik registers, constant pools, and non-alias + * locations are safe to be hoisted. So only mark the heap references + * conservatively here. + */ + if (stopUseAllMask & ENCODE_HEAP_REF) { + stopUseAllMask |= ENCODE_REG_PC; + } + + /* Similar as above, but just check for pure register dependency */ + u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM; + u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM; + + int nextSlot = 0; + bool stopHere = false; + + /* Try to hoist the load to a good spot */ + for (checkLIR = PREV_LIR(thisLIR); + checkLIR != headLIR; + checkLIR = PREV_LIR(checkLIR)) { + + /* + * Skip already dead instructions (whose dataflow information is + * outdated and misleading). + */ + if (checkLIR->flags.isNop) continue; + + u8 checkMemMask = checkLIR->defMask & ENCODE_MEM; + u8 aliasCondition = stopUseAllMask & checkMemMask; + stopHere = false; + + /* Potential WAR alias seen - check the exact relation */ + if (checkMemMask != ENCODE_MEM && aliasCondition != 0) { + /* We can fully disambiguate Dalvik references */ + if (aliasCondition == ENCODE_DALVIK_REG) { + /* Must alias or partually overlap */ + if ((checkLIR->aliasInfo == thisLIR->aliasInfo) || + isDalvikRegisterClobbered(thisLIR, checkLIR)) { + stopHere = true; + } + /* Conservatively treat all heap refs as may-alias */ + } else { + assert(aliasCondition == ENCODE_HEAP_REF); + stopHere = true; + } + /* Memory content may be updated. Stop looking now. */ + if (stopHere) { + prevInstList[nextSlot++] = checkLIR; + break; + } + } + + if (stopHere == false) { + stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask, + checkLIR); + } + + /* + * Store the dependent or non-pseudo/indepedent instruction to the + * list. + */ + if (stopHere || !isPseudoOpCode(checkLIR->opcode)) { + prevInstList[nextSlot++] = checkLIR; + if (nextSlot == MAX_HOIST_DISTANCE) break; + } + + /* Found a new place to put the load - move it here */ + if (stopHere == true) { + DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR + "HOIST STOP")); + break; + } + } + + /* + * Reached the top - use headLIR as the dependent marker as all labels + * are barriers. + */ + if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) { + prevInstList[nextSlot++] = headLIR; + } + + /* + * At least one independent instruction is found. Scan in the reversed + * direction to find a beneficial slot. + */ + if (nextSlot >= 2) { + int firstSlot = nextSlot - 2; + int slot; + MipsLIR *depLIR = prevInstList[nextSlot-1]; + /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ + if (!isPseudoOpCode(depLIR->opcode) && + (EncodingMap[depLIR->opcode].flags & IS_LOAD)) { + firstSlot -= LDLD_DISTANCE; + } + /* + * Make sure we check slot >= 0 since firstSlot may be negative + * when the loop is first entered. + */ + for (slot = firstSlot; slot >= 0; slot--) { + MipsLIR *curLIR = prevInstList[slot]; + MipsLIR *prevLIR = prevInstList[slot+1]; + + /* Check the highest instruction */ + if (prevLIR->defMask == ENCODE_ALL) { + /* + * If the first instruction is a load, don't hoist anything + * above it since it is unlikely to be beneficial. + */ + if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue; + /* + * If the remaining number of slots is less than LD_LATENCY, + * insert the hoisted load here. + */ + if (slot < LD_LATENCY) break; + } + + /* + * NOTE: now prevLIR is guaranteed to be a non-pseudo + * instruction (ie accessing EncodingMap[prevLIR->opcode] is + * safe). + * + * Try to find two instructions with load/use dependency until + * the remaining instructions are less than LD_LATENCY. + */ + if (((curLIR->useMask & prevLIR->defMask) && + (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) || + (slot < LD_LATENCY)) { + break; + } + } + + /* Found a slot to hoist to */ + if (slot >= 0) { + MipsLIR *curLIR = prevInstList[slot]; + MipsLIR *newLoadLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), + true); + *newLoadLIR = *thisLIR; + /* + * Insertion is guaranteed to succeed since checkLIR + * is never the first LIR on the list + */ + dvmCompilerInsertLIRBefore((LIR *) curLIR, + (LIR *) newLoadLIR); + thisLIR->flags.isNop = true; + } + } + } +} + +void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR, + LIR *tailLIR) +{ + if (!(gDvmJit.disableOpt & (1 << kLoadStoreElimination))) { + applyLoadStoreElimination(cUnit, (MipsLIR *) headLIR, + (MipsLIR *) tailLIR); + } + if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) { + applyLoadHoisting(cUnit, (MipsLIR *) headLIR, (MipsLIR *) tailLIR); + } +} diff --git a/vm/compiler/codegen/mips/Mips32/Factory.cpp b/vm/compiler/codegen/mips/Mips32/Factory.cpp new file mode 100644 index 000000000..9a158b46b --- /dev/null +++ b/vm/compiler/codegen/mips/Mips32/Factory.cpp @@ -0,0 +1,1015 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen for the Thumb ISA and is intended to be + * includes by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +static int coreTemps[] = {r_V0, r_V1, r_A0, r_A1, r_A2, r_A3, r_T0, r_T1, r_T2, + r_T3, r_T4, r_T5, r_T6, r_T7, r_T8, r_T9, r_S0, r_S4}; +#ifdef __mips_hard_float +static int fpTemps[] = {r_F0, r_F1, r_F2, r_F3, r_F4, r_F5, r_F6, r_F7, + r_F8, r_F9, r_F10, r_F11, r_F12, r_F13, r_F14, r_F15}; +#endif + +static void storePair(CompilationUnit *cUnit, int base, int lowReg, + int highReg); +static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg); +static MipsLIR *loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement, + int rDest); +static MipsLIR *storeWordDisp(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc); +static MipsLIR *genRegRegCheck(CompilationUnit *cUnit, + MipsConditionCode cond, + int reg1, int reg2, int dOffset, + MipsLIR *pcrLabel); +static MipsLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value); + +#ifdef __mips_hard_float +static MipsLIR *fpRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) +{ + MipsLIR* res = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + res->operands[0] = rDest; + res->operands[1] = rSrc; + if (rDest == rSrc) { + res->flags.isNop = true; + } else { + /* must be both DOUBLE or both not DOUBLE */ + assert(DOUBLEREG(rDest) == DOUBLEREG(rSrc)); + if (DOUBLEREG(rDest)) { + res->opcode = kMipsFmovd; + } else { + if (SINGLEREG(rDest)) { + if (SINGLEREG(rSrc)) { + res->opcode = kMipsFmovs; + } else { + /* note the operands are swapped for the mtc1 instr */ + res->opcode = kMipsMtc1; + res->operands[0] = rSrc; + res->operands[1] = rDest; + } + } else { + assert(SINGLEREG(rSrc)); + res->opcode = kMipsMfc1; + } + } + } + setupResourceMasks(res); + return res; +} +#endif + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool. If target is + * a high register, build constant into a low register and copy. + * + * No additional register clobbering operation performed. Use this version when + * 1) rDest is freshly returned from dvmCompilerAllocTemp or + * 2) The codegen is under fixed register usage + */ +static MipsLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, + int value) +{ + MipsLIR *res; + +#ifdef __mips_hard_float + int rDestSave = rDest; + int isFpReg = FPREG(rDest); + if (isFpReg) { + assert(SINGLEREG(rDest)); + rDest = dvmCompilerAllocTemp(cUnit); + } +#endif + + /* See if the value can be constructed cheaply */ + if (value == 0) { + res = newLIR2(cUnit, kMipsMove, rDest, r_ZERO); + } else if ((value > 0) && (value <= 65535)) { + res = newLIR3(cUnit, kMipsOri, rDest, r_ZERO, value); + } else if ((value < 0) && (value >= -32768)) { + res = newLIR3(cUnit, kMipsAddiu, rDest, r_ZERO, value); + } else { + res = newLIR2(cUnit, kMipsLui, rDest, value>>16); + if (value & 0xffff) + newLIR3(cUnit, kMipsOri, rDest, rDest, value); + } + +#ifdef __mips_hard_float + if (isFpReg) { + newLIR2(cUnit, kMipsMtc1, rDest, rDestSave); + dvmCompilerFreeTemp(cUnit, rDest); + } +#endif + + return res; +} + +/* + * Load an immediate value into a fixed or temp register. Target + * register is clobbered, and marked inUse. + */ +static MipsLIR *loadConstant(CompilationUnit *cUnit, int rDest, int value) +{ + if (dvmCompilerIsTemp(cUnit, rDest)) { + dvmCompilerClobber(cUnit, rDest); + dvmCompilerMarkInUse(cUnit, rDest); + } + return loadConstantNoClobber(cUnit, rDest, value); +} + +/* + * Load a class pointer value into a fixed or temp register. Target + * register is clobbered, and marked inUse. + */ +static MipsLIR *loadClassPointer(CompilationUnit *cUnit, int rDest, int value) +{ + MipsLIR *res; + if (dvmCompilerIsTemp(cUnit, rDest)) { + dvmCompilerClobber(cUnit, rDest); + dvmCompilerMarkInUse(cUnit, rDest); + } + res = newLIR2(cUnit, kMipsLui, rDest, value>>16); + if (value & 0xffff) + newLIR3(cUnit, kMipsOri, rDest, rDest, value); + return res; +} + +static MipsLIR *opNone(CompilationUnit *cUnit, OpKind op) +{ + MipsLIR *res; + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpUncondBr: + opcode = kMipsB; + break; + default: + LOGE("Jit: bad case in opNone"); + dvmCompilerAbort(cUnit); + } + res = newLIR0(cUnit, opcode); + return res; +} + +static MipsLIR *opCompareBranch(CompilationUnit *cUnit, MipsOpCode opc, int rs, int rt) +{ + MipsLIR *res; + if (rt < 0) { + assert(opc >= kMipsBeqz && opc <= kMipsBnez); + res = newLIR1(cUnit, opc, rs); + } else { + assert(opc == kMipsBeq || opc == kMipsBne); + res = newLIR2(cUnit, opc, rs, rt); + } + return res; +} + +static MipsLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask); + +static MipsLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc) +{ + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpBlx: + opcode = kMipsJalr; + break; + default: + assert(0); + } + return newLIR2(cUnit, opcode, r_RA, rDestSrc); +} + +static MipsLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, + int rSrc1, int value); +static MipsLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, + int value) +{ + MipsLIR *res; + bool neg = (value < 0); + int absValue = (neg) ? -value : value; + bool shortForm = (absValue & 0xff) == absValue; + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpAdd: + return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value); + break; + case kOpSub: + return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value); + break; + default: + LOGE("Jit: bad case in opRegImm"); + dvmCompilerAbort(cUnit); + break; + } + if (shortForm) + res = newLIR2(cUnit, opcode, rDestSrc1, absValue); + else { + int rScratch = dvmCompilerAllocTemp(cUnit); + res = loadConstant(cUnit, rScratch, value); + if (op == kOpCmp) + newLIR2(cUnit, opcode, rDestSrc1, rScratch); + else + newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, rScratch); + } + return res; +} + +static MipsLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest, + int rSrc1, int rSrc2) +{ + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpAdd: + opcode = kMipsAddu; + break; + case kOpSub: + opcode = kMipsSubu; + break; + case kOpAnd: + opcode = kMipsAnd; + break; + case kOpMul: + opcode = kMipsMul; + break; + case kOpOr: + opcode = kMipsOr; + break; + case kOpXor: + opcode = kMipsXor; + break; + case kOpLsl: + opcode = kMipsSllv; + break; + case kOpLsr: + opcode = kMipsSrlv; + break; + case kOpAsr: + opcode = kMipsSrav; + break; + default: + LOGE("Jit: bad case in opRegRegReg"); + dvmCompilerAbort(cUnit); + break; + } + return newLIR3(cUnit, opcode, rDest, rSrc1, rSrc2); +} + +static MipsLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, + int rSrc1, int value) +{ + MipsLIR *res; + MipsOpCode opcode = kMipsNop; + bool shortForm = true; + + switch(op) { + case kOpAdd: + if (IS_SIMM16(value)) { + opcode = kMipsAddiu; + } + else { + shortForm = false; + opcode = kMipsAddu; + } + break; + case kOpSub: + if (IS_SIMM16((-value))) { + value = -value; + opcode = kMipsAddiu; + } + else { + shortForm = false; + opcode = kMipsSubu; + } + break; + case kOpLsl: + assert(value >= 0 && value <= 31); + opcode = kMipsSll; + break; + case kOpLsr: + assert(value >= 0 && value <= 31); + opcode = kMipsSrl; + break; + case kOpAsr: + assert(value >= 0 && value <= 31); + opcode = kMipsSra; + break; + case kOpAnd: + if (IS_UIMM16((value))) { + opcode = kMipsAndi; + } + else { + shortForm = false; + opcode = kMipsAnd; + } + break; + case kOpOr: + if (IS_UIMM16((value))) { + opcode = kMipsOri; + } + else { + shortForm = false; + opcode = kMipsOr; + } + break; + case kOpXor: + if (IS_UIMM16((value))) { + opcode = kMipsXori; + } + else { + shortForm = false; + opcode = kMipsXor; + } + break; + case kOpMul: + shortForm = false; + opcode = kMipsMul; + break; + default: + LOGE("Jit: bad case in opRegRegImm"); + dvmCompilerAbort(cUnit); + break; + } + + if (shortForm) + res = newLIR3(cUnit, opcode, rDest, rSrc1, value); + else { + if (rDest != rSrc1) { + res = loadConstant(cUnit, rDest, value); + newLIR3(cUnit, opcode, rDest, rSrc1, rDest); + } else { + int rScratch = dvmCompilerAllocTemp(cUnit); + res = loadConstant(cUnit, rScratch, value); + newLIR3(cUnit, opcode, rDest, rSrc1, rScratch); + } + } + return res; +} + +static MipsLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1, + int rSrc2) +{ + MipsOpCode opcode = kMipsNop; + MipsLIR *res; + switch (op) { + case kOpMov: + opcode = kMipsMove; + break; + case kOpMvn: + return newLIR3(cUnit, kMipsNor, rDestSrc1, rSrc2, r_ZERO); + case kOpNeg: + return newLIR3(cUnit, kMipsSubu, rDestSrc1, r_ZERO, rSrc2); + case kOpAdd: + case kOpAnd: + case kOpMul: + case kOpOr: + case kOpSub: + case kOpXor: + return opRegRegReg(cUnit, op, rDestSrc1, rDestSrc1, rSrc2); + case kOp2Byte: +#if __mips_isa_rev>=2 + res = newLIR2(cUnit, kMipsSeb, rDestSrc1, rSrc2); +#else + res = opRegRegImm(cUnit, kOpLsl, rDestSrc1, rSrc2, 24); + opRegRegImm(cUnit, kOpAsr, rDestSrc1, rDestSrc1, 24); +#endif + return res; + case kOp2Short: +#if __mips_isa_rev>=2 + res = newLIR2(cUnit, kMipsSeh, rDestSrc1, rSrc2); +#else + res = opRegRegImm(cUnit, kOpLsl, rDestSrc1, rSrc2, 16); + opRegRegImm(cUnit, kOpAsr, rDestSrc1, rDestSrc1, 16); +#endif + return res; + case kOp2Char: + return newLIR3(cUnit, kMipsAndi, rDestSrc1, rSrc2, 0xFFFF); + default: + LOGE("Jit: bad case in opRegReg"); + dvmCompilerAbort(cUnit); + break; + } + return newLIR2(cUnit, opcode, rDestSrc1, rSrc2); +} + +static MipsLIR *loadConstantValueWide(CompilationUnit *cUnit, int rDestLo, + int rDestHi, int valLo, int valHi) +{ + MipsLIR *res; + res = loadConstantNoClobber(cUnit, rDestLo, valLo); + loadConstantNoClobber(cUnit, rDestHi, valHi); + return res; +} + +/* Load value from base + scaled index. */ +static MipsLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, + int rIndex, int rDest, int scale, OpSize size) +{ + MipsLIR *first = NULL; + MipsLIR *res; + MipsOpCode opcode = kMipsNop; + int tReg = dvmCompilerAllocTemp(cUnit); + +#ifdef __mips_hard_float + if (FPREG(rDest)) { + assert(SINGLEREG(rDest)); + assert((size == kWord) || (size == kSingle)); + size = kSingle; + } else { + if (size == kSingle) + size = kWord; + } +#endif + + if (!scale) { + first = newLIR3(cUnit, kMipsAddu, tReg , rBase, rIndex); + } else { + first = opRegRegImm(cUnit, kOpLsl, tReg, rIndex, scale); + newLIR3(cUnit, kMipsAddu, tReg , rBase, tReg); + } + + switch (size) { +#ifdef __mips_hard_float + case kSingle: + opcode = kMipsFlwc1; + break; +#endif + case kWord: + opcode = kMipsLw; + break; + case kUnsignedHalf: + opcode = kMipsLhu; + break; + case kSignedHalf: + opcode = kMipsLh; + break; + case kUnsignedByte: + opcode = kMipsLbu; + break; + case kSignedByte: + opcode = kMipsLb; + break; + default: + LOGE("Jit: bad case in loadBaseIndexed"); + dvmCompilerAbort(cUnit); + } + + res = newLIR3(cUnit, opcode, rDest, 0, tReg); +#if defined(WITH_SELF_VERIFICATION) + if (cUnit->heapMemOp) + res->flags.insertWrapper = true; +#endif + dvmCompilerFreeTemp(cUnit, tReg); + return (first) ? first : res; +} + +/* store value base base + scaled index. */ +static MipsLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, + int rIndex, int rSrc, int scale, OpSize size) +{ + MipsLIR *first = NULL; + MipsLIR *res; + MipsOpCode opcode = kMipsNop; + int rNewIndex = rIndex; + int tReg = dvmCompilerAllocTemp(cUnit); + +#ifdef __mips_hard_float + if (FPREG(rSrc)) { + assert(SINGLEREG(rSrc)); + assert((size == kWord) || (size == kSingle)); + size = kSingle; + } else { + if (size == kSingle) + size = kWord; + } +#endif + + if (!scale) { + first = newLIR3(cUnit, kMipsAddu, tReg , rBase, rIndex); + } else { + first = opRegRegImm(cUnit, kOpLsl, tReg, rIndex, scale); + newLIR3(cUnit, kMipsAddu, tReg , rBase, tReg); + } + + switch (size) { +#ifdef __mips_hard_float + case kSingle: + opcode = kMipsFswc1; + break; +#endif + case kWord: + opcode = kMipsSw; + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = kMipsSh; + break; + case kUnsignedByte: + case kSignedByte: + opcode = kMipsSb; + break; + default: + LOGE("Jit: bad case in storeBaseIndexed"); + dvmCompilerAbort(cUnit); + } + res = newLIR3(cUnit, opcode, rSrc, 0, tReg); +#if defined(WITH_SELF_VERIFICATION) + if (cUnit->heapMemOp) + res->flags.insertWrapper = true; +#endif + dvmCompilerFreeTemp(cUnit, rNewIndex); + return first; +} + +static MipsLIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask) +{ + int i; + int loadCnt = 0; + MipsLIR *res = NULL ; + genBarrier(cUnit); + + for (i = 0; i < 8; i++, rMask >>= 1) { + if (rMask & 0x1) { /* map r0 to MIPS r_A0 */ + newLIR3(cUnit, kMipsLw, i+r_A0, loadCnt*4, rBase); + loadCnt++; + } + } + + if (loadCnt) {/* increment after */ + newLIR3(cUnit, kMipsAddiu, rBase, rBase, loadCnt*4); + } + +#if defined(WITH_SELF_VERIFICATION) + if (cUnit->heapMemOp) + res->flags.insertWrapper = true; +#endif + genBarrier(cUnit); + return res; /* NULL always returned which should be ok since no callers use it */ +} + +static MipsLIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask) +{ + int i; + int storeCnt = 0; + MipsLIR *res = NULL ; + genBarrier(cUnit); + + for (i = 0; i < 8; i++, rMask >>= 1) { + if (rMask & 0x1) { /* map r0 to MIPS r_A0 */ + newLIR3(cUnit, kMipsSw, i+r_A0, storeCnt*4, rBase); + storeCnt++; + } + } + + if (storeCnt) { /* increment after */ + newLIR3(cUnit, kMipsAddiu, rBase, rBase, storeCnt*4); + } + +#if defined(WITH_SELF_VERIFICATION) + if (cUnit->heapMemOp) + res->flags.insertWrapper = true; +#endif + genBarrier(cUnit); + return res; /* NULL always returned which should be ok since no callers use it */ +} + +static MipsLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, + int displacement, int rDest, int rDestHi, + OpSize size, int sReg) +/* + * Load value from base + displacement. Optionally perform null check + * on base (which must have an associated sReg and MIR). If not + * performing null check, incoming MIR can be null. IMPORTANT: this + * code must not allocate any new temps. If a new register is needed + * and base and dest are the same, spill some other register to + * rlp and then restore. + */ +{ + MipsLIR *res; + MipsLIR *load = NULL; + MipsLIR *load2 = NULL; + MipsOpCode opcode = kMipsNop; + bool shortForm = IS_SIMM16(displacement); + bool pair = false; + + switch (size) { + case kLong: + case kDouble: + pair = true; + opcode = kMipsLw; +#ifdef __mips_hard_float + if (FPREG(rDest)) { + opcode = kMipsFlwc1; + if (DOUBLEREG(rDest)) { + rDest = rDest - FP_DOUBLE; + } else { + assert(FPREG(rDestHi)); + assert(rDest == (rDestHi - 1)); + } + rDestHi = rDest + 1; + } +#endif + shortForm = IS_SIMM16_2WORD(displacement); + assert((displacement & 0x3) == 0); + break; + case kWord: + case kSingle: + opcode = kMipsLw; +#ifdef __mips_hard_float + if (FPREG(rDest)) { + opcode = kMipsFlwc1; + assert(SINGLEREG(rDest)); + } +#endif + assert((displacement & 0x3) == 0); + break; + case kUnsignedHalf: + opcode = kMipsLhu; + assert((displacement & 0x1) == 0); + break; + case kSignedHalf: + opcode = kMipsLh; + assert((displacement & 0x1) == 0); + break; + case kUnsignedByte: + opcode = kMipsLbu; + break; + case kSignedByte: + opcode = kMipsLb; + break; + default: + LOGE("Jit: bad case in loadBaseIndexedBody"); + dvmCompilerAbort(cUnit); + } + + if (shortForm) { + if (!pair) { + load = res = newLIR3(cUnit, opcode, rDest, displacement, rBase); + } else { + load = res = newLIR3(cUnit, opcode, rDest, displacement + LOWORD_OFFSET, rBase); + load2 = newLIR3(cUnit, opcode, rDestHi, displacement + HIWORD_OFFSET, rBase); + } + } else { + if (pair) { + int rTmp = dvmCompilerAllocFreeTemp(cUnit); + res = opRegRegImm(cUnit, kOpAdd, rTmp, rBase, displacement); + load = newLIR3(cUnit, opcode, rDest, LOWORD_OFFSET, rTmp); + load2 = newLIR3(cUnit, opcode, rDestHi, HIWORD_OFFSET, rTmp); + dvmCompilerFreeTemp(cUnit, rTmp); + } else { + int rTmp = (rBase == rDest) ? dvmCompilerAllocFreeTemp(cUnit) + : rDest; + res = loadConstant(cUnit, rTmp, displacement); + load = newLIR3(cUnit, opcode, rDest, rBase, rTmp); + if (rTmp != rDest) + dvmCompilerFreeTemp(cUnit, rTmp); + } + } + + if (rBase == rFP) { + if (load != NULL) + annotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + true /* isLoad */); + if (load2 != NULL) + annotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2, + true /* isLoad */); + } +#if defined(WITH_SELF_VERIFICATION) + if (load != NULL && cUnit->heapMemOp) + load->flags.insertWrapper = true; + if (load2 != NULL && cUnit->heapMemOp) + load2->flags.insertWrapper = true; +#endif + return load; +} + +static MipsLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase, + int displacement, int rDest, OpSize size, + int sReg) +{ + return loadBaseDispBody(cUnit, mir, rBase, displacement, rDest, -1, + size, sReg); +} + +static MipsLIR *loadBaseDispWide(CompilationUnit *cUnit, MIR *mir, int rBase, + int displacement, int rDestLo, int rDestHi, + int sReg) +{ + return loadBaseDispBody(cUnit, mir, rBase, displacement, rDestLo, rDestHi, + kLong, sReg); +} + +static MipsLIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc, int rSrcHi, + OpSize size) +{ + MipsLIR *res; + MipsLIR *store = NULL; + MipsLIR *store2 = NULL; + MipsOpCode opcode = kMipsNop; + bool shortForm = IS_SIMM16(displacement); + bool pair = false; + + switch (size) { + case kLong: + case kDouble: + pair = true; + opcode = kMipsSw; +#ifdef __mips_hard_float + if (FPREG(rSrc)) { + opcode = kMipsFswc1; + if (DOUBLEREG(rSrc)) { + rSrc = rSrc - FP_DOUBLE; + } else { + assert(FPREG(rSrcHi)); + assert(rSrc == (rSrcHi - 1)); + } + rSrcHi = rSrc + 1; + } +#endif + shortForm = IS_SIMM16_2WORD(displacement); + assert((displacement & 0x3) == 0); + break; + case kWord: + case kSingle: + opcode = kMipsSw; +#ifdef __mips_hard_float + if (FPREG(rSrc)) { + opcode = kMipsFswc1; + assert(SINGLEREG(rSrc)); + } +#endif + assert((displacement & 0x3) == 0); + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = kMipsSh; + assert((displacement & 0x1) == 0); + break; + case kUnsignedByte: + case kSignedByte: + opcode = kMipsSb; + break; + default: + LOGE("Jit: bad case in storeBaseIndexedBody"); + dvmCompilerAbort(cUnit); + } + + if (shortForm) { + if (!pair) { + store = res = newLIR3(cUnit, opcode, rSrc, displacement, rBase); + } else { + store = res = newLIR3(cUnit, opcode, rSrc, displacement + LOWORD_OFFSET, rBase); + store2 = newLIR3(cUnit, opcode, rSrcHi, displacement + HIWORD_OFFSET, rBase); + } + } else { + int rScratch = dvmCompilerAllocTemp(cUnit); + res = opRegRegImm(cUnit, kOpAdd, rScratch, rBase, displacement); + if (!pair) { + store = newLIR3(cUnit, opcode, rSrc, 0, rScratch); + } else { + store = newLIR3(cUnit, opcode, rSrc, LOWORD_OFFSET, rScratch); + store2 = newLIR3(cUnit, opcode, rSrcHi, HIWORD_OFFSET, rScratch); + } + dvmCompilerFreeTemp(cUnit, rScratch); + } + + if (rBase == rFP) { + if (store != NULL) + annotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + false /* isLoad */); + if (store2 != NULL) + annotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2, + false /* isLoad */); + } + +#if defined(WITH_SELF_VERIFICATION) + if (store != NULL && cUnit->heapMemOp) + store->flags.insertWrapper = true; + if (store2 != NULL && cUnit->heapMemOp) + store2->flags.insertWrapper = true; +#endif + return res; +} + +static MipsLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, + int displacement, int rSrc, OpSize size) +{ + return storeBaseDispBody(cUnit, rBase, displacement, rSrc, -1, size); +} + +static MipsLIR *storeBaseDispWide(CompilationUnit *cUnit, int rBase, + int displacement, int rSrcLo, int rSrcHi) +{ + return storeBaseDispBody(cUnit, rBase, displacement, rSrcLo, rSrcHi, kLong); +} + +static void storePair(CompilationUnit *cUnit, int base, int lowReg, int highReg) +{ + storeWordDisp(cUnit, base, LOWORD_OFFSET, lowReg); + storeWordDisp(cUnit, base, HIWORD_OFFSET, highReg); +} + +static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg) +{ + loadWordDisp(cUnit, base, LOWORD_OFFSET , lowReg); + loadWordDisp(cUnit, base, HIWORD_OFFSET , highReg); +} + +static MipsLIR* genRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc) +{ + MipsLIR* res; + MipsOpCode opcode; +#ifdef __mips_hard_float + if (FPREG(rDest) || FPREG(rSrc)) + return fpRegCopy(cUnit, rDest, rSrc); +#endif + res = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + opcode = kMipsMove; + assert(LOWREG(rDest) && LOWREG(rSrc)); + res->operands[0] = rDest; + res->operands[1] = rSrc; + res->opcode = opcode; + setupResourceMasks(res); + if (rDest == rSrc) { + res->flags.isNop = true; + } + return res; +} + +static MipsLIR* genRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) +{ + MipsLIR *res = genRegCopyNoInsert(cUnit, rDest, rSrc); + dvmCompilerAppendLIR(cUnit, (LIR*)res); + return res; +} + +static void genRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi, + int srcLo, int srcHi) +{ +#ifdef __mips_hard_float + bool destFP = FPREG(destLo) && FPREG(destHi); + bool srcFP = FPREG(srcLo) && FPREG(srcHi); + assert(FPREG(srcLo) == FPREG(srcHi)); + assert(FPREG(destLo) == FPREG(destHi)); + if (destFP) { + if (srcFP) { + genRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi)); + } else { + /* note the operands are swapped for the mtc1 instr */ + newLIR2(cUnit, kMipsMtc1, srcLo, destLo); + newLIR2(cUnit, kMipsMtc1, srcHi, destHi); + } + } else { + if (srcFP) { + newLIR2(cUnit, kMipsMfc1, destLo, srcLo); + newLIR2(cUnit, kMipsMfc1, destHi, srcHi); + } else { + // Handle overlap + if (srcHi == destLo) { + genRegCopy(cUnit, destHi, srcHi); + genRegCopy(cUnit, destLo, srcLo); + } else { + genRegCopy(cUnit, destLo, srcLo); + genRegCopy(cUnit, destHi, srcHi); + } + } + } +#else + // Handle overlap + if (srcHi == destLo) { + genRegCopy(cUnit, destHi, srcHi); + genRegCopy(cUnit, destLo, srcLo); + } else { + genRegCopy(cUnit, destLo, srcLo); + genRegCopy(cUnit, destHi, srcHi); + } +#endif +} + +static inline MipsLIR *genRegImmCheck(CompilationUnit *cUnit, + MipsConditionCode cond, int reg, + int checkValue, int dOffset, + MipsLIR *pcrLabel) +{ + MipsLIR *branch = NULL; + + if (checkValue == 0) { + MipsOpCode opc = kMipsNop; + if (cond == kMipsCondEq) { + opc = kMipsBeqz; + } else if (cond == kMipsCondNe) { + opc = kMipsBnez; + } else if (cond == kMipsCondLt || cond == kMipsCondMi) { + opc = kMipsBltz; + } else if (cond == kMipsCondLe) { + opc = kMipsBlez; + } else if (cond == kMipsCondGt) { + opc = kMipsBgtz; + } else if (cond == kMipsCondGe) { + opc = kMipsBgez; + } else { + LOGE("Jit: bad case in genRegImmCheck"); + dvmCompilerAbort(cUnit); + } + branch = opCompareBranch(cUnit, opc, reg, -1); + } else if (IS_SIMM16(checkValue)) { + if (cond == kMipsCondLt) { + int tReg = dvmCompilerAllocTemp(cUnit); + newLIR3(cUnit, kMipsSlti, tReg, reg, checkValue); + branch = opCompareBranch(cUnit, kMipsBne, tReg, r_ZERO); + dvmCompilerFreeTemp(cUnit, tReg); + } else { + LOGE("Jit: bad case in genRegImmCheck"); + dvmCompilerAbort(cUnit); + } + } else { + LOGE("Jit: bad case in genRegImmCheck"); + dvmCompilerAbort(cUnit); + } + + if (cUnit->jitMode == kJitMethod) { + BasicBlock *bb = cUnit->curBlock; + if (bb->taken) { + MipsLIR *exceptionLabel = (MipsLIR *) cUnit->blockLabelList; + exceptionLabel += bb->taken->id; + branch->generic.target = (LIR *) exceptionLabel; + return exceptionLabel; + } else { + LOGE("Catch blocks not handled yet"); + dvmAbort(); + return NULL; + } + } else { + return genCheckCommon(cUnit, dOffset, branch, pcrLabel); + } +} + +#if defined(WITH_SELF_VERIFICATION) +static void genSelfVerificationPreBranch(CompilationUnit *cUnit, + MipsLIR *origLIR) { +// DOUGLAS - this still needs to be implemented for MIPS. +#if 0 + /* + * We need two separate pushes, since we want r5 to be pushed first. + * Store multiple will push LR first. + */ + MipsLIR *pushFP = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pushFP->opcode = kThumbPush; + pushFP->operands[0] = 1 << r5FP; + setupResourceMasks(pushFP); + dvmCompilerInsertLIRBefore((LIR *) origLIR, (LIR *) pushFP); + + MipsLIR *pushLR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + pushLR->opcode = kThumbPush; + /* Thumb push can handle LR, but is encoded differently at bit 8 */ + pushLR->operands[0] = 1 << 8; + setupResourceMasks(pushLR); + dvmCompilerInsertLIRBefore((LIR *) origLIR, (LIR *) pushLR); +#endif +} + +static void genSelfVerificationPostBranch(CompilationUnit *cUnit, + MipsLIR *origLIR) { +// DOUGLAS - this still needs to be implemented for MIPS. +#if 0 + /* + * Since Thumb cannot pop memory content into LR, we have to pop LR + * to a temp first (r5 in this case). Then we move r5 to LR, then pop the + * original r5 from stack. + */ + /* Pop memory content(LR) into r5 first */ + MipsLIR *popForLR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + popForLR->opcode = kThumbPop; + popForLR->operands[0] = 1 << r5FP; + setupResourceMasks(popForLR); + dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) popForLR); + + MipsLIR *copy = genRegCopyNoInsert(cUnit, r14lr, r5FP); + dvmCompilerInsertLIRAfter((LIR *) popForLR, (LIR *) copy); + + /* Now restore the original r5 */ + MipsLIR *popFP = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); + popFP->opcode = kThumbPop; + popFP->operands[0] = 1 << r5FP; + setupResourceMasks(popFP); + dvmCompilerInsertLIRAfter((LIR *) copy, (LIR *) popFP); +#endif +} +#endif diff --git a/vm/compiler/codegen/mips/Mips32/Gen.cpp b/vm/compiler/codegen/mips/Mips32/Gen.cpp new file mode 100644 index 000000000..29c7c5fbd --- /dev/null +++ b/vm/compiler/codegen/mips/Mips32/Gen.cpp @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen for the Mips ISA and is intended to be + * includes by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +/* + * Reserve 8 bytes at the beginning of the trace + * +----------------------------+ + * | prof count addr (4 bytes) | + * +----------------------------+ + * | chain cell offset (4 bytes)| + * +----------------------------+ + * + * ...and then code to increment the execution + * + * For continuous profiling (24 bytes) + * lahi a0, addr # get ptr to prof count addr into a0 + * lalo a0, addr + * lw a0, 0(a0) # read prof count addr into a0 + * lw a1, 0(a0) # read prof count into a1 + * addiu a1, a1, 1 # increment count + * sw a1, 0(a0) # store count + * + * For periodic profiling (8 bytes) + * call TEMPLATE_PERIODIC_PROFILING + * nop + * + * and return the size (in bytes) of the generated code. + */ +static int genTraceProfileEntry(CompilationUnit *cUnit) +{ + intptr_t addr = (intptr_t)dvmJitNextTraceCounter(); + assert(__BYTE_ORDER == __LITTLE_ENDIAN); + MipsLIR *executionCount = newLIR1(cUnit, kMips32BitData, addr); + cUnit->chainCellOffsetLIR = + (LIR *) newLIR1(cUnit, kMips32BitData, CHAIN_CELL_OFFSET_TAG); + cUnit->headerSize = 8; + if ((gDvmJit.profileMode == kTraceProfilingContinuous) || + (gDvmJit.profileMode == kTraceProfilingDisabled)) { + MipsLIR *loadAddr = newLIR2(cUnit, kMipsLahi, r_A0, 0); + loadAddr->generic.target = (LIR *) executionCount; + loadAddr = newLIR3(cUnit, kMipsLalo, r_A0, r_A0, 0); + loadAddr ->generic.target = (LIR *) executionCount; + newLIR3(cUnit, kMipsLw, r_A0, 0, r_A0); + newLIR3(cUnit, kMipsLw, r_A1, 0, r_A0); + newLIR3(cUnit, kMipsAddiu, r_A1, r_A1, 1); + newLIR3(cUnit, kMipsSw, r_A1, 0, r_A0); + return 24; + } else { + int opcode = TEMPLATE_PERIODIC_PROFILING; + newLIR1(cUnit, kMipsJal, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + newLIR0(cUnit, kMipsNop); /* delay slot */ + return 8; + } +} + +/* + * Perform a "reg cmp imm" operation and jump to the PCR region if condition + * satisfies. + */ +static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + RegLocation rlResult; + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegRegImm(cUnit, kOpAdd, rlResult.lowReg, + rlSrc.lowReg, 0x80000000); + storeValue(cUnit, rlDest, rlResult); +} + +static void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + RegLocation rlResult; + rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + opRegRegImm(cUnit, kOpAdd, rlResult.highReg, rlSrc.highReg, + 0x80000000); + genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg); + storeValueWide(cUnit, rlDest, rlResult); +} + +static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + RegLocation rlResult; + loadValueDirectWideFixed(cUnit, rlSrc1, r_ARG0, r_ARG1); + loadValueDirectWideFixed(cUnit, rlSrc2, r_ARG2, r_ARG3); + genDispatchToHandler(cUnit, TEMPLATE_MUL_LONG); + rlResult = dvmCompilerGetReturnWide(cUnit); + storeValueWide(cUnit, rlDest, rlResult); +} + +static bool partialOverlap(int sreg1, int sreg2) +{ + return abs(sreg1 - sreg2) == 1; +} + +static void withCarryHelper(CompilationUnit *cUnit, MipsOpCode opc, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2, int sltuSrc1, int sltuSrc2) +{ + int tReg = dvmCompilerAllocTemp(cUnit); + newLIR3(cUnit, opc, rlDest.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + newLIR3(cUnit, kMipsSltu, tReg, sltuSrc1, sltuSrc2); + newLIR3(cUnit, opc, rlDest.highReg, rlSrc1.highReg, rlSrc2.highReg); + newLIR3(cUnit, opc, rlDest.highReg, rlDest.highReg, tReg); + dvmCompilerFreeTemp(cUnit, tReg); +} + +static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp, + OpKind secondOp, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + RegLocation rlResult; + int carryOp = (secondOp == kOpAdc || secondOp == kOpSbc); + + if (partialOverlap(rlSrc1.sRegLow,rlSrc2.sRegLow) || + partialOverlap(rlSrc1.sRegLow,rlDest.sRegLow) || + partialOverlap(rlSrc2.sRegLow,rlDest.sRegLow)) { + // Rare case - not enough registers to properly handle + genInterpSingleStep(cUnit, mir); + } else if (rlDest.sRegLow == rlSrc1.sRegLow) { + rlResult = loadValueWide(cUnit, rlDest, kCoreReg); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); + if (!carryOp) { + opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlResult.lowReg, rlSrc2.lowReg); + opRegRegReg(cUnit, secondOp, rlResult.highReg, rlResult.highReg, rlSrc2.highReg); + } else if (secondOp == kOpAdc) { + withCarryHelper(cUnit, kMipsAddu, rlResult, rlResult, rlSrc2, + rlResult.lowReg, rlSrc2.lowReg); + } else { + int tReg = dvmCompilerAllocTemp(cUnit); + newLIR2(cUnit, kMipsMove, tReg, rlResult.lowReg); + withCarryHelper(cUnit, kMipsSubu, rlResult, rlResult, rlSrc2, + tReg, rlResult.lowReg); + dvmCompilerFreeTemp(cUnit, tReg); + } + storeValueWide(cUnit, rlDest, rlResult); + } else if (rlDest.sRegLow == rlSrc2.sRegLow) { + rlResult = loadValueWide(cUnit, rlDest, kCoreReg); + rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); + if (!carryOp) { + opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlResult.lowReg); + opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg, rlResult.highReg); + } else if (secondOp == kOpAdc) { + withCarryHelper(cUnit, kMipsAddu, rlResult, rlSrc1, rlResult, + rlResult.lowReg, rlSrc1.lowReg); + } else { + withCarryHelper(cUnit, kMipsSubu, rlResult, rlSrc1, rlResult, + rlSrc1.lowReg, rlResult.lowReg); + } + storeValueWide(cUnit, rlDest, rlResult); + } else { + rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + if (!carryOp) { + opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg); + } else if (secondOp == kOpAdc) { + withCarryHelper(cUnit, kMipsAddu, rlResult, rlSrc1, rlSrc2, + rlResult.lowReg, rlSrc1.lowReg); + } else { + withCarryHelper(cUnit, kMipsSubu, rlResult, rlSrc1, rlSrc2, + rlSrc1.lowReg, rlResult.lowReg); + } + storeValueWide(cUnit, rlDest, rlResult); + } +} + +void dvmCompilerInitializeRegAlloc(CompilationUnit *cUnit) +{ + int numTemps = sizeof(coreTemps)/sizeof(int); + RegisterPool *pool = (RegisterPool *) dvmCompilerNew(sizeof(*pool), true); + cUnit->regPool = pool; + pool->numCoreTemps = numTemps; + pool->coreTemps = + (RegisterInfo *) dvmCompilerNew(numTemps * sizeof(*pool->coreTemps), true); + dvmCompilerInitPool(pool->coreTemps, coreTemps, pool->numCoreTemps); +#ifdef __mips_hard_float + int numFPTemps = sizeof(fpTemps)/sizeof(int); + pool->numFPTemps = numFPTemps; + pool->FPTemps = + (RegisterInfo *) dvmCompilerNew(numFPTemps * sizeof(*pool->FPTemps), true); + dvmCompilerInitPool(pool->FPTemps, fpTemps, pool->numFPTemps); +#else + pool->numFPTemps = 0; + pool->FPTemps = NULL; + dvmCompilerInitPool(pool->FPTemps, NULL, 0); +#endif + pool->nullCheckedRegs = + dvmCompilerAllocBitVector(cUnit->numSSARegs, false); +} + +/* Export the Dalvik PC assicated with an instruction to the StackSave area */ +static MipsLIR *genExportPC(CompilationUnit *cUnit, MIR *mir) +{ + MipsLIR *res; + int rDPC = dvmCompilerAllocTemp(cUnit); + int rAddr = dvmCompilerAllocTemp(cUnit); + int offset = offsetof(StackSaveArea, xtra.currentPc); + res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); + newLIR3(cUnit, kMipsAddiu, rAddr, rFP, -(sizeof(StackSaveArea) - offset)); + storeWordDisp( cUnit, rAddr, 0, rDPC); + return res; +} + +static void genMonitor(CompilationUnit *cUnit, MIR *mir) +{ + genMonitorPortable(cUnit, mir); +} + +static void genCmpLong(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + RegLocation rlResult; + loadValueDirectWideFixed(cUnit, rlSrc1, r_ARG0, r_ARG1); + loadValueDirectWideFixed(cUnit, rlSrc2, r_ARG2, r_ARG3); + genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG); + rlResult = dvmCompilerGetReturn(cUnit); + storeValue(cUnit, rlDest, rlResult); +} + +static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) +{ + int offset = offsetof(Thread, interpSave.retval); + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + int reg0 = loadValue(cUnit, rlSrc, kCoreReg).lowReg; +#if __mips_isa_rev>=2 + newLIR4(cUnit, kMipsExt, reg0, reg0, 0, 31-1 /* size-1 */); +#else + newLIR2(cUnit, kMipsSll, reg0, 1); + newLIR2(cUnit, kMipsSrl, reg0, 1); +#endif + storeWordDisp(cUnit, rSELF, offset, reg0); + //TUNING: rewrite this to not clobber + dvmCompilerClobber(cUnit, reg0); + return false; +} + +static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir) +{ + int offset = offsetof(Thread, interpSave.retval); + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation regSrc = loadValueWide(cUnit, rlSrc, kCoreReg); + int reglo = regSrc.lowReg; + int reghi = regSrc.highReg; + storeWordDisp(cUnit, rSELF, offset + LOWORD_OFFSET, reglo); +#if __mips_isa_rev>=2 + newLIR4(cUnit, kMipsExt, reghi, reghi, 0, 31-1 /* size-1 */); +#else + newLIR2(cUnit, kMipsSll, reghi, 1); + newLIR2(cUnit, kMipsSrl, reghi, 1); +#endif + storeWordDisp(cUnit, rSELF, offset + HIWORD_OFFSET, reghi); + //TUNING: rewrite this to not clobber + dvmCompilerClobber(cUnit, reghi); + return false; +} + +/* No select in thumb, so we need to branch. Thumb2 will do better */ +static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) +{ + int offset = offsetof(Thread, interpSave.retval); + RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); + int reg0 = loadValue(cUnit, rlSrc1, kCoreReg).lowReg; + int reg1 = loadValue(cUnit, rlSrc2, kCoreReg).lowReg; + int tReg = dvmCompilerAllocTemp(cUnit); + if (isMin) { + newLIR3(cUnit, kMipsSlt, tReg, reg0, reg1); + } + else { + newLIR3(cUnit, kMipsSlt, tReg, reg1, reg0); + } + newLIR3(cUnit, kMipsMovz, reg0, reg1, tReg); + dvmCompilerFreeTemp(cUnit, tReg); + newLIR3(cUnit, kMipsSw, reg0, offset, rSELF); + //TUNING: rewrite this to not clobber + dvmCompilerClobber(cUnit,reg0); + return false; +} + +static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, + RegLocation rlSrc, RegLocation rlResult, int lit, + int firstBit, int secondBit) +{ + // We can't implement "add src, src, src, lsl#shift" on Thumb, so we have + // to do a regular multiply. + opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit); +} diff --git a/vm/compiler/codegen/mips/Mips32/Ralloc.cpp b/vm/compiler/codegen/mips/Mips32/Ralloc.cpp new file mode 100644 index 000000000..681013150 --- /dev/null +++ b/vm/compiler/codegen/mips/Mips32/Ralloc.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen for the Mips ISA and is intended to be + * includes by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +/* + * Alloc a pair of core registers, or a double. Low reg in low byte, + * high reg in next byte. + */ +int dvmCompilerAllocTypedTempPair(CompilationUnit *cUnit, bool fpHint, + int regClass) +{ + int highReg; + int lowReg; + int res = 0; + +#ifdef __mips_hard_float + if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg)) { + lowReg = dvmCompilerAllocTempDouble(cUnit); + highReg = lowReg + 1; + res = (lowReg & 0xff) | ((highReg & 0xff) << 8); + return res; + } +#endif + + lowReg = dvmCompilerAllocTemp(cUnit); + highReg = dvmCompilerAllocTemp(cUnit); + res = (lowReg & 0xff) | ((highReg & 0xff) << 8); + return res; +} + +int dvmCompilerAllocTypedTemp(CompilationUnit *cUnit, bool fpHint, int regClass) +{ +#ifdef __mips_hard_float + if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg)) +{ + return dvmCompilerAllocTempFloat(cUnit); +} +#endif + return dvmCompilerAllocTemp(cUnit); +} diff --git a/vm/compiler/codegen/mips/MipsLIR.h b/vm/compiler/codegen/mips/MipsLIR.h new file mode 100644 index 000000000..fc82da255 --- /dev/null +++ b/vm/compiler/codegen/mips/MipsLIR.h @@ -0,0 +1,644 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DALVIK_VM_COMPILER_CODEGEN_MIPS_MIPSLIR_H_ +#define DALVIK_VM_COMPILER_CODEGEN_MIPS_MIPSLIR_H_ + +#include "Dalvik.h" +#include "compiler/CompilerInternals.h" + +/* + * zero is always the value 0 + * at is scratch for Jit (normally used as temp reg by assembler) + * v0, v1 are scratch for Jit (normally hold subroutine return values) + * a0-a3 are scratch for Jit (normally hold subroutine arguments) + * t0-t7 are scratch for Jit + * t8 is scratch for Jit + * t9 is scratch for Jit (normally used for function calls) + * s0 (rFP) is reserved [holds Dalvik frame pointer] + * s1 (rSELF) is reserved [holds current &Thread] + * s2 (rINST) is scratch for Jit + * s3 (rIBASE) is scratch for Jit + * s4-s7 are scratch for Jit + * k0, k1 are reserved for use by interrupt handlers + * gp is reserved for global pointer + * sp is reserved + * s8 is scratch for Jit + * ra is scratch for Jit (normally holds the return addr) + * + * Preserved across C calls: s0-s8 + * Trashed across C calls: at, v0-v1, a0-a3, t0-t9, gp, ra + * + * Floating pointer registers + * NOTE: there are 32 fp registers (16 df pairs), but current Jit code + * only support 16 fp registers (8 df pairs). + * f0-f15 + * df0-df7, where df0={f0,f1}, df1={f2,f3}, ... , df7={f14,f15} + * + * f0-f15 (df0-df7) trashed across C calls + * + * For mips32 code use: + * a0-a3 to hold operands + * v0-v1 to hold results + * t0-t9 for temps + * + * All jump/branch instructions have a delay slot after it. + * + */ + +/* Offset to distingish FP regs */ +#define FP_REG_OFFSET 32 +/* Offset to distinguish DP FP regs */ +#define FP_DOUBLE 64 +/* Offset to distingish the extra regs */ +#define EXTRA_REG_OFFSET 128 +/* Reg types */ +#define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE)) +#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET) +#define EXTRAREG(x) ((x & EXTRA_REG_OFFSET) == EXTRA_REG_OFFSET) +#define LOWREG(x) ((x & 0x1f) == x) +#define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE) +#define SINGLEREG(x) (FPREG(x) && !DOUBLEREG(x)) +/* + * Note: the low register of a floating point pair is sufficient to + * create the name of a double, but require both names to be passed to + * allow for asserts to verify that the pair is consecutive if significant + * rework is done in this area. Also, it is a good reminder in the calling + * code that reg locations always describe doubles as a pair of singles. + */ +#define S2D(x,y) ((x) | FP_DOUBLE) +/* Mask to strip off fp flags */ +#define FP_REG_MASK (FP_REG_OFFSET-1) +/* non-existent Dalvik register */ +#define vNone (-1) +/* non-existant physical register */ +#define rNone (-1) + +#ifdef HAVE_LITTLE_ENDIAN +#define LOWORD_OFFSET 0 +#define HIWORD_OFFSET 4 +#define r_ARG0 r_A0 +#define r_ARG1 r_A1 +#define r_ARG2 r_A2 +#define r_ARG3 r_A3 +#define r_RESULT0 r_V0 +#define r_RESULT1 r_V1 +#else +#define LOWORD_OFFSET 4 +#define HIWORD_OFFSET 0 +#define r_ARG0 r_A1 +#define r_ARG1 r_A0 +#define r_ARG2 r_A3 +#define r_ARG3 r_A2 +#define r_RESULT0 r_V1 +#define r_RESULT1 r_V0 +#endif + +/* These are the same for both big and little endian. */ +#define r_FARG0 r_F12 +#define r_FARG1 r_F13 +#define r_FRESULT0 r_F0 +#define r_FRESULT1 r_F1 + +/* RegisterLocation templates return values (r_V0, or r_V0/r_V1) */ +#define LOC_C_RETURN {kLocPhysReg, 0, 0, r_V0, 0, -1} +#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r_RESULT0, r_RESULT1, -1} +#define LOC_C_RETURN_ALT {kLocPhysReg, 0, 1, r_F0, 0, -1} +#define LOC_C_RETURN_WIDE_ALT {kLocPhysReg, 1, 1, r_FRESULT0, r_FRESULT1, -1} +/* RegisterLocation templates for interpState->retVal; */ +#define LOC_DALVIK_RETURN_VAL {kLocRetval, 0, 0, 0, 0, -1} +#define LOC_DALVIK_RETURN_VAL_WIDE {kLocRetval, 1, 0, 0, 0, -1} + + /* + * Data structure tracking the mapping between a Dalvik register (pair) and a + * native register (pair). The idea is to reuse the previously loaded value + * if possible, otherwise to keep the value in a native register as long as + * possible. + */ +typedef struct RegisterInfo { + int reg; // Reg number + bool inUse; // Has it been allocated? + bool pair; // Part of a register pair? + int partner; // If pair, other reg of pair + bool live; // Is there an associated SSA name? + bool dirty; // If live, is it dirty? + int sReg; // Name of live value + struct LIR *defStart; // Starting inst in last def sequence + struct LIR *defEnd; // Ending inst in last def sequence +} RegisterInfo; + +typedef struct RegisterPool { + BitVector *nullCheckedRegs; // Track which registers have been null-checked + int numCoreTemps; + RegisterInfo *coreTemps; + int nextCoreTemp; + int numFPTemps; + RegisterInfo *FPTemps; + int nextFPTemp; +} RegisterPool; + +typedef enum ResourceEncodingPos { + kGPReg0 = 0, + kRegSP = 29, + kRegLR = 31, + kFPReg0 = 32, /* only 16 fp regs supported currently */ + kFPRegEnd = 48, + kRegHI = kFPRegEnd, + kRegLO, + kRegPC, + kRegEnd = 51, + kCCode = kRegEnd, + kFPStatus, // FP status word + // The following four bits are for memory disambiguation + kDalvikReg, // 1 Dalvik Frame (can be fully disambiguated) + kLiteral, // 2 Literal pool (can be fully disambiguated) + kHeapRef, // 3 Somewhere on the heap (alias with any other heap) + kMustNotAlias, // 4 Guaranteed to be non-alias (eg *(r6+x)) +} ResourceEncodingPos; + +#define ENCODE_REG_LIST(N) ((u8) N) +#define ENCODE_REG_SP (1ULL << kRegSP) +#define ENCODE_REG_LR (1ULL << kRegLR) +#define ENCODE_REG_PC (1ULL << kRegPC) +#define ENCODE_CCODE (1ULL << kCCode) +#define ENCODE_FP_STATUS (1ULL << kFPStatus) + +/* Abstract memory locations */ +#define ENCODE_DALVIK_REG (1ULL << kDalvikReg) +#define ENCODE_LITERAL (1ULL << kLiteral) +#define ENCODE_HEAP_REF (1ULL << kHeapRef) +#define ENCODE_MUST_NOT_ALIAS (1ULL << kMustNotAlias) + +#define ENCODE_ALL (~0ULL) +#define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ + ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) + +#define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) +#define DECODE_ALIAS_INFO_WIDE(X) ((X & 0x80000000) ? 1 : 0) + +typedef enum OpSize { + kWord, + kLong, + kSingle, + kDouble, + kUnsignedHalf, + kSignedHalf, + kUnsignedByte, + kSignedByte, +} OpSize; + +typedef enum OpKind { + kOpMov, + kOpMvn, + kOpCmp, + kOpLsl, + kOpLsr, + kOpAsr, + kOpRor, + kOpNot, + kOpAnd, + kOpOr, + kOpXor, + kOpNeg, + kOpAdd, + kOpAdc, + kOpSub, + kOpSbc, + kOpRsub, + kOpMul, + kOpDiv, + kOpRem, + kOpBic, + kOpCmn, + kOpTst, + kOpBkpt, + kOpBlx, + kOpPush, + kOpPop, + kOp2Char, + kOp2Short, + kOp2Byte, + kOpCondBr, + kOpUncondBr, +} OpKind; + +/* + * Annotate special-purpose core registers: + * + * rPC, rFP, and rSELF are for architecture-independent code to use. + */ +typedef enum NativeRegisterPool { + r_ZERO = 0, + r_AT = 1, + r_V0 = 2, + r_V1 = 3, + r_A0 = 4, + r_A1 = 5, + r_A2 = 6, + r_A3 = 7, + r_T0 = 8, + r_T1 = 9, + r_T2 = 10, + r_T3 = 11, + r_T4 = 12, + r_T5 = 13, + r_T6 = 14, + r_T7 = 15, + r_S0 = 16, + r_S1 = 17, + r_S2 = 18, + r_S3 = 19, + r_S4 = 20, + r_S5 = 21, + r_S6 = 22, + r_S7 = 23, + r_T8 = 24, + r_T9 = 25, + r_K0 = 26, + r_K1 = 27, + r_GP = 28, + r_SP = 29, + r_FP = 30, + r_RA = 31, + + r_F0 = 0 + FP_REG_OFFSET, + r_F1, + r_F2, + r_F3, + r_F4, + r_F5, + r_F6, + r_F7, + r_F8, + r_F9, + r_F10, + r_F11, + r_F12, + r_F13, + r_F14, + r_F15, +#if 0 /* only 16 fp regs supported currently */ + r_F16, + r_F17, + r_F18, + r_F19, + r_F20, + r_F21, + r_F22, + r_F23, + r_F24, + r_F25, + r_F26, + r_F27, + r_F28, + r_F29, + r_F30, + r_F31, +#endif + r_DF0 = r_F0 + FP_DOUBLE, + r_DF1 = r_F2 + FP_DOUBLE, + r_DF2 = r_F4 + FP_DOUBLE, + r_DF3 = r_F6 + FP_DOUBLE, + r_DF4 = r_F8 + FP_DOUBLE, + r_DF5 = r_F10 + FP_DOUBLE, + r_DF6 = r_F12 + FP_DOUBLE, + r_DF7 = r_F14 + FP_DOUBLE, +#if 0 /* only 16 fp regs supported currently */ + r_DF8 = r_F16 + FP_DOUBLE, + r_DF9 = r_F18 + FP_DOUBLE, + r_DF10 = r_F20 + FP_DOUBLE, + r_DF11 = r_F22 + FP_DOUBLE, + r_DF12 = r_F24 + FP_DOUBLE, + r_DF13 = r_F26 + FP_DOUBLE, + r_DF14 = r_F28 + FP_DOUBLE, + r_DF15 = r_F30 + FP_DOUBLE, +#endif + r_HI = EXTRA_REG_OFFSET, + r_LO, + r_PC, +} NativeRegisterPool; + + +/* must match gp offset used mterp/mips files */ +#define STACK_OFFSET_GP 84 + +/* MIPSTODO: properly remap arm regs (dPC, dFP, dGLUE) and remove these mappings */ +#define r4PC r_S0 +#define rFP r_S1 +#define rSELF r_S2 +#define rINST r_S4 + +/* Shift encodings */ +typedef enum MipsShiftEncodings { + kMipsLsl = 0x0, + kMipsLsr = 0x1, + kMipsAsr = 0x2, + kMipsRor = 0x3 +} MipsShiftEncodings; + +/* condition encodings */ +typedef enum MipsConditionCode { + kMipsCondEq = 0x0, /* 0000 */ + kMipsCondNe = 0x1, /* 0001 */ + kMipsCondCs = 0x2, /* 0010 */ + kMipsCondCc = 0x3, /* 0011 */ + kMipsCondMi = 0x4, /* 0100 */ + kMipsCondPl = 0x5, /* 0101 */ + kMipsCondVs = 0x6, /* 0110 */ + kMipsCondVc = 0x7, /* 0111 */ + kMipsCondHi = 0x8, /* 1000 */ + kMipsCondLs = 0x9, /* 1001 */ + kMipsCondGe = 0xa, /* 1010 */ + kMipsCondLt = 0xb, /* 1011 */ + kMipsCondGt = 0xc, /* 1100 */ + kMipsCondLe = 0xd, /* 1101 */ + kMipsCondAl = 0xe, /* 1110 */ + kMipsCondNv = 0xf, /* 1111 */ +} MipsConditionCode; + +#define isPseudoOpCode(opCode) ((int)(opCode) < 0) + +/* + * The following enum defines the list of supported Thumb instructions by the + * assembler. Their corresponding snippet positions will be defined in + * Assemble.c. + */ +typedef enum MipsOpCode { + kMipsChainingCellBottom = -18, + kMipsPseudoBarrier = -17, + kMipsPseudoExtended = -16, + kMipsPseudoSSARep = -15, + kMipsPseudoEntryBlock = -14, + kMipsPseudoExitBlock = -13, + kMipsPseudoTargetLabel = -12, + kMipsPseudoChainingCellBackwardBranch = -11, + kMipsPseudoChainingCellHot = -10, + kMipsPseudoChainingCellInvokePredicted = -9, + kMipsPseudoChainingCellInvokeSingleton = -8, + kMipsPseudoChainingCellNormal = -7, + kMipsPseudoDalvikByteCodeBoundary = -6, + kMipsPseudoPseudoAlign4 = -5, + kMipsPseudoPCReconstructionCell = -4, + kMipsPseudoPCReconstructionBlockLabel = -3, + kMipsPseudoEHBlockLabel = -2, + kMipsPseudoNormalBlockLabel = -1, + + kMipsFirst, + kMips32BitData = kMipsFirst, /* data [31..0] */ + kMipsAddiu, /* addiu t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0] */ + kMipsAddu, /* add d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100001] */ + kMipsAnd, /* and d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100100] */ + kMipsAndi, /* andi t,s,imm16 [001100] s[25..21] t[20..16] imm16[15..0] */ + kMipsB, /* b o [0001000000000000] o[15..0] */ + kMipsBal, /* bal o [0000010000010001] o[15..0] */ + /* NOTE: the code tests the range kMipsBeq thru kMipsBne, so + adding an instruction in this range may require updates */ + kMipsBeq, /* beq s,t,o [000100] s[25..21] t[20..16] o[15..0] */ + kMipsBeqz, /* beqz s,o [000100] s[25..21] [00000] o[15..0] */ + kMipsBgez, /* bgez s,o [000001] s[25..21] [00001] o[15..0] */ + kMipsBgtz, /* bgtz s,o [000111] s[25..21] [00000] o[15..0] */ + kMipsBlez, /* blez s,o [000110] s[25..21] [00000] o[15..0] */ + kMipsBltz, /* bltz s,o [000001] s[25..21] [00000] o[15..0] */ + kMipsBnez, /* bnez s,o [000101] s[25..21] [00000] o[15..0] */ + kMipsBne, /* bne s,t,o [000101] s[25..21] t[20..16] o[15..0] */ + kMipsDiv, /* div s,t [000000] s[25..21] t[20..16] [0000000000011010] */ +#if __mips_isa_rev>=2 + kMipsExt, /* ext t,s,p,z [011111] s[25..21] t[20..16] z[15..11] p[10..6] [000000] */ +#endif + kMipsJal, /* jal t [000011] t[25..0] */ + kMipsJalr, /* jalr d,s [000000] s[25..21] [00000] d[15..11] + hint[10..6] [001001] */ + kMipsJr, /* jr s [000000] s[25..21] [0000000000] hint[10..6] [001000] */ + kMipsLahi, /* lui t,imm16 [00111100000] t[20..16] imm16[15..0] load addr hi */ + kMipsLalo, /* ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0] load addr lo */ + kMipsLui, /* lui t,imm16 [00111100000] t[20..16] imm16[15..0] */ + kMipsLb, /* lb t,o(b) [100000] b[25..21] t[20..16] o[15..0] */ + kMipsLbu, /* lbu t,o(b) [100100] b[25..21] t[20..16] o[15..0] */ + kMipsLh, /* lh t,o(b) [100001] b[25..21] t[20..16] o[15..0] */ + kMipsLhu, /* lhu t,o(b) [100101] b[25..21] t[20..16] o[15..0] */ + kMipsLw, /* lw t,o(b) [100011] b[25..21] t[20..16] o[15..0] */ + kMipsMfhi, /* mfhi d [0000000000000000] d[15..11] [00000010000] */ + kMipsMflo, /* mflo d [0000000000000000] d[15..11] [00000010010] */ + kMipsMove, /* move d,s [000000] s[25..21] [00000] d[15..11] [00000100101] */ + kMipsMovz, /* movz d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000001010] */ + kMipsMul, /* mul d,s,t [011100] s[25..21] t[20..16] d[15..11] [00000000010] */ + kMipsNop, /* nop [00000000000000000000000000000000] */ + kMipsNor, /* nor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100111] */ + kMipsOr, /* or d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100101] */ + kMipsOri, /* ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0] */ + kMipsPref, /* pref h,o(b) [101011] b[25..21] h[20..16] o[15..0] */ + kMipsSb, /* sb t,o(b) [101000] b[25..21] t[20..16] o[15..0] */ +#if __mips_isa_rev>=2 + kMipsSeb, /* seb d,t [01111100000] t[20..16] d[15..11] [10000100000] */ + kMipsSeh, /* seh d,t [01111100000] t[20..16] d[15..11] [11000100000] */ +#endif + kMipsSh, /* sh t,o(b) [101001] b[25..21] t[20..16] o[15..0] */ + kMipsSll, /* sll d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [000000] */ + kMipsSllv, /* sllv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000100] */ + kMipsSlt, /* slt d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101010] */ + kMipsSlti, /* slti t,s,imm16 [001010] s[25..21] t[20..16] imm16[15..0] */ + kMipsSltu, /* sltu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101011] */ + kMipsSra, /* sra d,s,imm5 [00000000000] t[20..16] d[15..11] imm5[10..6] [000011] */ + kMipsSrav, /* srav d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000111] */ + kMipsSrl, /* srl d,t,a [00000000000] t[20..16] d[20..16] a[10..6] [000010] */ + kMipsSrlv, /* srlv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000110] */ + kMipsSubu, /* subu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100011] */ + kMipsSw, /* sw t,o(b) [101011] b[25..21] t[20..16] o[15..0] */ + kMipsXor, /* xor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100110] */ + kMipsXori, /* xori t,s,imm16 [001110] s[25..21] t[20..16] imm16[15..0] */ +#ifdef __mips_hard_float + kMipsFadds, /* add.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000000] */ + kMipsFsubs, /* sub.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000001] */ + kMipsFmuls, /* mul.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000010] */ + kMipsFdivs, /* div.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000011] */ + kMipsFaddd, /* add.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000000] */ + kMipsFsubd, /* sub.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000001] */ + kMipsFmuld, /* mul.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000010] */ + kMipsFdivd, /* div.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000011] */ + kMipsFcvtsd, /* cvt.s.d d,s [01000110001] [00000] s[15..11] d[10..6] [100000] */ + kMipsFcvtsw, /* cvt.s.w d,s [01000110100] [00000] s[15..11] d[10..6] [100000] */ + kMipsFcvtds, /* cvt.d.s d,s [01000110000] [00000] s[15..11] d[10..6] [100001] */ + kMipsFcvtdw, /* cvt.d.w d,s [01000110100] [00000] s[15..11] d[10..6] [100001] */ + kMipsFcvtws, /* cvt.w.d d,s [01000110000] [00000] s[15..11] d[10..6] [100100] */ + kMipsFcvtwd, /* cvt.w.d d,s [01000110001] [00000] s[15..11] d[10..6] [100100] */ + kMipsFmovs, /* mov.s d,s [01000110000] [00000] s[15..11] d[10..6] [000110] */ + kMipsFmovd, /* mov.d d,s [01000110001] [00000] s[15..11] d[10..6] [000110] */ + kMipsFlwc1, /* lwc1 t,o(b) [110001] b[25..21] t[20..16] o[15..0] */ + kMipsFldc1, /* ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0] */ + kMipsFswc1, /* swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0] */ + kMipsFsdc1, /* sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0] */ + kMipsMfc1, /* mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000] */ + kMipsMtc1, /* mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000] */ +#endif + kMipsUndefined, /* undefined [011001xxxxxxxxxxxxxxxx] */ + kMipsLast +} MipsOpCode; + +/* Bit flags describing the behavior of each native opcode */ +typedef enum MipsOpFeatureFlags { + kIsBranch = 0, + kRegDef0, + kRegDef1, + kRegDefSP, + kRegDefLR, + kRegDefList0, + kRegDefList1, + kRegUse0, + kRegUse1, + kRegUse2, + kRegUse3, + kRegUseSP, + kRegUsePC, + kRegUseList0, + kRegUseList1, + kNoOperand, + kIsUnaryOp, + kIsBinaryOp, + kIsTertiaryOp, + kIsQuadOp, + kIsIT, + kSetsCCodes, + kUsesCCodes, + kMemLoad, + kMemStore, +} MipsOpFeatureFlags; + +#define IS_LOAD (1 << kMemLoad) +#define IS_STORE (1 << kMemStore) +#define IS_BRANCH (1 << kIsBranch) +#define REG_DEF0 (1 << kRegDef0) +#define REG_DEF1 (1 << kRegDef1) +#define REG_DEF_SP (1 << kRegDefSP) +#define REG_DEF_LR (1 << kRegDefLR) +#define REG_DEF_LIST0 (1 << kRegDefList0) +#define REG_DEF_LIST1 (1 << kRegDefList1) +#define REG_USE0 (1 << kRegUse0) +#define REG_USE1 (1 << kRegUse1) +#define REG_USE2 (1 << kRegUse2) +#define REG_USE3 (1 << kRegUse3) +#define REG_USE_SP (1 << kRegUseSP) +#define REG_USE_PC (1 << kRegUsePC) +#define REG_USE_LIST0 (1 << kRegUseList0) +#define REG_USE_LIST1 (1 << kRegUseList1) +#define NO_OPERAND (1 << kNoOperand) +#define IS_UNARY_OP (1 << kIsUnaryOp) +#define IS_BINARY_OP (1 << kIsBinaryOp) +#define IS_TERTIARY_OP (1 << kIsTertiaryOp) +#define IS_QUAD_OP (1 << kIsQuadOp) +#define IS_IT (1 << kIsIT) +#define SETS_CCODES (1 << kSetsCCodes) +#define USES_CCODES (1 << kUsesCCodes) + +/* Common combo register usage patterns */ +#define REG_USE01 (REG_USE0 | REG_USE1) +#define REG_USE02 (REG_USE0 | REG_USE2) +#define REG_USE012 (REG_USE01 | REG_USE2) +#define REG_USE12 (REG_USE1 | REG_USE2) +#define REG_USE23 (REG_USE2 | REG_USE3) +#define REG_DEF01 (REG_DEF0 | REG_DEF1) +#define REG_DEF0_USE0 (REG_DEF0 | REG_USE0) +#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1) +#define REG_DEF0_USE2 (REG_DEF0 | REG_USE2) +#define REG_DEF0_USE01 (REG_DEF0 | REG_USE01) +#define REG_DEF0_USE12 (REG_DEF0 | REG_USE12) +#define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2) + +/* Instruction assembly fieldLoc kind */ +typedef enum MipsEncodingKind { + kFmtUnused, + kFmtBitBlt, /* Bit string using end/start */ + kFmtDfp, /* Double FP reg */ + kFmtSfp, /* Single FP reg */ +} MipsEncodingKind; + +/* Struct used to define the snippet positions for each Thumb opcode */ +typedef struct MipsEncodingMap { + u4 skeleton; + struct { + MipsEncodingKind kind; + int end; /* end for kFmtBitBlt, 1-bit slice end for FP regs */ + int start; /* start for kFmtBitBlt, 4-bit slice end for FP regs */ + } fieldLoc[4]; + MipsOpCode opcode; + int flags; + const char *name; + const char* fmt; + int size; +} MipsEncodingMap; + +/* Keys for target-specific scheduling and other optimization hints */ +typedef enum MipsTargetOptHints { + kMaxHoistDistance, +} MipsTargetOptHints; + +extern MipsEncodingMap EncodingMap[kMipsLast]; + +/* + * Each instance of this struct holds a pseudo or real LIR instruction: + * - pseudo ones (eg labels and marks) and will be discarded by the assembler. + * - real ones will be assembled into Thumb instructions. + * + * Machine resources are encoded into a 64-bit vector, where the encodings are + * as following: + * - [ 0..15]: general purpose registers including PC, SP, and LR + * - [16..47]: floating-point registers where d0 is expanded to s[01] and s0 + * starts at bit 16 + * - [48]: IT block + * - [49]: integer condition code + * - [50]: floatint-point status word + */ +typedef struct MipsLIR { + LIR generic; + MipsOpCode opcode; + int operands[4]; // [0..3] = [dest, src1, src2, extra] + struct { + bool isNop:1; // LIR is optimized away + bool insertWrapper:1; // insert branch to emulate memory accesses + unsigned int age:4; // default is 0, set lazily by the optimizer + unsigned int size:3; // bytes (2 for thumb, 2/4 for thumb2) + unsigned int unused:23; + } flags; + int aliasInfo; // For Dalvik register access & litpool disambiguation + u8 useMask; // Resource mask for use + u8 defMask; // Resource mask for def +} MipsLIR; + +/* Init values when a predicted chain is initially assembled */ +/* E7FE is branch to self */ +#define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe +#define PREDICTED_CHAIN_DELAY_SLOT_INIT 0 +#define PREDICTED_CHAIN_CLAZZ_INIT 0 +#define PREDICTED_CHAIN_METHOD_INIT 0 +#define PREDICTED_CHAIN_COUNTER_INIT 0 + +/* Utility macros to traverse the LIR/MipsLIR list */ +#define NEXT_LIR(lir) ((MipsLIR *) lir->generic.next) +#define PREV_LIR(lir) ((MipsLIR *) lir->generic.prev) + +#define NEXT_LIR_LVALUE(lir) (lir)->generic.next +#define PREV_LIR_LVALUE(lir) (lir)->generic.prev + +#define CHAIN_CELL_OFFSET_TAG 0xcdabcdabL + +#define IS_UIMM16(v) ((0 <= (v)) && ((v) <= 65535)) +#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32766)) +#define IS_SIMM16_2WORD(v) ((-32764 <= (v)) && ((v) <= 32763)) /* 2 offsets must fit */ + +#define CHAIN_CELL_NORMAL_SIZE 16 +#define CHAIN_CELL_PREDICTED_SIZE 20 + + +#endif // DALVIK_VM_COMPILER_CODEGEN_MIPS_MIPSLIR_H_ diff --git a/vm/compiler/codegen/mips/Ralloc.h b/vm/compiler/codegen/mips/Ralloc.h new file mode 100644 index 000000000..33ad2fb95 --- /dev/null +++ b/vm/compiler/codegen/mips/Ralloc.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains register alloction support and is intended to be + * included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +#include "compiler/CompilerUtility.h" +#include "compiler/CompilerIR.h" +#include "compiler/Dataflow.h" +#include "compiler/codegen/mips/MipsLIR.h" + +/* + * Return most flexible allowed register class based on size. + * Bug: 2813841 + * Must use a core register for data types narrower than word (due + * to possible unaligned load/store. + */ +static inline RegisterClass dvmCompilerRegClassBySize(OpSize size) +{ + return (size == kUnsignedHalf || + size == kSignedHalf || + size == kUnsignedByte || + size == kSignedByte ) ? kCoreReg : kAnyReg; +} + +static inline int dvmCompilerS2VReg(CompilationUnit *cUnit, int sReg) +{ + assert(sReg != INVALID_SREG); + return DECODE_REG(dvmConvertSSARegToDalvik(cUnit, sReg)); +} + +/* Reset the tracker to unknown state */ +static inline void dvmCompilerResetNullCheck(CompilationUnit *cUnit) +{ + dvmClearAllBits(cUnit->regPool->nullCheckedRegs); +} + +/* + * Get the "real" sreg number associated with an sReg slot. In general, + * sReg values passed through codegen are the SSA names created by + * dataflow analysis and refer to slot numbers in the cUnit->regLocation + * array. However, renaming is accomplished by simply replacing RegLocation + * entries in the cUnit->reglocation[] array. Therefore, when location + * records for operands are first created, we need to ask the locRecord + * identified by the dataflow pass what it's new name is. + */ + +static inline int dvmCompilerSRegHi(int lowSreg) { + return (lowSreg == INVALID_SREG) ? INVALID_SREG : lowSreg + 1; +} + + +static inline bool dvmCompilerLiveOut(CompilationUnit *cUnit, int sReg) +{ + //TODO: fully implement + return true; +} + +static inline int dvmCompilerSSASrc(MIR *mir, int num) +{ + assert(mir->ssaRep->numUses > num); + return mir->ssaRep->uses[num]; +} + +extern RegLocation dvmCompilerEvalLoc(CompilationUnit *cUnit, RegLocation loc, + int regClass, bool update); +/* Mark a temp register as dead. Does not affect allocation state. */ +extern void dvmCompilerClobber(CompilationUnit *cUnit, int reg); + +extern RegLocation dvmCompilerUpdateLoc(CompilationUnit *cUnit, + RegLocation loc); + +/* see comments for updateLoc */ +extern RegLocation dvmCompilerUpdateLocWide(CompilationUnit *cUnit, + RegLocation loc); + +/* Clobber all of the temps that might be used by a handler. */ +extern void dvmCompilerClobberHandlerRegs(CompilationUnit *cUnit); + +extern void dvmCompilerMarkLive(CompilationUnit *cUnit, int reg, int sReg); + +extern void dvmCompilerMarkDirty(CompilationUnit *cUnit, int reg); + +extern void dvmCompilerMarkPair(CompilationUnit *cUnit, int lowReg, + int highReg); + +extern void dvmCompilerMarkClean(CompilationUnit *cUnit, int reg); + +extern void dvmCompilerResetDef(CompilationUnit *cUnit, int reg); + +extern void dvmCompilerResetDefLoc(CompilationUnit *cUnit, RegLocation rl); + +/* Set up temp & preserved register pools specialized by target */ +extern void dvmCompilerInitPool(RegisterInfo *regs, int *regNums, int num); + +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +extern void dvmCompilerMarkDef(CompilationUnit *cUnit, RegLocation rl, + LIR *start, LIR *finish); +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +extern void dvmCompilerMarkDefWide(CompilationUnit *cUnit, RegLocation rl, + LIR *start, LIR *finish); + +extern RegLocation dvmCompilerGetSrcWide(CompilationUnit *cUnit, MIR *mir, + int low, int high); + +extern RegLocation dvmCompilerGetDestWide(CompilationUnit *cUnit, MIR *mir, + int low, int high); +// Get the LocRecord associated with an SSA name use. +extern RegLocation dvmCompilerGetSrc(CompilationUnit *cUnit, MIR *mir, int num); + +// Get the LocRecord associated with an SSA name def. +extern RegLocation dvmCompilerGetDest(CompilationUnit *cUnit, MIR *mir, + int num); + +extern RegLocation dvmCompilerGetReturnWide(CompilationUnit *cUnit); + +/* Clobber all regs that might be used by an external C call */ +extern void dvmCompilerClobberCallRegs(CompilationUnit *cUnit); + +extern RegisterInfo *dvmCompilerIsTemp(CompilationUnit *cUnit, int reg); + +extern void dvmCompilerMarkInUse(CompilationUnit *cUnit, int reg); + +extern int dvmCompilerAllocTemp(CompilationUnit *cUnit); + +extern int dvmCompilerAllocTempFloat(CompilationUnit *cUnit); + +//REDO: too many assumptions. +extern int dvmCompilerAllocTempDouble(CompilationUnit *cUnit); + +extern void dvmCompilerFreeTemp(CompilationUnit *cUnit, int reg); + +extern void dvmCompilerResetDefLocWide(CompilationUnit *cUnit, RegLocation rl); + +extern void dvmCompilerResetDefTracking(CompilationUnit *cUnit); + +/* Kill the corresponding bit in the null-checked register list */ +extern void dvmCompilerKillNullCheckedLoc(CompilationUnit *cUnit, + RegLocation loc); + +//FIXME - this needs to also check the preserved pool. +extern RegisterInfo *dvmCompilerIsLive(CompilationUnit *cUnit, int reg); + +/* To be used when explicitly managing register use */ +extern void dvmCompilerLockAllTemps(CompilationUnit *cUnit); + +extern void dvmCompilerFlushAllRegs(CompilationUnit *cUnit); + +extern RegLocation dvmCompilerGetReturnWideAlt(CompilationUnit *cUnit); + +extern RegLocation dvmCompilerGetReturn(CompilationUnit *cUnit); + +extern RegLocation dvmCompilerGetReturnAlt(CompilationUnit *cUnit); + +/* Clobber any temp associated with an sReg. Could be in either class */ +extern void dvmCompilerClobberSReg(CompilationUnit *cUnit, int sReg); + +/* Return a temp if one is available, -1 otherwise */ +extern int dvmCompilerAllocFreeTemp(CompilationUnit *cUnit); + +/* + * Similar to dvmCompilerAllocTemp(), but forces the allocation of a specific + * register. No check is made to see if the register was previously + * allocated. Use with caution. + */ +extern void dvmCompilerLockTemp(CompilationUnit *cUnit, int reg); + +extern RegLocation dvmCompilerWideToNarrow(CompilationUnit *cUnit, + RegLocation rl); + +/* + * Free all allocated temps in the temp pools. Note that this does + * not affect the "liveness" of a temp register, which will stay + * live until it is either explicitly killed or reallocated. + */ +extern void dvmCompilerResetRegPool(CompilationUnit *cUnit); + +extern void dvmCompilerClobberAllRegs(CompilationUnit *cUnit); + +extern void dvmCompilerResetDefTracking(CompilationUnit *cUnit); diff --git a/vm/compiler/codegen/mips/RallocUtil.cpp b/vm/compiler/codegen/mips/RallocUtil.cpp new file mode 100644 index 000000000..190437335 --- /dev/null +++ b/vm/compiler/codegen/mips/RallocUtil.cpp @@ -0,0 +1,1025 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains register alloction support and is intended to be + * included by: + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +#include "compiler/CompilerUtility.h" +#include "compiler/CompilerIR.h" +#include "compiler/Dataflow.h" +#include "MipsLIR.h" +#include "Codegen.h" +#include "Ralloc.h" + +#define SREG(c, s) ((c)->regLocation[(s)].sRegLow) +/* + * Get the "real" sreg number associated with an sReg slot. In general, + * sReg values passed through codegen are the SSA names created by + * dataflow analysis and refer to slot numbers in the cUnit->regLocation + * array. However, renaming is accomplished by simply replacing RegLocation + * entries in the cUnit->reglocation[] array. Therefore, when location + * records for operands are first created, we need to ask the locRecord + * identified by the dataflow pass what it's new name is. + */ + +/* + * Free all allocated temps in the temp pools. Note that this does + * not affect the "liveness" of a temp register, which will stay + * live until it is either explicitly killed or reallocated. + */ +extern void dvmCompilerResetRegPool(CompilationUnit *cUnit) +{ + int i; + for (i=0; i < cUnit->regPool->numCoreTemps; i++) { + cUnit->regPool->coreTemps[i].inUse = false; + } + for (i=0; i < cUnit->regPool->numFPTemps; i++) { + cUnit->regPool->FPTemps[i].inUse = false; + } +} + + /* Set up temp & preserved register pools specialized by target */ +extern void dvmCompilerInitPool(RegisterInfo *regs, int *regNums, int num) +{ + int i; + for (i=0; i < num; i++) { + regs[i].reg = regNums[i]; + regs[i].inUse = false; + regs[i].pair = false; + regs[i].live = false; + regs[i].dirty = false; + regs[i].sReg = INVALID_SREG; + } +} + +static void dumpRegPool(RegisterInfo *p, int numRegs) +{ + int i; + LOGE("================================================"); + for (i=0; i < numRegs; i++ ){ + LOGE("R[%d]: U:%d, P:%d, part:%d, LV:%d, D:%d, SR:%d, ST:%x, EN:%x", + p[i].reg, p[i].inUse, p[i].pair, p[i].partner, p[i].live, + p[i].dirty, p[i].sReg,(int)p[i].defStart, (int)p[i].defEnd); + } + LOGE("================================================"); +} + +static RegisterInfo *getRegInfo(CompilationUnit *cUnit, int reg) +{ + int numTemps = cUnit->regPool->numCoreTemps; + RegisterInfo *p = cUnit->regPool->coreTemps; + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return &p[i]; + } + } + p = cUnit->regPool->FPTemps; + numTemps = cUnit->regPool->numFPTemps; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return &p[i]; + } + } + LOGE("Tried to get info on a non-existant temp: r%d",reg); + dvmCompilerAbort(cUnit); + return NULL; +} + +static void flushRegWide(CompilationUnit *cUnit, int reg1, int reg2) +{ + RegisterInfo *info1 = getRegInfo(cUnit, reg1); + RegisterInfo *info2 = getRegInfo(cUnit, reg2); + assert(info1 && info2 && info1->pair && info2->pair && + (info1->partner == info2->reg) && + (info2->partner == info1->reg)); + if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) { + info1->dirty = false; + info2->dirty = false; + if (dvmCompilerS2VReg(cUnit, info2->sReg) < + dvmCompilerS2VReg(cUnit, info1->sReg)) + info1 = info2; + dvmCompilerFlushRegWideImpl(cUnit, rFP, + dvmCompilerS2VReg(cUnit, info1->sReg) << 2, + info1->reg, info1->partner); + } +} + +static void flushReg(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *info = getRegInfo(cUnit, reg); + if (info->live && info->dirty) { + info->dirty = false; + dvmCompilerFlushRegImpl(cUnit, rFP, + dvmCompilerS2VReg(cUnit, info->sReg) << 2, + reg, kWord); + } +} + +/* return true if found reg to clobber */ +static bool clobberRegBody(CompilationUnit *cUnit, RegisterInfo *p, + int numTemps, int reg) +{ + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + if (p[i].live && p[i].dirty) { + if (p[i].pair) { + flushRegWide(cUnit, p[i].reg, p[i].partner); + } else { + flushReg(cUnit, p[i].reg); + } + } + p[i].live = false; + p[i].sReg = INVALID_SREG; + p[i].defStart = NULL; + p[i].defEnd = NULL; + if (p[i].pair) { + p[i].pair = false; + /* partners should be in same pool */ + clobberRegBody(cUnit, p, numTemps, p[i].partner); + } + return true; + } + } + return false; +} + +/* Mark a temp register as dead. Does not affect allocation state. */ +void dvmCompilerClobber(CompilationUnit *cUnit, int reg) +{ + if (!clobberRegBody(cUnit, cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps, reg)) { + clobberRegBody(cUnit, cUnit->regPool->FPTemps, + cUnit->regPool->numFPTemps, reg); + } +} + +static void clobberSRegBody(RegisterInfo *p, int numTemps, int sReg) +{ + int i; + for (i=0; i< numTemps; i++) { + if (p[i].sReg == sReg) { + p[i].live = false; + p[i].defStart = NULL; + p[i].defEnd = NULL; + } + } +} + +/* Clobber any temp associated with an sReg. Could be in either class */ +extern void dvmCompilerClobberSReg(CompilationUnit *cUnit, int sReg) +{ + clobberSRegBody(cUnit->regPool->coreTemps, cUnit->regPool->numCoreTemps, + sReg); + clobberSRegBody(cUnit->regPool->FPTemps, cUnit->regPool->numFPTemps, + sReg); +} + +static int allocTempBody(CompilationUnit *cUnit, RegisterInfo *p, int numTemps, + int *nextTemp, bool required) +{ + int i; + int next = *nextTemp; + for (i=0; i< numTemps; i++) { + if (next >= numTemps) + next = 0; + if (!p[next].inUse && !p[next].live) { + dvmCompilerClobber(cUnit, p[next].reg); + p[next].inUse = true; + p[next].pair = false; + *nextTemp = next + 1; + return p[next].reg; + } + next++; + } + next = *nextTemp; + for (i=0; i< numTemps; i++) { + if (next >= numTemps) + next = 0; + if (!p[next].inUse) { + dvmCompilerClobber(cUnit, p[next].reg); + p[next].inUse = true; + p[next].pair = false; + *nextTemp = next + 1; + return p[next].reg; + } + next++; + } + if (required) { + LOGE("No free temp registers"); + dvmCompilerAbort(cUnit); + } + return -1; // No register available +} + +//REDO: too many assumptions. +extern int dvmCompilerAllocTempDouble(CompilationUnit *cUnit) +{ + RegisterInfo *p = cUnit->regPool->FPTemps; + int numTemps = cUnit->regPool->numFPTemps; + /* Cleanup - not all targets need aligned regs */ + int start = cUnit->regPool->nextFPTemp + (cUnit->regPool->nextFPTemp & 1); + int next = start; + int i; + + for (i=0; i < numTemps; i+=2) { + if (next >= numTemps) + next = 0; + if ((!p[next].inUse && !p[next].live) && + (!p[next+1].inUse && !p[next+1].live)) { + dvmCompilerClobber(cUnit, p[next].reg); + dvmCompilerClobber(cUnit, p[next+1].reg); + p[next].inUse = true; + p[next+1].inUse = true; + assert((p[next].reg+1) == p[next+1].reg); + assert((p[next].reg & 0x1) == 0); + cUnit->regPool->nextFPTemp += 2; + return p[next].reg; + } + next += 2; + } + next = start; + for (i=0; i < numTemps; i+=2) { + if (next >= numTemps) + next = 0; + if (!p[next].inUse && !p[next+1].inUse) { + dvmCompilerClobber(cUnit, p[next].reg); + dvmCompilerClobber(cUnit, p[next+1].reg); + p[next].inUse = true; + p[next+1].inUse = true; + assert((p[next].reg+1) == p[next+1].reg); + assert((p[next].reg & 0x1) == 0); + cUnit->regPool->nextFPTemp += 2; + return p[next].reg; + } + next += 2; + } + LOGE("No free temp registers"); + dvmCompilerAbort(cUnit); + return -1; +} + +/* Return a temp if one is available, -1 otherwise */ +extern int dvmCompilerAllocFreeTemp(CompilationUnit *cUnit) +{ + return allocTempBody(cUnit, cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps, + &cUnit->regPool->nextCoreTemp, true); +} + +extern int dvmCompilerAllocTemp(CompilationUnit *cUnit) +{ + return allocTempBody(cUnit, cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps, + &cUnit->regPool->nextCoreTemp, true); +} + +extern int dvmCompilerAllocTempFloat(CompilationUnit *cUnit) +{ + return allocTempBody(cUnit, cUnit->regPool->FPTemps, + cUnit->regPool->numFPTemps, + &cUnit->regPool->nextFPTemp, true); +} + +static RegisterInfo *allocLiveBody(RegisterInfo *p, int numTemps, int sReg) +{ + int i; + if (sReg == -1) + return NULL; + for (i=0; i < numTemps; i++) { + if (p[i].live && (p[i].sReg == sReg)) { + p[i].inUse = true; + return &p[i]; + } + } + return NULL; +} + +static RegisterInfo *allocLive(CompilationUnit *cUnit, int sReg, + int regClass) +{ + RegisterInfo *res = NULL; + switch(regClass) { + case kAnyReg: + res = allocLiveBody(cUnit->regPool->FPTemps, + cUnit->regPool->numFPTemps, sReg); + if (res) + break; + /* Intentional fallthrough */ + case kCoreReg: + res = allocLiveBody(cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps, sReg); + break; + case kFPReg: + res = allocLiveBody(cUnit->regPool->FPTemps, + cUnit->regPool->numFPTemps, sReg); + break; + default: + LOGE("Invalid register type"); + dvmCompilerAbort(cUnit); + } + return res; +} + +extern void dvmCompilerFreeTemp(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *p = cUnit->regPool->coreTemps; + int numTemps = cUnit->regPool->numCoreTemps; + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + p[i].inUse = false; + p[i].pair = false; + return; + } + } + p = cUnit->regPool->FPTemps; + numTemps = cUnit->regPool->numFPTemps; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + p[i].inUse = false; + p[i].pair = false; + return; + } + } + LOGE("Tried to free a non-existant temp: r%d",reg); + dvmCompilerAbort(cUnit); +} + +/* + * FIXME - this needs to also check the preserved pool once we start + * start using preserved registers. + */ +extern RegisterInfo *dvmCompilerIsLive(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *p = cUnit->regPool->coreTemps; + int numTemps = cUnit->regPool->numCoreTemps; + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return p[i].live ? &p[i] : NULL; + } + } + p = cUnit->regPool->FPTemps; + numTemps = cUnit->regPool->numFPTemps; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return p[i].live ? &p[i] : NULL; + } + } + return NULL; +} + +extern RegisterInfo *dvmCompilerIsTemp(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *p = cUnit->regPool->coreTemps; + int numTemps = cUnit->regPool->numCoreTemps; + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return &p[i]; + } + } + p = cUnit->regPool->FPTemps; + numTemps = cUnit->regPool->numFPTemps; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + return &p[i]; + } + } + return NULL; +} + +/* + * Similar to dvmCompilerAllocTemp(), but forces the allocation of a specific + * register. No check is made to see if the register was previously + * allocated. Use with caution. + */ +extern void dvmCompilerLockTemp(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *p = cUnit->regPool->coreTemps; + int numTemps = cUnit->regPool->numCoreTemps; + int i; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + p[i].inUse = true; + p[i].live = false; + return; + } + } + p = cUnit->regPool->FPTemps; + numTemps = cUnit->regPool->numFPTemps; + for (i=0; i< numTemps; i++) { + if (p[i].reg == reg) { + p[i].inUse = true; + p[i].live = false; + return; + } + } + LOGE("Tried to lock a non-existant temp: r%d",reg); + dvmCompilerAbort(cUnit); +} + +/* Clobber all regs that might be used by an external C call */ +extern void dvmCompilerClobberCallRegs(CompilationUnit *cUnit) +{ + dvmCompilerClobber(cUnit, r_ZERO); + dvmCompilerClobber(cUnit, r_AT); + dvmCompilerClobber(cUnit, r_V0); + dvmCompilerClobber(cUnit, r_V1); + dvmCompilerClobber(cUnit, r_A0); + dvmCompilerClobber(cUnit, r_A1); + dvmCompilerClobber(cUnit, r_A2); + dvmCompilerClobber(cUnit, r_A3); + dvmCompilerClobber(cUnit, r_T0); + dvmCompilerClobber(cUnit, r_T1); + dvmCompilerClobber(cUnit, r_T2); + dvmCompilerClobber(cUnit, r_T3); + dvmCompilerClobber(cUnit, r_T4); + dvmCompilerClobber(cUnit, r_T5); + dvmCompilerClobber(cUnit, r_T6); + dvmCompilerClobber(cUnit, r_T7); + dvmCompilerClobber(cUnit, r_T8); + dvmCompilerClobber(cUnit, r_T9); + dvmCompilerClobber(cUnit, r_K0); + dvmCompilerClobber(cUnit, r_K1); + dvmCompilerClobber(cUnit, r_GP); + dvmCompilerClobber(cUnit, r_FP); + dvmCompilerClobber(cUnit, r_RA); + dvmCompilerClobber(cUnit, r_HI); + dvmCompilerClobber(cUnit, r_LO); + dvmCompilerClobber(cUnit, r_F0); + dvmCompilerClobber(cUnit, r_F1); + dvmCompilerClobber(cUnit, r_F2); + dvmCompilerClobber(cUnit, r_F3); + dvmCompilerClobber(cUnit, r_F4); + dvmCompilerClobber(cUnit, r_F5); + dvmCompilerClobber(cUnit, r_F6); + dvmCompilerClobber(cUnit, r_F7); + dvmCompilerClobber(cUnit, r_F8); + dvmCompilerClobber(cUnit, r_F9); + dvmCompilerClobber(cUnit, r_F10); + dvmCompilerClobber(cUnit, r_F11); + dvmCompilerClobber(cUnit, r_F12); + dvmCompilerClobber(cUnit, r_F13); + dvmCompilerClobber(cUnit, r_F14); + dvmCompilerClobber(cUnit, r_F15); +} + +/* Clobber all of the temps that might be used by a handler. */ +extern void dvmCompilerClobberHandlerRegs(CompilationUnit *cUnit) +{ + //TUNING: reduce the set of regs used by handlers. Only a few need lots. + dvmCompilerClobberCallRegs(cUnit); + dvmCompilerClobber(cUnit, r_S0); + dvmCompilerClobber(cUnit, r_S1); + dvmCompilerClobber(cUnit, r_S2); + dvmCompilerClobber(cUnit, r_S3); + dvmCompilerClobber(cUnit, r_S4); + dvmCompilerClobber(cUnit, r_S5); + dvmCompilerClobber(cUnit, r_S6); + dvmCompilerClobber(cUnit, r_S7); +} + +extern void dvmCompilerResetDef(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *p = getRegInfo(cUnit, reg); + p->defStart = NULL; + p->defEnd = NULL; +} + +static void nullifyRange(CompilationUnit *cUnit, LIR *start, LIR *finish, + int sReg1, int sReg2) +{ + if (start && finish) { + LIR *p; + assert(sReg1 == sReg2); + for (p = start; ;p = p->next) { + ((MipsLIR *)p)->flags.isNop = true; + if (p == finish) + break; + } + } +} + +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +extern void dvmCompilerMarkDef(CompilationUnit *cUnit, RegLocation rl, + LIR *start, LIR *finish) +{ + assert(!rl.wide); + assert(start && start->next); + assert(finish); + RegisterInfo *p = getRegInfo(cUnit, rl.lowReg); + p->defStart = start->next; + p->defEnd = finish; +} + +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +extern void dvmCompilerMarkDefWide(CompilationUnit *cUnit, RegLocation rl, + LIR *start, LIR *finish) +{ + assert(rl.wide); + assert(start && start->next); + assert(finish); + RegisterInfo *p = getRegInfo(cUnit, rl.lowReg); + dvmCompilerResetDef(cUnit, rl.highReg); // Only track low of pair + p->defStart = start->next; + p->defEnd = finish; +} + +extern RegLocation dvmCompilerWideToNarrow(CompilationUnit *cUnit, + RegLocation rl) +{ + assert(rl.wide); + if (rl.location == kLocPhysReg) { + RegisterInfo *infoLo = getRegInfo(cUnit, rl.lowReg); + RegisterInfo *infoHi = getRegInfo(cUnit, rl.highReg); + if (!infoLo->pair) { + dumpRegPool(cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps); + assert(infoLo->pair); + } + if (!infoHi->pair) { + dumpRegPool(cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps); + assert(infoHi->pair); + } + assert(infoLo->pair); + assert(infoHi->pair); + assert(infoLo->partner == infoHi->reg); + assert(infoHi->partner == infoLo->reg); + infoLo->pair = false; + infoHi->pair = false; + infoLo->defStart = NULL; + infoLo->defEnd = NULL; + infoHi->defStart = NULL; + infoHi->defEnd = NULL; + } +#ifndef HAVE_LITTLE_ENDIAN + else if (rl.location == kLocDalvikFrame) { + rl.sRegLow = dvmCompilerSRegHi(rl.sRegLow); + } +#endif + + rl.wide = false; + return rl; +} + +extern void dvmCompilerResetDefLoc(CompilationUnit *cUnit, RegLocation rl) +{ + assert(!rl.wide); + if (!(gDvmJit.disableOpt & (1 << kSuppressLoads))) { + RegisterInfo *p = getRegInfo(cUnit, rl.lowReg); + assert(!p->pair); + nullifyRange(cUnit, p->defStart, p->defEnd, + p->sReg, rl.sRegLow); + } + dvmCompilerResetDef(cUnit, rl.lowReg); +} + +extern void dvmCompilerResetDefLocWide(CompilationUnit *cUnit, RegLocation rl) +{ + assert(rl.wide); + if (!(gDvmJit.disableOpt & (1 << kSuppressLoads))) { + RegisterInfo *p = getRegInfo(cUnit, rl.lowReg); + assert(p->pair); + nullifyRange(cUnit, p->defStart, p->defEnd, + p->sReg, rl.sRegLow); + } + dvmCompilerResetDef(cUnit, rl.lowReg); + dvmCompilerResetDef(cUnit, rl.highReg); +} + +extern void dvmCompilerResetDefTracking(CompilationUnit *cUnit) +{ + int i; + for (i=0; i< cUnit->regPool->numCoreTemps; i++) { + dvmCompilerResetDef(cUnit, cUnit->regPool->coreTemps[i].reg); + } + for (i=0; i< cUnit->regPool->numFPTemps; i++) { + dvmCompilerResetDef(cUnit, cUnit->regPool->FPTemps[i].reg); + } +} + +extern void dvmCompilerClobberAllRegs(CompilationUnit *cUnit) +{ + int i; + for (i=0; i< cUnit->regPool->numCoreTemps; i++) { + dvmCompilerClobber(cUnit, cUnit->regPool->coreTemps[i].reg); + } + for (i=0; i< cUnit->regPool->numFPTemps; i++) { + dvmCompilerClobber(cUnit, cUnit->regPool->FPTemps[i].reg); + } +} + +/* To be used when explicitly managing register use */ +extern void dvmCompilerLockAllTemps(CompilationUnit *cUnit) +{ + int i; + for (i=0; i< cUnit->regPool->numCoreTemps; i++) { + dvmCompilerLockTemp(cUnit, cUnit->regPool->coreTemps[i].reg); + } +} + +// Make sure nothing is live and dirty +static void flushAllRegsBody(CompilationUnit *cUnit, RegisterInfo *info, + int numRegs) +{ + int i; + for (i=0; i < numRegs; i++) { + if (info[i].live && info[i].dirty) { + if (info[i].pair) { + flushRegWide(cUnit, info[i].reg, info[i].partner); + } else { + flushReg(cUnit, info[i].reg); + } + } + } +} + +extern void dvmCompilerFlushAllRegs(CompilationUnit *cUnit) +{ + flushAllRegsBody(cUnit, cUnit->regPool->coreTemps, + cUnit->regPool->numCoreTemps); + flushAllRegsBody(cUnit, cUnit->regPool->FPTemps, + cUnit->regPool->numFPTemps); + dvmCompilerClobberAllRegs(cUnit); +} + + +//TUNING: rewrite all of this reg stuff. Probably use an attribute table +static bool regClassMatches(int regClass, int reg) +{ + if (regClass == kAnyReg) { + return true; + } else if (regClass == kCoreReg) { + return !FPREG(reg); + } else { + return FPREG(reg); + } +} + +extern void dvmCompilerMarkLive(CompilationUnit *cUnit, int reg, int sReg) +{ + RegisterInfo *info = getRegInfo(cUnit, reg); + if ((info->reg == reg) && (info->sReg == sReg) && info->live) { + return; /* already live */ + } else if (sReg != INVALID_SREG) { + dvmCompilerClobberSReg(cUnit, sReg); + info->live = true; + } else { + /* Can't be live if no associated sReg */ + info->live = false; + } + info->sReg = sReg; +} + +extern void dvmCompilerMarkPair(CompilationUnit *cUnit, int lowReg, int highReg) +{ + RegisterInfo *infoLo = getRegInfo(cUnit, lowReg); + RegisterInfo *infoHi = getRegInfo(cUnit, highReg); + infoLo->pair = infoHi->pair = true; + infoLo->partner = highReg; + infoHi->partner = lowReg; +} + +extern void dvmCompilerMarkClean(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *info = getRegInfo(cUnit, reg); + info->dirty = false; +} + +extern void dvmCompilerMarkDirty(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *info = getRegInfo(cUnit, reg); + info->dirty = true; +} + +extern void dvmCompilerMarkInUse(CompilationUnit *cUnit, int reg) +{ + RegisterInfo *info = getRegInfo(cUnit, reg); + info->inUse = true; +} + +void copyRegInfo(CompilationUnit *cUnit, int newReg, int oldReg) +{ + RegisterInfo *newInfo = getRegInfo(cUnit, newReg); + RegisterInfo *oldInfo = getRegInfo(cUnit, oldReg); + *newInfo = *oldInfo; + newInfo->reg = newReg; +} + +/* + * Return an updated location record with current in-register status. + * If the value lives in live temps, reflect that fact. No code + * is generated. The the live value is part of an older pair, + * clobber both low and high. + * TUNING: clobbering both is a bit heavy-handed, but the alternative + * is a bit complex when dealing with FP regs. Examine code to see + * if it's worthwhile trying to be more clever here. + */ +extern RegLocation dvmCompilerUpdateLoc(CompilationUnit *cUnit, RegLocation loc) +{ + assert(!loc.wide); + if (loc.location == kLocDalvikFrame) { + RegisterInfo *infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); + if (infoLo) { + if (infoLo->pair) { + dvmCompilerClobber(cUnit, infoLo->reg); + dvmCompilerClobber(cUnit, infoLo->partner); + } else { + loc.lowReg = infoLo->reg; + loc.location = kLocPhysReg; + } + } + } + + return loc; +} + +/* see comments for updateLoc */ +extern RegLocation dvmCompilerUpdateLocWide(CompilationUnit *cUnit, + RegLocation loc) +{ + assert(loc.wide); + if (loc.location == kLocDalvikFrame) { + // Are the dalvik regs already live in physical registers? + RegisterInfo *infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); + RegisterInfo *infoHi = allocLive(cUnit, + dvmCompilerSRegHi(loc.sRegLow), kAnyReg); + bool match = true; + match = match && (infoLo != NULL); + match = match && (infoHi != NULL); + // Are they both core or both FP? + match = match && (FPREG(infoLo->reg) == FPREG(infoHi->reg)); + // If a pair of floating point singles, are they properly aligned? + if (match && FPREG(infoLo->reg)) { + match &= ((infoLo->reg & 0x1) == 0); + match &= ((infoHi->reg - infoLo->reg) == 1); + } + // If previously used as a pair, it is the same pair? + if (match && (infoLo->pair || infoHi->pair)) { + match = (infoLo->pair == infoHi->pair); + match &= ((infoLo->reg == infoHi->partner) && + (infoHi->reg == infoLo->partner)); + } + if (match) { + // Can reuse - update the register usage info + loc.lowReg = infoLo->reg; + loc.highReg = infoHi->reg; + loc.location = kLocPhysReg; + dvmCompilerMarkPair(cUnit, loc.lowReg, loc.highReg); + assert(!FPREG(loc.lowReg) || ((loc.lowReg & 0x1) == 0)); + return loc; + } + // Can't easily reuse - clobber any overlaps + if (infoLo) { + dvmCompilerClobber(cUnit, infoLo->reg); + if (infoLo->pair) + dvmCompilerClobber(cUnit, infoLo->partner); + } + if (infoHi) { + dvmCompilerClobber(cUnit, infoHi->reg); + if (infoHi->pair) + dvmCompilerClobber(cUnit, infoHi->partner); + } + } + + return loc; +} + +static RegLocation evalLocWide(CompilationUnit *cUnit, RegLocation loc, + int regClass, bool update) +{ + assert(loc.wide); + int newRegs; + int lowReg; + int highReg; + + loc = dvmCompilerUpdateLocWide(cUnit, loc); + + /* If already in registers, we can assume proper form. Right reg class? */ + if (loc.location == kLocPhysReg) { + assert(FPREG(loc.lowReg) == FPREG(loc.highReg)); + assert(!FPREG(loc.lowReg) || ((loc.lowReg & 0x1) == 0)); + if (!regClassMatches(regClass, loc.lowReg)) { + /* Wrong register class. Reallocate and copy */ + newRegs = dvmCompilerAllocTypedTempPair(cUnit, loc.fp, regClass); + lowReg = newRegs & 0xff; + highReg = (newRegs >> 8) & 0xff; + dvmCompilerRegCopyWide(cUnit, lowReg, highReg, loc.lowReg, + loc.highReg); + copyRegInfo(cUnit, lowReg, loc.lowReg); + copyRegInfo(cUnit, highReg, loc.highReg); + dvmCompilerClobber(cUnit, loc.lowReg); + dvmCompilerClobber(cUnit, loc.highReg); + loc.lowReg = lowReg; + loc.highReg = highReg; + dvmCompilerMarkPair(cUnit, loc.lowReg, loc.highReg); + assert(!FPREG(loc.lowReg) || ((loc.lowReg & 0x1) == 0)); + } + return loc; + } + + assert((loc.location != kLocRetval) || (loc.sRegLow == INVALID_SREG)); + assert((loc.location != kLocRetval) || + (dvmCompilerSRegHi(loc.sRegLow) == INVALID_SREG)); + + newRegs = dvmCompilerAllocTypedTempPair(cUnit, loc.fp, regClass); + loc.lowReg = newRegs & 0xff; + loc.highReg = (newRegs >> 8) & 0xff; + + dvmCompilerMarkPair(cUnit, loc.lowReg, loc.highReg); + if (update) { + loc.location = kLocPhysReg; + dvmCompilerMarkLive(cUnit, loc.lowReg, loc.sRegLow); + dvmCompilerMarkLive(cUnit, loc.highReg, dvmCompilerSRegHi(loc.sRegLow)); + } + assert(!FPREG(loc.lowReg) || ((loc.lowReg & 0x1) == 0)); + return loc; +} + +extern RegLocation dvmCompilerEvalLoc(CompilationUnit *cUnit, RegLocation loc, + int regClass, bool update) +{ + int newReg; + if (loc.wide) + return evalLocWide(cUnit, loc, regClass, update); + loc = dvmCompilerUpdateLoc(cUnit, loc); + + if (loc.location == kLocPhysReg) { + if (!regClassMatches(regClass, loc.lowReg)) { + /* Wrong register class. Realloc, copy and transfer ownership */ + newReg = dvmCompilerAllocTypedTemp(cUnit, loc.fp, regClass); + dvmCompilerRegCopy(cUnit, newReg, loc.lowReg); + copyRegInfo(cUnit, newReg, loc.lowReg); + dvmCompilerClobber(cUnit, loc.lowReg); + loc.lowReg = newReg; + } + return loc; + } + + assert((loc.location != kLocRetval) || (loc.sRegLow == INVALID_SREG)); + + newReg = dvmCompilerAllocTypedTemp(cUnit, loc.fp, regClass); + loc.lowReg = newReg; + + if (update) { + loc.location = kLocPhysReg; + dvmCompilerMarkLive(cUnit, loc.lowReg, loc.sRegLow); + } + return loc; +} + +static inline int getDestSSAName(MIR *mir, int num) +{ + assert(mir->ssaRep->numDefs > num); + return mir->ssaRep->defs[num]; +} + +// Get the LocRecord associated with an SSA name use. +extern RegLocation dvmCompilerGetSrc(CompilationUnit *cUnit, MIR *mir, int num) +{ + RegLocation loc = cUnit->regLocation[ + SREG(cUnit, dvmCompilerSSASrc(mir, num))]; + loc.fp = cUnit->regLocation[dvmCompilerSSASrc(mir, num)].fp; + loc.wide = false; + return loc; +} + +// Get the LocRecord associated with an SSA name def. +extern RegLocation dvmCompilerGetDest(CompilationUnit *cUnit, MIR *mir, + int num) +{ + RegLocation loc = cUnit->regLocation[SREG(cUnit, getDestSSAName(mir, num))]; + loc.fp = cUnit->regLocation[getDestSSAName(mir, num)].fp; + loc.wide = false; + return loc; +} + +static RegLocation getLocWide(CompilationUnit *cUnit, MIR *mir, + int low, int high, bool isSrc) +{ + RegLocation lowLoc; + RegLocation highLoc; + /* Copy loc record for low word and patch in data from high word */ + if (isSrc) { + lowLoc = dvmCompilerGetSrc(cUnit, mir, low); + highLoc = dvmCompilerGetSrc(cUnit, mir, high); + } else { + lowLoc = dvmCompilerGetDest(cUnit, mir, low); + highLoc = dvmCompilerGetDest(cUnit, mir, high); + } + /* Avoid this case by either promoting both or neither. */ + assert(lowLoc.location == highLoc.location); + if (lowLoc.location == kLocPhysReg) { + /* This case shouldn't happen if we've named correctly */ + assert(lowLoc.fp == highLoc.fp); + } + lowLoc.wide = true; + lowLoc.highReg = highLoc.lowReg; + return lowLoc; +} + +extern RegLocation dvmCompilerGetDestWide(CompilationUnit *cUnit, MIR *mir, + int low, int high) +{ + return getLocWide(cUnit, mir, low, high, false); +} + +extern RegLocation dvmCompilerGetSrcWide(CompilationUnit *cUnit, MIR *mir, + int low, int high) +{ + return getLocWide(cUnit, mir, low, high, true); +} + +extern RegLocation dvmCompilerGetReturnWide(CompilationUnit *cUnit) +{ + RegLocation res = LOC_C_RETURN_WIDE; + dvmCompilerClobber(cUnit, r_V0); + dvmCompilerClobber(cUnit, r_V1); + dvmCompilerMarkInUse(cUnit, r_V0); + dvmCompilerMarkInUse(cUnit, r_V1); + dvmCompilerMarkPair(cUnit, res.lowReg, res.highReg); + return res; +} + +extern RegLocation dvmCompilerGetReturn(CompilationUnit *cUnit) +{ + RegLocation res = LOC_C_RETURN; + dvmCompilerClobber(cUnit, r_V0); + dvmCompilerMarkInUse(cUnit, r_V0); + return res; +} + +extern RegLocation dvmCompilerGetReturnWideAlt(CompilationUnit *cUnit) +{ + RegLocation res = LOC_C_RETURN_WIDE_ALT; + dvmCompilerClobber(cUnit, r_F0); + dvmCompilerClobber(cUnit, r_F1); + dvmCompilerMarkInUse(cUnit, r_F0); + dvmCompilerMarkInUse(cUnit, r_F1); + dvmCompilerMarkPair(cUnit, res.lowReg, res.highReg); + return res; +} + +extern RegLocation dvmCompilerGetReturnAlt(CompilationUnit *cUnit) +{ + RegLocation res = LOC_C_RETURN_ALT; + dvmCompilerClobber(cUnit, r_F0); + dvmCompilerMarkInUse(cUnit, r_F0); + return res; +} + +/* Kill the corresponding bit in the null-checked register list */ +extern void dvmCompilerKillNullCheckedLoc(CompilationUnit *cUnit, + RegLocation loc) +{ + if (loc.location != kLocRetval) { + assert(loc.sRegLow != INVALID_SREG); + dvmClearBit(cUnit->regPool->nullCheckedRegs, loc.sRegLow); + if (loc.wide) { + assert(dvmCompilerSRegHi(loc.sRegLow) != INVALID_SREG); + dvmClearBit(cUnit->regPool->nullCheckedRegs, + dvmCompilerSRegHi(loc.sRegLow)); + } + } +} + +extern void dvmCompilerFlushRegWideForV5TEVFP(CompilationUnit *cUnit, + int reg1, int reg2) +{ + flushRegWide(cUnit, reg1, reg2); +} + +extern void dvmCompilerFlushRegForV5TEVFP(CompilationUnit *cUnit, int reg) +{ + flushReg(cUnit, reg); +} diff --git a/vm/compiler/codegen/mips/mips/ArchVariant.cpp b/vm/compiler/codegen/mips/mips/ArchVariant.cpp new file mode 100644 index 000000000..51a590a6a --- /dev/null +++ b/vm/compiler/codegen/mips/mips/ArchVariant.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +extern "C" void dvmCompilerTemplateStart(void); + +/* + * This file is included by Codegen-mips.c, and implements architecture + * variant-specific code. + */ + +/* + * Determine the initial instruction set to be used for this trace. + * Later components may decide to change this. + */ +JitInstructionSetType dvmCompilerInstructionSet(void) +{ + return DALVIK_JIT_MIPS; +} + +/* First, declare dvmCompiler_TEMPLATE_XXX for each template */ +#define JIT_TEMPLATE(X) extern "C" void dvmCompiler_TEMPLATE_##X(); +#include "../../../template/mips/TemplateOpList.h" +#undef JIT_TEMPLATE + +/* Architecture-specific initializations and checks go here */ +bool dvmCompilerArchVariantInit(void) +{ + int i = 0; + + /* + * Then, populate the templateEntryOffsets array with the offsets from the + * the dvmCompilerTemplateStart symbol for each template. + */ +#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \ + (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart; +#include "../../../template/mips/TemplateOpList.h" +#undef JIT_TEMPLATE + + /* Target-specific configuration */ + gDvmJit.jitTableSize = 1 << 9; // 512 + gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; + gDvmJit.threshold = 200; + gDvmJit.codeCacheSize = 512*1024; + +#if defined(WITH_SELF_VERIFICATION) + /* Force into blocking mode */ + gDvmJit.blockingMode = true; + gDvm.nativeDebuggerActive = true; +#endif + + /* Codegen-specific assumptions */ + assert(OFFSETOF_MEMBER(ClassObject, vtable) < 128 && + (OFFSETOF_MEMBER(ClassObject, vtable) & 0x3) == 0); + assert(OFFSETOF_MEMBER(ArrayObject, length) < 128 && + (OFFSETOF_MEMBER(ArrayObject, length) & 0x3) == 0); + assert(OFFSETOF_MEMBER(ArrayObject, contents) < 256); + + /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */ + assert(sizeof(StackSaveArea) < 236); + + /* + * EA is calculated by doing "Rn + imm5 << 2", make sure that the last + * offset from the struct is less than 128. + */ + assert((offsetof(Thread, jitToInterpEntries) + + sizeof(struct JitToInterpEntries)) < 128); + + /* FIXME - comment out the following to enable method-based JIT */ + gDvmJit.disableOpt |= (1 << kMethodJit); + + // Make sure all threads have current values + dvmJitUpdateThreadStateAll(); + + return true; +} + +int dvmCompilerTargetOptHint(int key) +{ + int res; + switch (key) { + case kMaxHoistDistance: + res = 2; + break; + default: + LOGE("Unknown target optimization hint key: %d",key); + res = 0; + } + return res; +} + +void dvmCompilerGenMemBarrier(CompilationUnit *cUnit, int barrierKind) +{ + __asm__ __volatile__ ("" : : : "memory"); +} diff --git a/vm/compiler/codegen/mips/mips/ArchVariant.h b/vm/compiler/codegen/mips/mips/ArchVariant.h new file mode 100644 index 000000000..ec04dd8b3 --- /dev/null +++ b/vm/compiler/codegen/mips/mips/ArchVariant.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DALVIK_VM_COMPILER_CODEGEN_MIPS_ARCHVARIANT_H_ +#define DALVIK_VM_COMPILER_CODEGEN_MIPS_ARCHVARIANT_H_ + +/* Create the TemplateOpcode enum */ +#define JIT_TEMPLATE(X) TEMPLATE_##X, +enum TemplateOpcode{ +#include "../../../template/mips/TemplateOpList.h" +/* + * For example, + * TEMPLATE_CMP_LONG, + * TEMPLATE_RETURN, + * ... + */ + TEMPLATE_LAST_MARK, +}; +#undef JIT_TEMPLATE + +#endif // DALVIK_VM_COMPILER_CODEGEN_MIPS_ARCHVARIANT_H_ diff --git a/vm/compiler/codegen/mips/mips/CallingConvention.S b/vm/compiler/codegen/mips/mips/CallingConvention.S new file mode 100644 index 000000000..ab9765512 --- /dev/null +++ b/vm/compiler/codegen/mips/mips/CallingConvention.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Save & restore for callee-save FP registers. + * On entry: + * a0 : pointer to save area of JIT_CALLEE_SAVE_WORD_SIZE + */ + .text + .align 2 + .global dvmJitCalleeSave + .type dvmJitCalleeSave, %function +dvmJitCalleeSave: + jr $31 + + .global dvmJitCalleeRestore + .type dvmJitCalleeRestore, %function +dvmJitCalleeRestore: + jr $31 diff --git a/vm/compiler/codegen/mips/mips/Codegen.cpp b/vm/compiler/codegen/mips/mips/Codegen.cpp new file mode 100644 index 000000000..2c7456e4b --- /dev/null +++ b/vm/compiler/codegen/mips/mips/Codegen.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #define _CODEGEN_C + +#include "Dalvik.h" +#include "interp/InterpDefs.h" +#include "libdex/DexOpcodes.h" +#include "compiler/CompilerInternals.h" +#include "compiler/codegen/mips/MipsLIR.h" +#include "mterp/common/FindInterface.h" +#include "compiler/codegen/mips/Ralloc.h" +#include "compiler/codegen/mips/Codegen.h" +#include "compiler/Loop.h" +#include "ArchVariant.h" + +/* Architectural independent building blocks */ +#include "../CodegenCommon.cpp" + +/* Architectural independent building blocks */ +#include "../Mips32/Factory.cpp" +/* Factory utilities dependent on arch-specific features */ +#include "../CodegenFactory.cpp" + +/* Thumb-specific codegen routines */ +#include "../Mips32/Gen.cpp" +/* Thumb+Portable FP codegen routines */ +#include "../FP/MipsFP.cpp" + +/* Thumb-specific register allocation */ +#include "../Mips32/Ralloc.cpp" + +/* MIR2LIR dispatcher and architectural independent codegen routines */ +#include "../CodegenDriver.cpp" + +/* Dummy driver for method-based JIT */ +#include "MethodCodegenDriver.cpp" + +/* Architecture manifest */ +#include "ArchVariant.cpp" diff --git a/vm/compiler/codegen/mips/mips/MethodCodegenDriver.cpp b/vm/compiler/codegen/mips/mips/MethodCodegenDriver.cpp new file mode 100644 index 000000000..735a4787b --- /dev/null +++ b/vm/compiler/codegen/mips/mips/MethodCodegenDriver.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) +{ + LOGE("Method-based JIT not supported for the Mips target"); + dvmAbort(); +} diff --git a/vm/compiler/codegen/x86/ArchUtility.cpp b/vm/compiler/codegen/x86/ArchUtility.cpp index f7c48d628..e7b7d7004 100644 --- a/vm/compiler/codegen/x86/ArchUtility.cpp +++ b/vm/compiler/codegen/x86/ArchUtility.cpp @@ -28,3 +28,10 @@ int dvmCompilerCacheFlush(long start, long end, long flags) { return 0; } + +/* Target-specific cache clearing */ +void dvmCompilerCacheClear(char *start, size_t size) +{ + /* 0 is an invalid opcode for x86. */ + memset(start, 0, size); +} |