diff options
author | Bill Buzbee <buzbee@google.com> | 2010-03-02 16:14:41 -0800 |
---|---|---|
committer | Bill Buzbee <buzbee@google.com> | 2010-03-03 15:13:17 -0800 |
commit | 1f74863d3e0f19930818398f375ebf1cf2d78969 (patch) | |
tree | 4e646a4e73cae7d5e98c621d1cc1aa330a466cf9 /vm/compiler | |
parent | 10ebc7d0b84dcb98e1a7eeac96ef06acdfc8d184 (diff) | |
download | android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.gz android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.bz2 android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.zip |
Jit: Sapphire tuning - mostly scheduling.
Re-enabled load/store motion that had inadvertently been turned off for
non-armv7 targets. Tagged memory references with the kind of memory
they touch (Dalvik frame, literal pool, heap) to enable more aggressive
load hoisting. Eliminated some largely duplicate code in the target
specific files. Reworked temp register allocation code to allocate next
temp round-robin (to improve scheduling opportunities).
Overall, nice gain for Sapphire. Shows 5% to 15% on some benchmarks, and
measurable improvements for Passion.
Diffstat (limited to 'vm/compiler')
-rw-r--r-- | vm/compiler/Compiler.c | 2 | ||||
-rw-r--r-- | vm/compiler/codegen/CompilerCodegen.h | 3 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/ArmLIR.h | 29 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Assemble.c | 117 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/CodegenCommon.c | 43 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/CodegenDriver.c | 80 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/LocalOptimizations.c | 148 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/RallocUtil.c | 108 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb/Factory.c | 70 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb/Gen.c | 89 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Factory.c | 15 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Gen.c | 81 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c | 14 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/armv5te/ArchVariant.c | 14 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/armv7-a/ArchVariant.c | 14 |
15 files changed, 521 insertions, 306 deletions
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c index a353b867c..de5d4cc6e 100644 --- a/vm/compiler/Compiler.c +++ b/vm/compiler/Compiler.c @@ -97,7 +97,7 @@ bool dvmCompilerWorkEnqueue(const u2 *pc, WorkOrderKind kind, void* info) newOrder->info = info; newOrder->result.codeAddress = NULL; newOrder->result.discardResult = - (kind == kWorkOrderTraceDebug) ? true : false; + (kind == kWorkOrderTraceDebug) ? true : false; newOrder->result.requestingThread = dvmThreadSelf(); gDvmJit.compilerWorkEnqueueIndex++; diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h index 4a27a670f..06fd41090 100644 --- a/vm/compiler/codegen/CompilerCodegen.h +++ b/vm/compiler/codegen/CompilerCodegen.h @@ -58,4 +58,7 @@ JitInstructionSetType dvmCompilerInstructionSet(void); */ bool dvmCompilerArchVariantInit(void); +/* Implemented in codegen/<target>/<target_variant>/ArchVariant.c */ +int dvmCompilerTargetOptHint(int key); + #endif /* _DALVIK_VM_COMPILERCODEGEN_H_ */ diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index 7d262215e..e1073b604 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -115,8 +115,10 @@ typedef struct RegisterPool { BitVector *nullCheckedRegs; // Track which registers have been null-checked int numCoreTemps; RegisterInfo *coreTemps; + int nextCoreTemp; int numFPTemps; RegisterInfo *FPTemps; + int nextFPTemp; int numCoreRegs; RegisterInfo *coreRegs; int numFPRegs; @@ -133,6 +135,10 @@ typedef enum ResourceEncodingPos { kCCode = kRegEnd, kFPStatus, kDalvikReg, + kLiteral, + kFrameRef, + kHeapRef, + kLitPoolRef } ResourceEncodingPos; #define ENCODE_REG_LIST(N) ((u8) N) @@ -141,8 +147,20 @@ typedef enum ResourceEncodingPos { #define ENCODE_REG_PC (1ULL << kRegPC) #define ENCODE_CCODE (1ULL << kCCode) #define ENCODE_FP_STATUS (1ULL << kFPStatus) + + /* Must alias */ #define ENCODE_DALVIK_REG (1ULL << kDalvikReg) +#define ENCODE_LITERAL (1ULL << kLiteral) + + /* May alias */ +#define ENCODE_FRAME_REF (1ULL << kFrameRef) +#define ENCODE_HEAP_REF (1ULL << kHeapRef) +#define ENCODE_LITPOOL_REF (1ULL << kLitPoolRef) + #define ENCODE_ALL (~0ULL) +#define ENCODE_MEM_DEF (ENCODE_FRAME_REF | ENCODE_HEAP_REF) +#define ENCODE_MEM_USE (ENCODE_FRAME_REF | ENCODE_HEAP_REF \ + | ENCODE_LITPOOL_REF) #define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) #define DECODE_ALIAS_INFO_WIDE(X) ((X & 0x80000000) ? 1 : 0) @@ -633,8 +651,12 @@ typedef enum ArmOpFeatureFlags { kIsIT, kSetsCCodes, kUsesCCodes, + kMemLoad, + kMemStore, } ArmOpFeatureFlags; +#define IS_LOAD (1 << kMemLoad) +#define IS_STORE (1 << kMemStore) #define IS_BRANCH (1 << kIsBranch) #define REG_DEF0 (1 << kRegDef0) #define REG_DEF1 (1 << kRegDef1) @@ -702,6 +724,11 @@ typedef struct ArmEncodingMap { int size; } ArmEncodingMap; +/* Keys for target-specific scheduling and other optimization hints */ +typedef enum ArmTargetOptHints { + kMaxHoistDistance, +} ArmTargetOptHints; + extern ArmEncodingMap EncodingMap[kArmLast]; /* @@ -727,7 +754,7 @@ typedef struct ArmLIR { // used to identify mem ops for self verification mode int age; // default is 0, set lazily by the optimizer int size; // 16-bit unit size (1 for thumb, 1 or 2 for thumb2) - int aliasInfo; // For Dalvik register access disambiguation + int aliasInfo; // For Dalvik register access & litpool disambiguation u8 useMask; // Resource mask for use u8 defMask; // Resource mask for def } ArmLIR; diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 2ad084220..28dafe710 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -210,47 +210,47 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumbLdmia, 0xc800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, "ldmia", "r!0d!!, <!1R>", 1), ENCODING_MAP(kThumbLdrRRI5, 0x6800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldr", "r!0d, [r!1d, #!2E]", 1), ENCODING_MAP(kThumbLdrRRR, 0x5800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, "ldr", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbLdrPcRel, 0x4800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC, - "ldr", "r!0d, [pc, #!1E]", 1), + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC + | IS_LOAD, "ldr", "r!0d, [pc, #!1E]", 1), ENCODING_MAP(kThumbLdrSpRel, 0x9800, kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP, - "ldr", "r!0d, [sp, #!2E]", 1), + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP + | IS_LOAD, "ldr", "r!0d, [sp, #!2E]", 1), ENCODING_MAP(kThumbLdrbRRI5, 0x7800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrb", "r!0d, [r!1d, #2d]", 1), ENCODING_MAP(kThumbLdrbRRR, 0x5c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, "ldrb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbLdrhRRI5, 0x8800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrh", "r!0d, [r!1d, #!2F]", 1), ENCODING_MAP(kThumbLdrhRRR, 0x5a00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, "ldrh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbLdrsbRRR, 0x5600, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, "ldrsb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbLdrshRRR, 0x5e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, "ldrsh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbLslRRI5, 0x0000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, @@ -317,13 +317,13 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumbPop, 0xbc00, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0, - "pop", "<!0R>", 1), + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD, "pop", "<!0R>", 1), ENCODING_MAP(kThumbPush, 0xb400, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0, - "push", "<!0R>", 1), + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE, "push", "<!0R>", 1), ENCODING_MAP(kThumbRorRR, 0x41c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -337,35 +337,35 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumbStmia, 0xc000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1, + IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, "stmia", "r!0d!!, <!1R>", 1), ENCODING_MAP(kThumbStrRRI5, 0x6000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "str", "r!0d, [r!1d, #!2E]", 1), ENCODING_MAP(kThumbStrRRR, 0x5000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, "str", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbStrSpRel, 0x9000, kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP, - "str", "r!0d, [sp, #!2E]", 1), + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP + | IS_STORE, "str", "r!0d, [sp, #!2E]", 1), ENCODING_MAP(kThumbStrbRRI5, 0x7000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "strb", "r!0d, [r!1d, #!2d]", 1), ENCODING_MAP(kThumbStrbRRR, 0x5400, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, "strb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbStrhRRI5, 0x8000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "strh", "r!0d, [r!1d, #!2F]", 1), ENCODING_MAP(kThumbStrhRRR, 0x5200, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, "strh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(kThumbSubRRI3, 0x1e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, @@ -396,11 +396,11 @@ ArmEncodingMap EncodingMap[kArmLast] = { "tst", "r!0d, r!1d", 1), ENCODING_MAP(kThumb2Vldrs, 0xed900a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "vldr", "!0s, [r!1d, #!2E]", 2), ENCODING_MAP(kThumb2Vldrd, 0xed900b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "vldr", "!0S, [r!1d, #!2E]", 2), ENCODING_MAP(kThumb2Vmuls, 0xee200a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, @@ -413,11 +413,11 @@ ArmEncodingMap EncodingMap[kArmLast] = { "vmuld", "!0S, !1S, !2S", 2), ENCODING_MAP(kThumb2Vstrs, 0xed800a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "vstr", "!0s, [r!1d, #!2E]", 2), ENCODING_MAP(kThumb2Vstrd, 0xed800b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "vstr", "!0S, [r!1d, #!2E]", 2), ENCODING_MAP(kThumb2Vsubs, 0xee300a40, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, @@ -485,19 +485,19 @@ ArmEncodingMap EncodingMap[kArmLast] = { "mov", "r!0d, #!1M", 2), ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "str", "r!0d,[r!1d, #!2d", 2), ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldr", "r!0d,[r!1d, #!2d", 2), ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "str", "r!0d,[r!1d, #-!2d]", 2), ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldr", "r!0d,[r!1d, #-!2d]", 2), ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, @@ -527,12 +527,12 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumb2Ldmia, 0xe8900000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, "ldmia", "r!0d!!, <!1R>", 2), ENCODING_MAP(kThumb2Stmia, 0xe8800000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, "stmia", "r!0d!!, <!1R>", 2), ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, @@ -578,70 +578,70 @@ ArmEncodingMap EncodingMap[kArmLast] = { "sbfx", "r!0d, r!1d, #!2d, #!3d", 2), ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, "ldr", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, "ldrh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, "ldrsh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, "ldrb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, "ldrsb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2StrRRR, 0xf8400000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, "str", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, "strh", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, "strb", "r!0d,[r!1d, r!2d, LSL #!3d]", 2), ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrh", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrsh", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrb", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrsb", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "strh", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, "strb", "r!0d,[r!1d, #!2d]", 2), ENCODING_MAP(kThumb2Pop, 0xe8bd0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0, - "pop", "<!0R>", 2), + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD, "pop", "<!0R>", 2), ENCODING_MAP(kThumb2Push, 0xe8ad0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0, - "push", "<!0R>", 2), + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE, "push", "<!0R>", 2), ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, @@ -783,7 +783,7 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC, + IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, "ldr", "r!0d,[rpc, #!1d]", 2), ENCODING_MAP(kThumb2BCond, 0xf0008000, kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, @@ -850,15 +850,16 @@ ArmEncodingMap EncodingMap[kArmLast] = { "umull", "r!0d, r!1d, r!2d, r!3d", 2), ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrex", "r!0d,[r!1d, #!2E]", 2), ENCODING_MAP(kThumb2Strex, 0xe8400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, - kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12, + kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, "strex", "r!0d,r!1d, [r!2d, #!2E]", 2), ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND, "clrex", "", 2), + kFmtUnused, -1, -1, NO_OPERAND, + "clrex", "", 2), ENCODING_MAP(kThumb2Bfi, 0xf3600000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, diff --git a/vm/compiler/codegen/arm/CodegenCommon.c b/vm/compiler/codegen/arm/CodegenCommon.c index a3a8d54de..6d2ddcd40 100644 --- a/vm/compiler/codegen/arm/CodegenCommon.c +++ b/vm/compiler/codegen/arm/CodegenCommon.c @@ -32,17 +32,45 @@ static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK]; /* Track exercised opcodes */ static int opcodeCoverage[256]; +static void setMemRefType(ArmLIR *lir, bool isLoad, int memType) +{ + u8 *maskPtr; + u8 mask; + assert( EncodingMap[lir->opCode].flags & (IS_LOAD | IS_STORE)); + if (isLoad) { + maskPtr = &lir->useMask; + mask = ENCODE_MEM_USE; + } else { + maskPtr = &lir->defMask; + mask = ENCODE_MEM_DEF; + } + /* Clear out the memref flags */ + *maskPtr &= ~mask; + /* ..and then add back the one we need */ + switch(memType) { + case kLiteral: + assert(isLoad); + *maskPtr |= (ENCODE_LITERAL | ENCODE_LITPOOL_REF); + break; + case kDalvikReg: + *maskPtr |= (ENCODE_DALVIK_REG | ENCODE_FRAME_REF); + break; + case kHeapRef: + *maskPtr |= ENCODE_HEAP_REF; + break; + default: + LOGE("Jit: invalid memref kind - %d", memType); + dvmAbort(); + } +} + /* * Mark load/store instructions that access Dalvik registers through rFP + * offset. */ static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad) { - if (isLoad) { - lir->useMask |= ENCODE_DALVIK_REG; - } else { - lir->defMask |= ENCODE_DALVIK_REG; - } + setMemRefType(lir, isLoad, kDalvikReg); /* * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit @@ -90,6 +118,11 @@ static void setupResourceMasks(ArmLIR *lir) flags = EncodingMap[lir->opCode].flags; /* Set up the mask for resources that are updated */ + if (flags & (IS_LOAD | IS_STORE)) { + /* Default to heap - will catch specialized classes later */ + setMemRefType(lir, flags & IS_LOAD, kHeapRef); + } + if (flags & IS_BRANCH) { lir->defMask |= ENCODE_REG_PC; lir->useMask |= ENCODE_REG_PC; diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c index a6aafd126..78c52ee21 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.c +++ b/vm/compiler/codegen/arm/CodegenDriver.c @@ -2959,6 +2959,86 @@ static bool genInlinedIndexOf(CompilationUnit *cUnit, MIR *mir, bool singleI) #endif } +static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + rlObj = loadValue(cUnit, rlObj, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, + rlResult.lowReg); + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir) +{ + int contents = offsetof(ArrayObject, contents); + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); + RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1); + RegLocation rlDest = inlinedTarget(cUnit, mir, false); + RegLocation rlResult; + rlObj = loadValue(cUnit, rlObj, kCoreReg); + rlIdx = loadValue(cUnit, rlIdx, kCoreReg); + int regMax = dvmCompilerAllocTemp(cUnit); + int regOff = dvmCompilerAllocTemp(cUnit); + int regPtr = dvmCompilerAllocTemp(cUnit); + ArmLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, + mir->offset, NULL); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff); + loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr); + genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel); + dvmCompilerFreeTemp(cUnit, regMax); + opRegImm(cUnit, kOpAdd, regPtr, contents); + opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf); + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + RegLocation rlDest = inlinedTarget(cUnit, mir, false);; + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + int signReg = dvmCompilerAllocTemp(cUnit); + /* + * abs(x) = y<=x>>31, (x+y)^y. + * Thumb2's IT block also yields 3 instructions, but imposes + * scheduling constraints. + */ + opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31); + opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); + opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); + storeValue(cUnit, rlDest, rlResult); + return false; +} + +static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) +{ + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlDest = inlinedTargetWide(cUnit, mir, false); + rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + int signReg = dvmCompilerAllocTemp(cUnit); + /* + * abs(x) = y<=x>>31, (x+y)^y. + * Thumb2 IT block allows slightly shorter sequence, + * but introduces a scheduling barrier. Stick with this + * mechanism for now. + */ + opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31); + opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); + opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg); + opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); + opRegReg(cUnit, kOpXor, rlResult.highReg, signReg); + storeValueWide(cUnit, rlDest, rlResult); + return false; +} /* * NOTE: Handles both range and non-range versions (arguments diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c index 159c2aae5..9f616b836 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.c +++ b/vm/compiler/codegen/arm/LocalOptimizations.c @@ -26,12 +26,18 @@ ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc); /* Is this a Dalvik register access? */ static inline bool isDalvikLoad(ArmLIR *lir) { - return (lir->useMask != ~0ULL) && (lir->useMask & ENCODE_DALVIK_REG); + return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_DALVIK_REG); +} + +/* Is this a load from the literal pool? */ +static inline bool isLiteralLoad(ArmLIR *lir) +{ + return (lir->useMask != ENCODE_ALL) && (lir->useMask & ENCODE_LITERAL); } static inline bool isDalvikStore(ArmLIR *lir) { - return (lir->defMask != ~0ULL) && (lir->defMask & ENCODE_DALVIK_REG); + return (lir->defMask != ENCODE_ALL) && (lir->defMask & ENCODE_DALVIK_REG); } static inline bool isDalvikRegisterClobbered(ArmLIR *lir1, ArmLIR *lir2) @@ -169,6 +175,12 @@ static void applyLoadHoisting(CompilationUnit *cUnit, ArmLIR *tailLIR) { ArmLIR *thisLIR; + /* + * Don't want to hoist in front of first load following a barrier (or + * first instruction of the block. + */ + bool firstLoad = true; + int maxHoist = dvmCompilerTargetOptHint(kMaxHoistDistance); cUnit->optRound++; for (thisLIR = headLIR; @@ -179,6 +191,18 @@ static void applyLoadHoisting(CompilationUnit *cUnit, thisLIR->isNop == true) { continue; } + + if (firstLoad && (EncodingMap[thisLIR->opCode].flags & IS_LOAD)) { + /* + * Ensure nothing will be hoisted in front of this load because + * it's result will likely be needed soon. + */ + thisLIR->defMask |= ENCODE_MEM_USE; + firstLoad = false; + } + + firstLoad |= (thisLIR->defMask == ENCODE_ALL); + if (isDalvikLoad(thisLIR)) { int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo); int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo); @@ -186,8 +210,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit, ArmLIR *checkLIR; int hoistDistance = 0; u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) & - ~ENCODE_DALVIK_REG; - u8 stopDefMask = thisLIR->defMask & ~ENCODE_DALVIK_REG; + ~ENCODE_FRAME_REF; + u8 stopDefMask = thisLIR->defMask & ~ENCODE_FRAME_REF; /* First check if the load can be completely elinimated */ for (checkLIR = PREV_LIR(thisLIR); @@ -243,8 +267,15 @@ static void applyLoadHoisting(CompilationUnit *cUnit, if (checkLIR->isNop) continue; - /* Check if the current load is redundant */ - if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) && + /* + * Check if the "thisLIR" load is redundant + * NOTE: At one point, we also triggered if the checkLIR + * instruction was a load. However, that tended to insert + * a load/use dependency because the full scheduler is + * not yet complete. When it is, we chould also trigger + * on loads. + */ + if (isDalvikStore(checkLIR) && (checkLIR->aliasInfo == thisLIR->aliasInfo) && (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) { /* Insert a move to replace the load */ @@ -301,6 +332,9 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } } + /* Don't go too far */ + stopHere |= (hoistDistance >= maxHoist); + /* Found a new place to put the load - move it here */ if (stopHere == true) { DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, @@ -331,6 +365,108 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } } } + } else if (isLiteralLoad(thisLIR)) { + int litVal = thisLIR->aliasInfo; + int nativeRegId = thisLIR->operands[0]; + ArmLIR *checkLIR; + int hoistDistance = 0; + u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) & + ~ENCODE_LITPOOL_REF; + u8 stopDefMask = thisLIR->defMask & ~ENCODE_LITPOOL_REF; + + /* First check if the load can be completely elinimated */ + for (checkLIR = PREV_LIR(thisLIR); + checkLIR != headLIR; + checkLIR = PREV_LIR(checkLIR)) { + + if (checkLIR->isNop) continue; + + /* Reloading same literal into same tgt reg? Eliminate if so */ + if (isLiteralLoad(checkLIR) && + (checkLIR->aliasInfo == litVal) && + (checkLIR->operands[0] == nativeRegId)) { + thisLIR->isNop = true; + break; + } + + /* + * No earlier use/def can reach this load if: + * 1) Head instruction is reached + * 2) load target register is clobbered + * 3) A branch is seen (stopUseMask has the PC bit set). + */ + if ((checkLIR == headLIR) || + (stopUseMask | stopDefMask) & checkLIR->defMask) { + break; + } + } + + /* The load has been eliminated */ + if (thisLIR->isNop) continue; + + /* + * The load cannot be eliminated. See if it can be hoisted to an + * earlier spot. + */ + for (checkLIR = PREV_LIR(thisLIR); + /* empty by intention */; + checkLIR = PREV_LIR(checkLIR)) { + + if (checkLIR->isNop) continue; + + /* + * TUNING: once a full scheduler exists, check here + * for conversion of a redundant load into a copy similar + * to the way redundant loads are handled above. + */ + + /* Find out if the load can be yanked past the checkLIR */ + + /* Last instruction reached */ + bool stopHere = (checkLIR == headLIR); + + /* Base address is clobbered by checkLIR */ + stopHere |= ((stopUseMask & checkLIR->defMask) != 0); + + /* Load target clobbers use/def in checkLIR */ + stopHere |= ((stopDefMask & + (checkLIR->useMask | checkLIR->defMask)) != 0); + + /* Avoid re-ordering literal pool loads */ + stopHere |= isLiteralLoad(checkLIR); + + /* Don't go too far */ + stopHere |= (hoistDistance >= maxHoist); + + /* Found a new place to put the load - move it here */ + if (stopHere == true) { + DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, + "HOIST LOAD")); + /* The store can be hoisted for at least one cycle */ + if (hoistDistance != 0) { + ArmLIR *newLoadLIR = + dvmCompilerNew(sizeof(ArmLIR), true); + *newLoadLIR = *thisLIR; + newLoadLIR->age = cUnit->optRound; + /* + * Insertion is guaranteed to succeed since checkLIR + * is never the first LIR on the list + */ + dvmCompilerInsertLIRAfter((LIR *) checkLIR, + (LIR *) newLoadLIR); + thisLIR->isNop = true; + } + break; + } + + /* + * Saw a real instruction that hosting the load is + * beneficial + */ + if (!isPseudoOpCode(checkLIR->opCode)) { + hoistDistance++; + } + } } } } diff --git a/vm/compiler/codegen/arm/RallocUtil.c b/vm/compiler/codegen/arm/RallocUtil.c index 9035f93c7..131df2c7b 100644 --- a/vm/compiler/codegen/arm/RallocUtil.c +++ b/vm/compiler/codegen/arm/RallocUtil.c @@ -123,7 +123,7 @@ static RegisterInfo *getRegInfo(CompilationUnit *cUnit, int reg) } } LOGE("Tried to get info on a non-existant temp: r%d",reg); - dvmAbort(); + dvmAbort(); // FIXME: abort translation intead of vm return NULL; } @@ -218,29 +218,38 @@ extern void dvmCompilerClobberSReg(CompilationUnit *cUnit, int sReg) } static int allocTempBody(CompilationUnit *cUnit, RegisterInfo *p, int numTemps, - bool required) + int *nextTemp, bool required) { int i; - //Tuning: redo this to widen the live window on freed temps + int next = *nextTemp; for (i=0; i< numTemps; i++) { - if (!p[i].inUse && !p[i].live) { - dvmCompilerClobber(cUnit, p[i].reg); - p[i].inUse = true; - p[i].pair = false; - return p[i].reg; + if (next >= numTemps) + next = 0; + if (!p[next].inUse && !p[next].live) { + dvmCompilerClobber(cUnit, p[next].reg); + p[next].inUse = true; + p[next].pair = false; + *nextTemp = next + 1; + return p[next].reg; } + next++; } + next = *nextTemp; for (i=0; i< numTemps; i++) { - if (!p[i].inUse) { - dvmCompilerClobber(cUnit, p[i].reg); - p[i].inUse = true; - p[i].pair = false; - return p[i].reg; + if (next >= numTemps) + next = 0; + if (!p[next].inUse) { + dvmCompilerClobber(cUnit, p[next].reg); + p[next].inUse = true; + p[next].pair = false; + *nextTemp = next + 1; + return p[next].reg; } + next++; } if (required) { LOGE("No free temp registers"); - assert(0); + dvmAbort(); // FIXME: abort translation instead of vm } return -1; // No register available } @@ -250,34 +259,46 @@ extern int dvmCompilerAllocTempDouble(CompilationUnit *cUnit) { RegisterInfo *p = cUnit->regPool->FPTemps; int numTemps = cUnit->regPool->numFPTemps; + int next = cUnit->regPool->nextFPTemp; int i; for (i=0; i < numTemps; i+=2) { - if ((!p[i].inUse && !p[i].live) && - (!p[i+1].inUse && !p[i+1].live)) { - dvmCompilerClobber(cUnit, p[i].reg); - dvmCompilerClobber(cUnit, p[i+1].reg); - p[i].inUse = true; - p[i+1].inUse = true; - assert((p[i].reg+1) == p[i+1].reg); - assert((p[i].reg & 0x1) == 0); - return p[i].reg; + /* Cleanup - not all targets need aligned regs */ + if (next & 1) + next++; + if (next >= numTemps) + next = 0; + if ((!p[next].inUse && !p[next].live) && + (!p[next+1].inUse && !p[next+1].live)) { + dvmCompilerClobber(cUnit, p[next].reg); + dvmCompilerClobber(cUnit, p[next+1].reg); + p[next].inUse = true; + p[next+1].inUse = true; + assert((p[next].reg+1) == p[next+1].reg); + assert((p[next].reg & 0x1) == 0); + cUnit->regPool->nextFPTemp += 2; + return p[next].reg; } + next += 2; } + next = cUnit->regPool->nextFPTemp; for (i=0; i < numTemps; i+=2) { - if (!p[i].inUse && !p[i+1].inUse) { - dvmCompilerClobber(cUnit, p[i].reg); - dvmCompilerClobber(cUnit, p[i+1].reg); - p[i].inUse = true; - p[i+1].inUse = true; - assert((p[i].reg+1) == p[i+1].reg); - assert((p[i].reg & 0x1) == 0); - return p[i].reg; + if (next >= numTemps) + next = 0; + if (!p[next].inUse && !p[next+1].inUse) { + dvmCompilerClobber(cUnit, p[next].reg); + dvmCompilerClobber(cUnit, p[next+1].reg); + p[next].inUse = true; + p[next+1].inUse = true; + assert((p[next].reg+1) == p[next+1].reg); + assert((p[next].reg & 0x1) == 0); + cUnit->regPool->nextFPTemp += 2; + return p[next].reg; } + next += 2; } LOGE("No free temp registers"); - *((int*)0) = 0; //For development, die instantly. Later abort translation - dvmAbort(); + dvmAbort(); // FIXME: abort translation instead of vm return -1; } @@ -285,19 +306,22 @@ extern int dvmCompilerAllocTempDouble(CompilationUnit *cUnit) extern int dvmCompilerAllocFreeTemp(CompilationUnit *cUnit) { return allocTempBody(cUnit, cUnit->regPool->coreTemps, - cUnit->regPool->numCoreTemps, true); + cUnit->regPool->numCoreTemps, + &cUnit->regPool->nextCoreTemp, true); } extern int dvmCompilerAllocTemp(CompilationUnit *cUnit) { return allocTempBody(cUnit, cUnit->regPool->coreTemps, - cUnit->regPool->numCoreTemps, true); + cUnit->regPool->numCoreTemps, + &cUnit->regPool->nextCoreTemp, true); } extern int dvmCompilerAllocTempFloat(CompilationUnit *cUnit) { return allocTempBody(cUnit, cUnit->regPool->FPTemps, - cUnit->regPool->numFPTemps, true); + cUnit->regPool->numFPTemps, + &cUnit->regPool->nextFPTemp, true); } static RegisterInfo *allocLiveBody(RegisterInfo *p, int numTemps, int sReg) @@ -335,8 +359,7 @@ static RegisterInfo *allocLive(CompilationUnit *cUnit, int sReg, break; default: LOGE("Invalid register type"); - assert(0); - dvmAbort(); + dvmAbort(); //FIXME: abort translation instead of vm } return res; } @@ -363,10 +386,13 @@ extern void dvmCompilerFreeTemp(CompilationUnit *cUnit, int reg) } } LOGE("Tried to free a non-existant temp: r%d",reg); - dvmAbort(); + dvmAbort(); // FIXME: abort translation instead of vm } -//FIXME - this needs to also check the preserved pool. +/* + * FIXME - this needs to also check the preserved pool once we start + * start using preserved registers. + */ extern RegisterInfo *dvmCompilerIsLive(CompilationUnit *cUnit, int reg) { RegisterInfo *p = cUnit->regPool->coreTemps; @@ -434,7 +460,7 @@ extern void dvmCompilerLockTemp(CompilationUnit *cUnit, int reg) } } LOGE("Tried to lock a non-existant temp: r%d",reg); - dvmAbort(); + dvmAbort(); // FIXME: abort translation instead of vm } static void lockArgRegs(CompilationUnit *cUnit) diff --git a/vm/compiler/codegen/arm/Thumb/Factory.c b/vm/compiler/codegen/arm/Thumb/Factory.c index 59818a295..0a4428043 100644 --- a/vm/compiler/codegen/arm/Thumb/Factory.c +++ b/vm/compiler/codegen/arm/Thumb/Factory.c @@ -75,6 +75,17 @@ static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value) loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = tDest; setupResourceMasks(loadPcRel); + /* + * Special case for literal loads with a link register target. + * Self-cosim mode will insert calls prior to heap references + * after optimization, and those will destroy r14. The easy + * workaround is to treat literal loads into r14 as heap references + * to prevent them from being hoisted. Use of r14 in this manner + * is currently rare. Revist if that changes. + */ + if (rDest != rlr) + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); @@ -113,7 +124,7 @@ static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op) opCode = kThumbBUncond; break; default: - assert(0); + dvmAbort(); // FIXME: abort trace instead of VM } return newLIR0(cUnit, opCode); } @@ -134,7 +145,7 @@ static ArmLIR *opImm(CompilationUnit *cUnit, OpKind op, int value) opCode = kThumbPop; break; default: - assert(0); + dvmAbort(); // FIXME: abort trace instead of VM } return newLIR1(cUnit, opCode, value); } @@ -147,7 +158,7 @@ static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc) opCode = kThumbBlxR; break; default: - assert(0); + dvmAbort(); // FIXME: abort trace instead of VM } return newLIR1(cUnit, opCode, rDestSrc); } @@ -192,7 +203,7 @@ static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, } break; default: - assert(0); + dvmAbort(); // FIXME: abort trace instead of VM break; } if (shortForm) @@ -312,7 +323,7 @@ static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, } return res; default: - assert(0); + dvmAbort(); // FIXME - abort trace instead of VM break; } if (shortForm) @@ -409,7 +420,7 @@ static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1, opRegRegImm(cUnit, kOpLsr, rDestSrc1, rDestSrc1, 16); return res; default: - assert(0); + dvmAbort(); // FIXME - abort trace instead of VM break; } return newLIR2(cUnit, opCode, rDestSrc1, rSrc2); @@ -454,7 +465,7 @@ static ArmLIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, opCode = kThumbLdrsbRRR; break; default: - assert(0); + dvmAbort(); // FIXME: abort trace instead of VM } res = newLIR3(cUnit, opCode, rDest, rBase, rNewIndex); #if defined(WITH_SELF_VERIFICATION) @@ -491,7 +502,7 @@ static ArmLIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, opCode = kThumbStrbRRR; break; default: - assert(0); + dvmAbort(); // FIXME - abort trace instead of VM } res = newLIR3(cUnit, opCode, rSrc, rBase, rNewIndex); #if defined(WITH_SELF_VERIFICATION) @@ -608,7 +619,7 @@ static ArmLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, opCode = kThumbLdrsbRRR; break; default: - assert(0); + dvmAbort(); // FIXME - abort trace instead of VM } if (shortForm) { load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp); @@ -618,20 +629,13 @@ static ArmLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, } else { if (pair) { int rTmp = dvmCompilerAllocFreeTemp(cUnit); - if (rTmp < 0) { - //UNIMP: need to spill if no temps. - assert(0); - } res = opRegRegImm(cUnit, kOpAdd, rTmp, rBase, displacement); - //TUNING: how to mark loadPair if Dalvik access? - loadPair(cUnit, rTmp, rDest, rDestHi); + load = newLIR3(cUnit, kThumbLdrRRI5, rDest, rTmp, 0); + load2 = newLIR3(cUnit, kThumbLdrRRI5, rDestHi, rTmp, 1); dvmCompilerFreeTemp(cUnit, rTmp); } else { - int rTmp = (rBase == rDest) ? dvmCompilerAllocFreeTemp(cUnit) : rDest; - if (rTmp < 0) { - //UNIMP: need to spill if no temps. - assert(0); - } + int rTmp = (rBase == rDest) ? dvmCompilerAllocFreeTemp(cUnit) + : rDest; res = loadConstant(cUnit, rTmp, displacement); load = newLIR3(cUnit, opCode, rDest, rBase, rTmp); if (rBase == rFP) @@ -641,6 +645,14 @@ static ArmLIR *loadBaseDispBody(CompilationUnit *cUnit, MIR *mir, int rBase, dvmCompilerFreeTemp(cUnit, rTmp); } } + if (rBase == rFP) { + if (load != NULL) + annotateDalvikRegAccess(load, displacement >> 2, + true /* isLoad */); + if (load2 != NULL) + annotateDalvikRegAccess(load2, (displacement >> 2) + 1, + true /* isLoad */); + } #if defined(WITH_SELF_VERIFICATION) if (load != NULL && cUnit->heapMemOp) load->branchInsertSV = true; @@ -724,7 +736,7 @@ static ArmLIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase, } break; default: - assert(0); + dvmAbort(); // FIXME - abort trace instead of VM } if (shortForm) { store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp); @@ -734,19 +746,23 @@ static ArmLIR *storeBaseDispBody(CompilationUnit *cUnit, int rBase, } else { int rScratch = dvmCompilerAllocTemp(cUnit); if (pair) { - //TUNING: how to mark storePair as Dalvik access if it is? res = opRegRegImm(cUnit, kOpAdd, rScratch, rBase, displacement); - storePair(cUnit, rScratch, rSrc, rSrcHi); + store = newLIR3(cUnit, kThumbStrRRI5, rSrc, rScratch, 0); + store2 = newLIR3(cUnit, kThumbStrRRI5, rSrcHi, rScratch, 1); } else { res = loadConstant(cUnit, rScratch, displacement); store = newLIR3(cUnit, opCode, rSrc, rBase, rScratch); - if (rBase == rFP) { - annotateDalvikRegAccess(store, displacement >> 2, - false /* isLoad */); - } } dvmCompilerFreeTemp(cUnit, rScratch); } + if (rBase == rFP) { + if (store != NULL) + annotateDalvikRegAccess(store, displacement >> 2, + false /* isLoad */); + if (store2 != NULL) + annotateDalvikRegAccess(store2, (displacement >> 2) + 1, + false /* isLoad */); + } #if defined(WITH_SELF_VERIFICATION) if (store != NULL && cUnit->heapMemOp) store->branchInsertSV = true; diff --git a/vm/compiler/codegen/arm/Thumb/Gen.c b/vm/compiler/codegen/arm/Thumb/Gen.c index 35c4451b7..ea5c1bc3f 100644 --- a/vm/compiler/codegen/arm/Thumb/Gen.c +++ b/vm/compiler/codegen/arm/Thumb/Gen.c @@ -153,66 +153,6 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, storeValue(cUnit, rlDest, rlResult); } -static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir) -{ - DecodedInstruction *dInsn = &mir->dalvikInsn; - int offset = offsetof(InterpState, retval); - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - int regObj = loadValue(cUnit, rlObj, kCoreReg).lowReg; - int reg1 = dvmCompilerAllocTemp(cUnit); - genNullCheck(cUnit, dvmCompilerSSASrc(mir, 0), regObj, mir->offset, NULL); - loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, reg1); - storeWordDisp(cUnit, rGLUE, offset, reg1); - return false; -} - -static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir) -{ - DecodedInstruction *dInsn = &mir->dalvikInsn; - int offset = offsetof(InterpState, retval); - int contents = offsetof(ArrayObject, contents); - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1); - int regObj = loadValue(cUnit, rlObj, kCoreReg).lowReg; - int regIdx = loadValue(cUnit, rlIdx, kCoreReg).lowReg; - int regMax = dvmCompilerAllocTemp(cUnit); - int regOff = dvmCompilerAllocTemp(cUnit); - ArmLIR * pcrLabel = genNullCheck(cUnit, dvmCompilerSSASrc(mir, 0), - regObj, mir->offset, NULL); - loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_count, regMax); - loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_offset, regOff); - loadWordDisp(cUnit, regObj, gDvm.offJavaLangString_value, regObj); - genBoundsCheck(cUnit, regIdx, regMax, mir->offset, pcrLabel); - - newLIR2(cUnit, kThumbAddRI8, regObj, contents); - newLIR3(cUnit, kThumbAddRRR, regIdx, regIdx, regOff); - newLIR3(cUnit, kThumbAddRRR, regIdx, regIdx, regIdx); - newLIR3(cUnit, kThumbLdrhRRR, regMax, regObj, regIdx); - dvmCompilerFreeTemp(cUnit, regOff); - storeWordDisp(cUnit, rGLUE, offset, regMax); -//FIXME: rewrite this to not clobber - dvmCompilerClobber(cUnit, regObj); - dvmCompilerClobber(cUnit, regIdx); - return false; -} - -static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir) -{ - int offset = offsetof(InterpState, retval); - RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); - int reg0 = loadValue(cUnit, rlSrc, kCoreReg).lowReg; - int sign = dvmCompilerAllocTemp(cUnit); - /* abs(x) = y<=x>>31, (x+y)^y. Shorter in ARM/THUMB2, no skip in THUMB */ - newLIR3(cUnit, kThumbAsrRRI5, sign, reg0, 31); - newLIR3(cUnit, kThumbAddRRR, reg0, reg0, sign); - newLIR2(cUnit, kThumbEorRR, reg0, sign); - dvmCompilerFreeTemp(cUnit, sign); - storeWordDisp(cUnit, rGLUE, offset, reg0); -//FIXME: rewrite this to not clobber - dvmCompilerClobber(cUnit, reg0); - return false; -} - static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) { int offset = offsetof(InterpState, retval); @@ -223,7 +163,7 @@ static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) newLIR2(cUnit, kThumbAndRR, reg0, signMask); dvmCompilerFreeTemp(cUnit, signMask); storeWordDisp(cUnit, rGLUE, offset, reg0); -//FIXME: rewrite this to not clobber + //TUNING: rewrite this to not clobber dvmCompilerClobber(cUnit, reg0); return true; } @@ -241,7 +181,7 @@ static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir) newLIR2(cUnit, kThumbAndRR, reghi, signMask); dvmCompilerFreeTemp(cUnit, signMask); storeWordDisp(cUnit, rGLUE, offset + 4, reghi); -//FIXME: rewrite this to not clobber + //TUNING: rewrite this to not clobber dvmCompilerClobber(cUnit, reghi); return true; } @@ -262,34 +202,11 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) target->defMask = ENCODE_ALL; newLIR3(cUnit, kThumbStrRRI5, reg0, rGLUE, offset >> 2); branch1->generic.target = (LIR *)target; -//FIXME: rewrite this to not clobber + //TUNING: rewrite this to not clobber dvmCompilerClobber(cUnit,reg0); return false; } -static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) -{ - int offset = offsetof(InterpState, retval); - RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); - RegLocation regSrc = loadValueWide(cUnit, rlSrc, kCoreReg); - int oplo = regSrc.lowReg; - int ophi = regSrc.highReg; - int sign = dvmCompilerAllocTemp(cUnit); - /* abs(x) = y<=x>>31, (x+y)^y. Shorter in ARM/THUMB2, no skip in THUMB */ - newLIR3(cUnit, kThumbAsrRRI5, sign, ophi, 31); - newLIR3(cUnit, kThumbAddRRR, oplo, oplo, sign); - newLIR2(cUnit, kThumbAdcRR, ophi, sign); - newLIR2(cUnit, kThumbEorRR, oplo, sign); - newLIR2(cUnit, kThumbEorRR, ophi, sign); - dvmCompilerFreeTemp(cUnit, sign); - storeWordDisp(cUnit, rGLUE, offset, oplo); - storeWordDisp(cUnit, rGLUE, offset + 4, ophi); -//FIXME: rewrite this to not clobber - dvmCompilerClobber(cUnit, oplo); - dvmCompilerClobber(cUnit, ophi); - return false; -} - static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, RegLocation rlSrc, RegLocation rlResult, int lit, int firstBit, int secondBit) diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c index 242e665ab..c4d2c2824 100644 --- a/vm/compiler/codegen/arm/Thumb2/Factory.c +++ b/vm/compiler/codegen/arm/Thumb2/Factory.c @@ -68,6 +68,10 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest, loadPcRel->operands[0] = rDest; loadPcRel->operands[1] = rpc; setupResourceMasks(loadPcRel); + // Self-cosim workaround. + if (rDest != rlr) + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); return loadPcRel; } @@ -168,6 +172,17 @@ static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value) loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = rDest; setupResourceMasks(loadPcRel); + /* + * Special case for literal loads with a link register target. + * Self-cosim mode will insert calls prior to heap references + * after optimization, and those will destroy r14. The easy + * workaround is to treat literal loads into r14 as heap references + * to prevent them from being hoisted. Use of r14 in this manner + * is currently rare. Revisit if that changes. + */ + if (rDest != rlr) + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c index 8b2b69653..1a505efe2 100644 --- a/vm/compiler/codegen/arm/Thumb2/Gen.c +++ b/vm/compiler/codegen/arm/Thumb2/Gen.c @@ -322,65 +322,6 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, branch3->generic.target = branch1->generic.target; } -static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - RegLocation rlDest = inlinedTarget(cUnit, mir, false); - rlObj = loadValue(cUnit, rlObj, kCoreReg); - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, - rlResult.lowReg); - storeValue(cUnit, rlDest, rlResult); - return false; -} - -static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir) -{ - int contents = offsetof(ArrayObject, contents); - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1); - RegLocation rlDest = inlinedTarget(cUnit, mir, false); - RegLocation rlResult; - rlObj = loadValue(cUnit, rlObj, kCoreReg); - rlIdx = loadValue(cUnit, rlIdx, kCoreReg); - int regMax = dvmCompilerAllocTemp(cUnit); - int regOff = dvmCompilerAllocTemp(cUnit); - int regPtr = dvmCompilerAllocTemp(cUnit); - ArmLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, - mir->offset, NULL); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr); - genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel); - dvmCompilerFreeTemp(cUnit, regMax); - opRegImm(cUnit, kOpAdd, regPtr, contents); - opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg); - rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf); - storeValue(cUnit, rlDest, rlResult); - return false; -} - -static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); - rlSrc = loadValue(cUnit, rlSrc, kCoreReg); - RegLocation rlDest = inlinedTarget(cUnit, mir, false);; - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - int signReg = dvmCompilerAllocTemp(cUnit); - /* - * abs(x) = y<=x>>31, (x+y)^y. - * Thumb2's IT block also yields 3 instructions, but imposes - * scheduling constraints. - */ - opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31); - opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); - storeValue(cUnit, rlDest, rlResult); - return false; -} - static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); @@ -421,28 +362,6 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) return false; } -static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); - RegLocation rlDest = inlinedTargetWide(cUnit, mir, false); - rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - int signReg = dvmCompilerAllocTemp(cUnit); - /* - * abs(x) = y<=x>>31, (x+y)^y. - * Thumb2 IT block allows slightly shorter sequence, - * but introduces a scheduling barrier. Stick with this - * mechanism for now. - */ - opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31); - opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); - opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.highReg, signReg); - storeValueWide(cUnit, rlDest, rlResult); - return false; -} - static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, RegLocation rlSrc, RegLocation rlResult, int lit, int firstBit, int secondBit) diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c index b5706f530..aa343aad5 100644 --- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c @@ -77,3 +77,17 @@ bool dvmCompilerArchVariantInit(void) assert(offsetof(InterpState, jitToInterpEntries) < 108); return true; } + +int dvmCompilerTargetOptHint(int key) +{ + int res; + switch (key) { + case kMaxHoistDistance: + res = 2; + break; + default: + LOGE("Unknown target optimization hint key: %d",key); + res = 0; + } + return res; +} diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c index 1311510ed..d33debee1 100644 --- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c @@ -77,3 +77,17 @@ bool dvmCompilerArchVariantInit(void) assert(offsetof(InterpState, jitToInterpEntries) < 108); return true; } + +int dvmCompilerTargetOptHint(int key) +{ + int res; + switch (key) { + case kMaxHoistDistance: + res = 2; + break; + default: + LOGE("Unknown target optimization hint key: %d",key); + res = 0; + } + return res; +} diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c index 1579d122d..fe2b1d485 100644 --- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c @@ -72,3 +72,17 @@ bool dvmCompilerArchVariantInit(void) assert(offsetof(InterpState, jitToInterpEntries) < 108); return true; } + +int dvmCompilerTargetOptHint(int key) +{ + int res; + switch (key) { + case kMaxHoistDistance: + res = 7; + break; + default: + LOGE("Unknown target optimization hint key: %d",key); + res = 0; + } + return res; +} |