JIT: Combine add with shift.

Optimize logical shift for ARM JIT. Whenever logical shift is followed by an add, try to replace it with an add capable of performing the shift in the same instruction. This improves performance for usecases involving code executing in Dalvik. Change-Id: I3cb807b6d6ef4b053a19e2703676a93a930eb963 Signed-off-by: Patrik Ryd <patrik.ryd@stericsson.com>
author: Anders O Nilsson <anders.o.nilsson@stericsson.com> 2012-10-03 09:24:22 +0200
committer: Steve Kondik <shade@chemlab.org> 2013-07-23 11:54:34 -0700
commit: 36141f9691b4592b77d1b09ad60cafb611394f36 (patch)
tree: 72250708b5f503717e1903c202c54b9a255515db
parent: f3485c43787ba8ca298546efc39111123f4ebac3 (diff)
download: android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.tar.gz
android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.tar.bz2
android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.zip
2 files changed, 134 insertions, 0 deletions
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index 43d98ed38..36f33e226 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -30,6 +30,7 @@ enum optControlVector {
     kSuppressLoads,
     kMethodInlining,
     kMethodJit,
+    kShiftArithmetic,
 };
 
 /* Forward declarations */
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp
index 8013d0059..cb35d745d 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.cpp
+++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp
@@ -453,6 +453,136 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
     }
 }
 
+/*
+ * Find all lsl/lsr and add that can be replaced with a
+ * combined lsl/lsr + add
+ */
+static void applyShiftArithmeticOpts(CompilationUnit *cUnit,
+                                     ArmLIR *headLIR,
+                                     ArmLIR *tailLIR) {
+    ArmLIR *thisLIR = NULL;
+
+    for (thisLIR = headLIR;
+         thisLIR != tailLIR;
+         thisLIR = NEXT_LIR(thisLIR)) {
+
+        if(thisLIR->flags.isNop) {
+            continue;
+        }
+
+        if(thisLIR->opcode == kThumb2LslRRI5 || thisLIR->opcode == kThumb2LsrRRI5 ||
+           thisLIR->opcode == kThumbLslRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
+
+            /* Find next that is not nop and not pseudo code */
+            ArmLIR *nextLIR = NULL;
+            for(nextLIR = NEXT_LIR(thisLIR);
+                nextLIR != tailLIR;
+                nextLIR = NEXT_LIR(nextLIR)) {
+                if (!nextLIR->flags.isNop && !isPseudoOpcode(nextLIR->opcode)) {
+                    break;
+                }
+            }
+
+            if(nextLIR == tailLIR) {
+                return;
+            }
+
+            if(nextLIR->opcode == kThumb2AddRRR &&
+               nextLIR->operands[3] == 0 &&
+               (nextLIR->operands[1] == thisLIR->operands[0] ||
+                nextLIR->operands[2] == thisLIR->operands[0])) {
+
+                bool applyOpt = true;
+                if(!(thisLIR->operands[0] == nextLIR->operands[0])) {
+                    /* Check that shift dest reg is not used after
+                     * the addition. */
+                    ArmLIR* tmpLIR = NULL;
+                    for(tmpLIR = NEXT_LIR(nextLIR);
+                        tmpLIR != tailLIR;
+                        tmpLIR = NEXT_LIR(tmpLIR)) {
+
+                        if (!tmpLIR->flags.isNop &&
+                            !(EncodingMap[tmpLIR->opcode].flags & IS_BRANCH) &&
+                            (tmpLIR->defMask | tmpLIR->useMask) & thisLIR->defMask) {
+                            if(tmpLIR->useMask & thisLIR->defMask) {
+                                /* Shift dest reg is used for src, skip opt. */
+                                applyOpt = false;
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                if(applyOpt) {
+
+                    /*
+                     *  Found lsl/lsr & add, use barrel shifter for add instead
+                     *
+                     *   (1) Normal case
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r2, r9
+                     *
+                     *   (2) Changing place of args for add
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r9, r2
+                     *
+                     *   (3) Using r1 and r1 shifted as args for add
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r1, r9
+                     *
+                     *   (4) Using r1 and r1 shifted as args for add, variant 2
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r9, r1
+                     *
+                     *   Result:
+                     *   [add]     rDest, rSrc1, rSrc2, [lsl/lsr] x
+                     */
+
+                    int type = kArmLsl;
+                    if(thisLIR->opcode == kThumb2LsrRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
+                        type = kArmLsr;
+                    }
+
+                    /* For most cases keep original rSrc1 */
+                    int rSrc1 = nextLIR->operands[1];
+
+                    if(thisLIR->operands[0] == nextLIR->operands[1]) {
+                        /* Case 2 & 4: move original rSrc2 to rScr1 since
+                           reg to be shifted need to be in rSrc2 */
+                        rSrc1 = nextLIR->operands[2];
+                    }
+
+                    /* Reg to be shifted need to be in rSrc2 */
+                    int rSrc2 = thisLIR->operands[1];
+
+                    /* Encode type of shift and amount */
+                    int shift = ((thisLIR->operands[2] & 0x1f) << 2) | type;
+
+                    /* Keep rDest, but change rSrc1, rSrc2 and use shift */
+                    ArmLIR* newLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
+                    newLIR->opcode = nextLIR->opcode;
+                    newLIR->operands[0] = nextLIR->operands[0];
+                    newLIR->operands[1] = rSrc1;
+                    newLIR->operands[2] = rSrc2;
+                    newLIR->operands[3] = shift;
+                    dvmCompilerSetupResourceMasks(newLIR);
+                    dvmCompilerInsertLIRBefore((LIR *) nextLIR, (LIR *) newLIR);
+
+                    thisLIR->flags.isNop = true;
+                    nextLIR->flags.isNop = true;
+                }
+
+                /*
+                 * Avoid looping through nops already identified.
+                 * Continue directly after the updated instruction
+                 * instead.
+                 */
+                thisLIR = nextLIR;
+            }
+        }
+    }
+}
+
 void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
                                         LIR *tailLIR)
 {
@@ -463,4 +593,7 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
     if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) {
         applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR);
     }
+    if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) {
+        applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR);
+    }
 }
author	Anders O Nilsson <anders.o.nilsson@stericsson.com>	2012-10-03 09:24:22 +0200
committer	Steve Kondik <shade@chemlab.org>	2013-07-23 11:54:34 -0700
commit	36141f9691b4592b77d1b09ad60cafb611394f36 (patch)
tree	72250708b5f503717e1903c202c54b9a255515db
parent	f3485c43787ba8ca298546efc39111123f4ebac3 (diff)
download	android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.tar.gz android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.tar.bz2 android_dalvik-36141f9691b4592b77d1b09ad60cafb611394f36.zip