summaryrefslogtreecommitdiffstats
path: root/compiler
diff options
context:
space:
mode:
authorZheng Xu <zheng.xu@arm.com>2014-08-18 17:28:22 +0800
committerZheng Xu <zheng.xu@arm.com>2014-08-18 17:42:12 +0800
commit5d7cdec7527f8043bf15e23a0041c40156727243 (patch)
treec6ca87c6a10a4b279fe7b2481b902a9a9b447ea6 /compiler
parentaa1ed01d119ee6d93e6254789c79246e91fcec32 (diff)
downloadart-5d7cdec7527f8043bf15e23a0041c40156727243.tar.gz
art-5d7cdec7527f8043bf15e23a0041c40156727243.tar.bz2
art-5d7cdec7527f8043bf15e23a0041c40156727243.zip
AArch64: Add tbz/tbnz and tst.
Since the branch offset supported by tbz/tbnz is quite small(-32k ~ +32k), it will be replaced by tst and beq/bneq in the fix-up stage if the branch offset is too large. Change-Id: I4cace06bec6425e0f2e1f5f7c471eec08d06bca6
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/compiler_enums.h1
-rw-r--r--compiler/dex/quick/arm64/arm64_lir.h35
-rw-r--r--compiler/dex/quick/arm64/assemble_arm64.cc108
-rw-r--r--compiler/dex/quick/arm64/int_arm64.cc12
-rw-r--r--compiler/dex/quick/arm64/target_arm64.cc3
5 files changed, 123 insertions, 36 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index dcc67c3986..63f3e640df 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -528,6 +528,7 @@ enum FixupKind {
kFixupLoad, // Mostly for immediates.
kFixupVLoad, // FP load which *may* be pc-relative.
kFixupCBxZ, // Cbz, Cbnz.
+ kFixupTBxZ, // Tbz, Tbnz.
kFixupPushPop, // Not really pc relative, but changes size based on args.
kFixupCondBranch, // Conditional branch
kFixupT1Branch, // Thumb1 Unconditional branch
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index a449cbd4f7..d001dd652a 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -116,6 +116,7 @@ enum Arm64ResourceEncodingPos {
#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value)
#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
@@ -355,7 +356,10 @@ enum ArmOpcode {
kA64Sub4rrro, // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Sub4RRre, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+ kA64Tst2rl, // tst alias of "ands rzr, rn, #imm".
kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3".
+ kA64Tbnz3rht, // tbnz imm_6_b5[31] [0110111] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
+ kA64Tbz3rht, // tbz imm_6_b5[31] [0110110] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
kA64Last,
kA64NotWide = 0, // Flag used to select the first instruction variant.
@@ -400,23 +404,24 @@ enum ArmOpDmbOptions {
enum ArmEncodingKind {
// All the formats below are encoded in the same way (as a kFmtBitBlt).
// These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
- kFmtRegW = 0, // Word register (w) or wzr.
- kFmtRegX, // Extended word register (x) or xzr.
- kFmtRegR, // Register with same width as the instruction or zr.
- kFmtRegWOrSp, // Word register (w) or wsp.
- kFmtRegXOrSp, // Extended word register (x) or sp.
- kFmtRegROrSp, // Register with same width as the instruction or sp.
- kFmtRegS, // Single FP reg.
- kFmtRegD, // Double FP reg.
- kFmtRegF, // Single/double FP reg depending on the instruction width.
- kFmtBitBlt, // Bit string using end/start.
+ kFmtRegW = 0, // Word register (w) or wzr.
+ kFmtRegX, // Extended word register (x) or xzr.
+ kFmtRegR, // Register with same width as the instruction or zr.
+ kFmtRegWOrSp, // Word register (w) or wsp.
+ kFmtRegXOrSp, // Extended word register (x) or sp.
+ kFmtRegROrSp, // Register with same width as the instruction or sp.
+ kFmtRegS, // Single FP reg.
+ kFmtRegD, // Double FP reg.
+ kFmtRegF, // Single/double FP reg depending on the instruction width.
+ kFmtBitBlt, // Bit string using end/start.
// Less likely formats.
- kFmtUnused, // Unused field and marks end of formats.
- kFmtImm21, // Sign-extended immediate using [23..5,30..29].
- kFmtShift, // Register shift, 9-bit at [23..21, 15..10]..
- kFmtExtend, // Register extend, 9-bit at [23..21, 15..10].
- kFmtSkip, // Unused field, but continue to next.
+ kFmtUnused, // Unused field and marks end of formats.
+ kFmtImm6Shift, // Shift immediate, 6-bit at [31, 23..19].
+ kFmtImm21, // Sign-extended immediate using [23..5,30..29].
+ kFmtShift, // Register shift, 9-bit at [23..21, 15..10]..
+ kFmtExtend, // Register extend, 9-bit at [23..21, 15..10].
+ kFmtSkip, // Unused field, but continue to next.
};
// Struct used to define the snippet positions for each A64 opcode.
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 15c89f2f18..5115246fc8 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -89,6 +89,7 @@ namespace art {
* M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
* B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
* H -> operand shift
+ * h -> 6-bit shift immediate
* T -> register shift (either ", lsl #0" or ", lsl #12")
* e -> register extend (e.g. uxtb #1)
* o -> register shift (e.g. lsl #1) for Word registers
@@ -614,10 +615,24 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
"subs", "!0r, !1R, #!2d", kFixupNone),
- ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+ ENCODING_MAP(WIDE(kA64Tst2rl), SF_VARIANTS(0x7200001f),
+ kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+ "tst", "!0r, !1l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a00001f),
kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
- kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
"tst", "!0r, !1r!2o", kFixupNone),
+ // NOTE: Tbz/Tbnz does not require SETS_CCODES, but it may be replaced by some other LIRs
+ // which require SETS_CCODES in the fix-up stage.
+ ENCODING_MAP(WIDE(kA64Tbnz3rht), CUSTOM_VARIANTS(0x37000000, 0x37000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbnz", "!0r, #!1h, !2t", kFixupTBxZ),
+ ENCODING_MAP(WIDE(kA64Tbz3rht), CUSTOM_VARIANTS(0x36000000, 0x36000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbz", "!0r, #!1h, !2t", kFixupTBxZ),
ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
@@ -787,6 +802,11 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
value |= ((operand & 0x1ffffc) >> 2) << 5;
bits |= value;
break;
+ case kFmtImm6Shift:
+ value = (operand & 0x1f) << 19;
+ value |= ((operand & 0x20) >> 5) << 31;
+ bits |= value;
+ break;
default:
LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
<< " (" << kind << ")";
@@ -827,11 +847,6 @@ void Arm64Mir2Lir::AssembleLIR() {
*/
int generation = 0;
while (true) {
- // TODO(Arm64): check whether passes and offset adjustments are really necessary.
- // Currently they aren't, as - in the fixups below - LIR are never inserted.
- // Things can be different if jump ranges above 1 MB need to be supported.
- // If they are not, then we can get rid of the assembler retry logic.
-
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
generation ^= 1;
@@ -839,13 +854,9 @@ void Arm64Mir2Lir::AssembleLIR() {
lir = first_fixup_;
prev_lir = NULL;
while (lir != NULL) {
- /*
- * NOTE: the lir being considered here will be encoded following the switch (so long as
- * we're not in a retry situation). However, any new non-pc_rel instructions inserted
- * due to retry must be explicitly encoded at the time of insertion. Note that
- * inserted instructions don't need use/def flags, but do need size and pc-rel status
- * properly updated.
- */
+ // NOTE: Any new non-pc_rel instructions inserted due to retry must be explicitly encoded at
+ // the time of insertion. Note that inserted instructions don't need use/def flags, but do
+ // need size and pc-rel status properly updated.
lir->offset += offset_adjustment;
// During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
lir->flags.generation = generation;
@@ -861,7 +872,8 @@ void Arm64Mir2Lir::AssembleLIR() {
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
}
lir->operands[0] = delta >> 2;
@@ -876,12 +888,75 @@ void Arm64Mir2Lir::AssembleLIR() {
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupLoad";
}
lir->operands[1] = delta >> 2;
break;
}
+ case kFixupTBxZ: {
+ int16_t opcode = lir->opcode;
+ RegStorage reg(lir->operands[0] | RegStorage::kValid);
+ int32_t imm = lir->operands[1];
+ DCHECK_EQ(IS_WIDE(opcode), reg.Is64Bit());
+ DCHECK_LT(imm, 64);
+ if (imm >= 32) {
+ DCHECK(IS_WIDE(opcode));
+ } else if (kIsDebugBuild && IS_WIDE(opcode)) {
+ // "tbz/tbnz x0, #imm(<32)" is the same with "tbz/tbnz w0, #imm(<32)", but GCC/oatdump
+ // will disassemble it as "tbz/tbnz w0, #imm(<32)". So unwide the LIR to make the
+ // compiler log behave the same with those disassembler in debug build.
+ // This will also affect tst instruction if it need to be replaced, but there is no
+ // performance difference between "tst Xt" and "tst Wt".
+ lir->opcode = UNWIDE(opcode);
+ lir->operands[0] = As32BitReg(reg).GetReg();
+ }
+
+ // Fix-up branch offset.
+ LIR *target_lir = lir->target;
+ DCHECK(target_lir);
+ CodeOffset pc = lir->offset;
+ CodeOffset target = target_lir->offset +
+ ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+ int32_t delta = target - pc;
+ DCHECK_EQ(delta & 0x3, 0);
+ // Check if branch offset can be encoded in tbz/tbnz.
+ if (!IS_SIGNED_IMM14(delta >> 2)) {
+ DexOffset dalvik_offset = lir->dalvik_offset;
+ int16_t opcode = lir->opcode;
+ LIR* target = lir->target;
+ // "tbz/tbnz Rt, #imm, label" -> "tst Rt, #(1<<imm)".
+ offset_adjustment -= lir->flags.size;
+ int32_t imm = EncodeLogicalImmediate(IS_WIDE(opcode), 1 << lir->operands[1]);
+ DCHECK_NE(imm, -1);
+ lir->opcode = IS_WIDE(opcode) ? WIDE(kA64Tst2rl) : kA64Tst2rl;
+ lir->operands[1] = imm;
+ lir->target = nullptr;
+ lir->flags.fixup = EncodingMap[kA64Tst2rl].fixup;
+ lir->flags.size = EncodingMap[kA64Tst2rl].size;
+ offset_adjustment += lir->flags.size;
+ // Insert "beq/bneq label".
+ opcode = UNWIDE(opcode);
+ DCHECK(opcode == kA64Tbz3rht || opcode == kA64Tbnz3rht);
+ LIR* new_lir = RawLIR(dalvik_offset, kA64B2ct,
+ opcode == kA64Tbz3rht ? kArmCondEq : kArmCondNe, 0, 0, 0, 0, target);
+ InsertLIRAfter(lir, new_lir);
+ new_lir->offset = lir->offset + lir->flags.size;
+ new_lir->flags.generation = generation;
+ new_lir->flags.fixup = EncodingMap[kA64B2ct].fixup;
+ new_lir->flags.size = EncodingMap[kA64B2ct].size;
+ offset_adjustment += new_lir->flags.size;
+ // lir no longer pcrel, unlink and link in new_lir.
+ ReplaceFixup(prev_lir, lir, new_lir);
+ prev_lir = new_lir; // Continue with the new instruction.
+ lir = new_lir->u.a.pcrel_next;
+ res = kRetryAll;
+ continue;
+ }
+ lir->operands[2] = delta >> 2;
+ break;
+ }
case kFixupAdr: {
LIR* target_lir = lir->target;
int32_t delta;
@@ -910,6 +985,7 @@ void Arm64Mir2Lir::AssembleLIR() {
}
if (res == kSuccess) {
+ DCHECK_EQ(offset_adjustment, 0);
break;
} else {
assembler_retries++;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index d00c57dee9..d1b9c81d09 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -271,8 +271,12 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_
ArmOpcode opcode = kA64Cbz2rt;
ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
+ } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
+ ArmOpcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
+ ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+ int value = reg.Is64Bit() ? 63 : 31;
+ branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
}
- // TODO: Use tbz/tbnz for < 0 or >= 0.
}
if (branch == nullptr) {
@@ -856,16 +860,14 @@ bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
// Copy one element.
- OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2);
- LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+ LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0);
OpRegImm(kOpSub, rs_length, 2);
LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
// Copy two elements.
LIR *copy_two = NewLIR0(kPseudoTargetLabel);
- OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4);
- LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+ LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0);
OpRegImm(kOpSub, rs_length, 4);
LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 9b4546a94b..685f8d5492 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -504,6 +504,9 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch
else
strcpy(tbuf, ", DecodeError3");
break;
+ case 'h':
+ snprintf(tbuf, arraysize(tbuf), "%d", operand);
+ break;
default:
strcpy(tbuf, "DecodeError1");
break;