diff options
author | Zheng Xu <zheng.xu@arm.com> | 2014-08-18 17:28:22 +0800 |
---|---|---|
committer | Zheng Xu <zheng.xu@arm.com> | 2014-08-18 17:42:12 +0800 |
commit | 5d7cdec7527f8043bf15e23a0041c40156727243 (patch) | |
tree | c6ca87c6a10a4b279fe7b2481b902a9a9b447ea6 /compiler | |
parent | aa1ed01d119ee6d93e6254789c79246e91fcec32 (diff) | |
download | art-5d7cdec7527f8043bf15e23a0041c40156727243.tar.gz art-5d7cdec7527f8043bf15e23a0041c40156727243.tar.bz2 art-5d7cdec7527f8043bf15e23a0041c40156727243.zip |
AArch64: Add tbz/tbnz and tst.
Since the branch offset supported by tbz/tbnz is quite small(-32k ~ +32k),
it will be replaced by tst and beq/bneq in the fix-up stage if the branch
offset is too large.
Change-Id: I4cace06bec6425e0f2e1f5f7c471eec08d06bca6
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/compiler_enums.h | 1 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 35 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/assemble_arm64.cc | 108 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 12 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 3 |
5 files changed, 123 insertions, 36 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index dcc67c3986..63f3e640df 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -528,6 +528,7 @@ enum FixupKind { kFixupLoad, // Mostly for immediates. kFixupVLoad, // FP load which *may* be pc-relative. kFixupCBxZ, // Cbz, Cbnz. + kFixupTBxZ, // Tbz, Tbnz. kFixupPushPop, // Not really pc relative, but changes size based on args. kFixupCondBranch, // Conditional branch kFixupT1Branch, // Thumb1 Unconditional branch diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index a449cbd4f7..d001dd652a 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -116,6 +116,7 @@ enum Arm64ResourceEncodingPos { #define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value) #define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value) #define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value) +#define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value) #define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value) #define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value) @@ -355,7 +356,10 @@ enum ArmOpcode { kA64Sub4rrro, // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Sub4RRre, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0]. + kA64Tst2rl, // tst alias of "ands rzr, rn, #imm". kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3". + kA64Tbnz3rht, // tbnz imm_6_b5[31] [0110111] imm_6_b40[23-19] imm_14[18-5] rt[4-0]. + kA64Tbz3rht, // tbz imm_6_b5[31] [0110110] imm_6_b40[23-19] imm_14[18-5] rt[4-0]. kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. kA64Last, kA64NotWide = 0, // Flag used to select the first instruction variant. @@ -400,23 +404,24 @@ enum ArmOpDmbOptions { enum ArmEncodingKind { // All the formats below are encoded in the same way (as a kFmtBitBlt). // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ..."). - kFmtRegW = 0, // Word register (w) or wzr. - kFmtRegX, // Extended word register (x) or xzr. - kFmtRegR, // Register with same width as the instruction or zr. - kFmtRegWOrSp, // Word register (w) or wsp. - kFmtRegXOrSp, // Extended word register (x) or sp. - kFmtRegROrSp, // Register with same width as the instruction or sp. - kFmtRegS, // Single FP reg. - kFmtRegD, // Double FP reg. - kFmtRegF, // Single/double FP reg depending on the instruction width. - kFmtBitBlt, // Bit string using end/start. + kFmtRegW = 0, // Word register (w) or wzr. + kFmtRegX, // Extended word register (x) or xzr. + kFmtRegR, // Register with same width as the instruction or zr. + kFmtRegWOrSp, // Word register (w) or wsp. + kFmtRegXOrSp, // Extended word register (x) or sp. + kFmtRegROrSp, // Register with same width as the instruction or sp. + kFmtRegS, // Single FP reg. + kFmtRegD, // Double FP reg. + kFmtRegF, // Single/double FP reg depending on the instruction width. + kFmtBitBlt, // Bit string using end/start. // Less likely formats. - kFmtUnused, // Unused field and marks end of formats. - kFmtImm21, // Sign-extended immediate using [23..5,30..29]. - kFmtShift, // Register shift, 9-bit at [23..21, 15..10].. - kFmtExtend, // Register extend, 9-bit at [23..21, 15..10]. - kFmtSkip, // Unused field, but continue to next. + kFmtUnused, // Unused field and marks end of formats. + kFmtImm6Shift, // Shift immediate, 6-bit at [31, 23..19]. + kFmtImm21, // Sign-extended immediate using [23..5,30..29]. + kFmtShift, // Register shift, 9-bit at [23..21, 15..10].. + kFmtExtend, // Register extend, 9-bit at [23..21, 15..10]. + kFmtSkip, // Unused field, but continue to next. }; // Struct used to define the snippet positions for each A64 opcode. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 15c89f2f18..5115246fc8 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -89,6 +89,7 @@ namespace art { * M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...) * B -> dmb option string (sy, st, ish, ishst, nsh, hshst) * H -> operand shift + * h -> 6-bit shift immediate * T -> register shift (either ", lsl #0" or ", lsl #12") * e -> register extend (e.g. uxtb #1) * o -> register shift (e.g. lsl #1) for Word registers @@ -614,10 +615,24 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, "subs", "!0r, !1R, #!2d", kFixupNone), - ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000), + ENCODING_MAP(WIDE(kA64Tst2rl), SF_VARIANTS(0x7200001f), + kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, + "tst", "!0r, !1l", kFixupNone), + ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a00001f), kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1, - kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, "tst", "!0r, !1r!2o", kFixupNone), + // NOTE: Tbz/Tbnz does not require SETS_CCODES, but it may be replaced by some other LIRs + // which require SETS_CCODES in the fix-up stage. + ENCODING_MAP(WIDE(kA64Tbnz3rht), CUSTOM_VARIANTS(0x37000000, 0x37000000), + kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES, + "tbnz", "!0r, #!1h, !2t", kFixupTBxZ), + ENCODING_MAP(WIDE(kA64Tbz3rht), CUSTOM_VARIANTS(0x36000000, 0x36000000), + kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES, + "tbz", "!0r, #!1h, !2t", kFixupTBxZ), ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000), kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16, kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1, @@ -787,6 +802,11 @@ uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { value |= ((operand & 0x1ffffc) >> 2) << 5; bits |= value; break; + case kFmtImm6Shift: + value = (operand & 0x1f) << 19; + value |= ((operand & 0x20) >> 5) << 31; + bits |= value; + break; default: LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name << " (" << kind << ")"; @@ -827,11 +847,6 @@ void Arm64Mir2Lir::AssembleLIR() { */ int generation = 0; while (true) { - // TODO(Arm64): check whether passes and offset adjustments are really necessary. - // Currently they aren't, as - in the fixups below - LIR are never inserted. - // Things can be different if jump ranges above 1 MB need to be supported. - // If they are not, then we can get rid of the assembler retry logic. - offset_adjustment = 0; AssemblerStatus res = kSuccess; // Assume success generation ^= 1; @@ -839,13 +854,9 @@ void Arm64Mir2Lir::AssembleLIR() { lir = first_fixup_; prev_lir = NULL; while (lir != NULL) { - /* - * NOTE: the lir being considered here will be encoded following the switch (so long as - * we're not in a retry situation). However, any new non-pc_rel instructions inserted - * due to retry must be explicitly encoded at the time of insertion. Note that - * inserted instructions don't need use/def flags, but do need size and pc-rel status - * properly updated. - */ + // NOTE: Any new non-pc_rel instructions inserted due to retry must be explicitly encoded at + // the time of insertion. Note that inserted instructions don't need use/def flags, but do + // need size and pc-rel status properly updated. lir->offset += offset_adjustment; // During pass, allows us to tell whether a node has been updated with offset_adjustment yet. lir->flags.generation = generation; @@ -861,7 +872,8 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { + DCHECK_EQ(delta & 0x3, 0); + if (!IS_SIGNED_IMM19(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupT1Branch"; } lir->operands[0] = delta >> 2; @@ -876,12 +888,75 @@ void Arm64Mir2Lir::AssembleLIR() { CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { + DCHECK_EQ(delta & 0x3, 0); + if (!IS_SIGNED_IMM19(delta >> 2)) { LOG(FATAL) << "Invalid jump range in kFixupLoad"; } lir->operands[1] = delta >> 2; break; } + case kFixupTBxZ: { + int16_t opcode = lir->opcode; + RegStorage reg(lir->operands[0] | RegStorage::kValid); + int32_t imm = lir->operands[1]; + DCHECK_EQ(IS_WIDE(opcode), reg.Is64Bit()); + DCHECK_LT(imm, 64); + if (imm >= 32) { + DCHECK(IS_WIDE(opcode)); + } else if (kIsDebugBuild && IS_WIDE(opcode)) { + // "tbz/tbnz x0, #imm(<32)" is the same with "tbz/tbnz w0, #imm(<32)", but GCC/oatdump + // will disassemble it as "tbz/tbnz w0, #imm(<32)". So unwide the LIR to make the + // compiler log behave the same with those disassembler in debug build. + // This will also affect tst instruction if it need to be replaced, but there is no + // performance difference between "tst Xt" and "tst Wt". + lir->opcode = UNWIDE(opcode); + lir->operands[0] = As32BitReg(reg).GetReg(); + } + + // Fix-up branch offset. + LIR *target_lir = lir->target; + DCHECK(target_lir); + CodeOffset pc = lir->offset; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + DCHECK_EQ(delta & 0x3, 0); + // Check if branch offset can be encoded in tbz/tbnz. + if (!IS_SIGNED_IMM14(delta >> 2)) { + DexOffset dalvik_offset = lir->dalvik_offset; + int16_t opcode = lir->opcode; + LIR* target = lir->target; + // "tbz/tbnz Rt, #imm, label" -> "tst Rt, #(1<<imm)". + offset_adjustment -= lir->flags.size; + int32_t imm = EncodeLogicalImmediate(IS_WIDE(opcode), 1 << lir->operands[1]); + DCHECK_NE(imm, -1); + lir->opcode = IS_WIDE(opcode) ? WIDE(kA64Tst2rl) : kA64Tst2rl; + lir->operands[1] = imm; + lir->target = nullptr; + lir->flags.fixup = EncodingMap[kA64Tst2rl].fixup; + lir->flags.size = EncodingMap[kA64Tst2rl].size; + offset_adjustment += lir->flags.size; + // Insert "beq/bneq label". + opcode = UNWIDE(opcode); + DCHECK(opcode == kA64Tbz3rht || opcode == kA64Tbnz3rht); + LIR* new_lir = RawLIR(dalvik_offset, kA64B2ct, + opcode == kA64Tbz3rht ? kArmCondEq : kArmCondNe, 0, 0, 0, 0, target); + InsertLIRAfter(lir, new_lir); + new_lir->offset = lir->offset + lir->flags.size; + new_lir->flags.generation = generation; + new_lir->flags.fixup = EncodingMap[kA64B2ct].fixup; + new_lir->flags.size = EncodingMap[kA64B2ct].size; + offset_adjustment += new_lir->flags.size; + // lir no longer pcrel, unlink and link in new_lir. + ReplaceFixup(prev_lir, lir, new_lir); + prev_lir = new_lir; // Continue with the new instruction. + lir = new_lir->u.a.pcrel_next; + res = kRetryAll; + continue; + } + lir->operands[2] = delta >> 2; + break; + } case kFixupAdr: { LIR* target_lir = lir->target; int32_t delta; @@ -910,6 +985,7 @@ void Arm64Mir2Lir::AssembleLIR() { } if (res == kSuccess) { + DCHECK_EQ(offset_adjustment, 0); break; } else { assembler_retries++; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index d00c57dee9..d1b9c81d09 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -271,8 +271,12 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_ ArmOpcode opcode = kA64Cbz2rt; ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); branch = NewLIR2(opcode | wide, reg.GetReg(), 0); + } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) { + ArmOpcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht; + ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); + int value = reg.Is64Bit() ? 63 : 31; + branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0); } - // TODO: Use tbz/tbnz for < 0 or >= 0. } if (branch == nullptr) { @@ -856,16 +860,14 @@ bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { OpRegRegImm(kOpLsl, rs_length, rs_length, 1); // Copy one element. - OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2); - LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); + LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0); OpRegImm(kOpSub, rs_length, 2); LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf); StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf); // Copy two elements. LIR *copy_two = NewLIR0(kPseudoTargetLabel); - OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4); - LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); + LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0); OpRegImm(kOpSub, rs_length, 4); LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32); StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32); diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 9b4546a94b..685f8d5492 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -504,6 +504,9 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch else strcpy(tbuf, ", DecodeError3"); break; + case 'h': + snprintf(tbuf, arraysize(tbuf), "%d", operand); + break; default: strcpy(tbuf, "DecodeError1"); break; |