//===-- Disassembler.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef liblldb_Disassembler_h_ #define liblldb_Disassembler_h_ #include "lldb/Core/Address.h" #include "lldb/Core/EmulateInstruction.h" #include "lldb/Core/FormatEntity.h" #include "lldb/Core/Opcode.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Interpreter/OptionValue.h" #include "lldb/Symbol/LineEntry.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/FileSpec.h" #include "lldb/lldb-defines.h" #include "lldb/lldb-forward.h" #include "lldb/lldb-private-enumerations.h" #include "lldb/lldb-types.h" #include "llvm/ADT/StringRef.h" #include #include #include #include #include #include #include #include #include namespace llvm { template class SmallVectorImpl; } namespace lldb_private { class AddressRange; class DataExtractor; class Debugger; class Disassembler; class Module; class Stream; class SymbolContext; class SymbolContextList; class Target; struct RegisterInfo; class Instruction { public: Instruction(const Address &address, AddressClass addr_class = AddressClass::eInvalid); virtual ~Instruction(); const Address &GetAddress() const { return m_address; } const char *GetMnemonic(const ExecutionContext *exe_ctx) { CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); return m_opcode_name.c_str(); } const char *GetOperands(const ExecutionContext *exe_ctx) { CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); return m_mnemonics.c_str(); } const char *GetComment(const ExecutionContext *exe_ctx) { CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); return m_comment.c_str(); } virtual void CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; AddressClass GetAddressClass(); void SetAddress(const Address &addr) { // Invalidate the address class to lazily discover it if we need to. m_address_class = AddressClass::eInvalid; m_address = addr; } /// Dump the text representation of this Instruction to a Stream /// /// Print the (optional) address, (optional) bytes, opcode, /// operands, and instruction comments to a stream. /// /// \param[in] s /// The Stream to add the text to. /// /// \param[in] show_address /// Whether the address (using disassembly_addr_format_spec formatting) /// should be printed. /// /// \param[in] show_bytes /// Whether the bytes of the assembly instruction should be printed. /// /// \param[in] max_opcode_byte_size /// The size (in bytes) of the largest instruction in the list that /// we are printing (for text justification/alignment purposes) /// Only needed if show_bytes is true. /// /// \param[in] exe_ctx /// The current execution context, if available. May be used in /// the assembling of the operands+comments for this instruction. /// Pass NULL if not applicable. /// /// \param[in] sym_ctx /// The SymbolContext for this instruction. /// Pass NULL if not available/computed. /// Only needed if show_address is true. /// /// \param[in] prev_sym_ctx /// The SymbolContext for the previous instruction. Depending on /// the disassembly address format specification, a change in /// Symbol / Function may mean that a line is printed with the new /// symbol/function name. /// Pass NULL if unavailable, or if this is the first instruction of /// the InstructionList. /// Only needed if show_address is true. /// /// \param[in] disassembly_addr_format /// The format specification for how addresses are printed. /// Only needed if show_address is true. /// /// \param[in] max_address_text_size /// The length of the longest address string at the start of the /// disassembly line that will be printed (the /// Debugger::FormatDisassemblerAddress() string) /// so this method can properly align the instruction opcodes. /// May be 0 to indicate no indentation/alignment of the opcodes. virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, bool show_bytes, const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, const FormatEntity::Entry *disassembly_addr_format, size_t max_address_text_size); virtual bool DoesBranch() = 0; virtual bool HasDelaySlot(); bool CanSetBreakpoint (); virtual size_t Decode(const Disassembler &disassembler, const DataExtractor &data, lldb::offset_t data_offset) = 0; virtual void SetDescription(llvm::StringRef) { } // May be overridden in sub-classes that have descriptions. lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream, OptionValue::Type data_type); lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream); bool DumpEmulation(const ArchSpec &arch); virtual bool TestEmulation(Stream *stream, const char *test_file_name); bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton, EmulateInstruction::ReadMemoryCallback read_mem_callback, EmulateInstruction::WriteMemoryCallback write_mem_calback, EmulateInstruction::ReadRegisterCallback read_reg_callback, EmulateInstruction::WriteRegisterCallback write_reg_callback); const Opcode &GetOpcode() const { return m_opcode; } uint32_t GetData(DataExtractor &data); struct Operand { enum class Type { Invalid = 0, Register, Immediate, Dereference, Sum, Product } m_type = Type::Invalid; std::vector m_children; lldb::addr_t m_immediate = 0; ConstString m_register; bool m_negative = false; bool m_clobbered = false; bool IsValid() { return m_type != Type::Invalid; } static Operand BuildRegister(ConstString &r); static Operand BuildImmediate(lldb::addr_t imm, bool neg); static Operand BuildImmediate(int64_t imm); static Operand BuildDereference(const Operand &ref); static Operand BuildSum(const Operand &lhs, const Operand &rhs); static Operand BuildProduct(const Operand &lhs, const Operand &rhs); }; virtual bool ParseOperands(llvm::SmallVectorImpl &operands) { return false; } virtual bool IsCall() { return false; } protected: Address m_address; // The section offset address of this instruction // We include an address class in the Instruction class to // allow the instruction specify the // AddressClass::eCodeAlternateISA (currently used for // thumb), and also to specify data (AddressClass::eData). // The usual value will be AddressClass::eCode, but often // when disassembling memory, you might run into data. // This can help us to disassemble appropriately. private: AddressClass m_address_class; // Use GetAddressClass () accessor function! protected: Opcode m_opcode; // The opcode for this instruction std::string m_opcode_name; std::string m_mnemonics; std::string m_comment; bool m_calculated_strings; void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) { if (!m_calculated_strings) { m_calculated_strings = true; CalculateMnemonicOperandsAndComment(exe_ctx); } } }; namespace OperandMatchers { std::function MatchBinaryOp(std::function base, std::function left, std::function right); std::function MatchUnaryOp(std::function base, std::function child); std::function MatchRegOp(const RegisterInfo &info); std::function FetchRegOp(ConstString ®); std::function MatchImmOp(int64_t imm); std::function FetchImmOp(int64_t &imm); std::function MatchOpType(Instruction::Operand::Type type); } class InstructionList { public: InstructionList(); ~InstructionList(); size_t GetSize() const; uint32_t GetMaxOpcocdeByteSize() const; lldb::InstructionSP GetInstructionAtIndex(size_t idx) const; //------------------------------------------------------------------ /// Get the index of the next branch instruction. /// /// Given a list of instructions, find the next branch instruction /// in the list by returning an index. /// /// @param[in] start /// The instruction index of the first instruction to check. /// /// @param[in] target /// A LLDB target object that is used to resolve addresses. /// /// @param[in] ignore_calls /// It true, then fine the first branch instruction that isn't /// a function call (a branch that calls and returns to the next /// instruction). If false, find the instruction index of any /// branch in the list. /// /// @param[out] found_calls /// If non-null, this will be set to true if any calls were found in /// extending the range. /// /// @return /// The instruction index of the first branch that is at or past /// \a start. Returns UINT32_MAX if no matching branches are /// found. //------------------------------------------------------------------ uint32_t GetIndexOfNextBranchInstruction(uint32_t start, Target &target, bool ignore_calls, bool *found_calls) const; uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, Target &target); uint32_t GetIndexOfInstructionAtAddress(const Address &addr); void Clear(); void Append(lldb::InstructionSP &inst_sp); void Dump(Stream *s, bool show_address, bool show_bytes, const ExecutionContext *exe_ctx); private: typedef std::vector collection; typedef collection::iterator iterator; typedef collection::const_iterator const_iterator; collection m_instructions; }; class PseudoInstruction : public Instruction { public: PseudoInstruction(); ~PseudoInstruction() override; bool DoesBranch() override; bool HasDelaySlot() override; void CalculateMnemonicOperandsAndComment( const ExecutionContext *exe_ctx) override { // TODO: fill this in and put opcode name into Instruction::m_opcode_name, // mnemonic into Instruction::m_mnemonics, and any comment into // Instruction::m_comment } size_t Decode(const Disassembler &disassembler, const DataExtractor &data, lldb::offset_t data_offset) override; void SetOpcode(size_t opcode_size, void *opcode_data); void SetDescription(llvm::StringRef description) override; protected: std::string m_description; DISALLOW_COPY_AND_ASSIGN(PseudoInstruction); }; class Disassembler : public std::enable_shared_from_this, public PluginInterface { public: enum { eOptionNone = 0u, eOptionShowBytes = (1u << 0), eOptionRawOuput = (1u << 1), eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains // the current PC (mixed mode only) eOptionMarkPCAddress = (1u << 3) // Mark the disassembly line the contains the PC }; enum HexImmediateStyle { eHexStyleC, eHexStyleAsm, }; // FindPlugin should be lax about the flavor string (it is too annoying to // have various internal uses of the disassembler fail because the global // flavor string gets set wrong. Instead, if you get a flavor string you // don't understand, use the default. Folks who care to check can use the // FlavorValidForArchSpec method on the disassembler they got back. static lldb::DisassemblerSP FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name); // This version will use the value in the Target settings if flavor is NULL; static lldb::DisassemblerSP FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch, const char *flavor, const char *plugin_name); static lldb::DisassemblerSP DisassembleRange(const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, const AddressRange &disasm_range, bool prefer_file_cache); static lldb::DisassemblerSP DisassembleBytes(const ArchSpec &arch, const char *plugin_name, const char *flavor, const Address &start, const void *bytes, size_t length, uint32_t max_num_instructions, bool data_from_file); static bool Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, const AddressRange &range, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); static bool Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, const Address &start, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); static size_t Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, SymbolContextList &sc_list, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); static bool Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, ConstString name, Module *module, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); static bool Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name, const char *flavor, const ExecutionContext &exe_ctx, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); // Constructors and Destructors Disassembler(const ArchSpec &arch, const char *flavor); ~Disassembler() override; typedef const char *(*SummaryCallback)(const Instruction &inst, ExecutionContext *exe_context, void *user_data); static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger, const ArchSpec &arch, const ExecutionContext &exe_ctx, uint32_t num_instructions, bool mixed_source_and_assembly, uint32_t num_mixed_context_lines, uint32_t options, Stream &strm); size_t ParseInstructions(const ExecutionContext *exe_ctx, const AddressRange &range, Stream *error_strm_ptr, bool prefer_file_cache); size_t ParseInstructions(const ExecutionContext *exe_ctx, const Address &range, uint32_t num_instructions, bool prefer_file_cache); virtual size_t DecodeInstructions(const Address &base_addr, const DataExtractor &data, lldb::offset_t data_offset, size_t num_instructions, bool append, bool data_from_file) = 0; InstructionList &GetInstructionList(); const InstructionList &GetInstructionList() const; const ArchSpec &GetArchitecture() const { return m_arch; } const char *GetFlavor() const { return m_flavor.c_str(); } virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, const char *flavor) = 0; protected: // SourceLine and SourceLinesToDisplay structures are only used in the mixed // source and assembly display methods internal to this class. struct SourceLine { FileSpec file; uint32_t line; uint32_t column; SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {} bool operator==(const SourceLine &rhs) const { return file == rhs.file && line == rhs.line && rhs.column == column; } bool operator!=(const SourceLine &rhs) const { return file != rhs.file || line != rhs.line || column != rhs.column; } bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; } }; struct SourceLinesToDisplay { std::vector lines; // index of the "current" source line, if we want to highlight that when // displaying the source lines. (as opposed to the surrounding source // lines provided to give context) size_t current_source_line; // Whether to print a blank line at the end of the source lines. bool print_source_context_end_eol; SourceLinesToDisplay() : lines(), current_source_line(-1), print_source_context_end_eol(true) { } }; // Get the function's declaration line number, hopefully a line number // earlier than the opening curly brace at the start of the function body. static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc); // Add the provided SourceLine to the map of filenames-to-source-lines-seen. static void AddLineToSourceLineTables( SourceLine &line, std::map> &source_lines_seen); // Given a source line, determine if we should print it when we're doing // mixed source & assembly output. We're currently using the // target.process.thread.step-avoid-regexp setting (which is used for // stepping over inlined STL functions by default) to determine what source // lines to avoid showing. // // Returns true if this source line should be elided (if the source line // should not be displayed). static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, const SymbolContext &sc, SourceLine &line); static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, const SymbolContext &sc, LineEntry &line) { SourceLine sl; sl.file = line.file; sl.line = line.line; sl.column = line.column; return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl); }; // Classes that inherit from Disassembler can see and modify these ArchSpec m_arch; InstructionList m_instruction_list; lldb::addr_t m_base_addr; std::string m_flavor; private: // For Disassembler only DISALLOW_COPY_AND_ASSIGN(Disassembler); }; } // namespace lldb_private #endif // liblldb_Disassembler_h_