diff options
Diffstat (limited to 'include/llvm/Object')
-rw-r--r-- | include/llvm/Object/MachOFormat.h | 367 | ||||
-rw-r--r-- | include/llvm/Object/MachOObject.h | 180 | ||||
-rw-r--r-- | include/llvm/Object/ObjectFile.h | 17 |
3 files changed, 562 insertions, 2 deletions
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h new file mode 100644 index 0000000000..31cd523ea2 --- /dev/null +++ b/include/llvm/Object/MachOFormat.h @@ -0,0 +1,367 @@ +//===- MachOFormat.h - Mach-O Format Structures And Constants ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares various structures and constants which are platform +// independent and can be shared by any client which wishes to interact with +// Mach object files. +// +// The definitions here are purposely chosen to match the LLVM style as opposed +// to following the platform specific definition of the format. +// +// On a Mach system, see the <mach-o/...> includes for more information, in +// particular <mach-o/loader.h>. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_MACHOFORMAT_H +#define LLVM_OBJECT_MACHOFORMAT_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +namespace object { + +/// General Mach platform information. +namespace mach { + /// @name CPU Type and Subtype Information + /// { + + /// \brief Capability bits used in CPU type encoding. + enum CPUTypeFlagsMask { + CTFM_ArchMask = 0xFF000000, + CTFM_ArchABI64 = 0x01000000 + }; + + /// \brief Machine type IDs used in CPU type encoding. + enum CPUTypeMachine { + CTM_i386 = 7, + CTM_x86_64 = CTM_i386 | CTFM_ArchABI64, + CTM_ARM = 12, + CTM_SPARC = 14, + CTM_PowerPC = 18, + CTM_PowerPC64 = CTM_PowerPC | CTFM_ArchABI64 + }; + + /// \brief Capability bits used in CPU subtype encoding. + enum CPUSubtypeFlagsMask { + CSFM_SubtypeMask = 0xFF000000, + CSFM_SubtypeLib64 = 0x80000000 + }; + + /// \brief ARM Machine Subtypes. + enum CPUSubtypeARM { + CSARM_ALL = 0, + CSARM_V4T = 5, + CSARM_V6 = 6, + CSARM_V5TEJ = 7, + CSARM_XSCALE = 8, + CSARM_V7 = 9 + }; + + /// \brief PowerPC Machine Subtypes. + enum CPUSubtypePowerPC { + CSPPC_ALL = 0 + }; + + /// \brief SPARC Machine Subtypes. + enum CPUSubtypeSPARC { + CSSPARC_ALL = 0 + }; + + /// \brief x86 Machine Subtypes. + enum CPUSubtypeX86 { + CSX86_ALL = 3 + }; + + /// @} + +} // end namespace mach + +/// Format information for Mach object files. +namespace macho { + /// \brief Constants for structure sizes. + enum StructureSizes { + Header32Size = 28, + Header64Size = 32, + SegmentLoadCommand32Size = 56, + SegmentLoadCommand64Size = 72, + Section32Size = 68, + Section64Size = 80, + SymtabLoadCommandSize = 24, + DysymtabLoadCommandSize = 80, + Nlist32Size = 12, + Nlist64Size = 16, + RelocationInfoSize = 8 + }; + + /// \brief Constants for header magic field. + enum HeaderMagic { + HM_Object32 = 0xFEEDFACE, ///< 32-bit mach object file + HM_Object64 = 0xFEEDFACF, ///< 64-bit mach object file + HM_Universal = 0xCAFEBABE ///< Universal object file + }; + + /// \brief Header common to all Mach object files. + struct Header { + uint32_t Magic; + uint32_t CPUType; + uint32_t CPUSubtype; + uint32_t FileType; + uint32_t NumLoadCommands; + uint32_t SizeOfLoadCommands; + uint32_t Flags; + }; + + /// \brief Extended header for 64-bit object files. + struct Header64Ext { + uint32_t Reserved; + }; + + // See <mach-o/loader.h>. + enum HeaderFileType { + HFT_Object = 0x1 + }; + + enum HeaderFlags { + HF_SubsectionsViaSymbols = 0x2000 + }; + + enum LoadCommandType { + LCT_Segment = 0x1, + LCT_Symtab = 0x2, + LCT_Dysymtab = 0xb, + LCT_Segment64 = 0x19, + LCT_UUID = 0x1b + }; + + /// \brief Load command structure. + struct LoadCommand { + uint32_t Type; + uint32_t Size; + }; + + /// @name Load Command Structures + /// @{ + + struct SegmentLoadCommand { + uint32_t Type; + uint32_t Size; + char Name[16]; + uint32_t VMAddress; + uint32_t VMSize; + uint32_t FileOffset; + uint32_t FileSize; + uint32_t MaxVMProtection; + uint32_t InitialVMProtection; + uint32_t NumSections; + uint32_t Flags; + }; + + struct Segment64LoadCommand { + uint32_t Type; + uint32_t Size; + char Name[16]; + uint64_t VMAddress; + uint64_t VMSize; + uint64_t FileOffset; + uint64_t FileSize; + uint32_t MaxVMProtection; + uint32_t InitialVMProtection; + uint32_t NumSections; + uint32_t Flags; + }; + + struct SymtabLoadCommand { + uint32_t Type; + uint32_t Size; + uint32_t SymbolTableOffset; + uint32_t NumSymbolTableEntries; + uint32_t StringTableOffset; + uint32_t StringTableSize; + }; + + struct DysymtabLoadCommand { + uint32_t Type; + uint32_t Size; + + uint32_t LocalSymbolsIndex; + uint32_t NumLocalSymbols; + + uint32_t ExternalSymbolsIndex; + uint32_t NumExternalSymbols; + + uint32_t UndefinedSymbolsIndex; + uint32_t NumUndefinedSymbols; + + uint32_t TOCOffset; + uint32_t NumTOCEntries; + + uint32_t ModuleTableOffset; + uint32_t NumModuleTableEntries; + + uint32_t ReferenceSymbolTableOffset; + uint32_t NumReferencedSymbolTableEntries; + + uint32_t IndirectSymbolTableOffset; + uint32_t NumIndirectSymbolTableEntries; + + uint32_t ExternalRelocationTableOffset; + uint32_t NumExternalRelocationTableEntries; + + uint32_t LocalRelocationTableOffset; + uint32_t NumLocalRelocationTableEntries; + }; + + /// @} + /// @name Section Data + /// @{ + + struct Section { + char Name[16]; + char SegmentName[16]; + uint32_t Address; + uint32_t Size; + uint32_t Offset; + uint32_t Align; + uint32_t RelocationTableOffset; + uint32_t NumRelocationTableEntries; + uint32_t Flags; + uint32_t Reserved1; + uint32_t Reserved2; + }; + struct Section64 { + char Name[16]; + char SegmentName[16]; + uint64_t Address; + uint64_t Size; + uint32_t Offset; + uint32_t Align; + uint32_t RelocationTableOffset; + uint32_t NumRelocationTableEntries; + uint32_t Flags; + uint32_t Reserved1; + uint32_t Reserved2; + uint32_t Reserved3; + }; + + /// @} + /// @name Symbol Table Entries + /// @{ + + struct SymbolTableEntry { + uint32_t StringIndex; + uint8_t Type; + uint8_t SectionIndex; + uint16_t Flags; + uint32_t Value; + }; + struct Symbol64TableEntry { + uint32_t StringIndex; + uint8_t Type; + uint8_t SectionIndex; + uint16_t Flags; + uint64_t Value; + }; + + /// @} + /// @name Indirect Symbol Table + /// @{ + + struct IndirectSymbolTableEntry { + uint32_t Index; + }; + + /// @} + /// @name Relocation Data + /// @{ + + struct RelocationEntry { + uint32_t Word0; + uint32_t Word1; + }; + + /// @} + + // See <mach-o/nlist.h>. + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // <mach-o/stab.h>. Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// IndirectSymbolFlags - Flags for encoding special values in the indirect + /// symbol entry. + enum IndirectSymbolFlags { + ISF_Local = 0x80000000, + ISF_Absolute = 0x40000000 + }; + + /// RelocationFlags - Special flags for addresses. + enum RelocationFlags { + RF_Scattered = 0x80000000 + }; + + /// Common relocation info types. + enum RelocationInfoType { + RIT_Vanilla = 0, + RIT_Pair = 1, + RIT_Difference = 2 + }; + + /// Generic relocation info types, which are shared by some (but not all) + /// platforms. + enum RelocationInfoType_Generic { + RIT_Generic_PreboundLazyPointer = 3, + RIT_Generic_LocalDifference = 4, + RIT_Generic_TLV = 5 + }; + + /// X86_64 uses its own relocation types. + enum RelocationInfoTypeX86_64 { + // Note that x86_64 doesn't even share the common relocation types. + RIT_X86_64_Unsigned = 0, + RIT_X86_64_Signed = 1, + RIT_X86_64_Branch = 2, + RIT_X86_64_GOTLoad = 3, + RIT_X86_64_GOT = 4, + RIT_X86_64_Subtractor = 5, + RIT_X86_64_Signed1 = 6, + RIT_X86_64_Signed2 = 7, + RIT_X86_64_Signed4 = 8, + RIT_X86_64_TLV = 9 + }; + + /// ARM uses its own relocation types. + enum RelocationInfoTypeARM { + RIT_ARM_LocalDifference = 3, + RIT_ARM_PreboundLazyPointer = 4, + RIT_ARM_Branch24Bit = 5, + RIT_ARM_ThumbBranch22Bit = 6, + RIT_ARM_ThumbBranch32Bit = 7, + RIT_ARM_Half = 8, + RIT_ARM_HalfDifference = 9 + + }; + +} // end namespace macho + +} // end namespace object +} // end namespace llvm + +#endif diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h new file mode 100644 index 0000000000..03d9c147b4 --- /dev/null +++ b/include/llvm/Object/MachOObject.h @@ -0,0 +1,180 @@ +//===- MachOObject.h - Mach-O Object File Wrapper ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_MACHOOBJECT_H +#define LLVM_OBJECT_MACHOOBJECT_H + +#include <string> +#include "llvm/ADT/InMemoryStruct.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/MachOFormat.h" + +namespace llvm { + +class MemoryBuffer; + +namespace object { + +/// \brief Wrapper object for manipulating Mach-O object files. +/// +/// This class is designed to implement a full-featured, efficient, portable, +/// and robust Mach-O interface to Mach-O object files. It does not attempt to +/// smooth over rough edges in the Mach-O format or generalize access to object +/// independent features. +/// +/// The class is designed around accessing the Mach-O object which is expected +/// to be fully loaded into memory. +/// +/// This class is *not* suitable for concurrent use. For efficient operation, +/// the class uses APIs which rely on the ability to cache the results of +/// certain calls in internal objects which are not safe for concurrent +/// access. This allows the API to be zero-copy on the common paths. +// +// FIXME: It would be cool if we supported a "paged" MemoryBuffer +// implementation. This would allow us to implement a more sensible version of +// MemoryObject which can work like a MemoryBuffer, but be more efficient for +// objects which are in the current address space. +class MachOObject { +public: + struct LoadCommandInfo { + /// The load command information. + macho::LoadCommand Command; + + /// The offset to the start of the load command in memory. + uint64_t Offset; + }; + +private: + OwningPtr<MemoryBuffer> Buffer; + + /// Whether the object is little endian. + bool IsLittleEndian; + /// Whether the object is 64-bit. + bool Is64Bit; + /// Whether the object is swapped endianness from the host. + bool IsSwappedEndian; + /// Whether the string table has been registered. + bool HasStringTable; + + /// The cached information on the load commands. + LoadCommandInfo *LoadCommands; + mutable unsigned NumLoadedCommands; + + /// The cached copy of the header. + macho::Header Header; + macho::Header64Ext Header64Ext; + + /// Cache string table information. + StringRef StringTable; + +private: + MachOObject(MemoryBuffer *Buffer, bool IsLittleEndian, bool Is64Bit); + +public: + ~MachOObject(); + + /// \brief Load a Mach-O object from a MemoryBuffer object. + /// + /// \param Buffer - The buffer to load the object from. This routine takes + /// exclusive ownership of the buffer (which is passed to the returned object + /// on success). + /// \param ErrorStr [out] - If given, will be set to a user readable error + /// message on failure. + /// \returns The loaded object, or null on error. + static MachOObject *LoadFromBuffer(MemoryBuffer *Buffer, + std::string *ErrorStr = 0); + + /// @name File Information + /// @{ + + bool isLittleEndian() const { return IsLittleEndian; } + bool isSwappedEndian() const { return IsSwappedEndian; } + bool is64Bit() const { return Is64Bit; } + + unsigned getHeaderSize() const { + return Is64Bit ? macho::Header64Size : macho::Header32Size; + } + + StringRef getData(size_t Offset, size_t Size) const; + + /// @} + /// @name String Table Data + /// @{ + + StringRef getStringTableData() const { + assert(HasStringTable && "String table has not been registered!"); + return StringTable; + } + + StringRef getStringAtIndex(unsigned Index) const { + size_t End = getStringTableData().find('\0', Index); + return getStringTableData().slice(Index, End); + } + + void RegisterStringTable(macho::SymtabLoadCommand &SLC); + + /// @} + /// @name Object Header Access + /// @{ + + const macho::Header &getHeader() const { return Header; } + const macho::Header64Ext &getHeader64Ext() const { + assert(is64Bit() && "Invalid access!"); + return Header64Ext; + } + + /// @} + /// @name Object Structure Access + /// @{ + + /// \brief Retrieve the information for the given load command. + const LoadCommandInfo &getLoadCommandInfo(unsigned Index) const; + + void ReadSegmentLoadCommand( + const LoadCommandInfo &LCI, + InMemoryStruct<macho::SegmentLoadCommand> &Res) const; + void ReadSegment64LoadCommand( + const LoadCommandInfo &LCI, + InMemoryStruct<macho::Segment64LoadCommand> &Res) const; + void ReadSymtabLoadCommand( + const LoadCommandInfo &LCI, + InMemoryStruct<macho::SymtabLoadCommand> &Res) const; + void ReadDysymtabLoadCommand( + const LoadCommandInfo &LCI, + InMemoryStruct<macho::DysymtabLoadCommand> &Res) const; + void ReadIndirectSymbolTableEntry( + const macho::DysymtabLoadCommand &DLC, + unsigned Index, + InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const; + void ReadSection( + const LoadCommandInfo &LCI, + unsigned Index, + InMemoryStruct<macho::Section> &Res) const; + void ReadSection64( + const LoadCommandInfo &LCI, + unsigned Index, + InMemoryStruct<macho::Section64> &Res) const; + void ReadRelocationEntry( + uint64_t RelocationTableOffset, unsigned Index, + InMemoryStruct<macho::RelocationEntry> &Res) const; + void ReadSymbolTableEntry( + uint64_t SymbolTableOffset, unsigned Index, + InMemoryStruct<macho::SymbolTableEntry> &Res) const; + void ReadSymbol64TableEntry( + uint64_t SymbolTableOffset, unsigned Index, + InMemoryStruct<macho::Symbol64TableEntry> &Res) const; + + /// @} +}; + +} // end namespace object +} // end namespace llvm + +#endif diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index ca51581434..eee9d447cd 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -15,7 +15,8 @@ #define LLVM_OBJECT_OBJECT_FILE_H #include "llvm/ADT/StringRef.h" -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" +#include <cstring> namespace llvm { @@ -25,7 +26,19 @@ class StringRef; namespace object { class ObjectFile; -typedef uint64_t DataRefImpl; + +union DataRefImpl { + struct { + uint32_t a, b; + } d; + intptr_t p; +}; + +static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) { + // Check bitwise identical. This is the only legal way to compare a union w/o + // knowing which member is in use. + return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0; +} /// SymbolRef - This is a value type class that represents a single symbol in /// the list of symbols in the object file. |