aboutsummaryrefslogtreecommitdiffstats
path: root/include/llvm/Object
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Object')
-rw-r--r--include/llvm/Object/MachOFormat.h367
-rw-r--r--include/llvm/Object/MachOObject.h180
-rw-r--r--include/llvm/Object/ObjectFile.h17
3 files changed, 562 insertions, 2 deletions
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h
new file mode 100644
index 0000000000..31cd523ea2
--- /dev/null
+++ b/include/llvm/Object/MachOFormat.h
@@ -0,0 +1,367 @@
+//===- MachOFormat.h - Mach-O Format Structures And Constants ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares various structures and constants which are platform
+// independent and can be shared by any client which wishes to interact with
+// Mach object files.
+//
+// The definitions here are purposely chosen to match the LLVM style as opposed
+// to following the platform specific definition of the format.
+//
+// On a Mach system, see the <mach-o/...> includes for more information, in
+// particular <mach-o/loader.h>.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOFORMAT_H
+#define LLVM_OBJECT_MACHOFORMAT_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+namespace object {
+
+/// General Mach platform information.
+namespace mach {
+ /// @name CPU Type and Subtype Information
+ /// {
+
+ /// \brief Capability bits used in CPU type encoding.
+ enum CPUTypeFlagsMask {
+ CTFM_ArchMask = 0xFF000000,
+ CTFM_ArchABI64 = 0x01000000
+ };
+
+ /// \brief Machine type IDs used in CPU type encoding.
+ enum CPUTypeMachine {
+ CTM_i386 = 7,
+ CTM_x86_64 = CTM_i386 | CTFM_ArchABI64,
+ CTM_ARM = 12,
+ CTM_SPARC = 14,
+ CTM_PowerPC = 18,
+ CTM_PowerPC64 = CTM_PowerPC | CTFM_ArchABI64
+ };
+
+ /// \brief Capability bits used in CPU subtype encoding.
+ enum CPUSubtypeFlagsMask {
+ CSFM_SubtypeMask = 0xFF000000,
+ CSFM_SubtypeLib64 = 0x80000000
+ };
+
+ /// \brief ARM Machine Subtypes.
+ enum CPUSubtypeARM {
+ CSARM_ALL = 0,
+ CSARM_V4T = 5,
+ CSARM_V6 = 6,
+ CSARM_V5TEJ = 7,
+ CSARM_XSCALE = 8,
+ CSARM_V7 = 9
+ };
+
+ /// \brief PowerPC Machine Subtypes.
+ enum CPUSubtypePowerPC {
+ CSPPC_ALL = 0
+ };
+
+ /// \brief SPARC Machine Subtypes.
+ enum CPUSubtypeSPARC {
+ CSSPARC_ALL = 0
+ };
+
+ /// \brief x86 Machine Subtypes.
+ enum CPUSubtypeX86 {
+ CSX86_ALL = 3
+ };
+
+ /// @}
+
+} // end namespace mach
+
+/// Format information for Mach object files.
+namespace macho {
+ /// \brief Constants for structure sizes.
+ enum StructureSizes {
+ Header32Size = 28,
+ Header64Size = 32,
+ SegmentLoadCommand32Size = 56,
+ SegmentLoadCommand64Size = 72,
+ Section32Size = 68,
+ Section64Size = 80,
+ SymtabLoadCommandSize = 24,
+ DysymtabLoadCommandSize = 80,
+ Nlist32Size = 12,
+ Nlist64Size = 16,
+ RelocationInfoSize = 8
+ };
+
+ /// \brief Constants for header magic field.
+ enum HeaderMagic {
+ HM_Object32 = 0xFEEDFACE, ///< 32-bit mach object file
+ HM_Object64 = 0xFEEDFACF, ///< 64-bit mach object file
+ HM_Universal = 0xCAFEBABE ///< Universal object file
+ };
+
+ /// \brief Header common to all Mach object files.
+ struct Header {
+ uint32_t Magic;
+ uint32_t CPUType;
+ uint32_t CPUSubtype;
+ uint32_t FileType;
+ uint32_t NumLoadCommands;
+ uint32_t SizeOfLoadCommands;
+ uint32_t Flags;
+ };
+
+ /// \brief Extended header for 64-bit object files.
+ struct Header64Ext {
+ uint32_t Reserved;
+ };
+
+ // See <mach-o/loader.h>.
+ enum HeaderFileType {
+ HFT_Object = 0x1
+ };
+
+ enum HeaderFlags {
+ HF_SubsectionsViaSymbols = 0x2000
+ };
+
+ enum LoadCommandType {
+ LCT_Segment = 0x1,
+ LCT_Symtab = 0x2,
+ LCT_Dysymtab = 0xb,
+ LCT_Segment64 = 0x19,
+ LCT_UUID = 0x1b
+ };
+
+ /// \brief Load command structure.
+ struct LoadCommand {
+ uint32_t Type;
+ uint32_t Size;
+ };
+
+ /// @name Load Command Structures
+ /// @{
+
+ struct SegmentLoadCommand {
+ uint32_t Type;
+ uint32_t Size;
+ char Name[16];
+ uint32_t VMAddress;
+ uint32_t VMSize;
+ uint32_t FileOffset;
+ uint32_t FileSize;
+ uint32_t MaxVMProtection;
+ uint32_t InitialVMProtection;
+ uint32_t NumSections;
+ uint32_t Flags;
+ };
+
+ struct Segment64LoadCommand {
+ uint32_t Type;
+ uint32_t Size;
+ char Name[16];
+ uint64_t VMAddress;
+ uint64_t VMSize;
+ uint64_t FileOffset;
+ uint64_t FileSize;
+ uint32_t MaxVMProtection;
+ uint32_t InitialVMProtection;
+ uint32_t NumSections;
+ uint32_t Flags;
+ };
+
+ struct SymtabLoadCommand {
+ uint32_t Type;
+ uint32_t Size;
+ uint32_t SymbolTableOffset;
+ uint32_t NumSymbolTableEntries;
+ uint32_t StringTableOffset;
+ uint32_t StringTableSize;
+ };
+
+ struct DysymtabLoadCommand {
+ uint32_t Type;
+ uint32_t Size;
+
+ uint32_t LocalSymbolsIndex;
+ uint32_t NumLocalSymbols;
+
+ uint32_t ExternalSymbolsIndex;
+ uint32_t NumExternalSymbols;
+
+ uint32_t UndefinedSymbolsIndex;
+ uint32_t NumUndefinedSymbols;
+
+ uint32_t TOCOffset;
+ uint32_t NumTOCEntries;
+
+ uint32_t ModuleTableOffset;
+ uint32_t NumModuleTableEntries;
+
+ uint32_t ReferenceSymbolTableOffset;
+ uint32_t NumReferencedSymbolTableEntries;
+
+ uint32_t IndirectSymbolTableOffset;
+ uint32_t NumIndirectSymbolTableEntries;
+
+ uint32_t ExternalRelocationTableOffset;
+ uint32_t NumExternalRelocationTableEntries;
+
+ uint32_t LocalRelocationTableOffset;
+ uint32_t NumLocalRelocationTableEntries;
+ };
+
+ /// @}
+ /// @name Section Data
+ /// @{
+
+ struct Section {
+ char Name[16];
+ char SegmentName[16];
+ uint32_t Address;
+ uint32_t Size;
+ uint32_t Offset;
+ uint32_t Align;
+ uint32_t RelocationTableOffset;
+ uint32_t NumRelocationTableEntries;
+ uint32_t Flags;
+ uint32_t Reserved1;
+ uint32_t Reserved2;
+ };
+ struct Section64 {
+ char Name[16];
+ char SegmentName[16];
+ uint64_t Address;
+ uint64_t Size;
+ uint32_t Offset;
+ uint32_t Align;
+ uint32_t RelocationTableOffset;
+ uint32_t NumRelocationTableEntries;
+ uint32_t Flags;
+ uint32_t Reserved1;
+ uint32_t Reserved2;
+ uint32_t Reserved3;
+ };
+
+ /// @}
+ /// @name Symbol Table Entries
+ /// @{
+
+ struct SymbolTableEntry {
+ uint32_t StringIndex;
+ uint8_t Type;
+ uint8_t SectionIndex;
+ uint16_t Flags;
+ uint32_t Value;
+ };
+ struct Symbol64TableEntry {
+ uint32_t StringIndex;
+ uint8_t Type;
+ uint8_t SectionIndex;
+ uint16_t Flags;
+ uint64_t Value;
+ };
+
+ /// @}
+ /// @name Indirect Symbol Table
+ /// @{
+
+ struct IndirectSymbolTableEntry {
+ uint32_t Index;
+ };
+
+ /// @}
+ /// @name Relocation Data
+ /// @{
+
+ struct RelocationEntry {
+ uint32_t Word0;
+ uint32_t Word1;
+ };
+
+ /// @}
+
+ // See <mach-o/nlist.h>.
+ enum SymbolTypeType {
+ STT_Undefined = 0x00,
+ STT_Absolute = 0x02,
+ STT_Section = 0x0e
+ };
+
+ enum SymbolTypeFlags {
+ // If any of these bits are set, then the entry is a stab entry number (see
+ // <mach-o/stab.h>. Otherwise the other masks apply.
+ STF_StabsEntryMask = 0xe0,
+
+ STF_TypeMask = 0x0e,
+ STF_External = 0x01,
+ STF_PrivateExtern = 0x10
+ };
+
+ /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+ /// symbol entry.
+ enum IndirectSymbolFlags {
+ ISF_Local = 0x80000000,
+ ISF_Absolute = 0x40000000
+ };
+
+ /// RelocationFlags - Special flags for addresses.
+ enum RelocationFlags {
+ RF_Scattered = 0x80000000
+ };
+
+ /// Common relocation info types.
+ enum RelocationInfoType {
+ RIT_Vanilla = 0,
+ RIT_Pair = 1,
+ RIT_Difference = 2
+ };
+
+ /// Generic relocation info types, which are shared by some (but not all)
+ /// platforms.
+ enum RelocationInfoType_Generic {
+ RIT_Generic_PreboundLazyPointer = 3,
+ RIT_Generic_LocalDifference = 4,
+ RIT_Generic_TLV = 5
+ };
+
+ /// X86_64 uses its own relocation types.
+ enum RelocationInfoTypeX86_64 {
+ // Note that x86_64 doesn't even share the common relocation types.
+ RIT_X86_64_Unsigned = 0,
+ RIT_X86_64_Signed = 1,
+ RIT_X86_64_Branch = 2,
+ RIT_X86_64_GOTLoad = 3,
+ RIT_X86_64_GOT = 4,
+ RIT_X86_64_Subtractor = 5,
+ RIT_X86_64_Signed1 = 6,
+ RIT_X86_64_Signed2 = 7,
+ RIT_X86_64_Signed4 = 8,
+ RIT_X86_64_TLV = 9
+ };
+
+ /// ARM uses its own relocation types.
+ enum RelocationInfoTypeARM {
+ RIT_ARM_LocalDifference = 3,
+ RIT_ARM_PreboundLazyPointer = 4,
+ RIT_ARM_Branch24Bit = 5,
+ RIT_ARM_ThumbBranch22Bit = 6,
+ RIT_ARM_ThumbBranch32Bit = 7,
+ RIT_ARM_Half = 8,
+ RIT_ARM_HalfDifference = 9
+
+ };
+
+} // end namespace macho
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
new file mode 100644
index 0000000000..03d9c147b4
--- /dev/null
+++ b/include/llvm/Object/MachOObject.h
@@ -0,0 +1,180 @@
+//===- MachOObject.h - Mach-O Object File Wrapper ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOOBJECT_H
+#define LLVM_OBJECT_MACHOOBJECT_H
+
+#include <string>
+#include "llvm/ADT/InMemoryStruct.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/MachOFormat.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace object {
+
+/// \brief Wrapper object for manipulating Mach-O object files.
+///
+/// This class is designed to implement a full-featured, efficient, portable,
+/// and robust Mach-O interface to Mach-O object files. It does not attempt to
+/// smooth over rough edges in the Mach-O format or generalize access to object
+/// independent features.
+///
+/// The class is designed around accessing the Mach-O object which is expected
+/// to be fully loaded into memory.
+///
+/// This class is *not* suitable for concurrent use. For efficient operation,
+/// the class uses APIs which rely on the ability to cache the results of
+/// certain calls in internal objects which are not safe for concurrent
+/// access. This allows the API to be zero-copy on the common paths.
+//
+// FIXME: It would be cool if we supported a "paged" MemoryBuffer
+// implementation. This would allow us to implement a more sensible version of
+// MemoryObject which can work like a MemoryBuffer, but be more efficient for
+// objects which are in the current address space.
+class MachOObject {
+public:
+ struct LoadCommandInfo {
+ /// The load command information.
+ macho::LoadCommand Command;
+
+ /// The offset to the start of the load command in memory.
+ uint64_t Offset;
+ };
+
+private:
+ OwningPtr<MemoryBuffer> Buffer;
+
+ /// Whether the object is little endian.
+ bool IsLittleEndian;
+ /// Whether the object is 64-bit.
+ bool Is64Bit;
+ /// Whether the object is swapped endianness from the host.
+ bool IsSwappedEndian;
+ /// Whether the string table has been registered.
+ bool HasStringTable;
+
+ /// The cached information on the load commands.
+ LoadCommandInfo *LoadCommands;
+ mutable unsigned NumLoadedCommands;
+
+ /// The cached copy of the header.
+ macho::Header Header;
+ macho::Header64Ext Header64Ext;
+
+ /// Cache string table information.
+ StringRef StringTable;
+
+private:
+ MachOObject(MemoryBuffer *Buffer, bool IsLittleEndian, bool Is64Bit);
+
+public:
+ ~MachOObject();
+
+ /// \brief Load a Mach-O object from a MemoryBuffer object.
+ ///
+ /// \param Buffer - The buffer to load the object from. This routine takes
+ /// exclusive ownership of the buffer (which is passed to the returned object
+ /// on success).
+ /// \param ErrorStr [out] - If given, will be set to a user readable error
+ /// message on failure.
+ /// \returns The loaded object, or null on error.
+ static MachOObject *LoadFromBuffer(MemoryBuffer *Buffer,
+ std::string *ErrorStr = 0);
+
+ /// @name File Information
+ /// @{
+
+ bool isLittleEndian() const { return IsLittleEndian; }
+ bool isSwappedEndian() const { return IsSwappedEndian; }
+ bool is64Bit() const { return Is64Bit; }
+
+ unsigned getHeaderSize() const {
+ return Is64Bit ? macho::Header64Size : macho::Header32Size;
+ }
+
+ StringRef getData(size_t Offset, size_t Size) const;
+
+ /// @}
+ /// @name String Table Data
+ /// @{
+
+ StringRef getStringTableData() const {
+ assert(HasStringTable && "String table has not been registered!");
+ return StringTable;
+ }
+
+ StringRef getStringAtIndex(unsigned Index) const {
+ size_t End = getStringTableData().find('\0', Index);
+ return getStringTableData().slice(Index, End);
+ }
+
+ void RegisterStringTable(macho::SymtabLoadCommand &SLC);
+
+ /// @}
+ /// @name Object Header Access
+ /// @{
+
+ const macho::Header &getHeader() const { return Header; }
+ const macho::Header64Ext &getHeader64Ext() const {
+ assert(is64Bit() && "Invalid access!");
+ return Header64Ext;
+ }
+
+ /// @}
+ /// @name Object Structure Access
+ /// @{
+
+ /// \brief Retrieve the information for the given load command.
+ const LoadCommandInfo &getLoadCommandInfo(unsigned Index) const;
+
+ void ReadSegmentLoadCommand(
+ const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SegmentLoadCommand> &Res) const;
+ void ReadSegment64LoadCommand(
+ const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::Segment64LoadCommand> &Res) const;
+ void ReadSymtabLoadCommand(
+ const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SymtabLoadCommand> &Res) const;
+ void ReadDysymtabLoadCommand(
+ const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::DysymtabLoadCommand> &Res) const;
+ void ReadIndirectSymbolTableEntry(
+ const macho::DysymtabLoadCommand &DLC,
+ unsigned Index,
+ InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const;
+ void ReadSection(
+ const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section> &Res) const;
+ void ReadSection64(
+ const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section64> &Res) const;
+ void ReadRelocationEntry(
+ uint64_t RelocationTableOffset, unsigned Index,
+ InMemoryStruct<macho::RelocationEntry> &Res) const;
+ void ReadSymbolTableEntry(
+ uint64_t SymbolTableOffset, unsigned Index,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+ void ReadSymbol64TableEntry(
+ uint64_t SymbolTableOffset, unsigned Index,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
+
+ /// @}
+};
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index ca51581434..eee9d447cd 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -15,7 +15,8 @@
#define LLVM_OBJECT_OBJECT_FILE_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
namespace llvm {
@@ -25,7 +26,19 @@ class StringRef;
namespace object {
class ObjectFile;
-typedef uint64_t DataRefImpl;
+
+union DataRefImpl {
+ struct {
+ uint32_t a, b;
+ } d;
+ intptr_t p;
+};
+
+static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
+ // Check bitwise identical. This is the only legal way to compare a union w/o
+ // knowing which member is in use.
+ return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
+}
/// SymbolRef - This is a value type class that represents a single symbol in
/// the list of symbols in the object file.