| // Copyright (c) 2010, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
| |
| // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and |
| // google_breakpad::Mach_O::Reader. See macho_reader.h for details. |
| |
| #include "common/mac/macho_reader.h" |
| |
| #include <assert.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include <limits> |
| |
| // Unfortunately, CPU_TYPE_ARM is not define for 10.4. |
| #if !defined(CPU_TYPE_ARM) |
| #define CPU_TYPE_ARM 12 |
| #endif |
| |
| #if !defined(CPU_TYPE_ARM_64) |
| #define CPU_TYPE_ARM_64 16777228 |
| #endif |
| |
| namespace google_breakpad { |
| namespace mach_o { |
| |
| // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its |
| // arguments, so you can't place expressions that do necessary work in |
| // the argument of an assert. Nor can you assign the result of the |
| // expression to a variable and assert that the variable's value is |
| // true: you'll get unused variable warnings when NDEBUG is #defined. |
| // |
| // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that |
| // the result is true if NDEBUG is not #defined. |
| #if defined(NDEBUG) |
| #define ASSERT_ALWAYS_EVAL(x) (x) |
| #else |
| #define ASSERT_ALWAYS_EVAL(x) assert(x) |
| #endif |
| |
| void FatReader::Reporter::BadHeader() { |
| fprintf(stderr, "%s: file is neither a fat binary file" |
| " nor a Mach-O object file\n", filename_.c_str()); |
| } |
| |
| void FatReader::Reporter::TooShort() { |
| fprintf(stderr, "%s: file too short for the data it claims to contain\n", |
| filename_.c_str()); |
| } |
| |
| void FatReader::Reporter::MisplacedObjectFile() { |
| fprintf(stderr, "%s: file too short for the object files it claims" |
| " to contain\n", filename_.c_str()); |
| } |
| |
| bool FatReader::Read(const uint8_t *buffer, size_t size) { |
| buffer_.start = buffer; |
| buffer_.end = buffer + size; |
| ByteCursor cursor(&buffer_); |
| |
| // Fat binaries always use big-endian, so read the magic number in |
| // that endianness. To recognize Mach-O magic numbers, which can use |
| // either endianness, check for both the proper and reversed forms |
| // of the magic numbers. |
| cursor.set_big_endian(true); |
| if (cursor >> magic_) { |
| if (magic_ == FAT_MAGIC) { |
| // How many object files does this fat binary contain? |
| uint32_t object_files_count; |
| if (!(cursor >> object_files_count)) { // nfat_arch |
| reporter_->TooShort(); |
| return false; |
| } |
| |
| // Read the list of object files. |
| object_files_.resize(object_files_count); |
| for (size_t i = 0; i < object_files_count; i++) { |
| struct fat_arch objfile; |
| |
| // Read this object file entry, byte-swapping as appropriate. |
| cursor >> objfile.cputype |
| >> objfile.cpusubtype |
| >> objfile.offset |
| >> objfile.size |
| >> objfile.align; |
| |
| SuperFatArch super_fat_arch(objfile); |
| object_files_[i] = super_fat_arch; |
| |
| if (!cursor) { |
| reporter_->TooShort(); |
| return false; |
| } |
| // Does the file actually have the bytes this entry refers to? |
| size_t fat_size = buffer_.Size(); |
| if (objfile.offset > fat_size || |
| objfile.size > fat_size - objfile.offset) { |
| reporter_->MisplacedObjectFile(); |
| return false; |
| } |
| } |
| |
| return true; |
| } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 || |
| magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) { |
| // If this is a little-endian Mach-O file, fix the cursor's endianness. |
| if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) |
| cursor.set_big_endian(false); |
| // Record the entire file as a single entry in the object file list. |
| object_files_.resize(1); |
| |
| // Get the cpu type and subtype from the Mach-O header. |
| if (!(cursor >> object_files_[0].cputype |
| >> object_files_[0].cpusubtype)) { |
| reporter_->TooShort(); |
| return false; |
| } |
| |
| object_files_[0].offset = 0; |
| object_files_[0].size = static_cast<uint64_t>(buffer_.Size()); |
| // This alignment is correct for 32 and 64-bit x86 and ppc. |
| // See get_align in the lipo source for other architectures: |
| // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c |
| object_files_[0].align = 12; // 2^12 == 4096 |
| return true; |
| } |
| } |
| reporter_->BadHeader(); |
| return false; |
| } |
| |
| void Reader::Reporter::BadHeader() { |
| fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str()); |
| } |
| |
| void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type, |
| cpu_subtype_t cpu_subtype, |
| cpu_type_t expected_cpu_type, |
| cpu_subtype_t expected_cpu_subtype) { |
| fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected" |
| " type %d, subtype %d\n", |
| filename_.c_str(), cpu_type, cpu_subtype, |
| expected_cpu_type, expected_cpu_subtype); |
| } |
| |
| void Reader::Reporter::HeaderTruncated() { |
| fprintf(stderr, "%s: file does not contain a complete Mach-O header\n", |
| filename_.c_str()); |
| } |
| |
| void Reader::Reporter::LoadCommandRegionTruncated() { |
| fprintf(stderr, "%s: file too short to hold load command region" |
| " given in Mach-O header\n", filename_.c_str()); |
| } |
| |
| void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i, |
| LoadCommandType type) { |
| fprintf(stderr, "%s: file's header claims there are %zu" |
| " load commands, but load command #%zu", |
| filename_.c_str(), claimed, i); |
| if (type) fprintf(stderr, ", of type %d,", type); |
| fprintf(stderr, " extends beyond the end of the load command region\n"); |
| } |
| |
| void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) { |
| fprintf(stderr, "%s: the contents of load command #%zu, of type %d," |
| " extend beyond the size given in the load command's header\n", |
| filename_.c_str(), i, type); |
| } |
| |
| void Reader::Reporter::SectionsMissing(const string &name) { |
| fprintf(stderr, "%s: the load command for segment '%s'" |
| " is too short to hold the section headers it claims to have\n", |
| filename_.c_str(), name.c_str()); |
| } |
| |
| void Reader::Reporter::MisplacedSegmentData(const string &name) { |
| fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond" |
| " the end of the file\n", filename_.c_str(), name.c_str()); |
| } |
| |
| void Reader::Reporter::MisplacedSectionData(const string §ion, |
| const string &segment) { |
| fprintf(stderr, "%s: the section '%s' in segment '%s'" |
| " claims its contents lie outside the segment's contents\n", |
| filename_.c_str(), section.c_str(), segment.c_str()); |
| } |
| |
| void Reader::Reporter::MisplacedSymbolTable() { |
| fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol" |
| " table's contents are located beyond the end of the file\n", |
| filename_.c_str()); |
| } |
| |
| void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) { |
| fprintf(stderr, "%s: CPU type %d is not supported\n", |
| filename_.c_str(), cpu_type); |
| } |
| |
| bool Reader::Read(const uint8_t *buffer, |
| size_t size, |
| cpu_type_t expected_cpu_type, |
| cpu_subtype_t expected_cpu_subtype) { |
| assert(!buffer_.start); |
| buffer_.start = buffer; |
| buffer_.end = buffer + size; |
| ByteCursor cursor(&buffer_, true); |
| uint32_t magic; |
| if (!(cursor >> magic)) { |
| reporter_->HeaderTruncated(); |
| return false; |
| } |
| |
| if (expected_cpu_type != CPU_TYPE_ANY) { |
| uint32_t expected_magic; |
| // validate that magic matches the expected cpu type |
| switch (expected_cpu_type) { |
| case CPU_TYPE_ARM: |
| case CPU_TYPE_I386: |
| expected_magic = MH_CIGAM; |
| break; |
| case CPU_TYPE_POWERPC: |
| expected_magic = MH_MAGIC; |
| break; |
| case CPU_TYPE_ARM_64: |
| case CPU_TYPE_X86_64: |
| expected_magic = MH_CIGAM_64; |
| break; |
| case CPU_TYPE_POWERPC64: |
| expected_magic = MH_MAGIC_64; |
| break; |
| default: |
| reporter_->UnsupportedCPUType(expected_cpu_type); |
| return false; |
| } |
| |
| if (expected_magic != magic) { |
| reporter_->BadHeader(); |
| return false; |
| } |
| } |
| |
| // Since the byte cursor is in big-endian mode, a reversed magic number |
| // always indicates a little-endian file, regardless of our own endianness. |
| switch (magic) { |
| case MH_MAGIC: big_endian_ = true; bits_64_ = false; break; |
| case MH_CIGAM: big_endian_ = false; bits_64_ = false; break; |
| case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break; |
| case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break; |
| default: |
| reporter_->BadHeader(); |
| return false; |
| } |
| cursor.set_big_endian(big_endian_); |
| uint32_t commands_size, reserved; |
| cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_ |
| >> commands_size >> flags_; |
| if (bits_64_) |
| cursor >> reserved; |
| if (!cursor) { |
| reporter_->HeaderTruncated(); |
| return false; |
| } |
| |
| if (expected_cpu_type != CPU_TYPE_ANY && |
| (expected_cpu_type != cpu_type_ || |
| expected_cpu_subtype != cpu_subtype_)) { |
| reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_, |
| expected_cpu_type, expected_cpu_subtype); |
| return false; |
| } |
| |
| cursor |
| .PointTo(&load_commands_.start, commands_size) |
| .PointTo(&load_commands_.end, 0); |
| if (!cursor) { |
| reporter_->LoadCommandRegionTruncated(); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const { |
| ByteCursor list_cursor(&load_commands_, big_endian_); |
| |
| for (size_t index = 0; index < load_command_count_; ++index) { |
| // command refers to this load command alone, so that cursor will |
| // refuse to read past the load command's end. But since we haven't |
| // read the size yet, let command initially refer to the entire |
| // remainder of the load command series. |
| ByteBuffer command(list_cursor.here(), list_cursor.Available()); |
| ByteCursor cursor(&command, big_endian_); |
| |
| // Read the command type and size --- fields common to all commands. |
| uint32_t type, size; |
| if (!(cursor >> type)) { |
| reporter_->LoadCommandsOverrun(load_command_count_, index, 0); |
| return false; |
| } |
| if (!(cursor >> size) || size > command.Size()) { |
| reporter_->LoadCommandsOverrun(load_command_count_, index, type); |
| return false; |
| } |
| |
| // Now that we've read the length, restrict command's range to this |
| // load command only. |
| command.end = command.start + size; |
| |
| switch (type) { |
| case LC_SEGMENT: |
| case LC_SEGMENT_64: { |
| Segment segment; |
| segment.bits_64 = (type == LC_SEGMENT_64); |
| size_t word_size = segment.bits_64 ? 8 : 4; |
| cursor.CString(&segment.name, 16); |
| cursor |
| .Read(word_size, false, &segment.vmaddr) |
| .Read(word_size, false, &segment.vmsize) |
| .Read(word_size, false, &segment.fileoff) |
| .Read(word_size, false, &segment.filesize); |
| cursor >> segment.maxprot |
| >> segment.initprot |
| >> segment.nsects |
| >> segment.flags; |
| if (!cursor) { |
| reporter_->LoadCommandTooShort(index, type); |
| return false; |
| } |
| if (segment.fileoff > buffer_.Size() || |
| segment.filesize > buffer_.Size() - segment.fileoff) { |
| reporter_->MisplacedSegmentData(segment.name); |
| return false; |
| } |
| // Mach-O files in .dSYM bundles have the contents of the loaded |
| // segments removed, and their file offsets and file sizes zeroed |
| // out. To help us handle this special case properly, give such |
| // segments' contents NULL starting and ending pointers. |
| if (segment.fileoff == 0 && segment.filesize == 0) { |
| segment.contents.start = segment.contents.end = NULL; |
| } else { |
| segment.contents.start = buffer_.start + segment.fileoff; |
| segment.contents.end = segment.contents.start + segment.filesize; |
| } |
| // The section list occupies the remainder of this load command's space. |
| segment.section_list.start = cursor.here(); |
| segment.section_list.end = command.end; |
| |
| if (!handler->SegmentCommand(segment)) |
| return false; |
| break; |
| } |
| |
| case LC_SYMTAB: { |
| uint32_t symoff, nsyms, stroff, strsize; |
| cursor >> symoff >> nsyms >> stroff >> strsize; |
| if (!cursor) { |
| reporter_->LoadCommandTooShort(index, type); |
| return false; |
| } |
| // How big are the entries in the symbol table? |
| // sizeof(struct nlist_64) : sizeof(struct nlist), |
| // but be paranoid about alignment vs. target architecture. |
| size_t symbol_size = bits_64_ ? 16 : 12; |
| // How big is the entire symbol array? |
| size_t symbols_size = nsyms * symbol_size; |
| if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff || |
| stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) { |
| reporter_->MisplacedSymbolTable(); |
| return false; |
| } |
| ByteBuffer entries(buffer_.start + symoff, symbols_size); |
| ByteBuffer names(buffer_.start + stroff, strsize); |
| if (!handler->SymtabCommand(entries, names)) |
| return false; |
| break; |
| } |
| |
| default: { |
| if (!handler->UnknownCommand(type, command)) |
| return false; |
| break; |
| } |
| } |
| |
| list_cursor.set_here(command.end); |
| } |
| |
| return true; |
| } |
| |
| // A load command handler that looks for a segment of a given name. |
| class Reader::SegmentFinder : public LoadCommandHandler { |
| public: |
| // Create a load command handler that looks for a segment named NAME, |
| // and sets SEGMENT to describe it if found. |
| SegmentFinder(const string &name, Segment *segment) |
| : name_(name), segment_(segment), found_() { } |
| |
| // Return true if the traversal found the segment, false otherwise. |
| bool found() const { return found_; } |
| |
| bool SegmentCommand(const Segment &segment) { |
| if (segment.name == name_) { |
| *segment_ = segment; |
| found_ = true; |
| return false; |
| } |
| return true; |
| } |
| |
| private: |
| // The name of the segment our creator is looking for. |
| const string &name_; |
| |
| // Where we should store the segment if found. (WEAK) |
| Segment *segment_; |
| |
| // True if we found the segment. |
| bool found_; |
| }; |
| |
| bool Reader::FindSegment(const string &name, Segment *segment) const { |
| SegmentFinder finder(name, segment); |
| WalkLoadCommands(&finder); |
| return finder.found(); |
| } |
| |
| bool Reader::WalkSegmentSections(const Segment &segment, |
| SectionHandler *handler) const { |
| size_t word_size = segment.bits_64 ? 8 : 4; |
| ByteCursor cursor(&segment.section_list, big_endian_); |
| |
| for (size_t i = 0; i < segment.nsects; i++) { |
| Section section; |
| section.bits_64 = segment.bits_64; |
| uint64_t size, offset; |
| uint32_t dummy32; |
| cursor |
| .CString(§ion.section_name, 16) |
| .CString(§ion.segment_name, 16) |
| .Read(word_size, false, §ion.address) |
| .Read(word_size, false, &size) |
| .Read(sizeof(uint32_t), false, &offset) // clears high bits of |offset| |
| >> section.align |
| >> dummy32 |
| >> dummy32 |
| >> section.flags |
| >> dummy32 |
| >> dummy32; |
| if (section.bits_64) |
| cursor >> dummy32; |
| if (!cursor) { |
| reporter_->SectionsMissing(segment.name); |
| return false; |
| } |
| |
| // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle |
| // 64-bit file offsets gracefully. Segment load commands do contain 64-bit |
| // file offsets, but sections within do not. Because segments load |
| // contiguously, recompute each section’s file offset on the basis of its |
| // containing segment’s file offset and the difference between the section’s |
| // and segment’s load addresses. If truncation is detected, honor the |
| // recomputed offset. |
| if (segment.bits_64 && |
| segment.fileoff + segment.filesize > |
| std::numeric_limits<uint32_t>::max()) { |
| const uint64_t section_offset_recomputed = |
| segment.fileoff + section.address - segment.vmaddr; |
| if (offset == static_cast<uint32_t>(section_offset_recomputed)) { |
| offset = section_offset_recomputed; |
| } |
| } |
| |
| const uint32_t section_type = section.flags & SECTION_TYPE; |
| if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL || |
| section_type == S_GB_ZEROFILL) { |
| // Zero-fill sections have a size, but no contents. |
| section.contents.start = section.contents.end = NULL; |
| } else if (segment.contents.start == NULL && |
| segment.contents.end == NULL) { |
| // Mach-O files in .dSYM bundles have the contents of the loaded |
| // segments removed, and their file offsets and file sizes zeroed |
| // out. However, the sections within those segments still have |
| // non-zero sizes. There's no reason to call MisplacedSectionData in |
| // this case; the caller may just need the section's load |
| // address. But do set the contents' limits to NULL, for safety. |
| section.contents.start = section.contents.end = NULL; |
| } else { |
| if (offset < size_t(segment.contents.start - buffer_.start) || |
| offset > size_t(segment.contents.end - buffer_.start) || |
| size > size_t(segment.contents.end - buffer_.start - offset)) { |
| reporter_->MisplacedSectionData(section.section_name, |
| section.segment_name); |
| return false; |
| } |
| section.contents.start = buffer_.start + offset; |
| section.contents.end = section.contents.start + size; |
| } |
| if (!handler->HandleSection(section)) |
| return false; |
| } |
| return true; |
| } |
| |
| // A SectionHandler that builds a SectionMap for the sections within a |
| // given segment. |
| class Reader::SectionMapper: public SectionHandler { |
| public: |
| // Create a SectionHandler that populates MAP with an entry for |
| // each section it is given. |
| SectionMapper(SectionMap *map) : map_(map) { } |
| bool HandleSection(const Section §ion) { |
| (*map_)[section.section_name] = section; |
| return true; |
| } |
| private: |
| // The map under construction. (WEAK) |
| SectionMap *map_; |
| }; |
| |
| bool Reader::MapSegmentSections(const Segment &segment, |
| SectionMap *section_map) const { |
| section_map->clear(); |
| SectionMapper mapper(section_map); |
| return WalkSegmentSections(segment, &mapper); |
| } |
| |
| } // namespace mach_o |
| } // namespace google_breakpad |