src/common/stabs_reader.cc - breakpad - Git at Google

 // Copyright (c) 2010 Google Inc. All Rights Reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

 // This file implements the google_breakpad::StabsReader class.
 // See stabs_reader.h.

 #include "common/stabs_reader.h"

 #include <assert.h>
 #include <stab.h>
 #include <string.h>

 #include <string>

 #include "common/using_std_string.h"

 using std::vector;

 namespace google_breakpad {

 StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
                                           bool big_endian, size_t value_size)
     : value_size_(value_size), cursor_(buffer, big_endian) {
   // Actually, we could handle weird sizes just fine, but they're
   // probably mistakes --- expressed in bits, say.
   assert(value_size == 4 || value_size == 8);
   entry_.index = 0;
   Fetch();
 }

 void StabsReader::EntryIterator::Fetch() {
   cursor_
       .Read(4, false, &entry_.name_offset)
       .Read(1, false, &entry_.type)
       .Read(1, false, &entry_.other)
       .Read(2, false, &entry_.descriptor)
       .Read(value_size_, false, &entry_.value);
   entry_.at_end = !cursor_;
 }

 StabsReader::StabsReader(const uint8_t *stab,    size_t stab_size,
                          const uint8_t *stabstr, size_t stabstr_size,
                          bool big_endian, size_t value_size, bool unitized,
                          StabsHandler *handler)
     : entries_(stab, stab_size),
       strings_(stabstr, stabstr_size),
       iterator_(&entries_, big_endian, value_size),
       unitized_(unitized),
       handler_(handler),
       string_offset_(0),
       next_cu_string_offset_(0),
       current_source_file_(NULL) { }

 const char *StabsReader::SymbolString() {
   ptrdiff_t offset = string_offset_ + iterator_->name_offset;
   if (offset < 0 || (size_t) offset >= strings_.Size()) {
     handler_->Warning("symbol %d: name offset outside the string section\n",
                       iterator_->index);
     // Return our null string, to keep our promise about all names being
     // taken from the string section.
     offset = 0;
   }
   return reinterpret_cast<const char *>(strings_.start + offset);
 }

 bool StabsReader::Process() {
   while (!iterator_->at_end) {
     if (iterator_->type == N_SO) {
       if (! ProcessCompilationUnit())
         return false;
     } else if (iterator_->type == N_UNDF && unitized_) {
       // In unitized STABS (including Linux STABS, and pretty much anything
       // else that puts STABS data in sections), at the head of each
       // compilation unit's entries there is an N_UNDF stab giving the
       // number of symbols in the compilation unit, and the number of bytes
       // that compilation unit's strings take up in the .stabstr section.
       // Each CU's strings are separate; the n_strx values are offsets
       // within the current CU's portion of the .stabstr section.
       //
       // As an optimization, the GNU linker combines all the
       // compilation units into one, with a single N_UNDF at the
       // beginning. However, other linkers, like Gold, do not perform
       // this optimization.
       string_offset_ = next_cu_string_offset_;
       next_cu_string_offset_ = iterator_->value;
       ++iterator_;
     }
 #if defined(HAVE_MACH_O_NLIST_H)
     // Export symbols in Mach-O binaries look like this.
     // This is necessary in order to be able to dump symbols
     // from OS X system libraries.
     else if ((iterator_->type & N_STAB) == 0 &&
                (iterator_->type & N_TYPE) == N_SECT) {
       ProcessExtern();
     }
 #endif
     else {
       ++iterator_;
     }
   }
   return true;
 }

 bool StabsReader::ProcessCompilationUnit() {
   assert(!iterator_->at_end && iterator_->type == N_SO);

   // There may be an N_SO entry whose name ends with a slash,
   // indicating the directory in which the compilation occurred.
   // The build directory defaults to NULL.
   const char *build_directory = NULL;
   {
     const char *name = SymbolString();
     if (name[0] && name[strlen(name) - 1] == '/') {
       build_directory = name;
       ++iterator_;
     }
   }

   // We expect to see an N_SO entry with a filename next, indicating
   // the start of the compilation unit.
   {
     if (iterator_->at_end || iterator_->type != N_SO)
       return true;
     const char *name = SymbolString();
     if (name[0] == '\0') {
       // This seems to be a stray end-of-compilation-unit marker;
       // consume it, but don't report the end, since we didn't see a
       // beginning.
       ++iterator_;
       return true;
     }
     current_source_file_ = name;
   }

   if (! handler_->StartCompilationUnit(current_source_file_,
                                        iterator_->value,
                                        build_directory))
     return false;

   ++iterator_;

   // The STABS documentation says that some compilers may emit
   // additional N_SO entries with names immediately following the
   // first, and that they should be ignored.  However, the original
   // Breakpad STABS reader doesn't ignore them, so we won't either.

   // Process the body of the compilation unit, up to the next N_SO.
   while (!iterator_->at_end && iterator_->type != N_SO) {
     if (iterator_->type == N_FUN) {
       if (! ProcessFunction())
         return false;
     } else if (iterator_->type == N_SLINE) {
       // Mac OS X STABS place SLINE records before functions.
       Line line;
       // The value of an N_SLINE entry that appears outside a function is
       // the absolute address of the line.
       line.address = iterator_->value;
       line.filename = current_source_file_;
       // The n_desc of a N_SLINE entry is the line number.  It's a
       // signed 16-bit field; line numbers from 32768 to 65535 are
       // stored as n-65536.
       line.number = (uint16_t) iterator_->descriptor;
       queued_lines_.push_back(line);
       ++iterator_;
     } else if (iterator_->type == N_SOL) {
       current_source_file_ = SymbolString();
       ++iterator_;
     } else {
       // Ignore anything else.
       ++iterator_;
     }
   }

   // An N_SO with an empty name indicates the end of the compilation
   // unit.  Default to zero.
   uint64_t ending_address = 0;
   if (!iterator_->at_end) {
     assert(iterator_->type == N_SO);
     const char *name = SymbolString();
     if (name[0] == '\0') {
       ending_address = iterator_->value;
       ++iterator_;
     }
   }

   if (! handler_->EndCompilationUnit(ending_address))
     return false;

   queued_lines_.clear();

   return true;
 }

 bool StabsReader::ProcessFunction() {
   assert(!iterator_->at_end && iterator_->type == N_FUN);

   uint64_t function_address = iterator_->value;
   // The STABS string for an N_FUN entry is the name of the function,
   // followed by a colon, followed by type information for the
   // function.  We want to pass the name alone to StartFunction.
   const char *stab_string = SymbolString();
   const char *name_end = strchr(stab_string, ':');
   if (! name_end)
     name_end = stab_string + strlen(stab_string);
   string name(stab_string, name_end - stab_string);
   if (! handler_->StartFunction(name, function_address))
     return false;
   ++iterator_;

   // If there were any SLINE records given before the function, report them now.
   for (vector<Line>::const_iterator it = queued_lines_.begin();
        it != queued_lines_.end(); it++) {
     if (!handler_->Line(it->address, it->filename, it->number))
       return false;
   }
   queued_lines_.clear();

   while (!iterator_->at_end) {
     if (iterator_->type == N_SO || iterator_->type == N_FUN)
       break;
     else if (iterator_->type == N_SLINE) {
       // The value of an N_SLINE entry is the offset of the line from
       // the function's start address.
       uint64_t line_address = function_address + iterator_->value;
       // The n_desc of a N_SLINE entry is the line number.  It's a
       // signed 16-bit field; line numbers from 32768 to 65535 are
       // stored as n-65536.
       uint16_t line_number = iterator_->descriptor;
       if (! handler_->Line(line_address, current_source_file_, line_number))
         return false;
       ++iterator_;
     } else if (iterator_->type == N_SOL) {
       current_source_file_ = SymbolString();
       ++iterator_;
     } else
       // Ignore anything else.
       ++iterator_;
   }

   // We've reached the end of the function. See if we can figure out its
   // ending address.
   uint64_t ending_address = 0;
   if (!iterator_->at_end) {
     assert(iterator_->type == N_SO || iterator_->type == N_FUN);
     if (iterator_->type == N_FUN) {
       const char *symbol_name = SymbolString();
       if (symbol_name[0] == '\0') {
         // An N_FUN entry with no name is a terminator for this function;
         // its value is the function's size.
         ending_address = function_address + iterator_->value;
         ++iterator_;
       } else {
         // An N_FUN entry with a name is the next function, and we can take
         // its value as our ending address. Don't advance the iterator, as
         // we'll use this symbol to start the next function as well.
         ending_address = iterator_->value;
       }
     } else {
       // An N_SO entry could be an end-of-compilation-unit marker, or the
       // start of the next compilation unit, but in either case, its value
       // is our ending address. We don't advance the iterator;
       // ProcessCompilationUnit will decide what to do with this symbol.
       ending_address = iterator_->value;
     }
   }

   if (! handler_->EndFunction(ending_address))
     return false;

   return true;
 }

 bool StabsReader::ProcessExtern() {
 #if defined(HAVE_MACH_O_NLIST_H)
   assert(!iterator_->at_end &&
          (iterator_->type & N_STAB) == 0 &&
          (iterator_->type & N_TYPE) == N_SECT);
 #endif

   // TODO(mark): only do symbols in the text section?
   if (!handler_->Extern(SymbolString(), iterator_->value))
     return false;

   ++iterator_;
   return true;
 }

 } // namespace google_breakpad
	// Copyright (c) 2010 Google Inc. All Rights Reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

	// This file implements the google_breakpad::StabsReader class.
	// See stabs_reader.h.

	#include "common/stabs_reader.h"

	#include <assert.h>
	#include <stab.h>
	#include <string.h>

	#include <string>

	#include "common/using_std_string.h"

	using std::vector;

	namespace google_breakpad {

	StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
	bool big_endian, size_t value_size)
	: value_size_(value_size), cursor_(buffer, big_endian) {
	// Actually, we could handle weird sizes just fine, but they're
	// probably mistakes --- expressed in bits, say.
	assert(value_size == 4 \|\| value_size == 8);
	entry_.index = 0;
	Fetch();
	}

	void StabsReader::EntryIterator::Fetch() {
	cursor_
	.Read(4, false, &entry_.name_offset)
	.Read(1, false, &entry_.type)
	.Read(1, false, &entry_.other)
	.Read(2, false, &entry_.descriptor)
	.Read(value_size_, false, &entry_.value);
	entry_.at_end = !cursor_;
	}

	StabsReader::StabsReader(const uint8_t *stab, size_t stab_size,
	const uint8_t *stabstr, size_t stabstr_size,
	bool big_endian, size_t value_size, bool unitized,
	StabsHandler *handler)
	: entries_(stab, stab_size),
	strings_(stabstr, stabstr_size),
	iterator_(&entries_, big_endian, value_size),
	unitized_(unitized),
	handler_(handler),
	string_offset_(0),
	next_cu_string_offset_(0),
	current_source_file_(NULL) { }

	const char *StabsReader::SymbolString() {
	ptrdiff_t offset = string_offset_ + iterator_->name_offset;
	if (offset < 0 \|\| (size_t) offset >= strings_.Size()) {
	handler_->Warning("symbol %d: name offset outside the string section\n",
	iterator_->index);
	// Return our null string, to keep our promise about all names being
	// taken from the string section.
	offset = 0;
	}
	return reinterpret_cast<const char *>(strings_.start + offset);
	}

	bool StabsReader::Process() {
	while (!iterator_->at_end) {
	if (iterator_->type == N_SO) {
	if (! ProcessCompilationUnit())
	return false;
	} else if (iterator_->type == N_UNDF && unitized_) {
	// In unitized STABS (including Linux STABS, and pretty much anything
	// else that puts STABS data in sections), at the head of each
	// compilation unit's entries there is an N_UNDF stab giving the
	// number of symbols in the compilation unit, and the number of bytes
	// that compilation unit's strings take up in the .stabstr section.
	// Each CU's strings are separate; the n_strx values are offsets
	// within the current CU's portion of the .stabstr section.
	//
	// As an optimization, the GNU linker combines all the
	// compilation units into one, with a single N_UNDF at the
	// beginning. However, other linkers, like Gold, do not perform
	// this optimization.
	string_offset_ = next_cu_string_offset_;
	next_cu_string_offset_ = iterator_->value;
	++iterator_;
	}
	#if defined(HAVE_MACH_O_NLIST_H)
	// Export symbols in Mach-O binaries look like this.
	// This is necessary in order to be able to dump symbols
	// from OS X system libraries.
	else if ((iterator_->type & N_STAB) == 0 &&
	(iterator_->type & N_TYPE) == N_SECT) {
	ProcessExtern();
	}
	#endif
	else {
	++iterator_;
	}
	}
	return true;
	}

	bool StabsReader::ProcessCompilationUnit() {
	assert(!iterator_->at_end && iterator_->type == N_SO);

	// There may be an N_SO entry whose name ends with a slash,
	// indicating the directory in which the compilation occurred.
	// The build directory defaults to NULL.
	const char *build_directory = NULL;
	{
	const char *name = SymbolString();
	if (name[0] && name[strlen(name) - 1] == '/') {
	build_directory = name;
	++iterator_;
	}
	}

	// We expect to see an N_SO entry with a filename next, indicating
	// the start of the compilation unit.
	{
	if (iterator_->at_end \|\| iterator_->type != N_SO)
	return true;
	const char *name = SymbolString();
	if (name[0] == '\0') {
	// This seems to be a stray end-of-compilation-unit marker;
	// consume it, but don't report the end, since we didn't see a
	// beginning.
	++iterator_;
	return true;
	}
	current_source_file_ = name;
	}

	if (! handler_->StartCompilationUnit(current_source_file_,
	iterator_->value,
	build_directory))
	return false;

	++iterator_;

	// The STABS documentation says that some compilers may emit
	// additional N_SO entries with names immediately following the
	// first, and that they should be ignored. However, the original
	// Breakpad STABS reader doesn't ignore them, so we won't either.

	// Process the body of the compilation unit, up to the next N_SO.
	while (!iterator_->at_end && iterator_->type != N_SO) {
	if (iterator_->type == N_FUN) {
	if (! ProcessFunction())
	return false;
	} else if (iterator_->type == N_SLINE) {
	// Mac OS X STABS place SLINE records before functions.
	Line line;
	// The value of an N_SLINE entry that appears outside a function is
	// the absolute address of the line.
	line.address = iterator_->value;
	line.filename = current_source_file_;
	// The n_desc of a N_SLINE entry is the line number. It's a
	// signed 16-bit field; line numbers from 32768 to 65535 are
	// stored as n-65536.
	line.number = (uint16_t) iterator_->descriptor;
	queued_lines_.push_back(line);
	++iterator_;
	} else if (iterator_->type == N_SOL) {
	current_source_file_ = SymbolString();
	++iterator_;
	} else {
	// Ignore anything else.
	++iterator_;
	}
	}

	// An N_SO with an empty name indicates the end of the compilation
	// unit. Default to zero.
	uint64_t ending_address = 0;
	if (!iterator_->at_end) {
	assert(iterator_->type == N_SO);
	const char *name = SymbolString();
	if (name[0] == '\0') {
	ending_address = iterator_->value;
	++iterator_;
	}
	}

	if (! handler_->EndCompilationUnit(ending_address))
	return false;

	queued_lines_.clear();

	return true;
	}

	bool StabsReader::ProcessFunction() {
	assert(!iterator_->at_end && iterator_->type == N_FUN);

	uint64_t function_address = iterator_->value;
	// The STABS string for an N_FUN entry is the name of the function,
	// followed by a colon, followed by type information for the
	// function. We want to pass the name alone to StartFunction.
	const char *stab_string = SymbolString();
	const char *name_end = strchr(stab_string, ':');
	if (! name_end)
	name_end = stab_string + strlen(stab_string);
	string name(stab_string, name_end - stab_string);
	if (! handler_->StartFunction(name, function_address))
	return false;
	++iterator_;

	// If there were any SLINE records given before the function, report them now.
	for (vector<Line>::const_iterator it = queued_lines_.begin();
	it != queued_lines_.end(); it++) {
	if (!handler_->Line(it->address, it->filename, it->number))
	return false;
	}
	queued_lines_.clear();

	while (!iterator_->at_end) {
	if (iterator_->type == N_SO \|\| iterator_->type == N_FUN)
	break;
	else if (iterator_->type == N_SLINE) {
	// The value of an N_SLINE entry is the offset of the line from
	// the function's start address.
	uint64_t line_address = function_address + iterator_->value;
	// The n_desc of a N_SLINE entry is the line number. It's a
	// signed 16-bit field; line numbers from 32768 to 65535 are
	// stored as n-65536.
	uint16_t line_number = iterator_->descriptor;
	if (! handler_->Line(line_address, current_source_file_, line_number))
	return false;
	++iterator_;
	} else if (iterator_->type == N_SOL) {
	current_source_file_ = SymbolString();
	++iterator_;
	} else
	// Ignore anything else.
	++iterator_;
	}

	// We've reached the end of the function. See if we can figure out its
	// ending address.
	uint64_t ending_address = 0;
	if (!iterator_->at_end) {
	assert(iterator_->type == N_SO \|\| iterator_->type == N_FUN);
	if (iterator_->type == N_FUN) {
	const char *symbol_name = SymbolString();
	if (symbol_name[0] == '\0') {
	// An N_FUN entry with no name is a terminator for this function;
	// its value is the function's size.
	ending_address = function_address + iterator_->value;
	++iterator_;
	} else {
	// An N_FUN entry with a name is the next function, and we can take
	// its value as our ending address. Don't advance the iterator, as
	// we'll use this symbol to start the next function as well.
	ending_address = iterator_->value;
	}
	} else {
	// An N_SO entry could be an end-of-compilation-unit marker, or the
	// start of the next compilation unit, but in either case, its value
	// is our ending address. We don't advance the iterator;
	// ProcessCompilationUnit will decide what to do with this symbol.
	ending_address = iterator_->value;
	}
	}

	if (! handler_->EndFunction(ending_address))
	return false;

	return true;
	}

	bool StabsReader::ProcessExtern() {
	#if defined(HAVE_MACH_O_NLIST_H)
	assert(!iterator_->at_end &&
	(iterator_->type & N_STAB) == 0 &&
	(iterator_->type & N_TYPE) == N_SECT);
	#endif

	// TODO(mark): only do symbols in the text section?
	if (!handler_->Extern(SymbolString(), iterator_->value))
	return false;

	++iterator_;
	return true;
	}

	} // namespace google_breakpad