linux, dump_syms: Filter module entries outside specified ranges

Partitioned libraries generated with lld and llvm-objcopy currently
contain a superset of debug information, beyond what applies to the
library itself. This is because objcopy cannot split up debug
information by partition - instead, it places a copy of all debug
information into each partition.

In lieu of potential future support for lld or objcopy becoming able to
split up debug information, let dump_syms do the next best thing:

- Find the address ranges of all PT_LOAD segments in the lib.
- Supply these to the Module being generated.
- Filter additions to the Module based on these ranges.

Bug: 990190
Change-Id: Ib5f279f42e3f6ea79eed9665efbcc23c3c5d25dc
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1884699
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc
index 660f133..e561ad9 100644
--- a/src/common/linux/dump_symbols.cc
+++ b/src/common/linux/dump_symbols.cc
@@ -182,6 +182,23 @@
   return 0;
 }
 
+// Find the set of address ranges for all PT_LOAD segments.
+template <typename ElfClass>
+vector<Module::Range> GetPtLoadSegmentRanges(
+    const typename ElfClass::Phdr* program_headers,
+    int nheader) {
+  typedef typename ElfClass::Phdr Phdr;
+  vector<Module::Range> ranges;
+
+  for (int i = 0; i < nheader; ++i) {
+    const Phdr& header = program_headers[i];
+    if (header.p_type == PT_LOAD) {
+      ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
+    }
+  }
+  return ranges;
+}
+
 #ifndef NO_STABS_SUPPORT
 template<typename ElfClass>
 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
@@ -649,6 +666,14 @@
   module->SetLoadAddress(loading_addr);
   info->set_loading_addr(loading_addr, obj_file);
 
+  // Allow filtering of extraneous debug information in partitioned libraries.
+  // Such libraries contain debug information for all libraries extracted from
+  // the same combined library, implying extensive duplication.
+  vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
+      elf_header->e_phnum);
+  module->SetAddressRanges(address_ranges);
+
   const Shdr* sections =
       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
   const Shdr* section_names = sections + elf_header->e_shstrndx;
diff --git a/src/common/module.cc b/src/common/module.cc
index dc4f957..aff2212 100644
--- a/src/common/module.cc
+++ b/src/common/module.cc
@@ -76,11 +76,19 @@
   load_address_ = address;
 }
 
+void Module::SetAddressRanges(const vector<Range>& ranges) {
+  address_ranges_ = ranges;
+}
+
 void Module::AddFunction(Function *function) {
   // FUNC lines must not hold an empty name, so catch the problem early if
   // callers try to add one.
   assert(!function->name.empty());
 
+  if (!AddressIsInModule(function->address)) {
+    return;
+  }
+
   // FUNCs are better than PUBLICs as they come with sizes, so remove an extern
   // with the same address if present.
   Extern ext(function->address);
@@ -123,10 +131,18 @@
 }
 
 void Module::AddStackFrameEntry(StackFrameEntry *stack_frame_entry) {
+  if (!AddressIsInModule(stack_frame_entry->address)) {
+    return;
+  }
+
   stack_frame_entries_.push_back(stack_frame_entry);
 }
 
 void Module::AddExtern(Extern *ext) {
+  if (!AddressIsInModule(ext->address)) {
+    return;
+  }
+
   std::pair<ExternSet::iterator,bool> ret = externs_.insert(ext);
   if (!ret.second) {
     // Free the duplicate that was not inserted because this Module
@@ -232,6 +248,19 @@
   return stream.good();
 }
 
+bool Module::AddressIsInModule(Address address) const {
+  if (address_ranges_.empty()) {
+    return true;
+  }
+  for (const auto& segment : address_ranges_) {
+    if (address >= segment.address &&
+        address < segment.address + segment.size) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool Module::Write(std::ostream &stream, SymbolData symbol_data) {
   stream << "MODULE " << os_ << " " << architecture_ << " "
          << id_ << " " << name_ << "\n";
diff --git a/src/common/module.h b/src/common/module.h
index 7b1a0db..db8dabd 100644
--- a/src/common/module.h
+++ b/src/common/module.h
@@ -205,6 +205,14 @@
   // Write is used.
   void SetLoadAddress(Address load_address);
 
+  // Sets address filtering on elements added to the module.  This allows
+  // libraries with extraneous debug symbols to generate symbol files containing
+  // only relevant symbols.  For example, an LLD-generated partition library may
+  // contain debug information pertaining to all partitions derived from a
+  // single "combined" library.  Filtering applies only to elements added after
+  // this method is called.
+  void SetAddressRanges(const vector<Range>& ranges);
+
   // Add FUNCTION to the module. FUNCTION's name must not be empty.
   // This module owns all Function objects added with this function:
   // destroying the module destroys them as well.
@@ -302,6 +310,10 @@
   // if an error occurs, return false, and leave errno set.
   static bool WriteRuleMap(const RuleMap &rule_map, std::ostream &stream);
 
+  // Returns true of the specified address resides with an specified address
+  // range, or if no ranges have been specified.
+  bool AddressIsInModule(Address address) const;
+
   // Module header entries.
   string name_, os_, architecture_, id_, code_id_;
 
@@ -310,6 +322,10 @@
   // address.
   Address load_address_;
 
+  // The set of valid address ranges of the module.  If specified, attempts to
+  // add elements residing outside these ranges will be silently filtered.
+  vector<Range> address_ranges_;
+
   // Relation for maps whose keys are strings shared with some other
   // structure.
   struct CompareStringPtrs {
diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc
index 819fa03..b6770c5 100644
--- a/src/common/module_unittest.cc
+++ b/src/common/module_unittest.cc
@@ -564,3 +564,53 @@
                "PUBLIC cc00 0 arm_func\n",
                contents.c_str());
 }
+
+TEST(Write, OutOfRangeAddresses) {
+  stringstream s;
+  Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID);
+
+  // Specify an allowed address range, representing a PT_LOAD segment in a
+  // module.
+  vector<Module::Range> address_ranges = {
+    Module::Range(0x2000ULL, 0x1000ULL),
+  };
+  m.SetAddressRanges(address_ranges);
+
+  // Add three stack frames (one lower, one in, and one higher than the allowed
+  // address range).  Only the middle frame should be captured.
+  Module::StackFrameEntry* entry1 = new Module::StackFrameEntry();
+  entry1->address = 0x1000ULL;
+  entry1->size = 0x100ULL;
+  m.AddStackFrameEntry(entry1);
+  Module::StackFrameEntry* entry2 = new Module::StackFrameEntry();
+  entry2->address = 0x2000ULL;
+  entry2->size = 0x100ULL;
+  m.AddStackFrameEntry(entry2);
+  Module::StackFrameEntry* entry3 = new Module::StackFrameEntry();
+  entry3->address = 0x3000ULL;
+  entry3->size = 0x100ULL;
+  m.AddStackFrameEntry(entry3);
+
+  // Add a function outside the allowed range.
+  Module::File* file = m.FindFile("file_name.cc");
+  Module::Function* function = new Module::Function(
+      "function_name", 0x4000ULL);
+  Module::Range range(0x4000ULL, 0x1000ULL);
+  function->ranges.push_back(range);
+  function->parameter_size = 0x100ULL;
+  Module::Line line = { 0x4000ULL, 0x100ULL, file, 67519080 };
+  function->lines.push_back(line);
+  m.AddFunction(function);
+
+  // Add an extern outside the allowed range.
+  Module::Extern* extern1 = new Module::Extern(0x5000ULL);
+  extern1->name = "_xyz";
+  m.AddExtern(extern1);
+
+  m.Write(s, ALL_SYMBOL_DATA);
+
+  EXPECT_STREQ("MODULE os-name architecture id-string name with spaces\n"
+               "STACK CFI INIT 2000 100 \n",
+               s.str().c_str());
+
+}