Read dwarf5 compilation unit headers.

Change-Id: Ia85b2fb7b3cb821ad353a0003223d8b8465e792d
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/2265259
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Mark Mentovai <mark@chromium.org>
diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h
index bed7032..2b93aba 100644
--- a/src/common/dwarf/dwarf2enums.h
+++ b/src/common/dwarf/dwarf2enums.h
@@ -115,6 +115,16 @@
   DW_TAG_PGI_interface_block = 0xA020
 };
 
+enum DwarfUnitHeader {
+  DW_UT_compile = 0x01,
+  DW_UT_type = 0x02,
+  DW_UT_partial = 0x03,
+  DW_UT_skeleton = 0x04,
+  DW_UT_split_compile = 0x05,
+  DW_UT_split_type = 0x06,
+  DW_UT_lo_user = 0x80,
+  DW_UT_hi_user = 0xFF
+};
 
 enum DwarfHasChild {
   DW_children_no = 0,
diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc
index fbfff27..138f920 100644
--- a/src/common/dwarf/dwarf2reader.cc
+++ b/src/common/dwarf/dwarf2reader.cc
@@ -253,11 +253,50 @@
   return NULL;
 }
 
-// Read a DWARF2/3 header.
-// The header is variable length in DWARF3 (and DWARF2 as extended by
-// most compilers), and consists of an length field, a version number,
-// the offset in the .debug_abbrev section for our abbrevs, and an
-// address size.
+// Read the abbreviation offset from a compilation unit header.
+size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) {
+  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+  header_.abbrev_offset = reader_->ReadOffset(headerptr);
+  return reader_->OffsetSize();
+}
+
+// Read the address size from a compilation unit header.
+size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) {
+  // Compare against less than or equal because this may be the last
+  // section in the file.
+  assert(headerptr + 1 <= buffer_ + buffer_length_);
+  header_.address_size = reader_->ReadOneByte(headerptr);
+  reader_->SetAddressSize(header_.address_size);
+  return 1;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) {
+  assert(headerptr + 8 <= buffer_ + buffer_length_);
+  dwo_id_ = reader_->ReadEightBytes(headerptr);
+  return 8;
+}
+
+// Read the type signature from a type or split type compilation unit header.
+size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) {
+  assert(headerptr + 8 <= buffer_ + buffer_length_);
+  type_signature_ = reader_->ReadEightBytes(headerptr);
+  return 8;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) {
+  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+  type_offset_ = reader_->ReadOffset(headerptr);
+  return reader_->OffsetSize();
+}
+
+
+// Read a DWARF header.  The header is variable length in DWARF3 and DWARF4
+// (and DWARF2 as extended by most compilers), and consists of an length
+// field, a version number, the offset in the .debug_abbrev section for our
+// abbrevs, and an address size. DWARF5 adds a unit_type to distinguish
+// between partial-, full-, skeleton-, split-, and type- compilation units.
 void CompilationUnit::ReadHeader() {
   const uint8_t *headerptr = buffer_;
   size_t initial_length_size;
@@ -272,17 +311,36 @@
   header_.version = reader_->ReadTwoBytes(headerptr);
   headerptr += 2;
 
-  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
-  header_.abbrev_offset = reader_->ReadOffset(headerptr);
-  headerptr += reader_->OffsetSize();
-
-  // Compare against less than or equal because this may be the last
-  // section in the file.
-  assert(headerptr + 1 <= buffer_ + buffer_length_);
-  header_.address_size = reader_->ReadOneByte(headerptr);
-  reader_->SetAddressSize(header_.address_size);
-  headerptr += 1;
-
+  if (header_.version <= 4) {
+    // Older versions of dwarf have a relatively simple structure.
+    headerptr += ReadAbbrevOffset(headerptr);
+    headerptr += ReadAddressSize(headerptr);
+  } else {
+    // DWARF5 adds a unit_type field, and various fields based on unit_type.
+    assert(headerptr + 1 < buffer_ + buffer_length_);
+    uint8_t unit_type = reader_->ReadOneByte(headerptr);
+    headerptr += 1;
+    headerptr += ReadAddressSize(headerptr);
+    headerptr += ReadAbbrevOffset(headerptr);
+    switch (unit_type) {
+      case DW_UT_compile:
+      case DW_UT_partial:
+        // nothing else to read
+        break;
+      case DW_UT_skeleton:
+      case DW_UT_split_compile:
+        headerptr += ReadDwoId(headerptr);
+        break;
+      case DW_UT_type:
+      case DW_UT_split_type:
+        headerptr += ReadTypeSignature(headerptr);
+        headerptr += ReadTypeOffset(headerptr);
+        break;
+      default:
+        fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type);
+        break;
+    }
+  }
   after_header_ = headerptr;
 
   // This check ensures that we don't have to do checking during the
diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h
index 593f5c0..cd91b10 100644
--- a/src/common/dwarf/dwarf2reader.h
+++ b/src/common/dwarf/dwarf2reader.h
@@ -411,6 +411,21 @@
   // Reads the DWARF2/3 abbreviations for this compilation unit
   void ReadAbbrevs();
 
+  // Read the abbreviation offset for this compilation unit
+  size_t ReadAbbrevOffset(const uint8_t* headerptr);
+
+  // Read the address size for this compilation unit
+  size_t ReadAddressSize(const uint8_t* headerptr);
+
+  // Read the DWO id from a split or skeleton compilation unit header
+  size_t ReadDwoId(const uint8_t* headerptr);
+
+  // Read the type signature from a type or split type compilation unit header
+  size_t ReadTypeSignature(const uint8_t* headerptr);
+
+  // Read the DWO id from a split or skeleton compilation unit header
+  size_t ReadTypeOffset(const uint8_t* headerptr);
+
   // Processes a single DIE for this compilation unit and return a new
   // pointer just past the end of it
   const uint8_t *ProcessDIE(uint64_t dieoffset,
@@ -603,6 +618,12 @@
   // The value of the DW_AT_GNU_dwo_id attribute, if any.
   uint64_t dwo_id_;
 
+  // The value of the DW_AT_GNU_type_signature attribute, if any.
+  uint64_t type_signature_;
+
+  // The value of the DW_AT_GNU_type_offset attribute, if any.
+  size_t type_offset_;
+
   // The value of the DW_AT_GNU_dwo_name attribute, if any.
   const char* dwo_name_;
 
diff --git a/src/common/dwarf/dwarf2reader_die_unittest.cc b/src/common/dwarf/dwarf2reader_die_unittest.cc
index ca44cad..ab9f7cb 100644
--- a/src/common/dwarf/dwarf2reader_die_unittest.cc
+++ b/src/common/dwarf/dwarf2reader_die_unittest.cc
@@ -218,6 +218,8 @@
                       DwarfHeaderParams(kLittleEndian, 8, 3, 8),
                       DwarfHeaderParams(kLittleEndian, 8, 4, 4),
                       DwarfHeaderParams(kLittleEndian, 8, 4, 8),
+                      DwarfHeaderParams(kLittleEndian, 8, 5, 4),
+                      DwarfHeaderParams(kLittleEndian, 8, 5, 8),
                       DwarfHeaderParams(kBigEndian,    4, 2, 4),
                       DwarfHeaderParams(kBigEndian,    4, 2, 8),
                       DwarfHeaderParams(kBigEndian,    4, 3, 4),
@@ -229,7 +231,9 @@
                       DwarfHeaderParams(kBigEndian,    8, 3, 4),
                       DwarfHeaderParams(kBigEndian,    8, 3, 8),
                       DwarfHeaderParams(kBigEndian,    8, 4, 4),
-                      DwarfHeaderParams(kBigEndian,    8, 4, 8)));
+                      DwarfHeaderParams(kBigEndian,    8, 4, 8),
+                      DwarfHeaderParams(kBigEndian,    8, 5, 4),
+                      DwarfHeaderParams(kBigEndian,    8, 5, 8)));
 
 struct DwarfFormsFixture: public DIEFixture {
   // Start a compilation unit, as directed by |params|, containing one
diff --git a/src/common/dwarf/dwarf2reader_test_common.h b/src/common/dwarf/dwarf2reader_test_common.h
index e91de90..c81d54a 100644
--- a/src/common/dwarf/dwarf2reader_test_common.h
+++ b/src/common/dwarf/dwarf2reader_test_common.h
@@ -57,7 +57,7 @@
     assert(format_size == 4 || format_size == 8);
     format_size_ = format_size;
   }
-    
+
   // Append a DWARF section offset value, of the appropriate size for this
   // compilation unit.
   template<typename T>
@@ -80,8 +80,14 @@
     }
     post_length_offset_ = Size();
     D16(version);
-    SectionOffset(abbrev_offset);
-    D8(address_size);
+    if (version <= 4) {
+      SectionOffset(abbrev_offset);
+      D8(address_size);
+    } else {
+      D8(0x01);  // DW_UT_compile
+      D8(address_size);
+      SectionOffset(abbrev_offset);
+    }
     return *this;
   }