Add dwarf5 compilation-unit header handling.

Fix a small typo in DW_LANG__Python.

Change-Id: I1fe54e501a5e8da5057ecc9ac00c7e7259a9bb3f
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1967975
Reviewed-by: Mark Mentovai <mark@chromium.org>
diff --git a/src/common/dwarf/dwarf2enums.h b/src/common/dwarf/dwarf2enums.h
index 51253ed..b11a4fd 100644
--- a/src/common/dwarf/dwarf2enums.h
+++ b/src/common/dwarf/dwarf2enums.h
@@ -686,7 +686,7 @@
     DW_LANG_ObjC_plus_plus           =0x0011,
     DW_LANG_UPC                      =0x0012,
     DW_LANG_D                        =0x0013,
-    DW_LANG__Python                  =0x0014,
+    DW_LANG_Python                   =0x0014,
     DW_LANG_OpenCL                   =0x0015,
     DW_LANG_Go                       =0x0016,
     DW_LANG_Modula3                  =0x0017,
diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc
index 3e6a3e8..7b8e043 100644
--- a/src/common/dwarf/dwarf2reader.cc
+++ b/src/common/dwarf/dwarf2reader.cc
@@ -243,11 +243,51 @@
   return NULL;
 }
 
-// Read a DWARF2/3 header.
-// The header is variable length in DWARF3 (and DWARF2 as extended by
+// Read the abbreviation offset from a compilation unit header.
+int CompilationUnit::ReadAbbrevOffset(const uint8_t *headerptr) {
+  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+  header_.abbrev_offset = reader_->ReadOffset(headerptr);
+  return reader_->OffsetSize();
+}
+
+// Read the address size from a compilation unit header.
+int CompilationUnit::ReadAddressSize(const uint8_t *headerptr) {
+  // Compare against less than or equal because this may be the last
+  // section in the file.
+  assert(headerptr + 1 <= buffer_ + buffer_length_);
+  header_.address_size = reader_->ReadOneByte(headerptr);
+  reader_->SetAddressSize(header_.address_size);
+  return 1;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+int CompilationUnit::ReadDwoId(const uint8_t *headerptr) {
+  assert(headerptr + 8 <= buffer_ + buffer_length_);
+  dwo_id_ = reader_->ReadEightBytes(headerptr);
+  return 8;
+}
+
+// Read the type signature from a type or split type compilation unit header.
+int CompilationUnit::ReadTypeSignature(const uint8_t *headerptr) {
+  assert(headerptr + 8 <= buffer_ + buffer_length_);
+  type_signature_ = reader_->ReadEightBytes(headerptr);
+  return 8;
+}
+
+// Read the DWO id from a split or skeleton compilation unit header.
+int CompilationUnit::ReadTypeOffset(const uint8_t *headerptr) {
+  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
+  type_offset_ = reader_->ReadOffset(headerptr);
+  return reader_->OffsetSize();
+}
+
+
+// Read a DWARF header.
+// The header is variable length in DWARF3 and DWARF4 (and DWARF2 as extended by
 // most compilers), and consists of an length field, a version number,
 // the offset in the .debug_abbrev section for our abbrevs, and an
-// address size.
+// address size. DWARF5 adds a unit_type to distinguish between
+// partial-, full-, skeleton-, split-, and type- compilation units.
 void CompilationUnit::ReadHeader() {
   const uint8_t *headerptr = buffer_;
   size_t initial_length_size;
@@ -262,17 +302,33 @@
   header_.version = reader_->ReadTwoBytes(headerptr);
   headerptr += 2;
 
-  assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
-  header_.abbrev_offset = reader_->ReadOffset(headerptr);
-  headerptr += reader_->OffsetSize();
-
-  // Compare against less than or equal because this may be the last
-  // section in the file.
-  assert(headerptr + 1 <= buffer_ + buffer_length_);
-  header_.address_size = reader_->ReadOneByte(headerptr);
-  reader_->SetAddressSize(header_.address_size);
-  headerptr += 1;
-
+  if (header_.version <= 4) {
+    // Older versions of dwarf have a relatively simple structure.
+    headerptr += ReadAbbrevOffset(headerptr);
+    headerptr += ReadAddressSize(headerptr);
+  } else {
+    // DWARF5 adds a unit_type field, and various fields based on unit_type.
+    assert(headerptr + 1 < buffer_ + buffer_length_);
+    int unit_type = reader_->ReadOneByte(headerptr);
+    headerptr += 1;
+    headerptr += ReadAddressSize(headerptr);
+    headerptr += ReadAbbrevOffset(headerptr);
+    switch (unit_type) {
+      case DW_UT_compile:
+      case DW_UT_partial:
+        // nothing else to read
+        break;
+      case DW_UT_skeleton:
+      case DW_UT_split_compile:
+        headerptr += ReadDwoId(headerptr);;
+        break;
+      case DW_UT_type:
+      case DW_UT_split_type:
+        headerptr += ReadTypeSignature(headerptr);;
+        headerptr += ReadTypeOffset(headerptr);;
+        break;
+    }
+  }
   after_header_ = headerptr;
 
   // This check ensures that we don't have to do checking during the
diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h
index 902d9ef..6f1b1a4 100644
--- a/src/common/dwarf/dwarf2reader.h
+++ b/src/common/dwarf/dwarf2reader.h
@@ -410,6 +410,21 @@
   // Reads the DWARF2/3 abbreviations for this compilation unit
   void ReadAbbrevs();
 
+  // Read the abbreviation offset for this compilation unit
+  int ReadAbbrevOffset(const uint8_t *headerptr);
+
+  // Read the address size for this compilation unit
+  int ReadAddressSize(const uint8_t *headerptr);
+
+  // Read the DWO id from a split or skeleton compilation unit header
+  int ReadDwoId(const uint8_t *headerptr);
+
+  // Read the type signature from a type or split type compilation unit header
+  int ReadTypeSignature(const uint8_t *headerptr);
+
+  // Read the DWO id from a split or skeleton compilation unit header
+  int ReadTypeOffset(const uint8_t *headerptr);
+
   // Processes a single DIE for this compilation unit and return a new
   // pointer just past the end of it
   const uint8_t *ProcessDIE(uint64 dieoffset,
@@ -564,6 +579,12 @@
   // The value of the DW_AT_GNU_dwo_id attribute, if any.
   uint64 dwo_id_;
 
+  // The value of the DW_AT_GNU_type_signature attribute, if any.
+  uint64 type_signature_;
+
+  // The value of the DW_AT_GNU_type_offset attribute, if any.
+  size_t type_offset_;
+
   // The value of the DW_AT_GNU_dwo_name attribute, if any.
   const char* dwo_name_;
 
diff --git a/src/common/dwarf/dwarf2reader_die_unittest.cc b/src/common/dwarf/dwarf2reader_die_unittest.cc
index 71418eb..60541ff 100644
--- a/src/common/dwarf/dwarf2reader_die_unittest.cc
+++ b/src/common/dwarf/dwarf2reader_die_unittest.cc
@@ -217,6 +217,8 @@
                       DwarfHeaderParams(kLittleEndian, 8, 3, 8),
                       DwarfHeaderParams(kLittleEndian, 8, 4, 4),
                       DwarfHeaderParams(kLittleEndian, 8, 4, 8),
+                      DwarfHeaderParams(kLittleEndian, 8, 5, 4),
+                      DwarfHeaderParams(kLittleEndian, 8, 5, 8),
                       DwarfHeaderParams(kBigEndian,    4, 2, 4),
                       DwarfHeaderParams(kBigEndian,    4, 2, 8),
                       DwarfHeaderParams(kBigEndian,    4, 3, 4),
@@ -228,7 +230,9 @@
                       DwarfHeaderParams(kBigEndian,    8, 3, 4),
                       DwarfHeaderParams(kBigEndian,    8, 3, 8),
                       DwarfHeaderParams(kBigEndian,    8, 4, 4),
-                      DwarfHeaderParams(kBigEndian,    8, 4, 8)));
+                      DwarfHeaderParams(kBigEndian,    8, 4, 8),
+                      DwarfHeaderParams(kBigEndian,    8, 5, 4),
+                      DwarfHeaderParams(kBigEndian,    8, 5, 8)));
 
 struct DwarfFormsFixture: public DIEFixture {
   // Start a compilation unit, as directed by |params|, containing one
diff --git a/src/common/dwarf/dwarf2reader_test_common.h b/src/common/dwarf/dwarf2reader_test_common.h
index e91de90..545bc45 100644
--- a/src/common/dwarf/dwarf2reader_test_common.h
+++ b/src/common/dwarf/dwarf2reader_test_common.h
@@ -80,8 +80,14 @@
     }
     post_length_offset_ = Size();
     D16(version);
-    SectionOffset(abbrev_offset);
-    D8(address_size);
+    if (version <= 4) {
+      SectionOffset(abbrev_offset);
+      D8(address_size);
+    } else {
+      D8(0x01);  // DW_UT_compile
+      D8(address_size);
+      SectionOffset(abbrev_offset);
+    }
     return *this;
   }