Internal change

PiperOrigin-RevId: 161993552
Change-Id: I9ed5a787d41f613fee5bc5fd0c2170a0b94f40a8
diff --git a/import.sh b/import.sh
index 56b5842..5b2f397 100644
--- a/import.sh
+++ b/import.sh
@@ -4,7 +4,7 @@
 top=/tmp/chromium
 mkdir $top
 prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 57.0.2987.133 58.0.3029.110
+for version in 58.0.3029.110 59.0.3071.115
 do
   mkdir $top/$version
   cd $top/$version
diff --git a/src/base/optional.h b/src/base/optional.h
index 485baf7..1cc5c2d 100644
--- a/src/base/optional.h
+++ b/src/base/optional.h
@@ -8,7 +8,6 @@
 #include <type_traits>
 
 #include "base/logging.h"
-#include "base/template_util.h"
 
 namespace url {
 namespace base {
diff --git a/src/base/strings/string_piece.h b/src/base/strings/string_piece.h
index 7396eb4..340a341 100644
--- a/src/base/strings/string_piece.h
+++ b/src/base/strings/string_piece.h
@@ -252,6 +252,9 @@
     return r;
   }
 
+  // This is the style of conversion preferred by std::string_view in C++17.
+  explicit operator STRING_TYPE() const { return as_string(); }
+
   STRING_TYPE as_string() const {
     // std::string doesn't like to take a NULL pointer even with a 0 size.
     return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index 8c88afa..b79bddc 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc
@@ -79,6 +79,15 @@
   DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
 }
 
+GURL::GURL(GURL&& other) noexcept
+    : spec_(std::move(other.spec_)),
+      is_valid_(other.is_valid_),
+      parsed_(other.parsed_),
+      inner_url_(std::move(other.inner_url_)) {
+  other.is_valid_ = false;
+  other.parsed_ = url::Parsed();
+}
+
 GURL::GURL(url::base::StringPiece url_string) {
   InitCanonical(url_string, true);
 }
@@ -168,8 +177,29 @@
 GURL::~GURL() {
 }
 
-GURL& GURL::operator=(GURL other) {
-  Swap(&other);
+GURL& GURL::operator=(const GURL& other) {
+  spec_ = other.spec_;
+  is_valid_ = other.is_valid_;
+  parsed_ = other.parsed_;
+
+  if (!other.inner_url_)
+    inner_url_.reset();
+  else if (inner_url_)
+    *inner_url_ = *other.inner_url_;
+  else
+    inner_url_.reset(new GURL(*other.inner_url_));
+
+  return *this;
+}
+
+GURL& GURL::operator=(GURL&& other) {
+  spec_ = std::move(other.spec_);
+  is_valid_ = other.is_valid_;
+  parsed_ = other.parsed_;
+  inner_url_ = std::move(other.inner_url_);
+
+  other.is_valid_ = false;
+  other.parsed_ = url::Parsed();
   return *this;
 }
 
diff --git a/src/url/gurl.h b/src/url/gurl.h
index 1fd6c14..3dfc03d 100644
--- a/src/url/gurl.h
+++ b/src/url/gurl.h
@@ -54,6 +54,7 @@
   // Copy construction is relatively inexpensive, with most of the time going
   // to reallocating the string. It does not re-parse.
   GURL(const GURL& other);
+  GURL(GURL&& other) noexcept;
 
   // The strings to this contructor should be UTF-8 / UTF-16.
   explicit GURL(url::base::StringPiece url_string);
@@ -76,7 +77,8 @@
 
   ~GURL();
 
-  GURL& operator=(GURL other);
+  GURL& operator=(const GURL& other);
+  GURL& operator=(GURL&& other);
 
   // Returns true when this object represents a valid parsed URL. When not
   // valid, other functions will still succeed, but you will not get canonical
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index a0764e8..a3b43d6 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc
@@ -645,10 +645,12 @@
   // Constructor.
   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
+  EXPECT_TRUE(url_1.parsed_for_possibly_invalid_spec().whitespace_removed);
 
   // Relative path resolver.
   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
+  EXPECT_TRUE(url_2.parsed_for_possibly_invalid_spec().whitespace_removed);
 
   // Note that newlines are NOT stripped from ReplaceComponents.
 }
diff --git a/src/url/third_party/mozilla/url_parse.cc b/src/url/third_party/mozilla/url_parse.cc
index 9977cae..4cba88c 100644
--- a/src/url/third_party/mozilla/url_parse.cc
+++ b/src/url/third_party/mozilla/url_parse.cc
@@ -694,16 +694,17 @@
 
 Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
 
-Parsed::Parsed(const Parsed& other) :
-    scheme(other.scheme),
-    username(other.username),
-    password(other.password),
-    host(other.host),
-    port(other.port),
-    path(other.path),
-    query(other.query),
-    ref(other.ref),
-    inner_parsed_(NULL) {
+Parsed::Parsed(const Parsed& other)
+    : scheme(other.scheme),
+      username(other.username),
+      password(other.password),
+      host(other.host),
+      port(other.port),
+      path(other.path),
+      query(other.query),
+      ref(other.ref),
+      whitespace_removed(other.whitespace_removed),
+      inner_parsed_(NULL) {
   if (other.inner_parsed_)
     set_inner_parsed(*other.inner_parsed_);
 }
@@ -718,6 +719,7 @@
     path = other.path;
     query = other.query;
     ref = other.ref;
+    whitespace_removed = other.whitespace_removed;
     if (other.inner_parsed_)
       set_inner_parsed(*other.inner_parsed_);
     else
diff --git a/src/url/url_canon_mailtourl.cc b/src/url/url_canon_mailtourl.cc
index fb6bc9a..8a7ff1a 100644
--- a/src/url/url_canon_mailtourl.cc
+++ b/src/url/url_canon_mailtourl.cc
@@ -13,6 +13,23 @@
 
 namespace {
 
+// Certain characters should be percent-encoded when they appear in the path
+// component of a mailto URL, to improve compatibility and mitigate against
+// command-injection attacks on mailto handlers. See https://crbug.com/711020.
+template <typename UCHAR>
+bool ShouldEncodeMailboxCharacter(UCHAR uch) {
+  if (uch < 0x21 ||                              // space & control characters.
+      uch > 0x7e ||                              // high-ascii characters.
+      uch == 0x22 ||                             // quote.
+      uch == 0x3c || uch == 0x3e ||              // angle brackets.
+      uch == 0x60 ||                             // backtick.
+      uch == 0x7b || uch == 0x7c || uch == 0x7d  // braces and pipe.
+      ) {
+    return true;
+  }
+  return false;
+}
+
 template <typename CHAR, typename UCHAR>
 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
                              const Parsed& parsed,
@@ -38,12 +55,12 @@
     new_parsed->path.begin = output->length();
 
     // Copy the path using path URL's more lax escaping rules.
-    // We convert to UTF-8 and escape non-ASCII, but leave all
+    // We convert to UTF-8 and escape non-ASCII, but leave most
     // ASCII characters alone.
     int end = parsed.path.end();
     for (int i = parsed.path.begin; i < end; ++i) {
       UCHAR uch = static_cast<UCHAR>(source.path[i]);
-      if (uch < 0x20 || uch >= 0x80)
+      if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
       else
         output->push_back(static_cast<char>(uch));
diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 26d7815..d1991c6 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc
@@ -1847,20 +1847,51 @@
     Component expected_path;
     Component expected_query;
   } cases[] = {
-    {"mailto:addr1", "mailto:addr1", true, Component(7, 5), Component()},
-    {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, Component(7, 13), Component()},
+    // Null character should be escaped to %00.
+    // Keep this test first in the list as it is handled specially below.
+    {"mailto:addr1\0addr2?foo",
+     "mailto:addr1%00addr2?foo",
+     true, Component(7, 13), Component(21, 3)},
+    {"mailto:addr1",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"mailto:addr1@foo.com",
+     "mailto:addr1@foo.com",
+     true, Component(7, 13), Component()},
     // Trailing whitespace is stripped.
-    {"MaIlTo:addr1 \t ", "mailto:addr1", true, Component(7, 5), Component()},
-    {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, Component(7, 5), Component(13,6)},
-    {"mailto:addr1,addr2", "mailto:addr1,addr2", true, Component(7, 11), Component()},
-    {"mailto:addr1, addr2", "mailto:addr1, addr2", true, Component(7, 12), Component()},
-    {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, Component(7, 13), Component()},
-    {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, Component(7, 12), Component()},
-    // Null character should be escaped to %00
-    {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, Component(7, 13), Component(21, 3)},
+    {"MaIlTo:addr1 \t ",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"MaIlTo:addr1?to=jon",
+     "mailto:addr1?to=jon",
+     true, Component(7, 5), Component(13,6)},
+    {"mailto:addr1,addr2",
+     "mailto:addr1,addr2",
+     true, Component(7, 11), Component()},
+    // Embedded spaces must be encoded.
+    {"mailto:addr1, addr2",
+     "mailto:addr1,%20addr2",
+     true, Component(7, 14), Component()},
+    {"mailto:addr1, addr2?subject=one two ",
+     "mailto:addr1,%20addr2?subject=one%20two",
+     true, Component(7, 14), Component(22, 17)},
+    {"mailto:addr1%2caddr2",
+     "mailto:addr1%2caddr2",
+     true, Component(7, 13), Component()},
+    {"mailto:\xF0\x90\x8C\x80",
+     "mailto:%F0%90%8C%80",
+     true, Component(7, 12), Component()},
     // Invalid -- UTF-8 encoded surrogate value.
-    {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, Component(7, 9), Component()},
-    {"mailto:addr1?", "mailto:addr1?", true, Component(7, 5), Component(13, 0)},
+    {"mailto:\xed\xa0\x80",
+     "mailto:%EF%BF%BD",
+     false, Component(7, 9), Component()},
+    {"mailto:addr1?",
+     "mailto:addr1?",
+     true, Component(7, 5), Component(13, 0)},
+    // Certain characters have special meanings and must be encoded.
+    {"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~",
+     "mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~",
+     true, Component(7, 53), Component(61, 47)},
   };
 
   // Define outside of loop to catch bugs where components aren't reset
@@ -1869,8 +1900,8 @@
 
   for (size_t i = 0; i < arraysize(cases); i++) {
     int url_len = static_cast<int>(strlen(cases[i].input));
-    if (i == 8) {
-      // The 9th test case purposely has a '\0' in it -- don't count it
+    if (i == 0) {
+      // The first test case purposely has a '\0' in it -- don't count it
       // as the string terminator.
       url_len = 22;
     }
diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index c00b7ee..0fe9983 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc
@@ -83,6 +83,10 @@
   kWssScheme,
 };
 
+const char* kEmptyDocumentSchemes[] = {
+    kAboutScheme,
+};
+
 bool initialized = false;
 
 // Lists of the currently installed standard and referrer schemes. These lists
@@ -98,6 +102,7 @@
 std::vector<std::string>* cors_enabled_schemes = nullptr;
 std::vector<std::string>* web_storage_schemes = nullptr;
 std::vector<std::string>* csp_bypassing_schemes = nullptr;
+std::vector<std::string>* empty_document_schemes = nullptr;
 
 // See the LockSchemeRegistries declaration in the header.
 bool scheme_registries_locked = false;
@@ -529,6 +534,8 @@
   InitSchemes(&web_storage_schemes, kWebStorageSchemes,
               arraysize(kWebStorageSchemes));
   InitSchemes(&csp_bypassing_schemes, nullptr, 0);
+  InitSchemes(&empty_document_schemes, kEmptyDocumentSchemes,
+              arraysize(kEmptyDocumentSchemes));
   initialized = true;
 }
 
@@ -550,6 +557,8 @@
   web_storage_schemes = nullptr;
   delete csp_bypassing_schemes;
   csp_bypassing_schemes = nullptr;
+  delete empty_document_schemes;
+  empty_document_schemes = nullptr;
 }
 
 void AddStandardScheme(const char* new_scheme, SchemeType type) {
@@ -622,6 +631,16 @@
   return *csp_bypassing_schemes;
 }
 
+void AddEmptyDocumentScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, empty_document_schemes);
+}
+
+const std::vector<std::string>& GetEmptyDocumentSchemes() {
+  Initialize();
+  return *empty_document_schemes;
+}
+
 void LockSchemeRegistries() {
   scheme_registries_locked = true;
 }
diff --git a/src/url/url_util.h b/src/url/url_util.h
index d0a8f22..643c29d 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h
@@ -109,6 +109,11 @@
 URL_EXPORT void AddCSPBypassingScheme(const char* new_scheme);
 URL_EXPORT const std::vector<std::string>& GetCSPBypassingSchemes();
 
+// Adds an application-defined scheme to the list of schemes that are strictly
+// empty documents, allowing them to commit synchronously.
+URL_EXPORT void AddEmptyDocumentScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetEmptyDocumentSchemes();
+
 // Sets a flag to prevent future calls to Add*Scheme from succeeding.
 //
 // This is designed to help prevent errors for multithreaded applications.