Internal change

PiperOrigin-RevId: 176573453
Change-Id: I2d8d8e52c6faba9e017429e9937039696ce5e961
diff --git a/import.sh b/import.sh
index 5b612f0..010bd70 100644
--- a/import.sh
+++ b/import.sh
@@ -4,7 +4,7 @@
 top=/tmp/chromium
 mkdir $top
 prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 60.0.3112.101 61.0.3163.100
+for version in 61.0.3163.100 62.0.3202.94
 do
   mkdir $top/$version
   cd $top/$version
diff --git a/src/base/strings/string16.cc b/src/base/strings/string16.cc
index 2139034..a40212f 100644
--- a/src/base/strings/string16.cc
+++ b/src/base/strings/string16.cc
@@ -69,6 +69,7 @@
 }  // namespace base
 }  // namespace url
 
-template class std::basic_string<url::base::char16, url::base::string16_char_traits>;
+template class std::
+    basic_string<url::base::char16, url::base::string16_internals::string16_char_traits>;
 
 #endif  // WCHAR_T_IS_UTF32
diff --git a/src/base/strings/string16.h b/src/base/strings/string16.h
index 67f9cee..b8d6c82 100644
--- a/src/base/strings/string16.h
+++ b/src/base/strings/string16.h
@@ -44,7 +44,6 @@
 
 typedef wchar_t char16;
 typedef std::wstring string16;
-typedef std::char_traits<wchar_t> string16_char_traits;
 
 }  // namespace base
 }  // namespace url
@@ -68,6 +67,11 @@
 BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
 BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
 
+// This namespace contains the implementation of base::string16 along with
+// things that need to be found via argument-dependent lookup from a
+// base::string16.
+namespace string16_internals {
+
 struct string16_char_traits {
   typedef char16 char_type;
   typedef int int_type;
@@ -138,7 +142,11 @@
   }
 };
 
-typedef std::basic_string<char16, base::string16_char_traits> string16;
+}  // namespace string16_internals
+
+typedef std::basic_string<char16,
+                          base::string16_internals::string16_char_traits>
+    string16;
 
 }  // namespace base
 }  // namespace url
@@ -182,8 +190,9 @@
 //
 // TODO(mark): File this bug with Apple and update this note with a bug number.
 
-extern template
-class BASE_EXPORT std::basic_string<url::base::char16, url::base::string16_char_traits>;
+extern template class BASE_EXPORT
+    std::basic_string<url::base::char16,
+                      url::base::string16_internals::string16_char_traits>;
 
 // Specialize std::hash for base::string16. Although the style guide forbids
 // this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index f6bf408..e29e397 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc
@@ -374,6 +374,10 @@
   return other;
 }
 
+GURL GURL::GetWithoutFilename() const {
+  return Resolve(".");
+}
+
 bool GURL::IsStandard() const {
   return url::IsStandard(spec_.data(), parsed_.scheme);
 }
@@ -505,14 +509,14 @@
 
 #endif  // WIN32
 
-bool GURL::DomainIs(url::base::StringPiece lower_ascii_domain) const {
+bool GURL::DomainIs(url::base::StringPiece canonical_domain) const {
   if (!is_valid_)
     return false;
 
   // FileSystem URLs have empty host_piece, so check this first.
-  if (SchemeIsFileSystem() && inner_url_)
-    return inner_url_->DomainIs(lower_ascii_domain);
-  return url::DomainIs(host_piece(), lower_ascii_domain);
+  if (inner_url_ && SchemeIsFileSystem())
+    return inner_url_->DomainIs(canonical_domain);
+  return url::DomainIs(host_piece(), canonical_domain);
 }
 
 bool GURL::EqualsIgnoringRef(const GURL& other) const {
diff --git a/src/url/gurl.h b/src/url/gurl.h
index b3263c8..b410dba 100644
--- a/src/url/gurl.h
+++ b/src/url/gurl.h
@@ -181,6 +181,15 @@
   // will be the empty URL.
   GURL GetWithEmptyPath() const;
 
+  // A helper function to return a GURL without the filename, query values, and
+  // fragment. For example,
+  // GURL("https://www.foo.com/index.html?q=test").GetWithoutFilename().spec()
+  // will return "https://www.foo.com/".
+  // GURL("https://www.foo.com/bar/").GetWithoutFilename().spec()
+  // will return "https://www.foo.com/bar/". If the GURL is invalid or missing a
+  // scheme, authority or path, it will return an empty, invalid GURL.
+  GURL GetWithoutFilename() const;
+
   // A helper function to return a GURL containing just the scheme, host,
   // and port from a URL. Equivalent to clearing any username and password,
   // replacing the path with a slash, and clearing everything after that. If
@@ -388,11 +397,13 @@
   // "www.google.com", this will return true for "com", "google.com", and
   // "www.google.com".
   //
-  // The input domain should be lower-case ASCII to match the canonicalized
-  // scheme. This call is more efficient than getting the host and check
-  // whether host has the specific domain or not because no copies or
-  // object constructions are done.
-  bool DomainIs(url::base::StringPiece lower_ascii_domain) const;
+  // The input domain should match host canonicalization rules. i.e. the input
+  // show be lowercase except for escape chars.
+  //
+  // This call is more efficient than getting the host and checking whether the
+  // host has the specific domain or not because no copies or object
+  // constructions are done.
+  bool DomainIs(url::base::StringPiece canonical_domain) const;
 
   // Checks whether or not two URLs are differing only in the ref (the part
   // after the # character).
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 510b1ad..f6e426d 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc
@@ -387,6 +387,52 @@
   }
 }
 
+TEST(GURLTest, GetWithoutFilename) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    // Common Standard URLs.
+    {"https://www.google.com",                    "https://www.google.com/"},
+    {"https://www.google.com/",                   "https://www.google.com/"},
+    {"https://www.google.com/maps.htm",           "https://www.google.com/"},
+    {"https://www.google.com/maps/",              "https://www.google.com/maps/"},
+    {"https://www.google.com/index.html",         "https://www.google.com/"},
+    {"https://www.google.com/index.html?q=maps",  "https://www.google.com/"},
+    {"https://www.google.com/index.html#maps/",   "https://www.google.com/"},
+    {"https://foo:bar@www.google.com/maps.htm",   "https://foo:bar@www.google.com/"},
+    {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"},
+    {"https://www.google.com/maps/au/north",      "https://www.google.com/maps/au/"},
+    {"https://www.google.com/maps/au/north/",     "https://www.google.com/maps/au/north/"},
+    {"https://www.google.com/maps/au/index.html?q=maps#fragment/",     "https://www.google.com/maps/au/"},
+    {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"},
+    {"https://www.google.com/maps/au/north/?q=maps#fragment",          "https://www.google.com/maps/au/north/"},
+    {"https://www.google.com/maps/au/north?q=maps#fragment",           "https://www.google.com/maps/au/"},
+    // Less common standard URLs.
+    {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
+    {"file:///temporary/bar.html?baz=22","file:///temporary/"},
+    {"ftp://foo/test/index.html",        "ftp://foo/test/"},
+    {"gopher://foo/test/index.html",     "gopher://foo/test/"},
+    {"ws://foo/test/index.html",         "ws://foo/test/"},
+    // Non-standard, hierarchical URLs.
+    {"chrome://foo/bar.html", "chrome://foo/"},
+    {"httpa://foo/test/index.html", "httpa://foo/test/"},
+    // Non-standard, non-hierarchical URLs.
+    {"blob:https://foo.bar/test/index.html", ""},
+    {"about:blank", ""},
+    {"data:foobar", ""},
+    {"scheme:opaque_data", ""},
+    // Invalid URLs.
+    {"foobar", ""},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    GURL without_filename = url.GetWithoutFilename();
+    EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
+  }
+}
+
 TEST(GURLTest, Replacements) {
   // The URL canonicalizer replacement test will handle most of these case.
   // The most important thing to do here is to check that the proper
@@ -612,6 +658,11 @@
   GURL invalid_url("google.com");
   EXPECT_FALSE(invalid_url.is_valid());
   EXPECT_FALSE(invalid_url.DomainIs("google.com"));
+
+  GURL url_with_escape_chars("https://www.,.test");
+  EXPECT_TRUE(url_with_escape_chars.is_valid());
+  EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test");
+  EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test"));
 }
 
 TEST(GURLTest, DomainIsTerminatingDotBehavior) {
diff --git a/src/url/origin.cc b/src/url/origin.cc
index 2bf959c..28ea0b6 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc
@@ -169,8 +169,8 @@
   return GetPhysicalOrigin().IsSameOriginWith(other.GetPhysicalOrigin());
 }
 
-bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
-  return !unique_ && url::DomainIs(tuple_.host(), lower_ascii_domain);
+bool Origin::DomainIs(base::StringPiece canonical_domain) const {
+  return !unique_ && url::DomainIs(tuple_.host(), canonical_domain);
 }
 
 bool Origin::operator<(const Origin& other) const {
diff --git a/src/url/origin.h b/src/url/origin.h
index 9e6b492..8b59b5a 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h
@@ -154,10 +154,14 @@
   // Note: The returned URL will not necessarily be serialized to the same value
   // as the Origin would. The GURL will have an added "/" path for Origins with
   // valid SchemeHostPorts and file Origins.
+  //
+  // Try not to use this method under normal circumstances, as it loses type
+  // information. Downstream consumers can mistake the returned GURL with a full
+  // URL (e.g. with a path component).
   GURL GetURL() const;
 
   // Same as GURL::DomainIs. If |this| origin is unique, then returns false.
-  bool DomainIs(base::StringPiece lower_ascii_domain) const;
+  bool DomainIs(base::StringPiece canonical_domain) const;
 
   // Allows Origin to be used as a key in STL (for example, a std::set or
   // std::map).
diff --git a/src/url/url_canon_etc.cc b/src/url/url_canon_etc.cc
index 31e9fb5..cc74123 100644
--- a/src/url/url_canon_etc.cc
+++ b/src/url/url_canon_etc.cc
@@ -46,6 +46,17 @@
     return input;
   }
 
+  // Skip whitespace removal for `data:` URLs.
+  //
+  // TODO(mkwst): Ideally, this would use something like `base::StartsWith`, but
+  // that turns out to be difficult to do correctly given this function's
+  // character type templating.
+  if (input_len > 5 && input[0] == 'd' && input[1] == 'a' && input[2] == 't' &&
+      input[3] == 'a' && input[4] == ':') {
+    *output_len = input_len;
+    return input;
+  }
+
   // Remove the whitespace into the new buffer and return it.
   for (int i = 0; i < input_len; i++) {
     if (!IsRemovableURLWhitespace(input[i])) {
diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index a390a9d..30f2e7e 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc
@@ -676,28 +676,27 @@
   return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
 }
 
-bool DomainIs(base::StringPiece canonicalized_host,
-              base::StringPiece lower_ascii_domain) {
-  if (canonicalized_host.empty() || lower_ascii_domain.empty())
+bool DomainIs(base::StringPiece canonical_host,
+              base::StringPiece canonical_domain) {
+  if (canonical_host.empty() || canonical_domain.empty())
     return false;
 
   // If the host name ends with a dot but the input domain doesn't, then we
   // ignore the dot in the host name.
-  size_t host_len = canonicalized_host.length();
-  if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.')
+  size_t host_len = canonical_host.length();
+  if (canonical_host.back() == '.' && canonical_domain.back() != '.')
     --host_len;
 
-  if (host_len < lower_ascii_domain.length())
+  if (host_len < canonical_domain.length())
     return false;
 
   // |host_first_pos| is the start of the compared part of the host name, not
   // start of the whole host name.
   const char* host_first_pos =
-      canonicalized_host.data() + host_len - lower_ascii_domain.length();
+      canonical_host.data() + host_len - canonical_domain.length();
 
-  if (!base::LowerCaseEqualsASCII(
-          base::StringPiece(host_first_pos, lower_ascii_domain.length()),
-          lower_ascii_domain)) {
+  if (base::StringPiece(host_first_pos, canonical_domain.length()) !=
+      canonical_domain) {
     return false;
   }
 
@@ -705,7 +704,7 @@
   // if the host name is longer than the input domain name, then the character
   // immediately before the compared part should be a dot. For example,
   // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
-  if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() &&
+  if (canonical_domain[0] != '.' && host_len > canonical_domain.length() &&
       *(host_first_pos - 1) != '.') {
     return false;
   }
diff --git a/src/url/url_util.h b/src/url/url_util.h
index 643c29d..7486bf7 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h
@@ -173,16 +173,16 @@
 
 // Hosts  ----------------------------------------------------------------------
 
-// Returns true if the |canonicalized_host| matches or is in the same domain as
-// the given |lower_ascii_domain| string. For example, if the canonicalized
-// hostname is "www.google.com", this will return true for "com", "google.com",
-// and "www.google.com" domains.
+// Returns true if the |canonical_host| matches or is in the same domain as the
+// given |canonical_domain| string. For example, if the canonicalized hostname
+// is "www.google.com", this will return true for "com", "google.com", and
+// "www.google.com" domains.
 //
 // If either of the input StringPieces is empty, the return value is false. The
-// input domain should be a lower-case ASCII string in order to match the
-// canonicalized host.
-URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
-                         base::StringPiece lower_ascii_domain);
+// input domain should match host canonicalization rules. i.e. it should be
+// lowercase except for escape chars.
+URL_EXPORT bool DomainIs(base::StringPiece canonical_host,
+                         base::StringPiece canonical_domain);
 
 // Returns true if the hostname is an IP address. Note: this function isn't very
 // cheap, as it must re-parse the host to verify.