Internal change PiperOrigin-RevId: 176573453 Change-Id: I2d8d8e52c6faba9e017429e9937039696ce5e961
diff --git a/import.sh b/import.sh index 5b612f0..010bd70 100644 --- a/import.sh +++ b/import.sh
@@ -4,7 +4,7 @@ top=/tmp/chromium mkdir $top prefix=https://chromium.googlesource.com/chromium/src.git/+archive -for version in 60.0.3112.101 61.0.3163.100 +for version in 61.0.3163.100 62.0.3202.94 do mkdir $top/$version cd $top/$version
diff --git a/src/base/strings/string16.cc b/src/base/strings/string16.cc index 2139034..a40212f 100644 --- a/src/base/strings/string16.cc +++ b/src/base/strings/string16.cc
@@ -69,6 +69,7 @@ } // namespace base } // namespace url -template class std::basic_string<url::base::char16, url::base::string16_char_traits>; +template class std:: + basic_string<url::base::char16, url::base::string16_internals::string16_char_traits>; #endif // WCHAR_T_IS_UTF32
diff --git a/src/base/strings/string16.h b/src/base/strings/string16.h index 67f9cee..b8d6c82 100644 --- a/src/base/strings/string16.h +++ b/src/base/strings/string16.h
@@ -44,7 +44,6 @@ typedef wchar_t char16; typedef std::wstring string16; -typedef std::char_traits<wchar_t> string16_char_traits; } // namespace base } // namespace url @@ -68,6 +67,11 @@ BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); +// This namespace contains the implementation of base::string16 along with +// things that need to be found via argument-dependent lookup from a +// base::string16. +namespace string16_internals { + struct string16_char_traits { typedef char16 char_type; typedef int int_type; @@ -138,7 +142,11 @@ } }; -typedef std::basic_string<char16, base::string16_char_traits> string16; +} // namespace string16_internals + +typedef std::basic_string<char16, + base::string16_internals::string16_char_traits> + string16; } // namespace base } // namespace url @@ -182,8 +190,9 @@ // // TODO(mark): File this bug with Apple and update this note with a bug number. -extern template -class BASE_EXPORT std::basic_string<url::base::char16, url::base::string16_char_traits>; +extern template class BASE_EXPORT + std::basic_string<url::base::char16, + url::base::string16_internals::string16_char_traits>; // Specialize std::hash for base::string16. Although the style guide forbids // this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
diff --git a/src/url/gurl.cc b/src/url/gurl.cc index f6bf408..e29e397 100644 --- a/src/url/gurl.cc +++ b/src/url/gurl.cc
@@ -374,6 +374,10 @@ return other; } +GURL GURL::GetWithoutFilename() const { + return Resolve("."); +} + bool GURL::IsStandard() const { return url::IsStandard(spec_.data(), parsed_.scheme); } @@ -505,14 +509,14 @@ #endif // WIN32 -bool GURL::DomainIs(url::base::StringPiece lower_ascii_domain) const { +bool GURL::DomainIs(url::base::StringPiece canonical_domain) const { if (!is_valid_) return false; // FileSystem URLs have empty host_piece, so check this first. - if (SchemeIsFileSystem() && inner_url_) - return inner_url_->DomainIs(lower_ascii_domain); - return url::DomainIs(host_piece(), lower_ascii_domain); + if (inner_url_ && SchemeIsFileSystem()) + return inner_url_->DomainIs(canonical_domain); + return url::DomainIs(host_piece(), canonical_domain); } bool GURL::EqualsIgnoringRef(const GURL& other) const {
diff --git a/src/url/gurl.h b/src/url/gurl.h index b3263c8..b410dba 100644 --- a/src/url/gurl.h +++ b/src/url/gurl.h
@@ -181,6 +181,15 @@ // will be the empty URL. GURL GetWithEmptyPath() const; + // A helper function to return a GURL without the filename, query values, and + // fragment. For example, + // GURL("https://www.foo.com/index.html?q=test").GetWithoutFilename().spec() + // will return "https://www.foo.com/". + // GURL("https://www.foo.com/bar/").GetWithoutFilename().spec() + // will return "https://www.foo.com/bar/". If the GURL is invalid or missing a + // scheme, authority or path, it will return an empty, invalid GURL. + GURL GetWithoutFilename() const; + // A helper function to return a GURL containing just the scheme, host, // and port from a URL. Equivalent to clearing any username and password, // replacing the path with a slash, and clearing everything after that. If @@ -388,11 +397,13 @@ // "www.google.com", this will return true for "com", "google.com", and // "www.google.com". // - // The input domain should be lower-case ASCII to match the canonicalized - // scheme. This call is more efficient than getting the host and check - // whether host has the specific domain or not because no copies or - // object constructions are done. - bool DomainIs(url::base::StringPiece lower_ascii_domain) const; + // The input domain should match host canonicalization rules. i.e. the input + // show be lowercase except for escape chars. + // + // This call is more efficient than getting the host and checking whether the + // host has the specific domain or not because no copies or object + // constructions are done. + bool DomainIs(url::base::StringPiece canonical_domain) const; // Checks whether or not two URLs are differing only in the ref (the part // after the # character).
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc index 510b1ad..f6e426d 100644 --- a/src/url/gurl_unittest.cc +++ b/src/url/gurl_unittest.cc
@@ -387,6 +387,52 @@ } } +TEST(GURLTest, GetWithoutFilename) { + struct TestCase { + const char* input; + const char* expected; + } cases[] = { + // Common Standard URLs. + {"https://www.google.com", "https://www.google.com/"}, + {"https://www.google.com/", "https://www.google.com/"}, + {"https://www.google.com/maps.htm", "https://www.google.com/"}, + {"https://www.google.com/maps/", "https://www.google.com/maps/"}, + {"https://www.google.com/index.html", "https://www.google.com/"}, + {"https://www.google.com/index.html?q=maps", "https://www.google.com/"}, + {"https://www.google.com/index.html#maps/", "https://www.google.com/"}, + {"https://foo:bar@www.google.com/maps.htm", "https://foo:bar@www.google.com/"}, + {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"}, + {"https://www.google.com/maps/au/north", "https://www.google.com/maps/au/"}, + {"https://www.google.com/maps/au/north/", "https://www.google.com/maps/au/north/"}, + {"https://www.google.com/maps/au/index.html?q=maps#fragment/", "https://www.google.com/maps/au/"}, + {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"}, + {"https://www.google.com/maps/au/north/?q=maps#fragment", "https://www.google.com/maps/au/north/"}, + {"https://www.google.com/maps/au/north?q=maps#fragment", "https://www.google.com/maps/au/"}, + // Less common standard URLs. + {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"}, + {"file:///temporary/bar.html?baz=22","file:///temporary/"}, + {"ftp://foo/test/index.html", "ftp://foo/test/"}, + {"gopher://foo/test/index.html", "gopher://foo/test/"}, + {"ws://foo/test/index.html", "ws://foo/test/"}, + // Non-standard, hierarchical URLs. + {"chrome://foo/bar.html", "chrome://foo/"}, + {"httpa://foo/test/index.html", "httpa://foo/test/"}, + // Non-standard, non-hierarchical URLs. + {"blob:https://foo.bar/test/index.html", ""}, + {"about:blank", ""}, + {"data:foobar", ""}, + {"scheme:opaque_data", ""}, + // Invalid URLs. + {"foobar", ""}, + }; + + for (size_t i = 0; i < arraysize(cases); i++) { + GURL url(cases[i].input); + GURL without_filename = url.GetWithoutFilename(); + EXPECT_EQ(cases[i].expected, without_filename.spec()) << i; + } +} + TEST(GURLTest, Replacements) { // The URL canonicalizer replacement test will handle most of these case. // The most important thing to do here is to check that the proper @@ -612,6 +658,11 @@ GURL invalid_url("google.com"); EXPECT_FALSE(invalid_url.is_valid()); EXPECT_FALSE(invalid_url.DomainIs("google.com")); + + GURL url_with_escape_chars("https://www.,.test"); + EXPECT_TRUE(url_with_escape_chars.is_valid()); + EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test"); + EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test")); } TEST(GURLTest, DomainIsTerminatingDotBehavior) {
diff --git a/src/url/origin.cc b/src/url/origin.cc index 2bf959c..28ea0b6 100644 --- a/src/url/origin.cc +++ b/src/url/origin.cc
@@ -169,8 +169,8 @@ return GetPhysicalOrigin().IsSameOriginWith(other.GetPhysicalOrigin()); } -bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const { - return !unique_ && url::DomainIs(tuple_.host(), lower_ascii_domain); +bool Origin::DomainIs(base::StringPiece canonical_domain) const { + return !unique_ && url::DomainIs(tuple_.host(), canonical_domain); } bool Origin::operator<(const Origin& other) const {
diff --git a/src/url/origin.h b/src/url/origin.h index 9e6b492..8b59b5a 100644 --- a/src/url/origin.h +++ b/src/url/origin.h
@@ -154,10 +154,14 @@ // Note: The returned URL will not necessarily be serialized to the same value // as the Origin would. The GURL will have an added "/" path for Origins with // valid SchemeHostPorts and file Origins. + // + // Try not to use this method under normal circumstances, as it loses type + // information. Downstream consumers can mistake the returned GURL with a full + // URL (e.g. with a path component). GURL GetURL() const; // Same as GURL::DomainIs. If |this| origin is unique, then returns false. - bool DomainIs(base::StringPiece lower_ascii_domain) const; + bool DomainIs(base::StringPiece canonical_domain) const; // Allows Origin to be used as a key in STL (for example, a std::set or // std::map).
diff --git a/src/url/url_canon_etc.cc b/src/url/url_canon_etc.cc index 31e9fb5..cc74123 100644 --- a/src/url/url_canon_etc.cc +++ b/src/url/url_canon_etc.cc
@@ -46,6 +46,17 @@ return input; } + // Skip whitespace removal for `data:` URLs. + // + // TODO(mkwst): Ideally, this would use something like `base::StartsWith`, but + // that turns out to be difficult to do correctly given this function's + // character type templating. + if (input_len > 5 && input[0] == 'd' && input[1] == 'a' && input[2] == 't' && + input[3] == 'a' && input[4] == ':') { + *output_len = input_len; + return input; + } + // Remove the whitespace into the new buffer and return it. for (int i = 0; i < input_len; i++) { if (!IsRemovableURLWhitespace(input[i])) {
diff --git a/src/url/url_util.cc b/src/url/url_util.cc index a390a9d..30f2e7e 100644 --- a/src/url/url_util.cc +++ b/src/url/url_util.cc
@@ -676,28 +676,27 @@ return DoFindAndCompareScheme(str, str_len, compare, found_scheme); } -bool DomainIs(base::StringPiece canonicalized_host, - base::StringPiece lower_ascii_domain) { - if (canonicalized_host.empty() || lower_ascii_domain.empty()) +bool DomainIs(base::StringPiece canonical_host, + base::StringPiece canonical_domain) { + if (canonical_host.empty() || canonical_domain.empty()) return false; // If the host name ends with a dot but the input domain doesn't, then we // ignore the dot in the host name. - size_t host_len = canonicalized_host.length(); - if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.') + size_t host_len = canonical_host.length(); + if (canonical_host.back() == '.' && canonical_domain.back() != '.') --host_len; - if (host_len < lower_ascii_domain.length()) + if (host_len < canonical_domain.length()) return false; // |host_first_pos| is the start of the compared part of the host name, not // start of the whole host name. const char* host_first_pos = - canonicalized_host.data() + host_len - lower_ascii_domain.length(); + canonical_host.data() + host_len - canonical_domain.length(); - if (!base::LowerCaseEqualsASCII( - base::StringPiece(host_first_pos, lower_ascii_domain.length()), - lower_ascii_domain)) { + if (base::StringPiece(host_first_pos, canonical_domain.length()) != + canonical_domain) { return false; } @@ -705,7 +704,7 @@ // if the host name is longer than the input domain name, then the character // immediately before the compared part should be a dot. For example, // www.google.com has domain "google.com", but www.iamnotgoogle.com does not. - if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() && + if (canonical_domain[0] != '.' && host_len > canonical_domain.length() && *(host_first_pos - 1) != '.') { return false; }
diff --git a/src/url/url_util.h b/src/url/url_util.h index 643c29d..7486bf7 100644 --- a/src/url/url_util.h +++ b/src/url/url_util.h
@@ -173,16 +173,16 @@ // Hosts ---------------------------------------------------------------------- -// Returns true if the |canonicalized_host| matches or is in the same domain as -// the given |lower_ascii_domain| string. For example, if the canonicalized -// hostname is "www.google.com", this will return true for "com", "google.com", -// and "www.google.com" domains. +// Returns true if the |canonical_host| matches or is in the same domain as the +// given |canonical_domain| string. For example, if the canonicalized hostname +// is "www.google.com", this will return true for "com", "google.com", and +// "www.google.com" domains. // // If either of the input StringPieces is empty, the return value is false. The -// input domain should be a lower-case ASCII string in order to match the -// canonicalized host. -URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host, - base::StringPiece lower_ascii_domain); +// input domain should match host canonicalization rules. i.e. it should be +// lowercase except for escape chars. +URL_EXPORT bool DomainIs(base::StringPiece canonical_host, + base::StringPiece canonical_domain); // Returns true if the hostname is an IP address. Note: this function isn't very // cheap, as it must re-parse the host to verify.