Internal change PiperOrigin-RevId: 145837870 Change-Id: I765fae6aca1bbd3d175ce1e9184de25e3cd4e6ac

commit: 6b31f0e37e67f8486baa6e18cde534b8f04a4f7f [log] [tgz]
author: Devany Sandoval <sandovad@google.com> Fri Jan 27 14:35:37 2017 -0800
committer: sandovad <sandovad@google.com> Tue Sep 03 12:54:21 2019 -0700
tree: 3ea668dd25e94fba12f4f2865de00006aa55e65b
parent: 76ffcca033822e8cce853a3039c72437ad542974 [diff]
diff --git a/import.sh b/import.sh
index 6e9f053..47de023 100644
--- a/import.sh
+++ b/import.sh

@@ -4,7 +4,7 @@
 top=/tmp/chromium
 mkdir $top
 prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 53.0.2785.116 54.0.2840.100
+for version in 54.0.2840.100 55.0.2883.87
 do
   mkdir $top/$version
   cd $top/$version

diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index c057724..d81b252 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc

@@ -491,47 +491,13 @@
 #endif  // WIN32
 
 bool GURL::DomainIs(url::base::StringPiece lower_ascii_domain) const {
-  if (!is_valid_ || lower_ascii_domain.empty())
+  if (!is_valid_)
     return false;
 
-  // FileSystem URLs have empty parsed_.host, so check this first.
+  // FileSystem URLs have empty host_piece, so check this first.
   if (SchemeIsFileSystem() && inner_url_)
     return inner_url_->DomainIs(lower_ascii_domain);
-
-  if (!parsed_.host.is_nonempty())
-    return false;
-
-  // If the host name ends with a dot but the input domain doesn't,
-  // then we ignore the dot in the host name.
-  const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
-  int host_len = parsed_.host.len;
-  int domain_len = lower_ascii_domain.length();
-  if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
-    host_last_pos--;
-    host_len--;
-  }
-
-  if (host_len < domain_len)
-    return false;
-
-  // |host_first_pos| is the start of the compared part of the host name, not
-  // start of the whole host name.
-  const char* host_first_pos = spec_.data() + parsed_.host.begin +
-                               host_len - domain_len;
-
-  if (!url::base::LowerCaseEqualsASCII(
-           url::base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
-    return false;
-
-  // Make sure there aren't extra characters in host before the compared part;
-  // if the host name is longer than the input domain name, then the character
-  // immediately before the compared part should be a dot. For example,
-  // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
-  if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
-      '.' != *(host_first_pos - 1))
-    return false;
-
-  return true;
+  return url::DomainIs(host_piece(), lower_ascii_domain);
 }
 
 void GURL::Swap(GURL* other) {

diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 79c16bb..4e18da8 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc

@@ -232,7 +232,6 @@
     "http://user:pass@google.com:12345/path?k=v#fragment",
     "http:/path",
     "http:path",
-    "://google.com",
   };
   for (size_t i = 0; i < arraysize(valid_cases); i++) {
     EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
@@ -244,6 +243,7 @@
     "http:://google.com",
     "http//google.com",
     "http://google.com:12three45",
+    "://google.com",
     "path",
   };
   for (size_t i = 0; i < arraysize(invalid_cases); i++) {

diff --git a/src/url/origin.cc b/src/url/origin.cc
index 43b5e7e..1ba07c1 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc

@@ -64,6 +64,16 @@
   return tuple_.Serialize();
 }
 
+GURL Origin::GetURL() const {
+  if (unique())
+    return GURL();
+
+  if (scheme() == kFileScheme)
+    return GURL("file:///");
+
+  return tuple_.GetURL();
+}
+
 bool Origin::IsSameOriginWith(const Origin& other) const {
   if (unique_ || other.unique_)
     return false;
@@ -71,6 +81,10 @@
   return tuple_.Equals(other.tuple_);
 }
 
+bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
+  return !unique_ && url::DomainIs(tuple_.host(), lower_ascii_domain);
+}
+
 bool Origin::operator<(const Origin& other) const {
   return tuple_ < other.tuple_;
 }

diff --git a/src/url/origin.h b/src/url/origin.h
index aab1f05..273622e 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h

@@ -122,6 +122,17 @@
     return IsSameOriginWith(other);
   }
 
+  // Efficiently returns what GURL(Serialize()) would without re-parsing the
+  // URL. This can be used for the (rare) times a GURL representation is needed
+  // for an Origin.
+  // Note: The returned URL will not necessarily be serialized to the same value
+  // as the Origin would. The GURL will have an added "/" path for Origins with
+  // valid SchemeHostPorts and file Origins.
+  GURL GetURL() const;
+
+  // Same as GURL::DomainIs. If |this| origin is unique, then returns false.
+  bool DomainIs(base::StringPiece lower_ascii_domain) const;
+
   // Allows Origin to be used as a key in STL (for example, a std::set or
   // std::map).
   bool operator<(const Origin& other) const;

diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index d1ba161..7a67533 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc

@@ -13,6 +13,26 @@
 
 namespace {
 
+void ExpectParsedComponentEqual(const url::Component& a,
+                                const url::Component& b) {
+  EXPECT_EQ(a.begin, b.begin);
+  EXPECT_EQ(a.len, b.len);
+}
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+  EXPECT_EQ(a, b);
+  const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+  const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+  ExpectParsedComponentEqual(a_parsed.scheme, b_parsed.scheme);
+  ExpectParsedComponentEqual(a_parsed.username, b_parsed.username);
+  ExpectParsedComponentEqual(a_parsed.password, b_parsed.password);
+  ExpectParsedComponentEqual(a_parsed.host, b_parsed.host);
+  ExpectParsedComponentEqual(a_parsed.port, b_parsed.port);
+  ExpectParsedComponentEqual(a_parsed.path, b_parsed.path);
+  ExpectParsedComponentEqual(a_parsed.query, b_parsed.query);
+  ExpectParsedComponentEqual(a_parsed.ref, b_parsed.ref);
+}
+
 TEST(OriginTest, UniqueOriginComparison) {
   url::Origin unique_origin;
   EXPECT_EQ("", unique_origin.scheme());
@@ -38,6 +58,8 @@
     EXPECT_FALSE(origin.IsSameOriginWith(origin));
     EXPECT_FALSE(unique_origin.IsSameOriginWith(origin));
     EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
+
+    ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
   }
 }
 
@@ -103,6 +125,8 @@
     EXPECT_TRUE(origin.IsSameOriginWith(origin));
     EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
     EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
+
+    ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
   }
 }
 
@@ -127,7 +151,10 @@
     GURL url(test_case.url);
     EXPECT_TRUE(url.is_valid());
     url::Origin origin(url);
-    EXPECT_EQ(test_case.expected, origin.Serialize());
+    std::string serialized = origin.Serialize();
+    ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+    EXPECT_EQ(test_case.expected, serialized);
 
     // The '<<' operator should produce the same serialization as Serialize().
     std::stringstream out;
@@ -186,6 +213,8 @@
     EXPECT_EQ(test.port, origin.port());
     EXPECT_FALSE(origin.unique());
     EXPECT_TRUE(origin.IsSameOriginWith(origin));
+
+    ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
   }
 }
 
@@ -221,6 +250,8 @@
     EXPECT_EQ(0, origin.port());
     EXPECT_TRUE(origin.unique());
     EXPECT_FALSE(origin.IsSameOriginWith(origin));
+
+    ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
   }
 }
 
@@ -249,7 +280,66 @@
     EXPECT_EQ(0, origin.port());
     EXPECT_TRUE(origin.unique());
     EXPECT_FALSE(origin.IsSameOriginWith(origin));
+
+    ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
   }
 }
 
+TEST(OriginTest, DomainIs) {
+  const struct {
+    const char* url;
+    const char* lower_ascii_domain;
+    bool expected_domain_is;
+  } kTestCases[] = {
+      {"http://google.com/foo", "google.com", true},
+      {"http://www.google.com:99/foo", "google.com", true},
+      {"http://www.google.com.cn/foo", "google.com", false},
+      {"http://www.google.comm", "google.com", false},
+      {"http://www.iamnotgoogle.com/foo", "google.com", false},
+      {"http://www.google.com/foo", "Google.com", false},
+
+      // If the host ends with a dot, it matches domains with or without a dot.
+      {"http://www.google.com./foo", "google.com", true},
+      {"http://www.google.com./foo", "google.com.", true},
+      {"http://www.google.com./foo", ".com", true},
+      {"http://www.google.com./foo", ".com.", true},
+
+      // But, if the host doesn't end with a dot and the input domain does, then
+      // it's considered to not match.
+      {"http://google.com/foo", "google.com.", false},
+
+      // If the host ends with two dots, it doesn't match.
+      {"http://www.google.com../foo", "google.com", false},
+
+      // Filesystem scheme.
+      {"filesystem:http://www.google.com:99/foo/", "google.com", true},
+      {"filesystem:http://www.iamnotgoogle.com/foo/", "google.com", false},
+
+      // File scheme.
+      {"file:///home/user/text.txt", "", false},
+      {"file:///home/user/text.txt", "txt", false},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url
+                                    << ", " << test_case.lower_ascii_domain
+                                    << ")");
+    GURL url(test_case.url);
+    ASSERT_TRUE(url.is_valid());
+    url::Origin origin(url);
+
+    EXPECT_EQ(test_case.expected_domain_is,
+              origin.DomainIs(test_case.lower_ascii_domain));
+  }
+
+  // If the URL is invalid, DomainIs returns false.
+  GURL invalid_url("google.com");
+  ASSERT_FALSE(invalid_url.is_valid());
+  EXPECT_FALSE(url::Origin(invalid_url).DomainIs("google.com"));
+
+  // Unique origins.
+  EXPECT_FALSE(url::Origin().DomainIs(""));
+  EXPECT_FALSE(url::Origin().DomainIs("com"));
+}
+
 }  // namespace url

diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index ebc5232..e6bb493 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc

@@ -11,6 +11,7 @@
 
 #include "base/logging.h"
 #include "url/gurl.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
 #include "url/url_canon_stdstring.h"
@@ -141,12 +142,46 @@
 }
 
 std::string SchemeHostPort::Serialize() const {
+  // Null checking for |parsed| in SerializeInternal is probably slower than
+  // just filling it in and discarding it here.
+  url::Parsed parsed;
+  return SerializeInternal(&parsed);
+}
+
+GURL SchemeHostPort::GetURL() const {
+  url::Parsed parsed;
+  std::string serialized = SerializeInternal(&parsed);
+
+  // If the serialized string is passed to GURL for parsing, it will append an
+  // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
+  // normal Origin serialization.
+  DCHECK(!parsed.path.is_valid());
+  parsed.path = Component(serialized.length(), 1);
+  serialized.append("/");
+  return GURL(std::move(serialized), parsed, true);
+}
+
+bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
+  return port_ == other.port() && scheme_ == other.scheme() &&
+         host_ == other.host();
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+  return std::tie(port_, scheme_, host_) <
+         std::tie(other.port_, other.scheme_, other.host_);
+}
+
+std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
   std::string result;
   if (IsInvalid())
     return result;
 
+  parsed->scheme = Component(0, scheme_.length());
   result.append(scheme_);
+
   result.append(kStandardSchemeSeparator);
+
+  parsed->host = Component(result.length(), host_.length());
   result.append(host_);
 
   if (port_ == 0)
@@ -163,20 +198,12 @@
     const int buf_size = 6;
     char buf[buf_size];
     _itoa_s(port_, buf, buf_size, 10);
-    result.append(buf);
+    size_t len = strlen(buf);
+    parsed->port = Component(result.length(), len);
+    result.append(buf, len);
   }
 
   return result;
 }
 
-bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
-  return port_ == other.port() && scheme_ == other.scheme() &&
-         host_ == other.host();
-}
-
-bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
-  return std::tie(port_, scheme_, host_) <
-         std::tie(other.port_, other.scheme_, other.host_);
-}
-
 }  // namespace url

diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index 47a9041..dc8862a 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h

@@ -16,6 +16,8 @@
 
 namespace url {
 
+struct Parsed;
+
 // This class represents a (scheme, host, port) tuple extracted from a URL.
 //
 // The primary purpose of this class is to represent relevant network-authority
@@ -111,6 +113,10 @@
   // serialized as a unique Origin.
   std::string Serialize() const;
 
+  // Efficiently returns what GURL(Serialize()) would return, without needing to
+  // re-parse the URL.
+  GURL GetURL() const;
+
   // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
   // are exact matches.
   //
@@ -124,6 +130,8 @@
   bool operator<(const SchemeHostPort& other) const;
 
  private:
+  std::string SerializeInternal(url::Parsed* parsed) const;
+
   std::string scheme_;
   std::string host_;
   uint16_t port_;

diff --git a/src/url/url_canon_etc.cc b/src/url/url_canon_etc.cc
index e9da94c..9dd40da 100644
--- a/src/url/url_canon_etc.cc
+++ b/src/url/url_canon_etc.cc

@@ -89,7 +89,7 @@
     // Scheme is unspecified or empty, convert to empty by appending a colon.
     *out_scheme = Component(output->length(), 0);
     output->push_back(':');
-    return true;
+    return false;
   }
 
   // The output scheme starts from the current position.

diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 3dd617d..2f053f2 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc

@@ -240,6 +240,7 @@
       // Don't re-escape something already escaped. Note that it will
       // "canonicalize" the 'A' to 'a', but that's OK.
     {"ht%3Atp", "ht%3atp:", Component(0, 7), false},
+    {"", ":", Component(0, 0), false},
   };
 
   std::string out_str;
@@ -282,7 +283,7 @@
   out_str.clear();
   StdStringCanonOutput output(&out_str);
 
-  EXPECT_TRUE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
+  EXPECT_FALSE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
   output.Complete();
 
   EXPECT_EQ(std::string(":"), out_str);
@@ -1303,7 +1304,7 @@
     {"http://[www.google.com]/", "http://[www.google.com]/", false},
     {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false},
     {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", true},
-    {"www.google.com", ":www.google.com/", true},
+    {"www.google.com", ":www.google.com/", false},
     {"http://192.0x00A80001", "http://192.168.0.1/", true},
     {"http://www/foo%2Ehtml", "http://www/foo.html", true},
     {"http://user:pass@/", "http://user:pass@/", false},
@@ -1758,7 +1759,7 @@
   } path_cases[] = {
     {"javascript:", "javascript:"},
     {"JavaScript:Foo", "javascript:Foo"},
-    {":\":This /is interesting;?#", ":\":This /is interesting;?#"},
+    {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
   };
 
   for (size_t i = 0; i < arraysize(path_cases); i++) {

diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index bb43a4a..8522eb1 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc

@@ -492,6 +492,43 @@
   return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
 }
 
+bool DomainIs(base::StringPiece canonicalized_host,
+              base::StringPiece lower_ascii_domain) {
+  if (canonicalized_host.empty() || lower_ascii_domain.empty())
+    return false;
+
+  // If the host name ends with a dot but the input domain doesn't, then we
+  // ignore the dot in the host name.
+  size_t host_len = canonicalized_host.length();
+  if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.')
+    --host_len;
+
+  if (host_len < lower_ascii_domain.length())
+    return false;
+
+  // |host_first_pos| is the start of the compared part of the host name, not
+  // start of the whole host name.
+  const char* host_first_pos =
+      canonicalized_host.data() + host_len - lower_ascii_domain.length();
+
+  if (!base::LowerCaseEqualsASCII(
+          base::StringPiece(host_first_pos, lower_ascii_domain.length()),
+          lower_ascii_domain)) {
+    return false;
+  }
+
+  // Make sure there aren't extra characters in host before the compared part;
+  // if the host name is longer than the input domain name, then the character
+  // immediately before the compared part should be a dot. For example,
+  // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
+  if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() &&
+      *(host_first_pos - 1) != '.') {
+    return false;
+  }
+
+  return true;
+}
+
 bool Canonicalize(const char* spec,
                   int spec_len,
                   bool trim_path_end,

diff --git a/src/url/url_util.h b/src/url/url_util.h
index a209a61..724ce95 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h

@@ -8,6 +8,7 @@
 #include <string>
 
 #include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
 #include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_constants.h"
@@ -35,7 +36,7 @@
 // library.
 URL_EXPORT void Shutdown();
 
-// Schemes --------------------------------------------------------------------
+// Schemes ---------------------------------------------------------------------
 
 // Types of a scheme representing the requirements on the data represented by
 // the authority component of a URL with the scheme.
@@ -132,7 +133,20 @@
                                       const Component& scheme,
                                       SchemeType* type);
 
-// URL library wrappers -------------------------------------------------------
+// Domains ---------------------------------------------------------------------
+
+// Returns true if the |canonicalized_host| matches or is in the same domain as
+// the given |lower_ascii_domain| string. For example, if the canonicalized
+// hostname is "www.google.com", this will return true for "com", "google.com",
+// and "www.google.com" domains.
+//
+// If either of the input StringPieces is empty, the return value is false. The
+// input domain should be a lower-case ASCII string in order to match the
+// canonicalized host.
+URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
+                         base::StringPiece lower_ascii_domain);
+
+// URL library wrappers --------------------------------------------------------
 
 // Parses the given spec according to the extracted scheme type. Normal users
 // should use the URL object, although this may be useful if performance is
@@ -204,7 +218,7 @@
     CanonOutput* output,
     Parsed* out_parsed);
 
-// String helper functions ----------------------------------------------------
+// String helper functions -----------------------------------------------------
 
 // Unescapes the given string using URL escaping rules.
 URL_EXPORT void DecodeURLEscapeSequences(const char* input,

diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
index 74db9e5..eceb505 100644
--- a/src/url/url_util_unittest.cc
+++ b/src/url/url_util_unittest.cc

@@ -374,4 +374,47 @@
   EXPECT_FALSE(resolved_parsed.ref.is_valid());
 }
 
+TEST(URLUtilTest, TestDomainIs) {
+  const struct {
+    const char* canonicalized_host;
+    const char* lower_ascii_domain;
+    bool expected_domain_is;
+  } kTestCases[] = {
+      {"google.com", "google.com", true},
+      {"www.google.com", "google.com", true},      // Subdomain is ignored.
+      {"www.google.com.cn", "google.com", false},  // Different TLD.
+      {"www.google.comm", "google.com", false},
+      {"www.iamnotgoogle.com", "google.com", false},  // Different hostname.
+      {"www.google.com", "Google.com", false},  // The input is not lower-cased.
+
+      // If the host ends with a dot, it matches domains with or without a dot.
+      {"www.google.com.", "google.com", true},
+      {"www.google.com.", "google.com.", true},
+      {"www.google.com.", ".com", true},
+      {"www.google.com.", ".com.", true},
+
+      // But, if the host doesn't end with a dot and the input domain does, then
+      // it's considered to not match.
+      {"www.google.com", "google.com.", false},
+
+      // If the host ends with two dots, it doesn't match.
+      {"www.google.com..", "google.com", false},
+
+      // Empty parameters.
+      {"www.google.com", "", false},
+      {"", "www.google.com", false},
+      {"", "", false},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "(host, domain): ("
+                                    << test_case.canonicalized_host << ", "
+                                    << test_case.lower_ascii_domain << ")");
+
+    EXPECT_EQ(
+        test_case.expected_domain_is,
+        DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain));
+  }
+}
+
 }  // namespace url
commit	6b31f0e37e67f8486baa6e18cde534b8f04a4f7f	[log] [tgz]
author	Devany Sandoval <sandovad@google.com>	Fri Jan 27 14:35:37 2017 -0800
committer	sandovad <sandovad@google.com>	Tue Sep 03 12:54:21 2019 -0700
tree	3ea668dd25e94fba12f4f2865de00006aa55e65b
parent	76ffcca033822e8cce853a3039c72437ad542974 [diff]