Internal change
PiperOrigin-RevId: 145837870
Change-Id: I765fae6aca1bbd3d175ce1e9184de25e3cd4e6ac
diff --git a/import.sh b/import.sh
index 6e9f053..47de023 100644
--- a/import.sh
+++ b/import.sh
@@ -4,7 +4,7 @@
top=/tmp/chromium
mkdir $top
prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 53.0.2785.116 54.0.2840.100
+for version in 54.0.2840.100 55.0.2883.87
do
mkdir $top/$version
cd $top/$version
diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index c057724..d81b252 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc
@@ -491,47 +491,13 @@
#endif // WIN32
bool GURL::DomainIs(url::base::StringPiece lower_ascii_domain) const {
- if (!is_valid_ || lower_ascii_domain.empty())
+ if (!is_valid_)
return false;
- // FileSystem URLs have empty parsed_.host, so check this first.
+ // FileSystem URLs have empty host_piece, so check this first.
if (SchemeIsFileSystem() && inner_url_)
return inner_url_->DomainIs(lower_ascii_domain);
-
- if (!parsed_.host.is_nonempty())
- return false;
-
- // If the host name ends with a dot but the input domain doesn't,
- // then we ignore the dot in the host name.
- const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
- int host_len = parsed_.host.len;
- int domain_len = lower_ascii_domain.length();
- if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
- host_last_pos--;
- host_len--;
- }
-
- if (host_len < domain_len)
- return false;
-
- // |host_first_pos| is the start of the compared part of the host name, not
- // start of the whole host name.
- const char* host_first_pos = spec_.data() + parsed_.host.begin +
- host_len - domain_len;
-
- if (!url::base::LowerCaseEqualsASCII(
- url::base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
- return false;
-
- // Make sure there aren't extra characters in host before the compared part;
- // if the host name is longer than the input domain name, then the character
- // immediately before the compared part should be a dot. For example,
- // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
- if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
- '.' != *(host_first_pos - 1))
- return false;
-
- return true;
+ return url::DomainIs(host_piece(), lower_ascii_domain);
}
void GURL::Swap(GURL* other) {
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 79c16bb..4e18da8 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc
@@ -232,7 +232,6 @@
"http://user:pass@google.com:12345/path?k=v#fragment",
"http:/path",
"http:path",
- "://google.com",
};
for (size_t i = 0; i < arraysize(valid_cases); i++) {
EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
@@ -244,6 +243,7 @@
"http:://google.com",
"http//google.com",
"http://google.com:12three45",
+ "://google.com",
"path",
};
for (size_t i = 0; i < arraysize(invalid_cases); i++) {
diff --git a/src/url/origin.cc b/src/url/origin.cc
index 43b5e7e..1ba07c1 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc
@@ -64,6 +64,16 @@
return tuple_.Serialize();
}
+GURL Origin::GetURL() const {
+ if (unique())
+ return GURL();
+
+ if (scheme() == kFileScheme)
+ return GURL("file:///");
+
+ return tuple_.GetURL();
+}
+
bool Origin::IsSameOriginWith(const Origin& other) const {
if (unique_ || other.unique_)
return false;
@@ -71,6 +81,10 @@
return tuple_.Equals(other.tuple_);
}
+bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
+ return !unique_ && url::DomainIs(tuple_.host(), lower_ascii_domain);
+}
+
bool Origin::operator<(const Origin& other) const {
return tuple_ < other.tuple_;
}
diff --git a/src/url/origin.h b/src/url/origin.h
index aab1f05..273622e 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h
@@ -122,6 +122,17 @@
return IsSameOriginWith(other);
}
+ // Efficiently returns what GURL(Serialize()) would without re-parsing the
+ // URL. This can be used for the (rare) times a GURL representation is needed
+ // for an Origin.
+ // Note: The returned URL will not necessarily be serialized to the same value
+ // as the Origin would. The GURL will have an added "/" path for Origins with
+ // valid SchemeHostPorts and file Origins.
+ GURL GetURL() const;
+
+ // Same as GURL::DomainIs. If |this| origin is unique, then returns false.
+ bool DomainIs(base::StringPiece lower_ascii_domain) const;
+
// Allows Origin to be used as a key in STL (for example, a std::set or
// std::map).
bool operator<(const Origin& other) const;
diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index d1ba161..7a67533 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc
@@ -13,6 +13,26 @@
namespace {
+void ExpectParsedComponentEqual(const url::Component& a,
+ const url::Component& b) {
+ EXPECT_EQ(a.begin, b.begin);
+ EXPECT_EQ(a.len, b.len);
+}
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+ EXPECT_EQ(a, b);
+ const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+ const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+ ExpectParsedComponentEqual(a_parsed.scheme, b_parsed.scheme);
+ ExpectParsedComponentEqual(a_parsed.username, b_parsed.username);
+ ExpectParsedComponentEqual(a_parsed.password, b_parsed.password);
+ ExpectParsedComponentEqual(a_parsed.host, b_parsed.host);
+ ExpectParsedComponentEqual(a_parsed.port, b_parsed.port);
+ ExpectParsedComponentEqual(a_parsed.path, b_parsed.path);
+ ExpectParsedComponentEqual(a_parsed.query, b_parsed.query);
+ ExpectParsedComponentEqual(a_parsed.ref, b_parsed.ref);
+}
+
TEST(OriginTest, UniqueOriginComparison) {
url::Origin unique_origin;
EXPECT_EQ("", unique_origin.scheme());
@@ -38,6 +58,8 @@
EXPECT_FALSE(origin.IsSameOriginWith(origin));
EXPECT_FALSE(unique_origin.IsSameOriginWith(origin));
EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
+
+ ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
}
}
@@ -103,6 +125,8 @@
EXPECT_TRUE(origin.IsSameOriginWith(origin));
EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
+
+ ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
}
}
@@ -127,7 +151,10 @@
GURL url(test_case.url);
EXPECT_TRUE(url.is_valid());
url::Origin origin(url);
- EXPECT_EQ(test_case.expected, origin.Serialize());
+ std::string serialized = origin.Serialize();
+ ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+ EXPECT_EQ(test_case.expected, serialized);
// The '<<' operator should produce the same serialization as Serialize().
std::stringstream out;
@@ -186,6 +213,8 @@
EXPECT_EQ(test.port, origin.port());
EXPECT_FALSE(origin.unique());
EXPECT_TRUE(origin.IsSameOriginWith(origin));
+
+ ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
}
}
@@ -221,6 +250,8 @@
EXPECT_EQ(0, origin.port());
EXPECT_TRUE(origin.unique());
EXPECT_FALSE(origin.IsSameOriginWith(origin));
+
+ ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
}
}
@@ -249,7 +280,66 @@
EXPECT_EQ(0, origin.port());
EXPECT_TRUE(origin.unique());
EXPECT_FALSE(origin.IsSameOriginWith(origin));
+
+ ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
}
}
+TEST(OriginTest, DomainIs) {
+ const struct {
+ const char* url;
+ const char* lower_ascii_domain;
+ bool expected_domain_is;
+ } kTestCases[] = {
+ {"http://google.com/foo", "google.com", true},
+ {"http://www.google.com:99/foo", "google.com", true},
+ {"http://www.google.com.cn/foo", "google.com", false},
+ {"http://www.google.comm", "google.com", false},
+ {"http://www.iamnotgoogle.com/foo", "google.com", false},
+ {"http://www.google.com/foo", "Google.com", false},
+
+ // If the host ends with a dot, it matches domains with or without a dot.
+ {"http://www.google.com./foo", "google.com", true},
+ {"http://www.google.com./foo", "google.com.", true},
+ {"http://www.google.com./foo", ".com", true},
+ {"http://www.google.com./foo", ".com.", true},
+
+ // But, if the host doesn't end with a dot and the input domain does, then
+ // it's considered to not match.
+ {"http://google.com/foo", "google.com.", false},
+
+ // If the host ends with two dots, it doesn't match.
+ {"http://www.google.com../foo", "google.com", false},
+
+ // Filesystem scheme.
+ {"filesystem:http://www.google.com:99/foo/", "google.com", true},
+ {"filesystem:http://www.iamnotgoogle.com/foo/", "google.com", false},
+
+ // File scheme.
+ {"file:///home/user/text.txt", "", false},
+ {"file:///home/user/text.txt", "txt", false},
+ };
+
+ for (const auto& test_case : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url
+ << ", " << test_case.lower_ascii_domain
+ << ")");
+ GURL url(test_case.url);
+ ASSERT_TRUE(url.is_valid());
+ url::Origin origin(url);
+
+ EXPECT_EQ(test_case.expected_domain_is,
+ origin.DomainIs(test_case.lower_ascii_domain));
+ }
+
+ // If the URL is invalid, DomainIs returns false.
+ GURL invalid_url("google.com");
+ ASSERT_FALSE(invalid_url.is_valid());
+ EXPECT_FALSE(url::Origin(invalid_url).DomainIs("google.com"));
+
+ // Unique origins.
+ EXPECT_FALSE(url::Origin().DomainIs(""));
+ EXPECT_FALSE(url::Origin().DomainIs("com"));
+}
+
} // namespace url
diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index ebc5232..e6bb493 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc
@@ -11,6 +11,7 @@
#include "base/logging.h"
#include "url/gurl.h"
+#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_canon_stdstring.h"
@@ -141,12 +142,46 @@
}
std::string SchemeHostPort::Serialize() const {
+ // Null checking for |parsed| in SerializeInternal is probably slower than
+ // just filling it in and discarding it here.
+ url::Parsed parsed;
+ return SerializeInternal(&parsed);
+}
+
+GURL SchemeHostPort::GetURL() const {
+ url::Parsed parsed;
+ std::string serialized = SerializeInternal(&parsed);
+
+ // If the serialized string is passed to GURL for parsing, it will append an
+ // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
+ // normal Origin serialization.
+ DCHECK(!parsed.path.is_valid());
+ parsed.path = Component(serialized.length(), 1);
+ serialized.append("/");
+ return GURL(std::move(serialized), parsed, true);
+}
+
+bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
+ return port_ == other.port() && scheme_ == other.scheme() &&
+ host_ == other.host();
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+ return std::tie(port_, scheme_, host_) <
+ std::tie(other.port_, other.scheme_, other.host_);
+}
+
+std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
std::string result;
if (IsInvalid())
return result;
+ parsed->scheme = Component(0, scheme_.length());
result.append(scheme_);
+
result.append(kStandardSchemeSeparator);
+
+ parsed->host = Component(result.length(), host_.length());
result.append(host_);
if (port_ == 0)
@@ -163,20 +198,12 @@
const int buf_size = 6;
char buf[buf_size];
_itoa_s(port_, buf, buf_size, 10);
- result.append(buf);
+ size_t len = strlen(buf);
+ parsed->port = Component(result.length(), len);
+ result.append(buf, len);
}
return result;
}
-bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
- return port_ == other.port() && scheme_ == other.scheme() &&
- host_ == other.host();
-}
-
-bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
- return std::tie(port_, scheme_, host_) <
- std::tie(other.port_, other.scheme_, other.host_);
-}
-
} // namespace url
diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index 47a9041..dc8862a 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h
@@ -16,6 +16,8 @@
namespace url {
+struct Parsed;
+
// This class represents a (scheme, host, port) tuple extracted from a URL.
//
// The primary purpose of this class is to represent relevant network-authority
@@ -111,6 +113,10 @@
// serialized as a unique Origin.
std::string Serialize() const;
+ // Efficiently returns what GURL(Serialize()) would return, without needing to
+ // re-parse the URL.
+ GURL GetURL() const;
+
// Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
// are exact matches.
//
@@ -124,6 +130,8 @@
bool operator<(const SchemeHostPort& other) const;
private:
+ std::string SerializeInternal(url::Parsed* parsed) const;
+
std::string scheme_;
std::string host_;
uint16_t port_;
diff --git a/src/url/url_canon_etc.cc b/src/url/url_canon_etc.cc
index e9da94c..9dd40da 100644
--- a/src/url/url_canon_etc.cc
+++ b/src/url/url_canon_etc.cc
@@ -89,7 +89,7 @@
// Scheme is unspecified or empty, convert to empty by appending a colon.
*out_scheme = Component(output->length(), 0);
output->push_back(':');
- return true;
+ return false;
}
// The output scheme starts from the current position.
diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 3dd617d..2f053f2 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc
@@ -240,6 +240,7 @@
// Don't re-escape something already escaped. Note that it will
// "canonicalize" the 'A' to 'a', but that's OK.
{"ht%3Atp", "ht%3atp:", Component(0, 7), false},
+ {"", ":", Component(0, 0), false},
};
std::string out_str;
@@ -282,7 +283,7 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- EXPECT_TRUE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
+ EXPECT_FALSE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
output.Complete();
EXPECT_EQ(std::string(":"), out_str);
@@ -1303,7 +1304,7 @@
{"http://[www.google.com]/", "http://[www.google.com]/", false},
{"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false},
{"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", true},
- {"www.google.com", ":www.google.com/", true},
+ {"www.google.com", ":www.google.com/", false},
{"http://192.0x00A80001", "http://192.168.0.1/", true},
{"http://www/foo%2Ehtml", "http://www/foo.html", true},
{"http://user:pass@/", "http://user:pass@/", false},
@@ -1758,7 +1759,7 @@
} path_cases[] = {
{"javascript:", "javascript:"},
{"JavaScript:Foo", "javascript:Foo"},
- {":\":This /is interesting;?#", ":\":This /is interesting;?#"},
+ {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
};
for (size_t i = 0; i < arraysize(path_cases); i++) {
diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index bb43a4a..8522eb1 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc
@@ -492,6 +492,43 @@
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
+bool DomainIs(base::StringPiece canonicalized_host,
+ base::StringPiece lower_ascii_domain) {
+ if (canonicalized_host.empty() || lower_ascii_domain.empty())
+ return false;
+
+ // If the host name ends with a dot but the input domain doesn't, then we
+ // ignore the dot in the host name.
+ size_t host_len = canonicalized_host.length();
+ if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.')
+ --host_len;
+
+ if (host_len < lower_ascii_domain.length())
+ return false;
+
+ // |host_first_pos| is the start of the compared part of the host name, not
+ // start of the whole host name.
+ const char* host_first_pos =
+ canonicalized_host.data() + host_len - lower_ascii_domain.length();
+
+ if (!base::LowerCaseEqualsASCII(
+ base::StringPiece(host_first_pos, lower_ascii_domain.length()),
+ lower_ascii_domain)) {
+ return false;
+ }
+
+ // Make sure there aren't extra characters in host before the compared part;
+ // if the host name is longer than the input domain name, then the character
+ // immediately before the compared part should be a dot. For example,
+ // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
+ if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() &&
+ *(host_first_pos - 1) != '.') {
+ return false;
+ }
+
+ return true;
+}
+
bool Canonicalize(const char* spec,
int spec_len,
bool trim_path_end,
diff --git a/src/url/url_util.h b/src/url/url_util.h
index a209a61..724ce95 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h
@@ -8,6 +8,7 @@
#include <string>
#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
@@ -35,7 +36,7 @@
// library.
URL_EXPORT void Shutdown();
-// Schemes --------------------------------------------------------------------
+// Schemes ---------------------------------------------------------------------
// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
@@ -132,7 +133,20 @@
const Component& scheme,
SchemeType* type);
-// URL library wrappers -------------------------------------------------------
+// Domains ---------------------------------------------------------------------
+
+// Returns true if the |canonicalized_host| matches or is in the same domain as
+// the given |lower_ascii_domain| string. For example, if the canonicalized
+// hostname is "www.google.com", this will return true for "com", "google.com",
+// and "www.google.com" domains.
+//
+// If either of the input StringPieces is empty, the return value is false. The
+// input domain should be a lower-case ASCII string in order to match the
+// canonicalized host.
+URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
+ base::StringPiece lower_ascii_domain);
+
+// URL library wrappers --------------------------------------------------------
// Parses the given spec according to the extracted scheme type. Normal users
// should use the URL object, although this may be useful if performance is
@@ -204,7 +218,7 @@
CanonOutput* output,
Parsed* out_parsed);
-// String helper functions ----------------------------------------------------
+// String helper functions -----------------------------------------------------
// Unescapes the given string using URL escaping rules.
URL_EXPORT void DecodeURLEscapeSequences(const char* input,
diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
index 74db9e5..eceb505 100644
--- a/src/url/url_util_unittest.cc
+++ b/src/url/url_util_unittest.cc
@@ -374,4 +374,47 @@
EXPECT_FALSE(resolved_parsed.ref.is_valid());
}
+TEST(URLUtilTest, TestDomainIs) {
+ const struct {
+ const char* canonicalized_host;
+ const char* lower_ascii_domain;
+ bool expected_domain_is;
+ } kTestCases[] = {
+ {"google.com", "google.com", true},
+ {"www.google.com", "google.com", true}, // Subdomain is ignored.
+ {"www.google.com.cn", "google.com", false}, // Different TLD.
+ {"www.google.comm", "google.com", false},
+ {"www.iamnotgoogle.com", "google.com", false}, // Different hostname.
+ {"www.google.com", "Google.com", false}, // The input is not lower-cased.
+
+ // If the host ends with a dot, it matches domains with or without a dot.
+ {"www.google.com.", "google.com", true},
+ {"www.google.com.", "google.com.", true},
+ {"www.google.com.", ".com", true},
+ {"www.google.com.", ".com.", true},
+
+ // But, if the host doesn't end with a dot and the input domain does, then
+ // it's considered to not match.
+ {"www.google.com", "google.com.", false},
+
+ // If the host ends with two dots, it doesn't match.
+ {"www.google.com..", "google.com", false},
+
+ // Empty parameters.
+ {"www.google.com", "", false},
+ {"", "www.google.com", false},
+ {"", "", false},
+ };
+
+ for (const auto& test_case : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "(host, domain): ("
+ << test_case.canonicalized_host << ", "
+ << test_case.lower_ascii_domain << ")");
+
+ EXPECT_EQ(
+ test_case.expected_domain_is,
+ DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain));
+ }
+}
+
} // namespace url