Internal change
PiperOrigin-RevId: 148486483
Change-Id: Iebcb04cdcbdf2d47099b8ee50eff901139c5a721
diff --git a/import.sh b/import.sh
index 47de023..8671302 100644
--- a/import.sh
+++ b/import.sh
@@ -4,7 +4,7 @@
top=/tmp/chromium
mkdir $top
prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 54.0.2840.100 55.0.2883.87
+for version in 55.0.2883.87 56.0.2924.87
do
mkdir $top/$version
cd $top/$version
diff --git a/src/base/strings/utf_string_conversions.cc b/src/base/strings/utf_string_conversions.cc
new file mode 100644
index 0000000..944078f
--- /dev/null
+++ b/src/base/strings/utf_string_conversions.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/utf_string_conversions.h"
+
+#include <stdint.h>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversion_utils.h"
+#include "build/build_config.h"
+
+namespace url {
+namespace base {
+
+namespace {
+
+// Generalized Unicode converter -----------------------------------------------
+
+// Converts the given source Unicode character type to the given destination
+// Unicode character type as a STL string. The given input buffer and size
+// determine the source, and the given output STL string will be replaced by
+// the result.
+template<typename SRC_CHAR, typename DEST_STRING>
+bool ConvertUnicode(const SRC_CHAR* src,
+ size_t src_len,
+ DEST_STRING* output) {
+ // ICU requires 32-bit numbers.
+ bool success = true;
+ int32_t src_len32 = static_cast<int32_t>(src_len);
+ for (int32_t i = 0; i < src_len32; i++) {
+ uint32_t code_point;
+ if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
+ WriteUnicodeCharacter(code_point, output);
+ } else {
+ WriteUnicodeCharacter(0xFFFD, output);
+ success = false;
+ }
+ }
+
+ return success;
+}
+
+} // namespace
+
+// UTF16 <-> UTF8 --------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF32)
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+ if (IsStringASCII(StringPiece(src, src_len))) {
+ output->assign(src, src + src_len);
+ return true;
+ } else {
+ PrepareForUTF16Or32Output(src, src_len, output);
+ return ConvertUnicode(src, src_len, output);
+ }
+}
+
+string16 UTF8ToUTF16(StringPiece utf8) {
+ if (IsStringASCII(utf8)) {
+ return string16(utf8.begin(), utf8.end());
+ }
+
+ string16 ret;
+ PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
+ // Ignore the success flag of this call, it will do the best it can for
+ // invalid input, which is what we want here.
+ ConvertUnicode(utf8.data(), utf8.length(), &ret);
+ return ret;
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+ if (IsStringASCII(StringPiece16(src, src_len))) {
+ output->assign(src, src + src_len);
+ return true;
+ } else {
+ PrepareForUTF8Output(src, src_len, output);
+ return ConvertUnicode(src, src_len, output);
+ }
+}
+
+std::string UTF16ToUTF8(StringPiece16 utf16) {
+ if (IsStringASCII(utf16)) {
+ return std::string(utf16.begin(), utf16.end());
+ }
+
+ std::string ret;
+ // Ignore the success flag of this call, it will do the best it can for
+ // invalid input, which is what we want here.
+ UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
+ return ret;
+}
+
+#elif defined(WCHAR_T_IS_UTF16)
+// Easy case since we can use the "wide" versions we already wrote above.
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+ return UTF8ToWide(src, src_len, output);
+}
+
+string16 UTF8ToUTF16(StringPiece utf8) {
+ return UTF8ToWide(utf8);
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+ return WideToUTF8(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(StringPiece16 utf16) {
+ if (IsStringASCII(utf16))
+ return std::string(utf16.data(), utf16.data() + utf16.length());
+
+ std::string ret;
+ PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
+ ConvertUnicode(utf16.data(), utf16.length(), &ret);
+ return ret;
+}
+
+#endif
+
+} // namespace base
+} // namespace url
diff --git a/src/base/strings/utf_string_conversions.h b/src/base/strings/utf_string_conversions.h
new file mode 100644
index 0000000..d6876b3
--- /dev/null
+++ b/src/base/strings/utf_string_conversions.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
+#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
+
+#include <stddef.h>
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+
+#define BASE_EXPORT
+
+namespace url {
+namespace base {
+
+BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8);
+BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16);
+
+} // namespace base
+} // namespace url
+
+#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
diff --git a/src/build/build_config.h b/src/build/build_config.h
index f67dd48..5f99fc1 100644
--- a/src/build/build_config.h
+++ b/src/build/build_config.h
@@ -48,7 +48,6 @@
#endif
#elif defined(_WIN32)
#define OS_WIN 1
-#define TOOLKIT_VIEWS 1
#elif defined(__FreeBSD__)
#define OS_FREEBSD 1
#elif defined(__NetBSD__)
@@ -111,6 +110,31 @@
#define ARCH_CPU_X86 1
#define ARCH_CPU_32_BITS 1
#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__s390x__)
+#define ARCH_CPU_S390_FAMILY 1
+#define ARCH_CPU_S390X 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__s390__)
+#define ARCH_CPU_S390_FAMILY 1
+#define ARCH_CPU_S390 1
+#define ARCH_CPU_31_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__PPC64__) && defined(__BIG_ENDIAN__)
+#define ARCH_CPU_PPC64_FAMILY 1
+#define ARCH_CPU_PPC64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#define ARCH_CPU_PPC64_FAMILY 1
+#define ARCH_CPU_PPC64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__PPC__)
+#define ARCH_CPU_PPC_FAMILY 1
+#define ARCH_CPU_PPC 1
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
#elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1
#define ARCH_CPU_ARMEL 1
diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index d81b252..1d6dfc6 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc
@@ -180,14 +180,6 @@
return EmptyStringForGURL();
}
-bool GURL::operator==(const GURL& other) const {
- return spec_ == other.spec_;
-}
-
-bool GURL::operator!=(const GURL& other) const {
- return spec_ != other.spec_;
-}
-
bool GURL::operator<(const GURL& other) const {
return spec_ < other.spec_;
}
@@ -510,3 +502,20 @@
std::ostream& operator<<(std::ostream& out, const GURL& url) {
return out << url.possibly_invalid_spec();
}
+
+bool operator==(const GURL& x, const GURL& y) {
+ return x.possibly_invalid_spec() == y.possibly_invalid_spec();
+}
+
+bool operator!=(const GURL& x, const GURL& y) {
+ return !(x == y);
+}
+
+bool operator==(const GURL& x, const url::base::StringPiece& spec) {
+ DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec);
+ return x.possibly_invalid_spec() == spec;
+}
+
+bool operator!=(const GURL& x, const url::base::StringPiece& spec) {
+ return !(x == spec);
+}
diff --git a/src/url/gurl.h b/src/url/gurl.h
index 4d8b5d4..aeb77aa 100644
--- a/src/url/gurl.h
+++ b/src/url/gurl.h
@@ -132,10 +132,6 @@
return parsed_;
}
- // Defiant equality operator!
- bool operator==(const GURL& other) const;
- bool operator!=(const GURL& other) const;
-
// Allows GURL to used as a key in STL (for example, a std::set or std::map).
bool operator<(const GURL& other) const;
bool operator>(const GURL& other) const;
@@ -240,7 +236,8 @@
// higher-level and more complete semantics. See that function's documentation
// for more detail.
bool SchemeIsCryptographic() const {
- return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
+ return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) ||
+ SchemeIs(url::kHttpsSuboriginScheme);
}
// Returns true if the scheme is "blob".
@@ -248,6 +245,12 @@
return SchemeIs(url::kBlobScheme);
}
+ // Returns true if the scheme indicates a serialized suborigin.
+ bool SchemeIsSuborigin() const {
+ return SchemeIs(url::kHttpSuboriginScheme) ||
+ SchemeIs(url::kHttpsSuboriginScheme);
+ }
+
// The "content" of the URL is everything after the scheme (skipping the
// scheme delimiting colon). It is an error to get the content of an invalid
// URL: the result will be an empty string.
@@ -447,4 +450,13 @@
// Stream operator so GURL can be used in assertion statements.
URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);
+URL_EXPORT bool operator==(const GURL& x, const GURL& y);
+URL_EXPORT bool operator!=(const GURL& x, const GURL& y);
+
+// Equality operator for comparing raw spec_. This should be used in place of
+// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
+// needlessly re-parsing |spec| into a temporary GURL.
+URL_EXPORT bool operator==(const GURL& x, const url::base::StringPiece& spec);
+URL_EXPORT bool operator!=(const GURL& x, const url::base::StringPiece& spec);
+
#endif // URL_GURL_H_
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 4e18da8..aae5048 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc
@@ -5,6 +5,7 @@
#include <stddef.h>
#include "base/macros.h"
+#include "base/strings/utf_string_conversions.h"
#include "testing/base/public/gunit.h"
#include "url/gurl.h"
#include "url/url_canon.h"
@@ -12,9 +13,6 @@
namespace url {
-using test_utils::WStringToUTF16;
-using test_utils::ConvertUTF8ToUTF16;
-
namespace {
template<typename CHAR>
@@ -67,11 +65,11 @@
// the parser is already tested and works, so we are mostly interested if the
// object does the right thing with the results.
TEST(GURLTest, Components) {
- GURL empty_url(WStringToUTF16(L""));
+ GURL empty_url(base::UTF8ToUTF16(""));
EXPECT_TRUE(empty_url.is_empty());
EXPECT_FALSE(empty_url.is_valid());
- GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref"));
EXPECT_FALSE(url.is_empty());
EXPECT_TRUE(url.is_valid());
EXPECT_TRUE(url.SchemeIs("http"));
@@ -116,7 +114,8 @@
}
TEST(GURLTest, Copy) {
- GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(base::UTF8ToUTF16(
+ "http://user:pass@google.com:99/foo;bar?q=a#ref"));
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -149,7 +148,8 @@
}
TEST(GURLTest, Assign) {
- GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(base::UTF8ToUTF16(
+ "http://user:pass@google.com:99/foo;bar?q=a#ref"));
GURL url2;
url2 = url;
@@ -191,7 +191,8 @@
}
TEST(GURLTest, CopyFileSystem) {
- GURL url(WStringToUTF16(L"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
+ GURL url(base::UTF8ToUTF16(
+ "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -313,9 +314,9 @@
EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
// Wide code path.
- GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
+ GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base));
GURL outputw =
- input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
+ input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative));
EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
@@ -677,12 +678,42 @@
EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
}
+TEST(GURLTest, SchemeIsCryptographic) {
+ EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());
+
+ EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());
+
+ EXPECT_TRUE(GURL("https-so://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("HTTPS-SO://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_TRUE(GURL("HtTpS-So://foo.bar.com/").SchemeIsCryptographic());
+
+ EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
+ EXPECT_FALSE(GURL("http-so://foo.bar.com/").SchemeIsCryptographic());
+}
+
TEST(GURLTest, SchemeIsBlob) {
EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
}
+TEST(GURLTest, SchemeIsSuborigin) {
+ EXPECT_TRUE(GURL("http-so://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_TRUE(GURL("HTTP-SO://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_TRUE(GURL("HtTp-So://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsSuborigin());
+
+ EXPECT_TRUE(GURL("https-so://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_TRUE(GURL("HTTPS-SO://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_TRUE(GURL("HtTpS-So://foo.bar.com/").SchemeIsSuborigin());
+ EXPECT_FALSE(GURL("https://foo.bar.com/").SchemeIsSuborigin());
+}
+
TEST(GURLTest, ContentAndPathForNonStandardURLs) {
struct TestCase {
const char* url;
diff --git a/src/url/origin.cc b/src/url/origin.cc
index 1ba07c1..fac78cf 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc
@@ -16,10 +16,26 @@
namespace url {
-Origin::Origin() : unique_(true) {
+namespace {
+
+GURL AddSuboriginToUrl(const GURL& url, const std::string& suborigin) {
+ GURL::Replacements replacements;
+ if (url.scheme() == kHttpScheme) {
+ replacements.SetSchemeStr(kHttpSuboriginScheme);
+ } else {
+ DCHECK(url.scheme() == kHttpsScheme);
+ replacements.SetSchemeStr(kHttpsSuboriginScheme);
+ }
+ std::string new_host = suborigin + "." + url.host();
+ replacements.SetHostStr(new_host);
+ return url.ReplaceComponents(replacements);
}
-Origin::Origin(const GURL& url) : unique_(true) {
+} // namespace
+
+Origin::Origin() : unique_(true), suborigin_(std::string()) {}
+
+Origin::Origin(const GURL& url) : unique_(true), suborigin_(std::string()) {
if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
return;
@@ -31,6 +47,31 @@
// the "path", which boils down to everything after the scheme. GURL's
// 'GetContent()' gives us exactly that.
tuple_ = SchemeHostPort(GURL(url.GetContent()));
+ } else if (url.SchemeIsSuborigin()) {
+ GURL::Replacements replacements;
+ if (url.scheme() == kHttpSuboriginScheme) {
+ replacements.SetSchemeStr(kHttpScheme);
+ } else {
+ DCHECK(url.scheme() == kHttpsSuboriginScheme);
+ replacements.SetSchemeStr(kHttpsScheme);
+ }
+
+ std::string host = url.host();
+ size_t suborigin_end = host.find(".");
+ bool no_dot = suborigin_end == std::string::npos;
+ std::string new_host(
+ no_dot ? ""
+ : host.substr(suborigin_end + 1,
+ url.host().length() - suborigin_end - 1));
+ replacements.SetHostStr(new_host);
+
+ tuple_ = SchemeHostPort(url.ReplaceComponents(replacements));
+
+ bool invalid_suborigin = no_dot || suborigin_end == 0;
+ if (invalid_suborigin || tuple_.IsInvalid())
+ return;
+
+ suborigin_ = host.substr(0, suborigin_end);
} else {
tuple_ = SchemeHostPort(url);
}
@@ -38,9 +79,14 @@
unique_ = tuple_.IsInvalid();
}
-Origin::Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port)
- : tuple_(scheme, host, port) {
+Origin::Origin(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port,
+ base::StringPiece suborigin,
+ SchemeHostPort::ConstructPolicy policy)
+ : tuple_(scheme, host, port, policy) {
unique_ = tuple_.IsInvalid();
+ suborigin_ = suborigin.as_string();
}
Origin::~Origin() {
@@ -51,7 +97,22 @@
base::StringPiece scheme,
base::StringPiece host,
uint16_t port) {
- return Origin(scheme, host, port);
+ return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
+}
+
+Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port) {
+ return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
+}
+
+Origin Origin::CreateFromNormalizedTupleWithSuborigin(
+ base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port,
+ base::StringPiece suborigin) {
+ return Origin(scheme, host, port, suborigin,
+ SchemeHostPort::ALREADY_CANONICALIZED);
}
std::string Origin::Serialize() const {
@@ -61,9 +122,21 @@
if (scheme() == kFileScheme)
return "file://";
+ if (!suborigin_.empty()) {
+ GURL url_with_suborigin = AddSuboriginToUrl(tuple_.GetURL(), suborigin_);
+ return SchemeHostPort(url_with_suborigin).Serialize();
+ }
+
return tuple_.Serialize();
}
+Origin Origin::GetPhysicalOrigin() const {
+ if (suborigin_.empty())
+ return *this;
+
+ return Origin(tuple_.GetURL());
+}
+
GURL Origin::GetURL() const {
if (unique())
return GURL();
@@ -71,14 +144,23 @@
if (scheme() == kFileScheme)
return GURL("file:///");
- return tuple_.GetURL();
+ GURL tuple_url(tuple_.GetURL());
+
+ if (!suborigin_.empty())
+ return AddSuboriginToUrl(tuple_url, suborigin_);
+
+ return tuple_url;
}
bool Origin::IsSameOriginWith(const Origin& other) const {
if (unique_ || other.unique_)
return false;
- return tuple_.Equals(other.tuple_);
+ return tuple_.Equals(other.tuple_) && suborigin_ == other.suborigin_;
+}
+
+bool Origin::IsSamePhysicalOriginWith(const Origin& other) const {
+ return GetPhysicalOrigin().IsSameOriginWith(other.GetPhysicalOrigin());
}
bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
@@ -97,4 +179,8 @@
return Origin(a).IsSameOriginWith(Origin(b));
}
+bool IsSamePhysicalOriginWith(const GURL& a, const GURL& b) {
+ return Origin(a).IsSamePhysicalOriginWith(Origin(b));
+}
+
} // namespace url
diff --git a/src/url/origin.h b/src/url/origin.h
index 273622e..1c28588 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h
@@ -90,8 +90,8 @@
explicit Origin(const GURL& url);
// Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
- // must be valid and canonicalized. In particular, note that this cannot be
- // used to create unique origins; 'url::Origin()' is the right way to do that.
+ // must be valid and canonicalized. Do not use this method to create unique
+ // origins. Use Origin() for that.
//
// This constructor should be used in order to pass 'Origin' objects back and
// forth over IPC (as transitioning through GURL would risk potentially
@@ -102,6 +102,21 @@
base::StringPiece host,
uint16_t port);
+ // Creates an origin without sanity checking that the host is canonicalized.
+ // This should only be used when converting between already normalized types,
+ // and should NOT be used for IPC.
+ static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port);
+
+ // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
+ // as well.
+ static Origin CreateFromNormalizedTupleWithSuborigin(
+ base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port,
+ base::StringPiece suborigin);
+
~Origin();
// For unique origins, these return ("", "", 0).
@@ -109,19 +124,34 @@
const std::string& host() const { return tuple_.host(); }
uint16_t port() const { return tuple_.port(); }
+ // Note that an origin without a suborgin will return the empty string.
+ const std::string& suborigin() const { return suborigin_; }
+
bool unique() const { return unique_; }
// An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
// the addition that all Origins with a 'file' scheme serialize to "file://".
+ // If the Origin has a suborigin, it will be serialized per
+ // https://w3c.github.io/webappsec-suborigins/#serializing.
std::string Serialize() const;
+ // Returns the physical origin for Origin. If the suborigin is empty, this
+ // will just return a copy of the Origin. If it has a suborigin, will return
+ // the Origin of just the scheme/host/port tuple, without the suborigin. See
+ // https://w3c.github.io/webappsec-suborigins/.
+ Origin GetPhysicalOrigin() const;
+
// Two Origins are "same-origin" if their schemes, hosts, and ports are exact
- // matches; and neither is unique.
+ // matches; and neither is unique. If either of the origins have suborigins,
+ // the suborigins also must be exact matches.
bool IsSameOriginWith(const Origin& other) const;
bool operator==(const Origin& other) const {
return IsSameOriginWith(other);
}
+ // Same as above, but ignores suborigins if they exist.
+ bool IsSamePhysicalOriginWith(const Origin& other) const;
+
// Efficiently returns what GURL(Serialize()) would without re-parsing the
// URL. This can be used for the (rare) times a GURL representation is needed
// for an Origin.
@@ -138,15 +168,21 @@
bool operator<(const Origin& other) const;
private:
- Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port);
+ Origin(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port,
+ base::StringPiece suborigin,
+ SchemeHostPort::ConstructPolicy policy);
SchemeHostPort tuple_;
bool unique_;
+ std::string suborigin_;
};
URL_EXPORT std::ostream& operator<<(std::ostream& out, const Origin& origin);
URL_EXPORT bool IsSameOriginWith(const GURL& a, const GURL& b);
+URL_EXPORT bool IsSamePhysicalOriginWith(const GURL& a, const GURL& b);
} // namespace url
diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index 7a67533..fee161b 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc
@@ -13,24 +13,26 @@
namespace {
-void ExpectParsedComponentEqual(const url::Component& a,
- const url::Component& b) {
- EXPECT_EQ(a.begin, b.begin);
- EXPECT_EQ(a.len, b.len);
-}
-
void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
EXPECT_EQ(a, b);
const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
- ExpectParsedComponentEqual(a_parsed.scheme, b_parsed.scheme);
- ExpectParsedComponentEqual(a_parsed.username, b_parsed.username);
- ExpectParsedComponentEqual(a_parsed.password, b_parsed.password);
- ExpectParsedComponentEqual(a_parsed.host, b_parsed.host);
- ExpectParsedComponentEqual(a_parsed.port, b_parsed.port);
- ExpectParsedComponentEqual(a_parsed.path, b_parsed.path);
- ExpectParsedComponentEqual(a_parsed.query, b_parsed.query);
- ExpectParsedComponentEqual(a_parsed.ref, b_parsed.ref);
+ EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+ EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+ EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+ EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+ EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+ EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+ EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+ EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+ EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+ EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+ EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+ EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+ EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+ EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+ EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+ EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
}
TEST(OriginTest, UniqueOriginComparison) {
@@ -63,6 +65,52 @@
}
}
+TEST(OriginTest, ConstructFromTuple) {
+ struct TestCases {
+ const char* const scheme;
+ const char* const host;
+ const uint16_t port;
+ const char* const suborigin;
+ } cases[] = {
+ {"http", "example.com", 80, ""},
+ {"http", "example.com", 123, ""},
+ {"https", "example.com", 443, ""},
+ {"http-so", "foobar.example.com", 80, "foobar"},
+ {"http-so", "foobar.example.com", 123, "foobar"},
+ {"https-so", "foobar.example.com", 443, "foobar"},
+ };
+
+ for (const auto& test_case : cases) {
+ testing::Message scope_message;
+ if (test_case.suborigin != std::string()) {
+ scope_message << test_case.scheme << "-so://" << test_case.suborigin
+ << "." << test_case.host << ":" << test_case.port;
+ } else {
+ scope_message << test_case.scheme << "://" << test_case.host << ":"
+ << test_case.port;
+ }
+ SCOPED_TRACE(scope_message);
+
+ url::Origin origin_without_suborigin =
+ url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
+ test_case.port);
+
+ url::Origin origin_with_suborigin =
+ url::Origin::CreateFromNormalizedTupleWithSuborigin(
+ test_case.scheme, test_case.host, test_case.port,
+ test_case.suborigin);
+
+ EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
+ EXPECT_EQ(test_case.host, origin_without_suborigin.host());
+ EXPECT_EQ(test_case.port, origin_without_suborigin.port());
+
+ EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
+ EXPECT_EQ(test_case.host, origin_with_suborigin.host());
+ EXPECT_EQ(test_case.port, origin_with_suborigin.port());
+ EXPECT_EQ(test_case.suborigin, origin_with_suborigin.suborigin());
+ }
+}
+
TEST(OriginTest, ConstructFromGURL) {
url::Origin different_origin(GURL("https://not-in-the-list.test/"));
@@ -151,10 +199,14 @@
GURL url(test_case.url);
EXPECT_TRUE(url.is_valid());
url::Origin origin(url);
+ EXPECT_TRUE(origin.suborigin().empty());
std::string serialized = origin.Serialize();
+ std::string serialized_physical_origin =
+ origin.GetPhysicalOrigin().Serialize();
ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
EXPECT_EQ(test_case.expected, serialized);
+ EXPECT_EQ(test_case.expected, serialized_physical_origin);
// The '<<' operator should produce the same serialization as Serialize().
std::stringstream out;
@@ -163,6 +215,119 @@
}
}
+TEST(OriginTest, SuboriginSerialization) {
+ struct TestCases {
+ const char* const url;
+ const char* const expected;
+ const char* const expected_physical_origin;
+ const char* const expected_suborigin;
+ } cases[] = {
+ {"http-so://foobar.example.com/", "http-so://foobar.example.com",
+ "http://example.com", "foobar"},
+ {"http-so://foobar.example.com:123/", "http-so://foobar.example.com:123",
+ "http://example.com:123", "foobar"},
+ {"https-so://foobar.example.com/", "https-so://foobar.example.com",
+ "https://example.com", "foobar"},
+ {"https-so://foobar.example.com:123/",
+ "https-so://foobar.example.com:123", "https://example.com:123",
+ "foobar"},
+ {"http://example.com/", "http://example.com", "http://example.com", ""},
+ {"http-so://foobar.example.com/some/path", "http-so://foobar.example.com",
+ "http://example.com", "foobar"},
+ {"http-so://foobar.example.com/some/path?query",
+ "http-so://foobar.example.com", "http://example.com", "foobar"},
+ {"http-so://foobar.example.com/some/path#fragment",
+ "http-so://foobar.example.com", "http://example.com", "foobar"},
+ {"http-so://foobar.example.com/some/path?query#fragment",
+ "http-so://foobar.example.com", "http://example.com", "foobar"},
+ {"http-so://foobar.example.com:1234/some/path?query#fragment",
+ "http-so://foobar.example.com:1234", "http://example.com:1234",
+ "foobar"},
+ };
+
+ for (const auto& test_case : cases) {
+ SCOPED_TRACE(test_case.url);
+ GURL url(test_case.url);
+ EXPECT_TRUE(url.is_valid());
+ url::Origin origin(url);
+ std::string serialized = origin.Serialize();
+ std::string serialized_physical_origin =
+ origin.GetPhysicalOrigin().Serialize();
+ EXPECT_FALSE(origin.unique());
+ EXPECT_EQ(test_case.expected_suborigin, origin.suborigin());
+ ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+ EXPECT_EQ(test_case.expected, serialized);
+ EXPECT_EQ(test_case.expected_physical_origin, serialized_physical_origin);
+
+ // The '<<' operator should produce the same serialization as Serialize().
+ std::stringstream out;
+ out << origin;
+ EXPECT_EQ(test_case.expected, out.str());
+ }
+
+ const char* const failure_cases[] = {
+ "http-so://.", "http-so://foo", "http-so://.foo", "http-so://foo.",
+ "https-so://.", "https-so://foo", "https-so://.foo", "https-so://foo.",
+ };
+
+ for (const auto& test_case : failure_cases) {
+ SCOPED_TRACE(test_case);
+ GURL url(test_case);
+ EXPECT_TRUE(url.is_valid());
+ url::Origin origin(url);
+ std::string serialized = origin.Serialize();
+ std::string serialized_physical_origin =
+ origin.GetPhysicalOrigin().Serialize();
+ EXPECT_TRUE(origin.unique());
+ EXPECT_EQ("", origin.suborigin());
+ ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+ EXPECT_EQ("null", serialized);
+ EXPECT_EQ("null", serialized_physical_origin);
+ }
+}
+
+TEST(OriginTest, SuboriginIsSameOriginWith) {
+ struct TestCases {
+ const char* const url1;
+ const char* const url2;
+ bool is_same_origin;
+ bool is_same_physical_origin;
+ } cases[]{
+ {"http-so://foobar1.example.com/", "http-so://foobar1.example.com", true,
+ true},
+ {"http-so://foobar2.example.com/", "https-so://foobar2.example.com",
+ false, false},
+ {"http-so://foobar3.example.com/", "http://example.com", false, true},
+ {"https-so://foobar4.example.com/", "https-so://foobar4.example.com",
+ true, true},
+ {"https-so://foobar5.example.com/", "https://example.com", false, true},
+ {"http-so://foobar6.example.com/", "http-so://bazbar.example.com", false,
+ true},
+ {"http-so://foobar7.example.com/", "http-so://foobar7.google.com", false,
+ false},
+ };
+
+ for (const auto& test_case : cases) {
+ SCOPED_TRACE(test_case.url1);
+ url::Origin origin1(GURL(test_case.url1));
+ url::Origin origin2(GURL(test_case.url2));
+
+ EXPECT_TRUE(origin1.IsSameOriginWith(origin1));
+ EXPECT_TRUE(origin2.IsSameOriginWith(origin2));
+ EXPECT_EQ(test_case.is_same_origin, origin1.IsSameOriginWith(origin2));
+ EXPECT_EQ(test_case.is_same_origin, origin2.IsSameOriginWith(origin1));
+
+ EXPECT_TRUE(origin1.IsSamePhysicalOriginWith(origin1));
+ EXPECT_TRUE(origin2.IsSamePhysicalOriginWith(origin2));
+ EXPECT_EQ(test_case.is_same_physical_origin,
+ origin1.IsSamePhysicalOriginWith(origin2));
+ EXPECT_EQ(test_case.is_same_physical_origin,
+ origin2.IsSamePhysicalOriginWith(origin1));
+ }
+}
+
TEST(OriginTest, Comparison) {
// These URLs are arranged in increasing order:
const char* const urls[] = {
diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index e6bb493..b5de079 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc
@@ -48,7 +48,8 @@
bool IsValidInput(const base::StringPiece& scheme,
const base::StringPiece& host,
- uint16_t port) {
+ uint16_t port,
+ SchemeHostPort::ConstructPolicy policy) {
SchemeType scheme_type = SCHEME_WITH_PORT;
bool is_standard = GetStandardSchemeType(
scheme.data(),
@@ -71,8 +72,14 @@
if (host.empty() || port == 0)
return false;
- if (!IsCanonicalHost(host))
+ // Don't do an expensive canonicalization if the host is already
+ // canonicalized.
+ DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+ IsCanonicalHost(host));
+ if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+ !IsCanonicalHost(host)) {
return false;
+ }
return true;
@@ -83,8 +90,14 @@
return false;
}
- if (!IsCanonicalHost(host))
+ // Don't do an expensive canonicalization if the host is already
+ // canonicalized.
+ DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+ IsCanonicalHost(host));
+ if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+ !IsCanonicalHost(host)) {
return false;
+ }
return true;
@@ -104,9 +117,10 @@
SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
base::StringPiece host,
- uint16_t port)
+ uint16_t port,
+ ConstructPolicy policy)
: port_(0) {
- if (!IsValidInput(scheme, host, port))
+ if (!IsValidInput(scheme, host, port, policy))
return;
scheme.CopyToString(&scheme_);
@@ -114,6 +128,14 @@
port_ = port;
}
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port)
+ : SchemeHostPort(scheme,
+ host,
+ port,
+ ConstructPolicy::CHECK_CANONICALIZATION) {}
+
SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
if (!url.is_valid())
return;
@@ -126,7 +148,7 @@
if (port == PORT_UNSPECIFIED)
port = 0;
- if (!IsValidInput(scheme, host, port))
+ if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
return;
scheme.CopyToString(&scheme_);
@@ -152,6 +174,9 @@
url::Parsed parsed;
std::string serialized = SerializeInternal(&parsed);
+ if (IsInvalid())
+ return GURL(std::move(serialized), parsed, false);
+
// If the serialized string is passed to GURL for parsing, it will append an
// empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
// normal Origin serialization.
@@ -176,13 +201,17 @@
if (IsInvalid())
return result;
- parsed->scheme = Component(0, scheme_.length());
- result.append(scheme_);
+ if (!scheme_.empty()) {
+ parsed->scheme = Component(0, scheme_.length());
+ result.append(scheme_);
+ }
result.append(kStandardSchemeSeparator);
- parsed->host = Component(result.length(), host_.length());
- result.append(host_);
+ if (!host_.empty()) {
+ parsed->host = Component(result.length(), host_.length());
+ result.append(host_);
+ }
if (port_ == 0)
return result;
diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index dc8862a..065e4aa 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h
@@ -88,6 +88,19 @@
base::StringPiece host,
uint16_t port);
+ // Metadata influencing whether or not the constructor should sanity check
+ // host canonicalization.
+ enum ConstructPolicy { CHECK_CANONICALIZATION, ALREADY_CANONICALIZED };
+
+ // Creates a (scheme, host, port) tuple without performing sanity checking
+ // that the host and port are canonicalized. This should only be used when
+ // converting between already normalized types, and should NOT be used for
+ // IPC.
+ SchemeHostPort(base::StringPiece scheme,
+ base::StringPiece host,
+ uint16_t port,
+ ConstructPolicy policy);
+
// Creates a (scheme, host, port) tuple from |url|, as described at
// https://tools.ietf.org/html/rfc6454#section-4
//
diff --git a/src/url/scheme_host_port_unittest.cc b/src/url/scheme_host_port_unittest.cc
index 46e1cf8..9c59b9f 100644
--- a/src/url/scheme_host_port_unittest.cc
+++ b/src/url/scheme_host_port_unittest.cc
@@ -12,6 +12,28 @@
namespace {
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+ EXPECT_EQ(a, b);
+ const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+ const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+ EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+ EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+ EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+ EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+ EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+ EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+ EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+ EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+ EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+ EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+ EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+ EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+ EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+ EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+ EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+ EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
TEST(SchemeHostPortTest, Invalid) {
url::SchemeHostPort invalid;
EXPECT_EQ("", invalid.scheme());
@@ -37,6 +59,7 @@
EXPECT_TRUE(tuple.Equals(tuple));
EXPECT_TRUE(tuple.Equals(invalid));
EXPECT_TRUE(invalid.Equals(tuple));
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -63,6 +86,7 @@
EXPECT_EQ(test.port, tuple.port());
EXPECT_FALSE(tuple.IsInvalid());
EXPECT_TRUE(tuple.Equals(tuple));
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -98,6 +122,7 @@
EXPECT_EQ(0, tuple.port());
EXPECT_TRUE(tuple.IsInvalid());
EXPECT_TRUE(tuple.Equals(tuple));
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -125,6 +150,7 @@
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
EXPECT_TRUE(tuple.IsInvalid());
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -160,6 +186,7 @@
EXPECT_EQ(test.port, tuple.port());
EXPECT_FALSE(tuple.IsInvalid());
EXPECT_TRUE(tuple.Equals(tuple));
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -184,6 +211,7 @@
GURL url(test.url);
url::SchemeHostPort tuple(url);
EXPECT_EQ(test.expected, tuple.Serialize());
+ ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
diff --git a/src/url/url_canon.h b/src/url/url_canon.h
index 95d5345..c4852e4 100644
--- a/src/url/url_canon.h
+++ b/src/url/url_canon.h
@@ -379,6 +379,33 @@
CanonOutput* output,
CanonHostInfo* host_info);
+// Canonicalizes a string according to the host canonicalization rules. Unlike
+// CanonicalizeHost, this will not check for IP addresses which can change the
+// meaning (and canonicalization) of the components. This means it is possible
+// to call this for sub-components of a host name without corruption.
+//
+// As an example, "01.02.03.04.com" is a canonical hostname. If you called
+// CanonicalizeHost on the substring "01.02.03.04" it will get "fixed" to
+// "1.2.3.4" which will produce an invalid host name when reassembled. This
+// can happen more than one might think because all numbers by themselves are
+// considered IP addresses; so "5" canonicalizes to "0.0.0.5".
+//
+// Be careful: Because Punycode works on each dot-separated substring as a
+// unit, you should only pass this function substrings that represent complete
+// dot-separated subcomponents of the original host. Even if you have ASCII
+// input, percent-escaped characters will have different meanings if split in
+// the middle.
+//
+// Returns true if the host was valid. This function will treat a 0-length
+// host as valid (because it's designed to be used for substrings) while the
+// full version above will mark empty hosts as broken.
+URL_EXPORT bool CanonicalizeHostSubstring(const char* spec,
+ const Component& host,
+ CanonOutput* output);
+URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
+ const Component& host,
+ CanonOutput* output);
+
// IP addresses.
//
// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
diff --git a/src/url/url_canon_host.cc b/src/url/url_canon_host.cc
index d4cdfd5..76a2236 100644
--- a/src/url/url_canon_host.cc
+++ b/src/url/url_canon_host.cc
@@ -308,7 +308,25 @@
return DoIDNHost(host, host_len, output);
}
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
+bool DoHostSubstring(const CHAR* spec,
+ const Component& host,
+ CanonOutput* output) {
+ bool has_non_ascii, has_escaped;
+ ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
+
+ if (has_non_ascii || has_escaped) {
+ return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
+ has_escaped, output);
+ }
+
+ const bool success =
+ DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+ DCHECK(!has_non_ascii);
+ return success;
+}
+
+template <typename CHAR, typename UCHAR>
void DoHost(const CHAR* spec,
const Component& host,
CanonOutput* output,
@@ -320,26 +338,10 @@
return;
}
- bool has_non_ascii, has_escaped;
- ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
-
// Keep track of output's initial length, so we can rewind later.
const int output_begin = output->length();
- bool success;
- if (!has_non_ascii && !has_escaped) {
- success = DoSimpleHost(&spec[host.begin], host.len,
- output, &has_non_ascii);
- DCHECK(!has_non_ascii);
- } else {
- success = DoComplexHost(&spec[host.begin], host.len,
- has_non_ascii, has_escaped, output);
- }
-
- if (!success) {
- // Canonicalization failed. Set BROKEN to notify the caller.
- host_info->family = CanonHostInfo::BROKEN;
- } else {
+ if (DoHostSubstring<CHAR, UCHAR>(spec, host, output)) {
// After all the other canonicalization, check if we ended up with an IP
// address. IP addresses are small, so writing into this temporary buffer
// should not cause an allocation.
@@ -355,6 +357,9 @@
output->set_length(output_begin);
output->Append(canon_ip.data(), canon_ip.length());
}
+ } else {
+ // Canonicalization failed. Set BROKEN to notify the caller.
+ host_info->family = CanonHostInfo::BROKEN;
}
host_info->out_host = MakeRange(output_begin, output->length());
@@ -396,4 +401,16 @@
DoHost<base::char16, base::char16>(spec, host, output, host_info);
}
+bool CanonicalizeHostSubstring(const char* spec,
+ const Component& host,
+ CanonOutput* output) {
+ return DoHostSubstring<char, unsigned char>(spec, host, output);
+}
+
+bool CanonicalizeHostSubstring(const base::char16* spec,
+ const Component& host,
+ CanonOutput* output) {
+ return DoHostSubstring<base::char16, base::char16>(spec, host, output);
+}
+
} // namespace url
diff --git a/src/url/url_canon_icu_unittest.cc b/src/url/url_canon_icu_unittest.cc
index f7ce199..83a7263 100644
--- a/src/url/url_canon_icu_unittest.cc
+++ b/src/url/url_canon_icu_unittest.cc
@@ -14,8 +14,6 @@
namespace url {
-using test_utils::WStringToUTF16;
-
namespace {
// Wrapper around a UConverter object that managers creation and destruction.
@@ -64,7 +62,8 @@
std::string str;
StdStringCanonOutput output(&str);
- base::string16 input_str(WStringToUTF16(icu_cases[i].input));
+ base::string16 input_str(
+ test_utils::TruncateWStringToUTF16(icu_cases[i].input));
int input_len = static_cast<int>(input_str.length());
converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
output.Complete();
@@ -134,7 +133,8 @@
}
if (query_cases[i].input16) {
- base::string16 input16(WStringToUTF16(query_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
std::string out_str;
diff --git a/src/url/url_canon_stdurl.cc b/src/url/url_canon_stdurl.cc
index 7d1758b..e0bca9d 100644
--- a/src/url/url_canon_stdurl.cc
+++ b/src/url/url_canon_stdurl.cc
@@ -120,6 +120,14 @@
if (!strncmp(scheme, kWsScheme, scheme_len))
default_port = 80;
break;
+ case 7:
+ if (!strncmp(scheme, kHttpSuboriginScheme, scheme_len))
+ default_port = 80;
+ break;
+ case 8:
+ if (!strncmp(scheme, kHttpsSuboriginScheme, scheme_len))
+ default_port = 443;
+ break;
}
return default_port;
}
diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 2f053f2..26d7815 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc
@@ -6,6 +6,7 @@
#include <stddef.h>
#include "base/macros.h"
+#include "base/strings/utf_string_conversions.h"
#include "testing/base/public/gunit.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -15,10 +16,6 @@
namespace url {
-using test_utils::WStringToUTF16;
-using test_utils::ConvertUTF8ToUTF16;
-using test_utils::ConvertUTF16ToUTF8;
-
namespace {
struct ComponentCase {
@@ -195,7 +192,8 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- base::string16 input_str(WStringToUTF16(utf_cases[i].input16));
+ base::string16 input_str(
+ test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
int input_len = static_cast<int>(input_str.length());
bool success = true;
for (int ch = 0; ch < input_len; ch++) {
@@ -213,11 +211,12 @@
// UTF-16 -> UTF-8
std::string input8_str(utf_cases[i].input8);
- base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));
- EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));
+ base::string16 input16_str(
+ test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+ EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
// UTF-8 -> UTF-16
- EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));
+ EXPECT_EQ(input16_str, base::UTF8ToUTF16(input8_str));
}
}
}
@@ -265,7 +264,7 @@
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(ConvertUTF8ToUTF16(scheme_cases[i].input));
+ base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
@@ -530,7 +529,8 @@
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(WStringToUTF16(host_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
Component out_comp;
@@ -580,7 +580,8 @@
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(WStringToUTF16(host_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -702,7 +703,8 @@
}
// 16-bit version.
- base::string16 input16(WStringToUTF16(cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
std::string out_str2;
@@ -854,7 +856,8 @@
}
// 16-bit version.
- base::string16 input16(WStringToUTF16(cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
std::string out_str2;
@@ -887,6 +890,51 @@
EXPECT_FALSE(host_info.IsIPAddress());
}
+// Verifies that CanonicalizeHostSubstring produces the expected output and
+// does not "fix" IP addresses. Because this code is a subset of
+// CanonicalizeHost, the shared functionality is not tested.
+TEST(URLCanonTest, CanonicalizeHostSubstring) {
+ // Basic sanity check.
+ {
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_TRUE(CanonicalizeHostSubstring("M\xc3\x9cNCHEN.com",
+ Component(0, 12), &output));
+ output.Complete();
+ EXPECT_EQ("xn--mnchen-3ya.com", out_str);
+ }
+
+ // Failure case.
+ {
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_FALSE(CanonicalizeHostSubstring(
+ test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
+ Component(0, 8), &output));
+ output.Complete();
+ EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+ }
+
+ // Should return true for empty input strings.
+ {
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_TRUE(CanonicalizeHostSubstring("", Component(0, 0), &output));
+ output.Complete();
+ EXPECT_EQ(std::string(), out_str);
+ }
+
+ // Numbers that look like IP addresses should not be changed.
+ {
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_TRUE(
+ CanonicalizeHostSubstring("01.02.03.04", Component(0, 11), &output));
+ output.Complete();
+ EXPECT_EQ("01.02.03.04", out_str);
+ }
+}
+
TEST(URLCanonTest, UserInfo) {
// Note that the canonicalizer should escape and treat empty components as
// not being there.
@@ -940,7 +988,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(ConvertUTF8ToUTF16(user_info_cases[i].input));
+ base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1003,7 +1051,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(ConvertUTF8ToUTF16(port_cases[i].input));
+ base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input));
success = CanonicalizePort(wide_input.c_str(),
in_comp,
port_cases[i].default_port,
@@ -1123,7 +1171,8 @@
}
if (path_cases[i].input16) {
- base::string16 input16(WStringToUTF16(path_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(path_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
Component out_comp;
@@ -1198,7 +1247,8 @@
}
if (query_cases[i].input16) {
- base::string16 input16(WStringToUTF16(query_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
std::string out_str;
@@ -1260,7 +1310,8 @@
// 16-bit input
if (ref_cases[i].input16) {
- base::string16 input16(WStringToUTF16(ref_cases[i].input16));
+ base::string16 input16(
+ test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
Component out_comp;
@@ -1896,12 +1947,12 @@
const base::char16 fill_char = 0xffff;
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+ EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf));
EXPECT_EQ(fill_char, buf[3]);
// Test the edge cases - exactly the buffer size and one over
EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(WStringToUTF16(L"1234"), base::string16(buf));
+ EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf));
EXPECT_EQ(fill_char, buf[5]);
memset(buf, fill_mem, sizeof(buf));
@@ -1911,12 +1962,13 @@
// Test the template overload (note that this will see the full buffer)
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, 10));
- EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+ EXPECT_EQ(base::UTF8ToUTF16("12"),
+ base::string16(buf));
EXPECT_EQ(fill_char, buf[3]);
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12345, buf, 10));
- EXPECT_EQ(WStringToUTF16(L"12345"), base::string16(buf));
+ EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf));
EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
}
@@ -2152,7 +2204,7 @@
for (int i = 0; i < 4800; i++)
new_query.push_back('a');
- base::string16 new_path(WStringToUTF16(L"/foo"));
+ base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
repl.SetPath(new_path.c_str(), Component(0, 4));
repl.SetQuery(new_query.c_str(),
Component(0, static_cast<int>(new_query.length())));
@@ -2173,4 +2225,35 @@
EXPECT_TRUE(expected == repl_str);
}
+TEST(URLCanonTest, DefaultPortForScheme) {
+ struct TestCases {
+ const char* scheme;
+ const int expected_port;
+ } cases[]{
+ {"http", 80},
+ {"https", 443},
+ {"ftp", 21},
+ {"ws", 80},
+ {"wss", 443},
+ {"gopher", 70},
+ {"http-so", 80},
+ {"https-so", 443},
+ {"fake-scheme", PORT_UNSPECIFIED},
+ {"HTTP", PORT_UNSPECIFIED},
+ {"HTTPS", PORT_UNSPECIFIED},
+ {"FTP", PORT_UNSPECIFIED},
+ {"WS", PORT_UNSPECIFIED},
+ {"WSS", PORT_UNSPECIFIED},
+ {"GOPHER", PORT_UNSPECIFIED},
+ {"HTTP-SO", PORT_UNSPECIFIED},
+ {"HTTPS-SO", PORT_UNSPECIFIED},
+ };
+
+ for (auto& test_case : cases) {
+ SCOPED_TRACE(test_case.scheme);
+ EXPECT_EQ(test_case.expected_port,
+ DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
+ }
+}
+
} // namespace url
diff --git a/src/url/url_constants.cc b/src/url/url_constants.cc
index 549819e..73c9a76 100644
--- a/src/url/url_constants.cc
+++ b/src/url/url_constants.cc
@@ -24,6 +24,9 @@
const char kWsScheme[] = "ws";
const char kWssScheme[] = "wss";
+const char kHttpSuboriginScheme[] = "http-so";
+const char kHttpsSuboriginScheme[] = "https-so";
+
const char kStandardSchemeSeparator[] = "://";
const size_t kMaxURLChars = 2 * 1024 * 1024;
diff --git a/src/url/url_constants.h b/src/url/url_constants.h
index 3a423d2..c110589 100644
--- a/src/url/url_constants.h
+++ b/src/url/url_constants.h
@@ -30,6 +30,11 @@
URL_EXPORT extern const char kWsScheme[];
URL_EXPORT extern const char kWssScheme[];
+// Special HTTP and HTTPS schemes for serialization of suborigins. See
+// https://w3c.github.io/webappsec-suborigins/.
+URL_EXPORT extern const char kHttpSuboriginScheme[];
+URL_EXPORT extern const char kHttpsSuboriginScheme[];
+
// Used to separate a standard scheme and the hostname: "://".
URL_EXPORT extern const char kStandardSchemeSeparator[];
diff --git a/src/url/url_test_utils.h b/src/url/url_test_utils.h
index 174e5e0..8c89622 100644
--- a/src/url/url_test_utils.h
+++ b/src/url/url_test_utils.h
@@ -11,6 +11,7 @@
#include <string>
#include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
#include "testing/base/public/gunit.h"
#include "url/url_canon_internal.h"
@@ -18,10 +19,12 @@
namespace test_utils {
-// Converts a UTF-16 string from native wchar_t format to char16, by
-// truncating the high 32 bits. This is not meant to handle true UTF-32
-// encoded strings.
-inline base::string16 WStringToUTF16(const wchar_t* src) {
+// Converts a UTF-16 string from native wchar_t format to char16 by
+// truncating the high 32 bits. This is different than the conversion function
+// in base bacause it passes invalid UTF-16 characters which is important for
+// test purposes. As a result, this is not meant to handle true UTF-32 encoded
+// strings.
+inline base::string16 TruncateWStringToUTF16(const wchar_t* src) {
base::string16 str;
int length = static_cast<int>(wcslen(src));
for (int i = 0; i < length; ++i) {
@@ -30,25 +33,6 @@
return str;
}
-// Converts a string from UTF-8 to UTF-16.
-inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
- int length = static_cast<int>(src.length());
- EXPECT_LT(length, 1024);
- RawCanonOutputW<1024> output;
- EXPECT_TRUE(ConvertUTF8ToUTF16(src.data(), length, &output));
- return base::string16(output.data(), output.length());
-}
-
-// Converts a string from UTF-16 to UTF-8.
-inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
- std::string str;
- StdStringCanonOutput output(&str);
- EXPECT_TRUE(ConvertUTF16ToUTF8(src.data(), static_cast<int>(src.length()),
- &output));
- output.Complete();
- return str;
-}
-
} // namespace test_utils
} // namespace url
diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index 8522eb1..ebe386f 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc
@@ -21,25 +21,29 @@
namespace {
-const int kNumStandardURLSchemes = 8;
+const int kNumStandardURLSchemes = 10;
const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
- {kHttpScheme, SCHEME_WITH_PORT},
- {kHttpsScheme, SCHEME_WITH_PORT},
- // Yes, file URLs can have a hostname, so file URLs should be handled as
- // "standard". File URLs never have a port as specified by the SchemeType
- // field.
- {kFileScheme, SCHEME_WITHOUT_PORT},
- {kFtpScheme, SCHEME_WITH_PORT},
- {kGopherScheme, SCHEME_WITH_PORT},
- {kWsScheme, SCHEME_WITH_PORT}, // WebSocket.
- {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure.
- {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+ {kHttpScheme, SCHEME_WITH_PORT},
+ {kHttpsScheme, SCHEME_WITH_PORT},
+ // Yes, file URLs can have a hostname, so file URLs should be handled as
+ // "standard". File URLs never have a port as specified by the SchemeType
+ // field.
+ {kFileScheme, SCHEME_WITHOUT_PORT},
+ {kFtpScheme, SCHEME_WITH_PORT},
+ {kGopherScheme, SCHEME_WITH_PORT},
+ {kWsScheme, SCHEME_WITH_PORT}, // WebSocket.
+ {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure.
+ {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+ {kHttpSuboriginScheme, SCHEME_WITH_PORT},
+ {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
-const int kNumReferrerURLSchemes = 2;
+const int kNumReferrerURLSchemes = 4;
const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
- {kHttpScheme, SCHEME_WITH_PORT},
- {kHttpsScheme, SCHEME_WITH_PORT},
+ {kHttpScheme, SCHEME_WITH_PORT},
+ {kHttpsScheme, SCHEME_WITH_PORT},
+ {kHttpSuboriginScheme, SCHEME_WITH_PORT},
+ {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
// Lists of the currently installed standard and referrer schemes. These lists
diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
index eceb505..6d7c8f7 100644
--- a/src/url/url_util_unittest.cc
+++ b/src/url/url_util_unittest.cc
@@ -214,15 +214,15 @@
RawCanonOutputT<base::char16> output;
DecodeURLEscapeSequences(input, strlen(input), &output);
EXPECT_EQ(decode_cases[i].output,
- test_utils::ConvertUTF16ToUTF8(base::string16(output.data(),
- output.length())));
+ base::UTF16ToUTF8(base::string16(output.data(),
+ output.length())));
}
// Our decode should decode %00
const char zero_input[] = "%00";
RawCanonOutputT<base::char16> zero_output;
DecodeURLEscapeSequences(zero_input, strlen(zero_input), &zero_output);
- EXPECT_NE("%00", test_utils::ConvertUTF16ToUTF8(
+ EXPECT_NE("%00", base::UTF16ToUTF8(
base::string16(zero_output.data(), zero_output.length())));
// Test the error behavior for invalid UTF-8.