Internal change PiperOrigin-RevId: 148486483 Change-Id: Iebcb04cdcbdf2d47099b8ee50eff901139c5a721

commit: 44ac360d844b6ab5e40070aa367443ff0a9d5ac4 [log] [tgz]
author: Devany Sandoval <sandovad@google.com> Fri Feb 24 12:01:41 2017 -0800
committer: sandovad <sandovad@google.com> Tue Sep 03 12:54:36 2019 -0700
tree: 658028b65a476e57849fa7e5aabb2b47110773b5
parent: 6b31f0e37e67f8486baa6e18cde534b8f04a4f7f [diff]
diff --git a/import.sh b/import.sh
index 47de023..8671302 100644
--- a/import.sh
+++ b/import.sh

@@ -4,7 +4,7 @@
 top=/tmp/chromium
 mkdir $top
 prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 54.0.2840.100 55.0.2883.87
+for version in 55.0.2883.87 56.0.2924.87
 do
   mkdir $top/$version
   cd $top/$version

diff --git a/src/base/strings/utf_string_conversions.cc b/src/base/strings/utf_string_conversions.cc
new file mode 100644
index 0000000..944078f
--- /dev/null
+++ b/src/base/strings/utf_string_conversions.cc

@@ -0,0 +1,124 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/utf_string_conversions.h"
+
+#include <stdint.h>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversion_utils.h"
+#include "build/build_config.h"
+
+namespace url {
+namespace base {
+
+namespace {
+
+// Generalized Unicode converter -----------------------------------------------
+
+// Converts the given source Unicode character type to the given destination
+// Unicode character type as a STL string. The given input buffer and size
+// determine the source, and the given output STL string will be replaced by
+// the result.
+template<typename SRC_CHAR, typename DEST_STRING>
+bool ConvertUnicode(const SRC_CHAR* src,
+                    size_t src_len,
+                    DEST_STRING* output) {
+  // ICU requires 32-bit numbers.
+  bool success = true;
+  int32_t src_len32 = static_cast<int32_t>(src_len);
+  for (int32_t i = 0; i < src_len32; i++) {
+    uint32_t code_point;
+    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
+      WriteUnicodeCharacter(code_point, output);
+    } else {
+      WriteUnicodeCharacter(0xFFFD, output);
+      success = false;
+    }
+  }
+
+  return success;
+}
+
+}  // namespace
+
+// UTF16 <-> UTF8 --------------------------------------------------------------
+
+#if defined(WCHAR_T_IS_UTF32)
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+  if (IsStringASCII(StringPiece(src, src_len))) {
+    output->assign(src, src + src_len);
+    return true;
+  } else {
+    PrepareForUTF16Or32Output(src, src_len, output);
+    return ConvertUnicode(src, src_len, output);
+  }
+}
+
+string16 UTF8ToUTF16(StringPiece utf8) {
+  if (IsStringASCII(utf8)) {
+    return string16(utf8.begin(), utf8.end());
+  }
+
+  string16 ret;
+  PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  ConvertUnicode(utf8.data(), utf8.length(), &ret);
+  return ret;
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  if (IsStringASCII(StringPiece16(src, src_len))) {
+    output->assign(src, src + src_len);
+    return true;
+  } else {
+    PrepareForUTF8Output(src, src_len, output);
+    return ConvertUnicode(src, src_len, output);
+  }
+}
+
+std::string UTF16ToUTF8(StringPiece16 utf16) {
+  if (IsStringASCII(utf16)) {
+    return std::string(utf16.begin(), utf16.end());
+  }
+
+  std::string ret;
+  // Ignore the success flag of this call, it will do the best it can for
+  // invalid input, which is what we want here.
+  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
+  return ret;
+}
+
+#elif defined(WCHAR_T_IS_UTF16)
+// Easy case since we can use the "wide" versions we already wrote above.
+
+bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+  return UTF8ToWide(src, src_len, output);
+}
+
+string16 UTF8ToUTF16(StringPiece utf8) {
+  return UTF8ToWide(utf8);
+}
+
+bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+  return WideToUTF8(src, src_len, output);
+}
+
+std::string UTF16ToUTF8(StringPiece16 utf16) {
+  if (IsStringASCII(utf16))
+    return std::string(utf16.data(), utf16.data() + utf16.length());
+
+  std::string ret;
+  PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
+  ConvertUnicode(utf16.data(), utf16.length(), &ret);
+  return ret;
+}
+
+#endif
+
+}  // namespace base
+}  // namespace url

diff --git a/src/base/strings/utf_string_conversions.h b/src/base/strings/utf_string_conversions.h
new file mode 100644
index 0000000..d6876b3
--- /dev/null
+++ b/src/base/strings/utf_string_conversions.h

@@ -0,0 +1,26 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
+#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
+
+#include <stddef.h>
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+
+#define BASE_EXPORT
+
+namespace url {
+namespace base {
+
+BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8);
+BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16);
+
+}  // namespace base
+}  // namespace url
+
+#endif  // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_

diff --git a/src/build/build_config.h b/src/build/build_config.h
index f67dd48..5f99fc1 100644
--- a/src/build/build_config.h
+++ b/src/build/build_config.h

@@ -48,7 +48,6 @@
 #endif
 #elif defined(_WIN32)
 #define OS_WIN 1
-#define TOOLKIT_VIEWS 1
 #elif defined(__FreeBSD__)
 #define OS_FREEBSD 1
 #elif defined(__NetBSD__)
@@ -111,6 +110,31 @@
 #define ARCH_CPU_X86 1
 #define ARCH_CPU_32_BITS 1
 #define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__s390x__)
+#define ARCH_CPU_S390_FAMILY 1
+#define ARCH_CPU_S390X 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__s390__)
+#define ARCH_CPU_S390_FAMILY 1
+#define ARCH_CPU_S390 1
+#define ARCH_CPU_31_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__PPC64__) && defined(__BIG_ENDIAN__)
+#define ARCH_CPU_PPC64_FAMILY 1
+#define ARCH_CPU_PPC64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
+#elif defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#define ARCH_CPU_PPC64_FAMILY 1
+#define ARCH_CPU_PPC64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__PPC__)
+#define ARCH_CPU_PPC_FAMILY 1
+#define ARCH_CPU_PPC 1
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_BIG_ENDIAN 1
 #elif defined(__ARMEL__)
 #define ARCH_CPU_ARM_FAMILY 1
 #define ARCH_CPU_ARMEL 1

diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index d81b252..1d6dfc6 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc

@@ -180,14 +180,6 @@
   return EmptyStringForGURL();
 }
 
-bool GURL::operator==(const GURL& other) const {
-  return spec_ == other.spec_;
-}
-
-bool GURL::operator!=(const GURL& other) const {
-  return spec_ != other.spec_;
-}
-
 bool GURL::operator<(const GURL& other) const {
   return spec_ < other.spec_;
 }
@@ -510,3 +502,20 @@
 std::ostream& operator<<(std::ostream& out, const GURL& url) {
   return out << url.possibly_invalid_spec();
 }
+
+bool operator==(const GURL& x, const GURL& y) {
+  return x.possibly_invalid_spec() == y.possibly_invalid_spec();
+}
+
+bool operator!=(const GURL& x, const GURL& y) {
+  return !(x == y);
+}
+
+bool operator==(const GURL& x, const url::base::StringPiece& spec) {
+  DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec);
+  return x.possibly_invalid_spec() == spec;
+}
+
+bool operator!=(const GURL& x, const url::base::StringPiece& spec) {
+  return !(x == spec);
+}

diff --git a/src/url/gurl.h b/src/url/gurl.h
index 4d8b5d4..aeb77aa 100644
--- a/src/url/gurl.h
+++ b/src/url/gurl.h

@@ -132,10 +132,6 @@
     return parsed_;
   }
 
-  // Defiant equality operator!
-  bool operator==(const GURL& other) const;
-  bool operator!=(const GURL& other) const;
-
   // Allows GURL to used as a key in STL (for example, a std::set or std::map).
   bool operator<(const GURL& other) const;
   bool operator>(const GURL& other) const;
@@ -240,7 +236,8 @@
   // higher-level and more complete semantics. See that function's documentation
   // for more detail.
   bool SchemeIsCryptographic() const {
-    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
+    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) ||
+           SchemeIs(url::kHttpsSuboriginScheme);
   }
 
   // Returns true if the scheme is "blob".
@@ -248,6 +245,12 @@
     return SchemeIs(url::kBlobScheme);
   }
 
+  // Returns true if the scheme indicates a serialized suborigin.
+  bool SchemeIsSuborigin() const {
+    return SchemeIs(url::kHttpSuboriginScheme) ||
+           SchemeIs(url::kHttpsSuboriginScheme);
+  }
+
   // The "content" of the URL is everything after the scheme (skipping the
   // scheme delimiting colon). It is an error to get the content of an invalid
   // URL: the result will be an empty string.
@@ -447,4 +450,13 @@
 // Stream operator so GURL can be used in assertion statements.
 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);
 
+URL_EXPORT bool operator==(const GURL& x, const GURL& y);
+URL_EXPORT bool operator!=(const GURL& x, const GURL& y);
+
+// Equality operator for comparing raw spec_. This should be used in place of
+// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
+// needlessly re-parsing |spec| into a temporary GURL.
+URL_EXPORT bool operator==(const GURL& x, const url::base::StringPiece& spec);
+URL_EXPORT bool operator!=(const GURL& x, const url::base::StringPiece& spec);
+
 #endif  // URL_GURL_H_

diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 4e18da8..aae5048 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc

@@ -5,6 +5,7 @@
 #include <stddef.h>
 
 #include "base/macros.h"
+#include "base/strings/utf_string_conversions.h"
 #include "testing/base/public/gunit.h"
 #include "url/gurl.h"
 #include "url/url_canon.h"
@@ -12,9 +13,6 @@
 
 namespace url {
 
-using test_utils::WStringToUTF16;
-using test_utils::ConvertUTF8ToUTF16;
-
 namespace {
 
 template<typename CHAR>
@@ -67,11 +65,11 @@
 // the parser is already tested and works, so we are mostly interested if the
 // object does the right thing with the results.
 TEST(GURLTest, Components) {
-  GURL empty_url(WStringToUTF16(L""));
+  GURL empty_url(base::UTF8ToUTF16(""));
   EXPECT_TRUE(empty_url.is_empty());
   EXPECT_FALSE(empty_url.is_valid());
 
-  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+  GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref"));
   EXPECT_FALSE(url.is_empty());
   EXPECT_TRUE(url.is_valid());
   EXPECT_TRUE(url.SchemeIs("http"));
@@ -116,7 +114,8 @@
 }
 
 TEST(GURLTest, Copy) {
-  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+  GURL url(base::UTF8ToUTF16(
+      "http://user:pass@google.com:99/foo;bar?q=a#ref"));
 
   GURL url2(url);
   EXPECT_TRUE(url2.is_valid());
@@ -149,7 +148,8 @@
 }
 
 TEST(GURLTest, Assign) {
-  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+  GURL url(base::UTF8ToUTF16(
+      "http://user:pass@google.com:99/foo;bar?q=a#ref"));
 
   GURL url2;
   url2 = url;
@@ -191,7 +191,8 @@
 }
 
 TEST(GURLTest, CopyFileSystem) {
-  GURL url(WStringToUTF16(L"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
+  GURL url(base::UTF8ToUTF16(
+      "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
 
   GURL url2(url);
   EXPECT_TRUE(url2.is_valid());
@@ -313,9 +314,9 @@
     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
 
     // Wide code path.
-    GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
+    GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base));
     GURL outputw =
-        input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
+        input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative));
     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
     EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
@@ -677,12 +678,42 @@
   EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
 }
 
+TEST(GURLTest, SchemeIsCryptographic) {
+  EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());
+
+  EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());
+
+  EXPECT_TRUE(GURL("https-so://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HTTPS-SO://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HtTpS-So://foo.bar.com/").SchemeIsCryptographic());
+
+  EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_FALSE(GURL("http-so://foo.bar.com/").SchemeIsCryptographic());
+}
+
 TEST(GURLTest, SchemeIsBlob) {
   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
   EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
 }
 
+TEST(GURLTest, SchemeIsSuborigin) {
+  EXPECT_TRUE(GURL("http-so://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_TRUE(GURL("HTTP-SO://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_TRUE(GURL("HtTp-So://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsSuborigin());
+
+  EXPECT_TRUE(GURL("https-so://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_TRUE(GURL("HTTPS-SO://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_TRUE(GURL("HtTpS-So://foo.bar.com/").SchemeIsSuborigin());
+  EXPECT_FALSE(GURL("https://foo.bar.com/").SchemeIsSuborigin());
+}
+
 TEST(GURLTest, ContentAndPathForNonStandardURLs) {
   struct TestCase {
     const char* url;

diff --git a/src/url/origin.cc b/src/url/origin.cc
index 1ba07c1..fac78cf 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc

@@ -16,10 +16,26 @@
 
 namespace url {
 
-Origin::Origin() : unique_(true) {
+namespace {
+
+GURL AddSuboriginToUrl(const GURL& url, const std::string& suborigin) {
+  GURL::Replacements replacements;
+  if (url.scheme() == kHttpScheme) {
+    replacements.SetSchemeStr(kHttpSuboriginScheme);
+  } else {
+    DCHECK(url.scheme() == kHttpsScheme);
+    replacements.SetSchemeStr(kHttpsSuboriginScheme);
+  }
+  std::string new_host = suborigin + "." + url.host();
+  replacements.SetHostStr(new_host);
+  return url.ReplaceComponents(replacements);
 }
 
-Origin::Origin(const GURL& url) : unique_(true) {
+}  // namespace
+
+Origin::Origin() : unique_(true), suborigin_(std::string()) {}
+
+Origin::Origin(const GURL& url) : unique_(true), suborigin_(std::string()) {
   if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
     return;
 
@@ -31,6 +47,31 @@
     // the "path", which boils down to everything after the scheme. GURL's
     // 'GetContent()' gives us exactly that.
     tuple_ = SchemeHostPort(GURL(url.GetContent()));
+  } else if (url.SchemeIsSuborigin()) {
+    GURL::Replacements replacements;
+    if (url.scheme() == kHttpSuboriginScheme) {
+      replacements.SetSchemeStr(kHttpScheme);
+    } else {
+      DCHECK(url.scheme() == kHttpsSuboriginScheme);
+      replacements.SetSchemeStr(kHttpsScheme);
+    }
+
+    std::string host = url.host();
+    size_t suborigin_end = host.find(".");
+    bool no_dot = suborigin_end == std::string::npos;
+    std::string new_host(
+        no_dot ? ""
+               : host.substr(suborigin_end + 1,
+                             url.host().length() - suborigin_end - 1));
+    replacements.SetHostStr(new_host);
+
+    tuple_ = SchemeHostPort(url.ReplaceComponents(replacements));
+
+    bool invalid_suborigin = no_dot || suborigin_end == 0;
+    if (invalid_suborigin || tuple_.IsInvalid())
+      return;
+
+    suborigin_ = host.substr(0, suborigin_end);
   } else {
     tuple_ = SchemeHostPort(url);
   }
@@ -38,9 +79,14 @@
   unique_ = tuple_.IsInvalid();
 }
 
-Origin::Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port)
-    : tuple_(scheme, host, port) {
+Origin::Origin(base::StringPiece scheme,
+               base::StringPiece host,
+               uint16_t port,
+               base::StringPiece suborigin,
+               SchemeHostPort::ConstructPolicy policy)
+    : tuple_(scheme, host, port, policy) {
   unique_ = tuple_.IsInvalid();
+  suborigin_ = suborigin.as_string();
 }
 
 Origin::~Origin() {
@@ -51,7 +97,22 @@
     base::StringPiece scheme,
     base::StringPiece host,
     uint16_t port) {
-  return Origin(scheme, host, port);
+  return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
+}
+
+Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
+                                         base::StringPiece host,
+                                         uint16_t port) {
+  return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
+}
+
+Origin Origin::CreateFromNormalizedTupleWithSuborigin(
+    base::StringPiece scheme,
+    base::StringPiece host,
+    uint16_t port,
+    base::StringPiece suborigin) {
+  return Origin(scheme, host, port, suborigin,
+                SchemeHostPort::ALREADY_CANONICALIZED);
 }
 
 std::string Origin::Serialize() const {
@@ -61,9 +122,21 @@
   if (scheme() == kFileScheme)
     return "file://";
 
+  if (!suborigin_.empty()) {
+    GURL url_with_suborigin = AddSuboriginToUrl(tuple_.GetURL(), suborigin_);
+    return SchemeHostPort(url_with_suborigin).Serialize();
+  }
+
   return tuple_.Serialize();
 }
 
+Origin Origin::GetPhysicalOrigin() const {
+  if (suborigin_.empty())
+    return *this;
+
+  return Origin(tuple_.GetURL());
+}
+
 GURL Origin::GetURL() const {
   if (unique())
     return GURL();
@@ -71,14 +144,23 @@
   if (scheme() == kFileScheme)
     return GURL("file:///");
 
-  return tuple_.GetURL();
+  GURL tuple_url(tuple_.GetURL());
+
+  if (!suborigin_.empty())
+    return AddSuboriginToUrl(tuple_url, suborigin_);
+
+  return tuple_url;
 }
 
 bool Origin::IsSameOriginWith(const Origin& other) const {
   if (unique_ || other.unique_)
     return false;
 
-  return tuple_.Equals(other.tuple_);
+  return tuple_.Equals(other.tuple_) && suborigin_ == other.suborigin_;
+}
+
+bool Origin::IsSamePhysicalOriginWith(const Origin& other) const {
+  return GetPhysicalOrigin().IsSameOriginWith(other.GetPhysicalOrigin());
 }
 
 bool Origin::DomainIs(base::StringPiece lower_ascii_domain) const {
@@ -97,4 +179,8 @@
   return Origin(a).IsSameOriginWith(Origin(b));
 }
 
+bool IsSamePhysicalOriginWith(const GURL& a, const GURL& b) {
+  return Origin(a).IsSamePhysicalOriginWith(Origin(b));
+}
+
 }  // namespace url

diff --git a/src/url/origin.h b/src/url/origin.h
index 273622e..1c28588 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h

@@ -90,8 +90,8 @@
   explicit Origin(const GURL& url);
 
   // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
-  // must be valid and canonicalized. In particular, note that this cannot be
-  // used to create unique origins; 'url::Origin()' is the right way to do that.
+  // must be valid and canonicalized. Do not use this method to create unique
+  // origins. Use Origin() for that.
   //
   // This constructor should be used in order to pass 'Origin' objects back and
   // forth over IPC (as transitioning through GURL would risk potentially
@@ -102,6 +102,21 @@
       base::StringPiece host,
       uint16_t port);
 
+  // Creates an origin without sanity checking that the host is canonicalized.
+  // This should only be used when converting between already normalized types,
+  // and should NOT be used for IPC.
+  static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
+                                          base::StringPiece host,
+                                          uint16_t port);
+
+  // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
+  // as well.
+  static Origin CreateFromNormalizedTupleWithSuborigin(
+      base::StringPiece scheme,
+      base::StringPiece host,
+      uint16_t port,
+      base::StringPiece suborigin);
+
   ~Origin();
 
   // For unique origins, these return ("", "", 0).
@@ -109,19 +124,34 @@
   const std::string& host() const { return tuple_.host(); }
   uint16_t port() const { return tuple_.port(); }
 
+  // Note that an origin without a suborgin will return the empty string.
+  const std::string& suborigin() const { return suborigin_; }
+
   bool unique() const { return unique_; }
 
   // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
   // the addition that all Origins with a 'file' scheme serialize to "file://".
+  // If the Origin has a suborigin, it will be serialized per
+  // https://w3c.github.io/webappsec-suborigins/#serializing.
   std::string Serialize() const;
 
+  // Returns the physical origin for Origin. If the suborigin is empty, this
+  // will just return a copy of the Origin.  If it has a suborigin, will return
+  // the Origin of just the scheme/host/port tuple, without the suborigin. See
+  // https://w3c.github.io/webappsec-suborigins/.
+  Origin GetPhysicalOrigin() const;
+
   // Two Origins are "same-origin" if their schemes, hosts, and ports are exact
-  // matches; and neither is unique.
+  // matches; and neither is unique. If either of the origins have suborigins,
+  // the suborigins also must be exact matches.
   bool IsSameOriginWith(const Origin& other) const;
   bool operator==(const Origin& other) const {
     return IsSameOriginWith(other);
   }
 
+  // Same as above, but ignores suborigins if they exist.
+  bool IsSamePhysicalOriginWith(const Origin& other) const;
+
   // Efficiently returns what GURL(Serialize()) would without re-parsing the
   // URL. This can be used for the (rare) times a GURL representation is needed
   // for an Origin.
@@ -138,15 +168,21 @@
   bool operator<(const Origin& other) const;
 
  private:
-  Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port);
+  Origin(base::StringPiece scheme,
+         base::StringPiece host,
+         uint16_t port,
+         base::StringPiece suborigin,
+         SchemeHostPort::ConstructPolicy policy);
 
   SchemeHostPort tuple_;
   bool unique_;
+  std::string suborigin_;
 };
 
 URL_EXPORT std::ostream& operator<<(std::ostream& out, const Origin& origin);
 
 URL_EXPORT bool IsSameOriginWith(const GURL& a, const GURL& b);
+URL_EXPORT bool IsSamePhysicalOriginWith(const GURL& a, const GURL& b);
 
 }  // namespace url
 

diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index 7a67533..fee161b 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc

@@ -13,24 +13,26 @@
 
 namespace {
 
-void ExpectParsedComponentEqual(const url::Component& a,
-                                const url::Component& b) {
-  EXPECT_EQ(a.begin, b.begin);
-  EXPECT_EQ(a.len, b.len);
-}
-
 void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
   EXPECT_EQ(a, b);
   const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
   const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
-  ExpectParsedComponentEqual(a_parsed.scheme, b_parsed.scheme);
-  ExpectParsedComponentEqual(a_parsed.username, b_parsed.username);
-  ExpectParsedComponentEqual(a_parsed.password, b_parsed.password);
-  ExpectParsedComponentEqual(a_parsed.host, b_parsed.host);
-  ExpectParsedComponentEqual(a_parsed.port, b_parsed.port);
-  ExpectParsedComponentEqual(a_parsed.path, b_parsed.path);
-  ExpectParsedComponentEqual(a_parsed.query, b_parsed.query);
-  ExpectParsedComponentEqual(a_parsed.ref, b_parsed.ref);
+  EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+  EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+  EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+  EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+  EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+  EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+  EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+  EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+  EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+  EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+  EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+  EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+  EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+  EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+  EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+  EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
 }
 
 TEST(OriginTest, UniqueOriginComparison) {
@@ -63,6 +65,52 @@
   }
 }
 
+TEST(OriginTest, ConstructFromTuple) {
+  struct TestCases {
+    const char* const scheme;
+    const char* const host;
+    const uint16_t port;
+    const char* const suborigin;
+  } cases[] = {
+      {"http", "example.com", 80, ""},
+      {"http", "example.com", 123, ""},
+      {"https", "example.com", 443, ""},
+      {"http-so", "foobar.example.com", 80, "foobar"},
+      {"http-so", "foobar.example.com", 123, "foobar"},
+      {"https-so", "foobar.example.com", 443, "foobar"},
+  };
+
+  for (const auto& test_case : cases) {
+    testing::Message scope_message;
+    if (test_case.suborigin != std::string()) {
+      scope_message << test_case.scheme << "-so://" << test_case.suborigin
+                    << "." << test_case.host << ":" << test_case.port;
+    } else {
+      scope_message << test_case.scheme << "://" << test_case.host << ":"
+                    << test_case.port;
+    }
+    SCOPED_TRACE(scope_message);
+
+    url::Origin origin_without_suborigin =
+        url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
+                                               test_case.port);
+
+    url::Origin origin_with_suborigin =
+        url::Origin::CreateFromNormalizedTupleWithSuborigin(
+            test_case.scheme, test_case.host, test_case.port,
+            test_case.suborigin);
+
+    EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
+    EXPECT_EQ(test_case.host, origin_without_suborigin.host());
+    EXPECT_EQ(test_case.port, origin_without_suborigin.port());
+
+    EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
+    EXPECT_EQ(test_case.host, origin_with_suborigin.host());
+    EXPECT_EQ(test_case.port, origin_with_suborigin.port());
+    EXPECT_EQ(test_case.suborigin, origin_with_suborigin.suborigin());
+  }
+}
+
 TEST(OriginTest, ConstructFromGURL) {
   url::Origin different_origin(GURL("https://not-in-the-list.test/"));
 
@@ -151,10 +199,14 @@
     GURL url(test_case.url);
     EXPECT_TRUE(url.is_valid());
     url::Origin origin(url);
+    EXPECT_TRUE(origin.suborigin().empty());
     std::string serialized = origin.Serialize();
+    std::string serialized_physical_origin =
+        origin.GetPhysicalOrigin().Serialize();
     ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
 
     EXPECT_EQ(test_case.expected, serialized);
+    EXPECT_EQ(test_case.expected, serialized_physical_origin);
 
     // The '<<' operator should produce the same serialization as Serialize().
     std::stringstream out;
@@ -163,6 +215,119 @@
   }
 }
 
+TEST(OriginTest, SuboriginSerialization) {
+  struct TestCases {
+    const char* const url;
+    const char* const expected;
+    const char* const expected_physical_origin;
+    const char* const expected_suborigin;
+  } cases[] = {
+      {"http-so://foobar.example.com/", "http-so://foobar.example.com",
+       "http://example.com", "foobar"},
+      {"http-so://foobar.example.com:123/", "http-so://foobar.example.com:123",
+       "http://example.com:123", "foobar"},
+      {"https-so://foobar.example.com/", "https-so://foobar.example.com",
+       "https://example.com", "foobar"},
+      {"https-so://foobar.example.com:123/",
+       "https-so://foobar.example.com:123", "https://example.com:123",
+       "foobar"},
+      {"http://example.com/", "http://example.com", "http://example.com", ""},
+      {"http-so://foobar.example.com/some/path", "http-so://foobar.example.com",
+       "http://example.com", "foobar"},
+      {"http-so://foobar.example.com/some/path?query",
+       "http-so://foobar.example.com", "http://example.com", "foobar"},
+      {"http-so://foobar.example.com/some/path#fragment",
+       "http-so://foobar.example.com", "http://example.com", "foobar"},
+      {"http-so://foobar.example.com/some/path?query#fragment",
+       "http-so://foobar.example.com", "http://example.com", "foobar"},
+      {"http-so://foobar.example.com:1234/some/path?query#fragment",
+       "http-so://foobar.example.com:1234", "http://example.com:1234",
+       "foobar"},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    std::string serialized = origin.Serialize();
+    std::string serialized_physical_origin =
+        origin.GetPhysicalOrigin().Serialize();
+    EXPECT_FALSE(origin.unique());
+    EXPECT_EQ(test_case.expected_suborigin, origin.suborigin());
+    ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+    EXPECT_EQ(test_case.expected, serialized);
+    EXPECT_EQ(test_case.expected_physical_origin, serialized_physical_origin);
+
+    // The '<<' operator should produce the same serialization as Serialize().
+    std::stringstream out;
+    out << origin;
+    EXPECT_EQ(test_case.expected, out.str());
+  }
+
+  const char* const failure_cases[] = {
+      "http-so://.",  "http-so://foo",  "http-so://.foo",  "http-so://foo.",
+      "https-so://.", "https-so://foo", "https-so://.foo", "https-so://foo.",
+  };
+
+  for (const auto& test_case : failure_cases) {
+    SCOPED_TRACE(test_case);
+    GURL url(test_case);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    std::string serialized = origin.Serialize();
+    std::string serialized_physical_origin =
+        origin.GetPhysicalOrigin().Serialize();
+    EXPECT_TRUE(origin.unique());
+    EXPECT_EQ("", origin.suborigin());
+    ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+    EXPECT_EQ("null", serialized);
+    EXPECT_EQ("null", serialized_physical_origin);
+  }
+}
+
+TEST(OriginTest, SuboriginIsSameOriginWith) {
+  struct TestCases {
+    const char* const url1;
+    const char* const url2;
+    bool is_same_origin;
+    bool is_same_physical_origin;
+  } cases[]{
+      {"http-so://foobar1.example.com/", "http-so://foobar1.example.com", true,
+       true},
+      {"http-so://foobar2.example.com/", "https-so://foobar2.example.com",
+       false, false},
+      {"http-so://foobar3.example.com/", "http://example.com", false, true},
+      {"https-so://foobar4.example.com/", "https-so://foobar4.example.com",
+       true, true},
+      {"https-so://foobar5.example.com/", "https://example.com", false, true},
+      {"http-so://foobar6.example.com/", "http-so://bazbar.example.com", false,
+       true},
+      {"http-so://foobar7.example.com/", "http-so://foobar7.google.com", false,
+       false},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url1);
+    url::Origin origin1(GURL(test_case.url1));
+    url::Origin origin2(GURL(test_case.url2));
+
+    EXPECT_TRUE(origin1.IsSameOriginWith(origin1));
+    EXPECT_TRUE(origin2.IsSameOriginWith(origin2));
+    EXPECT_EQ(test_case.is_same_origin, origin1.IsSameOriginWith(origin2));
+    EXPECT_EQ(test_case.is_same_origin, origin2.IsSameOriginWith(origin1));
+
+    EXPECT_TRUE(origin1.IsSamePhysicalOriginWith(origin1));
+    EXPECT_TRUE(origin2.IsSamePhysicalOriginWith(origin2));
+    EXPECT_EQ(test_case.is_same_physical_origin,
+              origin1.IsSamePhysicalOriginWith(origin2));
+    EXPECT_EQ(test_case.is_same_physical_origin,
+              origin2.IsSamePhysicalOriginWith(origin1));
+  }
+}
+
 TEST(OriginTest, Comparison) {
   // These URLs are arranged in increasing order:
   const char* const urls[] = {

diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index e6bb493..b5de079 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc

@@ -48,7 +48,8 @@
 
 bool IsValidInput(const base::StringPiece& scheme,
                   const base::StringPiece& host,
-                  uint16_t port) {
+                  uint16_t port,
+                  SchemeHostPort::ConstructPolicy policy) {
   SchemeType scheme_type = SCHEME_WITH_PORT;
   bool is_standard = GetStandardSchemeType(
       scheme.data(),
@@ -71,8 +72,14 @@
       if (host.empty() || port == 0)
         return false;
 
-      if (!IsCanonicalHost(host))
+      // Don't do an expensive canonicalization if the host is already
+      // canonicalized.
+      DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+             IsCanonicalHost(host));
+      if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+          !IsCanonicalHost(host)) {
         return false;
+      }
 
       return true;
 
@@ -83,8 +90,14 @@
         return false;
       }
 
-      if (!IsCanonicalHost(host))
+      // Don't do an expensive canonicalization if the host is already
+      // canonicalized.
+      DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+             IsCanonicalHost(host));
+      if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+          !IsCanonicalHost(host)) {
         return false;
+      }
 
       return true;
 
@@ -104,9 +117,10 @@
 
 SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
                                base::StringPiece host,
-                               uint16_t port)
+                               uint16_t port,
+                               ConstructPolicy policy)
     : port_(0) {
-  if (!IsValidInput(scheme, host, port))
+  if (!IsValidInput(scheme, host, port, policy))
     return;
 
   scheme.CopyToString(&scheme_);
@@ -114,6 +128,14 @@
   port_ = port;
 }
 
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+                               base::StringPiece host,
+                               uint16_t port)
+    : SchemeHostPort(scheme,
+                     host,
+                     port,
+                     ConstructPolicy::CHECK_CANONICALIZATION) {}
+
 SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
   if (!url.is_valid())
     return;
@@ -126,7 +148,7 @@
   if (port == PORT_UNSPECIFIED)
     port = 0;
 
-  if (!IsValidInput(scheme, host, port))
+  if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
     return;
 
   scheme.CopyToString(&scheme_);
@@ -152,6 +174,9 @@
   url::Parsed parsed;
   std::string serialized = SerializeInternal(&parsed);
 
+  if (IsInvalid())
+    return GURL(std::move(serialized), parsed, false);
+
   // If the serialized string is passed to GURL for parsing, it will append an
   // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
   // normal Origin serialization.
@@ -176,13 +201,17 @@
   if (IsInvalid())
     return result;
 
-  parsed->scheme = Component(0, scheme_.length());
-  result.append(scheme_);
+  if (!scheme_.empty()) {
+    parsed->scheme = Component(0, scheme_.length());
+    result.append(scheme_);
+  }
 
   result.append(kStandardSchemeSeparator);
 
-  parsed->host = Component(result.length(), host_.length());
-  result.append(host_);
+  if (!host_.empty()) {
+    parsed->host = Component(result.length(), host_.length());
+    result.append(host_);
+  }
 
   if (port_ == 0)
     return result;

diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index dc8862a..065e4aa 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h

@@ -88,6 +88,19 @@
                  base::StringPiece host,
                  uint16_t port);
 
+  // Metadata influencing whether or not the constructor should sanity check
+  // host canonicalization.
+  enum ConstructPolicy { CHECK_CANONICALIZATION, ALREADY_CANONICALIZED };
+
+  // Creates a (scheme, host, port) tuple without performing sanity checking
+  // that the host and port are canonicalized. This should only be used when
+  // converting between already normalized types, and should NOT be used for
+  // IPC.
+  SchemeHostPort(base::StringPiece scheme,
+                 base::StringPiece host,
+                 uint16_t port,
+                 ConstructPolicy policy);
+
   // Creates a (scheme, host, port) tuple from |url|, as described at
   // https://tools.ietf.org/html/rfc6454#section-4
   //

diff --git a/src/url/scheme_host_port_unittest.cc b/src/url/scheme_host_port_unittest.cc
index 46e1cf8..9c59b9f 100644
--- a/src/url/scheme_host_port_unittest.cc
+++ b/src/url/scheme_host_port_unittest.cc

@@ -12,6 +12,28 @@
 
 namespace {
 
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+  EXPECT_EQ(a, b);
+  const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+  const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+  EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+  EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+  EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+  EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+  EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+  EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+  EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+  EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+  EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+  EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+  EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+  EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+  EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+  EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+  EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+  EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
 TEST(SchemeHostPortTest, Invalid) {
   url::SchemeHostPort invalid;
   EXPECT_EQ("", invalid.scheme());
@@ -37,6 +59,7 @@
     EXPECT_TRUE(tuple.Equals(tuple));
     EXPECT_TRUE(tuple.Equals(invalid));
     EXPECT_TRUE(invalid.Equals(tuple));
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 
@@ -63,6 +86,7 @@
     EXPECT_EQ(test.port, tuple.port());
     EXPECT_FALSE(tuple.IsInvalid());
     EXPECT_TRUE(tuple.Equals(tuple));
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 
@@ -98,6 +122,7 @@
     EXPECT_EQ(0, tuple.port());
     EXPECT_TRUE(tuple.IsInvalid());
     EXPECT_TRUE(tuple.Equals(tuple));
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 
@@ -125,6 +150,7 @@
     EXPECT_EQ("", tuple.host());
     EXPECT_EQ(0, tuple.port());
     EXPECT_TRUE(tuple.IsInvalid());
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 
@@ -160,6 +186,7 @@
     EXPECT_EQ(test.port, tuple.port());
     EXPECT_FALSE(tuple.IsInvalid());
     EXPECT_TRUE(tuple.Equals(tuple));
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 
@@ -184,6 +211,7 @@
     GURL url(test.url);
     url::SchemeHostPort tuple(url);
     EXPECT_EQ(test.expected, tuple.Serialize());
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
   }
 }
 

diff --git a/src/url/url_canon.h b/src/url/url_canon.h
index 95d5345..c4852e4 100644
--- a/src/url/url_canon.h
+++ b/src/url/url_canon.h

@@ -379,6 +379,33 @@
                                         CanonOutput* output,
                                         CanonHostInfo* host_info);
 
+// Canonicalizes a string according to the host canonicalization rules. Unlike
+// CanonicalizeHost, this will not check for IP addresses which can change the
+// meaning (and canonicalization) of the components. This means it is possible
+// to call this for sub-components of a host name without corruption.
+//
+// As an example, "01.02.03.04.com" is a canonical hostname. If you called
+// CanonicalizeHost on the substring "01.02.03.04" it will get "fixed" to
+// "1.2.3.4" which will produce an invalid host name when reassembled. This
+// can happen more than one might think because all numbers by themselves are
+// considered IP addresses; so "5" canonicalizes to "0.0.0.5".
+//
+// Be careful: Because Punycode works on each dot-separated substring as a
+// unit, you should only pass this function substrings that represent complete
+// dot-separated subcomponents of the original host. Even if you have ASCII
+// input, percent-escaped characters will have different meanings if split in
+// the middle.
+//
+// Returns true if the host was valid. This function will treat a 0-length
+// host as valid (because it's designed to be used for substrings) while the
+// full version above will mark empty hosts as broken.
+URL_EXPORT bool CanonicalizeHostSubstring(const char* spec,
+                                          const Component& host,
+                                          CanonOutput* output);
+URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
+                                          const Component& host,
+                                          CanonOutput* output);
+
 // IP addresses.
 //
 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is

diff --git a/src/url/url_canon_host.cc b/src/url/url_canon_host.cc
index d4cdfd5..76a2236 100644
--- a/src/url/url_canon_host.cc
+++ b/src/url/url_canon_host.cc

@@ -308,7 +308,25 @@
   return DoIDNHost(host, host_len, output);
 }
 
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
+bool DoHostSubstring(const CHAR* spec,
+                     const Component& host,
+                     CanonOutput* output) {
+  bool has_non_ascii, has_escaped;
+  ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
+
+  if (has_non_ascii || has_escaped) {
+    return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
+                         has_escaped, output);
+  }
+
+  const bool success =
+      DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+  DCHECK(!has_non_ascii);
+  return success;
+}
+
+template <typename CHAR, typename UCHAR>
 void DoHost(const CHAR* spec,
             const Component& host,
             CanonOutput* output,
@@ -320,26 +338,10 @@
     return;
   }
 
-  bool has_non_ascii, has_escaped;
-  ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
-
   // Keep track of output's initial length, so we can rewind later.
   const int output_begin = output->length();
 
-  bool success;
-  if (!has_non_ascii && !has_escaped) {
-    success = DoSimpleHost(&spec[host.begin], host.len,
-                           output, &has_non_ascii);
-    DCHECK(!has_non_ascii);
-  } else {
-    success = DoComplexHost(&spec[host.begin], host.len,
-                            has_non_ascii, has_escaped, output);
-  }
-
-  if (!success) {
-    // Canonicalization failed. Set BROKEN to notify the caller.
-    host_info->family = CanonHostInfo::BROKEN;
-  } else {
+  if (DoHostSubstring<CHAR, UCHAR>(spec, host, output)) {
     // After all the other canonicalization, check if we ended up with an IP
     // address. IP addresses are small, so writing into this temporary buffer
     // should not cause an allocation.
@@ -355,6 +357,9 @@
       output->set_length(output_begin);
       output->Append(canon_ip.data(), canon_ip.length());
     }
+  } else {
+    // Canonicalization failed. Set BROKEN to notify the caller.
+    host_info->family = CanonHostInfo::BROKEN;
   }
 
   host_info->out_host = MakeRange(output_begin, output->length());
@@ -396,4 +401,16 @@
   DoHost<base::char16, base::char16>(spec, host, output, host_info);
 }
 
+bool CanonicalizeHostSubstring(const char* spec,
+                               const Component& host,
+                               CanonOutput* output) {
+  return DoHostSubstring<char, unsigned char>(spec, host, output);
+}
+
+bool CanonicalizeHostSubstring(const base::char16* spec,
+                               const Component& host,
+                               CanonOutput* output) {
+  return DoHostSubstring<base::char16, base::char16>(spec, host, output);
+}
+
 }  // namespace url

diff --git a/src/url/url_canon_icu_unittest.cc b/src/url/url_canon_icu_unittest.cc
index f7ce199..83a7263 100644
--- a/src/url/url_canon_icu_unittest.cc
+++ b/src/url/url_canon_icu_unittest.cc

@@ -14,8 +14,6 @@
 
 namespace url {
 
-using test_utils::WStringToUTF16;
-
 namespace {
 
 // Wrapper around a UConverter object that managers creation and destruction.
@@ -64,7 +62,8 @@
     std::string str;
     StdStringCanonOutput output(&str);
 
-    base::string16 input_str(WStringToUTF16(icu_cases[i].input));
+    base::string16 input_str(
+        test_utils::TruncateWStringToUTF16(icu_cases[i].input));
     int input_len = static_cast<int>(input_str.length());
     converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
     output.Complete();
@@ -134,7 +133,8 @@
     }
 
     if (query_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(query_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(query_cases[i].input16));
       int len = static_cast<int>(input16.length());
       Component in_comp(0, len);
       std::string out_str;

diff --git a/src/url/url_canon_stdurl.cc b/src/url/url_canon_stdurl.cc
index 7d1758b..e0bca9d 100644
--- a/src/url/url_canon_stdurl.cc
+++ b/src/url/url_canon_stdurl.cc

@@ -120,6 +120,14 @@
       if (!strncmp(scheme, kWsScheme, scheme_len))
         default_port = 80;
       break;
+    case 7:
+      if (!strncmp(scheme, kHttpSuboriginScheme, scheme_len))
+        default_port = 80;
+      break;
+    case 8:
+      if (!strncmp(scheme, kHttpsSuboriginScheme, scheme_len))
+        default_port = 443;
+      break;
   }
   return default_port;
 }

diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 2f053f2..26d7815 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc

@@ -6,6 +6,7 @@
 #include <stddef.h>
 
 #include "base/macros.h"
+#include "base/strings/utf_string_conversions.h"
 #include "testing/base/public/gunit.h"
 #include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
@@ -15,10 +16,6 @@
 
 namespace url {
 
-using test_utils::WStringToUTF16;
-using test_utils::ConvertUTF8ToUTF16;
-using test_utils::ConvertUTF16ToUTF8;
-
 namespace {
 
 struct ComponentCase {
@@ -195,7 +192,8 @@
       out_str.clear();
       StdStringCanonOutput output(&out_str);
 
-      base::string16 input_str(WStringToUTF16(utf_cases[i].input16));
+      base::string16 input_str(
+          test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
       int input_len = static_cast<int>(input_str.length());
       bool success = true;
       for (int ch = 0; ch < input_len; ch++) {
@@ -213,11 +211,12 @@
 
       // UTF-16 -> UTF-8
       std::string input8_str(utf_cases[i].input8);
-      base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));
-      EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));
+      base::string16 input16_str(
+          test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+      EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
 
       // UTF-8 -> UTF-16
-      EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));
+      EXPECT_EQ(input16_str, base::UTF8ToUTF16(input8_str));
     }
   }
 }
@@ -265,7 +264,7 @@
     out_str.clear();
     StdStringCanonOutput output2(&out_str);
 
-    base::string16 wide_input(ConvertUTF8ToUTF16(scheme_cases[i].input));
+    base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
     in_comp.len = static_cast<int>(wide_input.length());
     success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
                                  &out_comp);
@@ -530,7 +529,8 @@
 
     // Wide version.
     if (host_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(host_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(host_cases[i].input16));
       int host_len = static_cast<int>(input16.length());
       Component in_comp(0, host_len);
       Component out_comp;
@@ -580,7 +580,8 @@
 
     // Wide version.
     if (host_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(host_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(host_cases[i].input16));
       int host_len = static_cast<int>(input16.length());
       Component in_comp(0, host_len);
 
@@ -702,7 +703,8 @@
     }
 
     // 16-bit version.
-    base::string16 input16(WStringToUTF16(cases[i].input16));
+    base::string16 input16(
+        test_utils::TruncateWStringToUTF16(cases[i].input16));
     component = Component(0, static_cast<int>(input16.length()));
 
     std::string out_str2;
@@ -854,7 +856,8 @@
     }
 
     // 16-bit version.
-    base::string16 input16(WStringToUTF16(cases[i].input16));
+    base::string16 input16(
+        test_utils::TruncateWStringToUTF16(cases[i].input16));
     component = Component(0, static_cast<int>(input16.length()));
 
     std::string out_str2;
@@ -887,6 +890,51 @@
   EXPECT_FALSE(host_info.IsIPAddress());
 }
 
+// Verifies that CanonicalizeHostSubstring produces the expected output and
+// does not "fix" IP addresses. Because this code is a subset of
+// CanonicalizeHost, the shared functionality is not tested.
+TEST(URLCanonTest, CanonicalizeHostSubstring) {
+  // Basic sanity check.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(CanonicalizeHostSubstring("M\xc3\x9cNCHEN.com",
+                                          Component(0, 12), &output));
+    output.Complete();
+    EXPECT_EQ("xn--mnchen-3ya.com", out_str);
+  }
+
+  // Failure case.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_FALSE(CanonicalizeHostSubstring(
+        test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
+        Component(0, 8), &output));
+    output.Complete();
+    EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+  }
+
+  // Should return true for empty input strings.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(CanonicalizeHostSubstring("", Component(0, 0), &output));
+    output.Complete();
+    EXPECT_EQ(std::string(), out_str);
+  }
+
+  // Numbers that look like IP addresses should not be changed.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(
+        CanonicalizeHostSubstring("01.02.03.04", Component(0, 11), &output));
+    output.Complete();
+    EXPECT_EQ("01.02.03.04", out_str);
+  }
+}
+
 TEST(URLCanonTest, UserInfo) {
   // Note that the canonicalizer should escape and treat empty components as
   // not being there.
@@ -940,7 +988,7 @@
     // Now try the wide version
     out_str.clear();
     StdStringCanonOutput output2(&out_str);
-    base::string16 wide_input(ConvertUTF8ToUTF16(user_info_cases[i].input));
+    base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
     success = CanonicalizeUserInfo(wide_input.c_str(),
                                    parsed.username,
                                    wide_input.c_str(),
@@ -1003,7 +1051,7 @@
     // Now try the wide version
     out_str.clear();
     StdStringCanonOutput output2(&out_str);
-    base::string16 wide_input(ConvertUTF8ToUTF16(port_cases[i].input));
+    base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input));
     success = CanonicalizePort(wide_input.c_str(),
                                in_comp,
                                port_cases[i].default_port,
@@ -1123,7 +1171,8 @@
     }
 
     if (path_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(path_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(path_cases[i].input16));
       int len = static_cast<int>(input16.length());
       Component in_comp(0, len);
       Component out_comp;
@@ -1198,7 +1247,8 @@
     }
 
     if (query_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(query_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(query_cases[i].input16));
       int len = static_cast<int>(input16.length());
       Component in_comp(0, len);
       std::string out_str;
@@ -1260,7 +1310,8 @@
 
     // 16-bit input
     if (ref_cases[i].input16) {
-      base::string16 input16(WStringToUTF16(ref_cases[i].input16));
+      base::string16 input16(
+          test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
       int len = static_cast<int>(input16.length());
       Component in_comp(0, len);
       Component out_comp;
@@ -1896,12 +1947,12 @@
   const base::char16 fill_char = 0xffff;
   memset(buf, fill_mem, sizeof(buf));
   EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
-  EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+  EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf));
   EXPECT_EQ(fill_char, buf[3]);
 
   // Test the edge cases - exactly the buffer size and one over
   EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
-  EXPECT_EQ(WStringToUTF16(L"1234"), base::string16(buf));
+  EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf));
   EXPECT_EQ(fill_char, buf[5]);
 
   memset(buf, fill_mem, sizeof(buf));
@@ -1911,12 +1962,13 @@
   // Test the template overload (note that this will see the full buffer)
   memset(buf, fill_mem, sizeof(buf));
   EXPECT_EQ(0, _itow_s(12, buf, 10));
-  EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+  EXPECT_EQ(base::UTF8ToUTF16("12"),
+            base::string16(buf));
   EXPECT_EQ(fill_char, buf[3]);
 
   memset(buf, fill_mem, sizeof(buf));
   EXPECT_EQ(0, _itow_s(12345, buf, 10));
-  EXPECT_EQ(WStringToUTF16(L"12345"), base::string16(buf));
+  EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf));
 
   EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
 }
@@ -2152,7 +2204,7 @@
   for (int i = 0; i < 4800; i++)
     new_query.push_back('a');
 
-  base::string16 new_path(WStringToUTF16(L"/foo"));
+  base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
   repl.SetPath(new_path.c_str(), Component(0, 4));
   repl.SetQuery(new_query.c_str(),
                 Component(0, static_cast<int>(new_query.length())));
@@ -2173,4 +2225,35 @@
   EXPECT_TRUE(expected == repl_str);
 }
 
+TEST(URLCanonTest, DefaultPortForScheme) {
+  struct TestCases {
+    const char* scheme;
+    const int expected_port;
+  } cases[]{
+      {"http", 80},
+      {"https", 443},
+      {"ftp", 21},
+      {"ws", 80},
+      {"wss", 443},
+      {"gopher", 70},
+      {"http-so", 80},
+      {"https-so", 443},
+      {"fake-scheme", PORT_UNSPECIFIED},
+      {"HTTP", PORT_UNSPECIFIED},
+      {"HTTPS", PORT_UNSPECIFIED},
+      {"FTP", PORT_UNSPECIFIED},
+      {"WS", PORT_UNSPECIFIED},
+      {"WSS", PORT_UNSPECIFIED},
+      {"GOPHER", PORT_UNSPECIFIED},
+      {"HTTP-SO", PORT_UNSPECIFIED},
+      {"HTTPS-SO", PORT_UNSPECIFIED},
+  };
+
+  for (auto& test_case : cases) {
+    SCOPED_TRACE(test_case.scheme);
+    EXPECT_EQ(test_case.expected_port,
+              DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
+  }
+}
+
 }  // namespace url

diff --git a/src/url/url_constants.cc b/src/url/url_constants.cc
index 549819e..73c9a76 100644
--- a/src/url/url_constants.cc
+++ b/src/url/url_constants.cc

@@ -24,6 +24,9 @@
 const char kWsScheme[] = "ws";
 const char kWssScheme[] = "wss";
 
+const char kHttpSuboriginScheme[] = "http-so";
+const char kHttpsSuboriginScheme[] = "https-so";
+
 const char kStandardSchemeSeparator[] = "://";
 
 const size_t kMaxURLChars = 2 * 1024 * 1024;

diff --git a/src/url/url_constants.h b/src/url/url_constants.h
index 3a423d2..c110589 100644
--- a/src/url/url_constants.h
+++ b/src/url/url_constants.h

@@ -30,6 +30,11 @@
 URL_EXPORT extern const char kWsScheme[];
 URL_EXPORT extern const char kWssScheme[];
 
+// Special HTTP and HTTPS schemes for serialization of suborigins. See
+// https://w3c.github.io/webappsec-suborigins/.
+URL_EXPORT extern const char kHttpSuboriginScheme[];
+URL_EXPORT extern const char kHttpsSuboriginScheme[];
+
 // Used to separate a standard scheme and the hostname: "://".
 URL_EXPORT extern const char kStandardSchemeSeparator[];
 

diff --git a/src/url/url_test_utils.h b/src/url/url_test_utils.h
index 174e5e0..8c89622 100644
--- a/src/url/url_test_utils.h
+++ b/src/url/url_test_utils.h

@@ -11,6 +11,7 @@
 #include <string>
 
 #include "base/strings/string16.h"
+#include "base/strings/utf_string_conversions.h"
 #include "testing/base/public/gunit.h"
 #include "url/url_canon_internal.h"
 
@@ -18,10 +19,12 @@
 
 namespace test_utils {
 
-// Converts a UTF-16 string from native wchar_t format to char16, by
-// truncating the high 32 bits. This is not meant to handle true UTF-32
-// encoded strings.
-inline base::string16 WStringToUTF16(const wchar_t* src) {
+// Converts a UTF-16 string from native wchar_t format to char16 by
+// truncating the high 32 bits. This is different than the conversion function
+// in base bacause it passes invalid UTF-16 characters which is important for
+// test purposes. As a result, this is not meant to handle true UTF-32 encoded
+// strings.
+inline base::string16 TruncateWStringToUTF16(const wchar_t* src) {
   base::string16 str;
   int length = static_cast<int>(wcslen(src));
   for (int i = 0; i < length; ++i) {
@@ -30,25 +33,6 @@
   return str;
 }
 
-// Converts a string from UTF-8 to UTF-16.
-inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
-  int length = static_cast<int>(src.length());
-  EXPECT_LT(length, 1024);
-  RawCanonOutputW<1024> output;
-  EXPECT_TRUE(ConvertUTF8ToUTF16(src.data(), length, &output));
-  return base::string16(output.data(), output.length());
-}
-
-// Converts a string from UTF-16 to UTF-8.
-inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
-  std::string str;
-  StdStringCanonOutput output(&str);
-  EXPECT_TRUE(ConvertUTF16ToUTF8(src.data(), static_cast<int>(src.length()),
-                                 &output));
-  output.Complete();
-  return str;
-}
-
 }  // namespace test_utils
 
 }  // namespace url

diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index 8522eb1..ebe386f 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc

@@ -21,25 +21,29 @@
 
 namespace {
 
-const int kNumStandardURLSchemes = 8;
+const int kNumStandardURLSchemes = 10;
 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
-  {kHttpScheme, SCHEME_WITH_PORT},
-  {kHttpsScheme, SCHEME_WITH_PORT},
-  // Yes, file URLs can have a hostname, so file URLs should be handled as
-  // "standard". File URLs never have a port as specified by the SchemeType
-  // field.
-  {kFileScheme, SCHEME_WITHOUT_PORT},
-  {kFtpScheme, SCHEME_WITH_PORT},
-  {kGopherScheme, SCHEME_WITH_PORT},
-  {kWsScheme, SCHEME_WITH_PORT},    // WebSocket.
-  {kWssScheme, SCHEME_WITH_PORT},   // WebSocket secure.
-  {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+    {kHttpScheme, SCHEME_WITH_PORT},
+    {kHttpsScheme, SCHEME_WITH_PORT},
+    // Yes, file URLs can have a hostname, so file URLs should be handled as
+    // "standard". File URLs never have a port as specified by the SchemeType
+    // field.
+    {kFileScheme, SCHEME_WITHOUT_PORT},
+    {kFtpScheme, SCHEME_WITH_PORT},
+    {kGopherScheme, SCHEME_WITH_PORT},
+    {kWsScheme, SCHEME_WITH_PORT},   // WebSocket.
+    {kWssScheme, SCHEME_WITH_PORT},  // WebSocket secure.
+    {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+    {kHttpSuboriginScheme, SCHEME_WITH_PORT},
+    {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
-const int kNumReferrerURLSchemes = 2;
+const int kNumReferrerURLSchemes = 4;
 const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
-  {kHttpScheme, SCHEME_WITH_PORT},
-  {kHttpsScheme, SCHEME_WITH_PORT},
+    {kHttpScheme, SCHEME_WITH_PORT},
+    {kHttpsScheme, SCHEME_WITH_PORT},
+    {kHttpSuboriginScheme, SCHEME_WITH_PORT},
+    {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
 // Lists of the currently installed standard and referrer schemes. These lists

diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
index eceb505..6d7c8f7 100644
--- a/src/url/url_util_unittest.cc
+++ b/src/url/url_util_unittest.cc

@@ -214,15 +214,15 @@
     RawCanonOutputT<base::char16> output;
     DecodeURLEscapeSequences(input, strlen(input), &output);
     EXPECT_EQ(decode_cases[i].output,
-              test_utils::ConvertUTF16ToUTF8(base::string16(output.data(),
-                                                            output.length())));
+              base::UTF16ToUTF8(base::string16(output.data(),
+                                               output.length())));
   }
 
   // Our decode should decode %00
   const char zero_input[] = "%00";
   RawCanonOutputT<base::char16> zero_output;
   DecodeURLEscapeSequences(zero_input, strlen(zero_input), &zero_output);
-  EXPECT_NE("%00", test_utils::ConvertUTF16ToUTF8(
+  EXPECT_NE("%00", base::UTF16ToUTF8(
       base::string16(zero_output.data(), zero_output.length())));
 
   // Test the error behavior for invalid UTF-8.
commit	44ac360d844b6ab5e40070aa367443ff0a9d5ac4	[log] [tgz]
author	Devany Sandoval <sandovad@google.com>	Fri Feb 24 12:01:41 2017 -0800
committer	sandovad <sandovad@google.com>	Tue Sep 03 12:54:36 2019 -0700
tree	658028b65a476e57849fa7e5aabb2b47110773b5
parent	6b31f0e37e67f8486baa6e18cde534b8f04a4f7f [diff]