Internal change PiperOrigin-RevId: 123874609 Change-Id: I41c29967273446a17f60bd8472f776d88bd00f42

commit: 0edea97f03182e76ae24c25b1f63544c34e7ad6d [log] [tgz]
author: Devany Sandoval <sandovad@google.com> Thu Jun 02 09:34:23 2016 -0700
committer: sandovad <sandovad@google.com> Tue Sep 03 12:52:33 2019 -0700
tree: 266602de5d553a98a496e454c922719ae6494546
parent: 20c50a19f25dc04563b2963c93b4c237b993132d [diff]
diff --git a/README.google b/README.google
index eab9cf3..f6a9a6a 100644
--- a/README.google
+++ b/README.google

@@ -1,5 +1,5 @@
-URL: https://chromium.googlesource.com/chromium/src/+archive/6e0744b15b09421eac6634fb3fb7fe0a03427d56/url.tar.gz
-Version: 6e0744b15b09421eac6634fb3fb7fe0a03427d56 (matching Chromium 41.0.2272.118)
+URL: https://chromium.googlesource.com/chromium/src.git/+archive/51.0.2704.63.tar.gz
+Version: commit 8726b6fe03a76ef686c9e1606e67169c177cb883 (matching Chromium 51.0.2704.63)
 License: BSD, MPL, ICU (one source file under MPL, one source file under ICU)
 License File: LICENSE
 
@@ -8,27 +8,34 @@
 
 Local Modifications:
 1. src/base directory:
-- Remove BASE_EXPORT macros.
+- Remove the include of base/base_export.h. Define BASE_EXPORT as empty string.
 - Wrap namespace base with namespace url to distinguish from google3 base.
 - src/base/strings/string16.*
-  * Include src/build/build_config.h to detect wchar_t size.
   * PrintTo function and operator << are removed to eliminate dependency on
     src/base/strings/utf_string_conversion.
+- src/base/strings/string_piece.*
+  * Add constructor for ::string.
+  * Change DCHECK_IS_ON() to !NDEBUG.
 - src/base/strings/string_util.*
-  * Only one MatchPattern function is kept for src/url/origin.cc.
-  * Change the argument type from StringPiece to std::string to remove
-    dependency on google3 StringPiece.
+  * Removed the include of base/compiler_specific.h.
+  * Only the needed functions are kept.
 - src/base/third_party/icu/icu_utf.cc
-  * Add FALLTHROUGH_INTENDED for fall-through switch cases.
+  * Add base/macros.h and FALLTHROUGH_INTENDED for fall-through switch cases.
 
 2. src/url directory:
-- Use google3 version of //base, //util/gtl/lazy_static_ptr.h
+- Use google3 version of //base, //util/gtl/lazy_static_ptr.h,
   //third_party/icu and //testing/base/public:gunit_main. Some users don't want
   googleurl to be dependent on google3 (e.g. geo/render/mirth/net:googleurl),
   so we try our best to do it.
-- src/url/gurl.cc
-  * Replace scoped_ptr with std::unique_ptr to eliminate dependency on google3
-    scoped_ptr.
+- Change NOTREACHED() to DCHECK(false).
+- src/url/origin.cc
+  * Remove the include of base/strings/string_number_conversions.h.
+- src/url/scheme_host_port.cc
+  * Remove the includes of base/numerics/safe_conversions.h and
+    base/strings/string_number_conversions.h.
+  * Include url/url_canon_internal.h.
+  * Replace base::checked_cast with static_cast.
+  * Change base::UintToString to _itoa_s, which is used in other places.
 - src/url/url_canon_icu.cc
   * Replace LazyInstance with google3 LazyStaticPtr, modify intialization
     and access methods accordingly.
@@ -36,16 +43,13 @@
   * Replace ANNOTATE_LEAKING_OBJECT_PTR() with google3
     HeapLeakChecker::IgnoreObject(), and only use it when GOOGLEURL_IN_GOOGLE3
     is defined.
-- src/url/url_canon_internal.h
-  * Expand NOT_REACHED() as DCHECK(false).
 - src/url/url_canon_icu.h and src/url/url_canon_stdstring.h
   * Remove the include of src/base/compiler_specific.h.
 - src/url/third_party/mozilla/url_parse.cc
   * Compile filesystemurl related function only when NO_FILESYSTEMURL_SUPPORT
-    is not defined, so that
-    wireless/android/icing/lib/core:liburl_parse_icing_static doesn't need to
+    is not defined, so that wireless/android/icing/lib/core doesn't need to
     depend on other googleurl srcs as well as third_party/icu.
 
-3. google3_addidions directory:
+3. google3_additions directory:
 2014-07-30: added google3_additions/googleurl_init.cc, which properly
 initializes googleurl during InitGoogle().

diff --git a/src/base/strings/string16.h b/src/base/strings/string16.h
index be488c3..925e52f 100644
--- a/src/base/strings/string16.h
+++ b/src/base/strings/string16.h

@@ -26,12 +26,17 @@
 // libc functions with custom, 2-byte-char compatible routines. It is capable
 // of carrying UTF-16-encoded data.
 
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
+
+#include <functional>
 #include <string>
 
-#include "base/basictypes.h"
 #include "build/build_config.h"
 
+#define BASE_EXPORT
+
 #if defined(WCHAR_T_IS_UTF16)
 
 namespace url {
@@ -49,17 +54,17 @@
 namespace url {
 namespace base {
 
-typedef uint16 char16;
+typedef uint16_t char16;
 
 // char16 versions of the functions required by string16_char_traits; these
 // are based on the wide character functions of similar names ("w" or "wcs"
 // instead of "c16").
-int c16memcmp(const char16* s1, const char16* s2, size_t n);
-size_t c16len(const char16* s);
-const char16* c16memchr(const char16* s, char16 c, size_t n);
-char16* c16memmove(char16* s1, const char16* s2, size_t n);
-char16* c16memcpy(char16* s1, const char16* s2, size_t n);
-char16* c16memset(char16* s, char16 c, size_t n);
+BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
+BASE_EXPORT size_t c16len(const char16* s);
+BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
+BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
+BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
+BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
 
 struct string16_char_traits {
   typedef char16 char_type;
@@ -67,7 +72,8 @@
 
   // int_type needs to be able to hold each possible value of char_type, and in
   // addition, the distinct value of eof().
-  COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width);
+  static_assert(sizeof(int_type) > sizeof(char_type),
+                "int must be larger than 16 bits wide");
 
   typedef std::streamoff off_type;
   typedef mbstate_t state_type;
@@ -97,7 +103,7 @@
     return c16memchr(s, a, n);
   }
 
-  static char_type* move(char_type* s1, const char_type* s2, int_type n) {
+  static char_type* move(char_type* s1, const char_type* s2, size_t n) {
     return c16memmove(s1, s2, n);
   }
 
@@ -130,7 +136,7 @@
   }
 };
 
-typedef std::basic_string<char16, url::base::string16_char_traits> string16;
+typedef std::basic_string<char16, base::string16_char_traits> string16;
 
 }  // namespace base
 }  // namespace url
@@ -175,7 +181,22 @@
 // TODO(mark): File this bug with Apple and update this note with a bug number.
 
 extern template
-class std::basic_string<url::base::char16, url::base::string16_char_traits>;
+class BASE_EXPORT std::basic_string<url::base::char16, url::base::string16_char_traits>;
+
+// Specialize std::hash for base::string16. Although the style guide forbids
+// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
+// platforms, where base::string16 is a type alias for std::wstring.
+namespace std {
+template <>
+struct hash<url::base::string16> {
+  std::size_t operator()(const url::base::string16& s) const {
+    std::size_t result = 0;
+    for (url::base::char16 c : s)
+      result = (result * 131) + c;
+    return result;
+  }
+};
+}  // namespace std
 
 #endif  // WCHAR_T_IS_UTF32
 

diff --git a/src/base/strings/string_piece.cc b/src/base/strings/string_piece.cc
new file mode 100644
index 0000000..b8006c1
--- /dev/null
+++ b/src/base/strings/string_piece.cc

@@ -0,0 +1,454 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// Copied from strings/stringpiece.cc with modifications
+
+#include "base/strings/string_piece.h"
+
+#include <limits.h>
+
+#include <algorithm>
+#include <ostream>
+
+#include "base/logging.h"
+
+namespace url {
+namespace base {
+namespace {
+
+// For each character in characters_wanted, sets the index corresponding
+// to the ASCII code of that character to 1 in table.  This is used by
+// the find_.*_of methods below to tell whether or not a character is in
+// the lookup table in constant time.
+// The argument `table' must be an array that is large enough to hold all
+// the possible values of an unsigned char.  Thus it should be be declared
+// as follows:
+//   bool table[UCHAR_MAX + 1]
+inline void BuildLookupTable(const StringPiece& characters_wanted,
+                             bool* table) {
+  const size_t length = characters_wanted.length();
+  const char* const data = characters_wanted.data();
+  for (size_t i = 0; i < length; ++i) {
+    table[static_cast<unsigned char>(data[i])] = true;
+  }
+}
+
+}  // namespace
+
+// MSVC doesn't like complex extern templates and DLLs.
+#if !defined(COMPILER_MSVC)
+template class BasicStringPiece<std::string>;
+template class BasicStringPiece<string16>;
+#endif
+
+bool operator==(const StringPiece& x, const StringPiece& y) {
+  if (x.size() != y.size())
+    return false;
+
+  return StringPiece::wordmemcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+  o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
+  return o;
+}
+
+namespace internal {
+
+template<typename STR>
+void CopyToStringT(const BasicStringPiece<STR>& self, STR* target) {
+  if (self.empty())
+    target->clear();
+  else
+    target->assign(self.data(), self.size());
+}
+
+void CopyToString(const StringPiece& self, std::string* target) {
+  CopyToStringT(self, target);
+}
+
+void CopyToString(const StringPiece16& self, string16* target) {
+  CopyToStringT(self, target);
+}
+
+template<typename STR>
+void AppendToStringT(const BasicStringPiece<STR>& self, STR* target) {
+  if (!self.empty())
+    target->append(self.data(), self.size());
+}
+
+void AppendToString(const StringPiece& self, std::string* target) {
+  AppendToStringT(self, target);
+}
+
+void AppendToString(const StringPiece16& self, string16* target) {
+  AppendToStringT(self, target);
+}
+
+template<typename STR>
+size_t copyT(const BasicStringPiece<STR>& self,
+             typename STR::value_type* buf,
+             size_t n,
+             size_t pos) {
+  size_t ret = std::min(self.size() - pos, n);
+  memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type));
+  return ret;
+}
+
+size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) {
+  return copyT(self, buf, n, pos);
+}
+
+size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) {
+  return copyT(self, buf, n, pos);
+}
+
+template<typename STR>
+size_t findT(const BasicStringPiece<STR>& self,
+             const BasicStringPiece<STR>& s,
+             size_t pos) {
+  if (pos > self.size())
+    return BasicStringPiece<STR>::npos;
+
+  typename BasicStringPiece<STR>::const_iterator result =
+      std::search(self.begin() + pos, self.end(), s.begin(), s.end());
+  const size_t xpos =
+    static_cast<size_t>(result - self.begin());
+  return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos;
+}
+
+size_t find(const StringPiece& self, const StringPiece& s, size_t pos) {
+  return findT(self, s, pos);
+}
+
+size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) {
+  return findT(self, s, pos);
+}
+
+template<typename STR>
+size_t findT(const BasicStringPiece<STR>& self,
+             typename STR::value_type c,
+             size_t pos) {
+  if (pos >= self.size())
+    return BasicStringPiece<STR>::npos;
+
+  typename BasicStringPiece<STR>::const_iterator result =
+      std::find(self.begin() + pos, self.end(), c);
+  return result != self.end() ?
+      static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
+}
+
+size_t find(const StringPiece& self, char c, size_t pos) {
+  return findT(self, c, pos);
+}
+
+size_t find(const StringPiece16& self, char16 c, size_t pos) {
+  return findT(self, c, pos);
+}
+
+template<typename STR>
+size_t rfindT(const BasicStringPiece<STR>& self,
+              const BasicStringPiece<STR>& s,
+              size_t pos) {
+  if (self.size() < s.size())
+    return BasicStringPiece<STR>::npos;
+
+  if (s.empty())
+    return std::min(self.size(), pos);
+
+  typename BasicStringPiece<STR>::const_iterator last =
+      self.begin() + std::min(self.size() - s.size(), pos) + s.size();
+  typename BasicStringPiece<STR>::const_iterator result =
+      std::find_end(self.begin(), last, s.begin(), s.end());
+  return result != last ?
+      static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
+}
+
+size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) {
+  return rfindT(self, s, pos);
+}
+
+size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) {
+  return rfindT(self, s, pos);
+}
+
+template<typename STR>
+size_t rfindT(const BasicStringPiece<STR>& self,
+              typename STR::value_type c,
+              size_t pos) {
+  if (self.size() == 0)
+    return BasicStringPiece<STR>::npos;
+
+  for (size_t i = std::min(pos, self.size() - 1); ;
+       --i) {
+    if (self.data()[i] == c)
+      return i;
+    if (i == 0)
+      break;
+  }
+  return BasicStringPiece<STR>::npos;
+}
+
+size_t rfind(const StringPiece& self, char c, size_t pos) {
+  return rfindT(self, c, pos);
+}
+
+size_t rfind(const StringPiece16& self, char16 c, size_t pos) {
+  return rfindT(self, c, pos);
+}
+
+// 8-bit version using lookup table.
+size_t find_first_of(const StringPiece& self,
+                     const StringPiece& s,
+                     size_t pos) {
+  if (self.size() == 0 || s.size() == 0)
+    return StringPiece::npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.size() == 1)
+    return find(self, s.data()[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_t i = pos; i < self.size(); ++i) {
+    if (lookup[static_cast<unsigned char>(self.data()[i])]) {
+      return i;
+    }
+  }
+  return StringPiece::npos;
+}
+
+// 16-bit brute force version.
+size_t find_first_of(const StringPiece16& self,
+                     const StringPiece16& s,
+                     size_t pos) {
+  StringPiece16::const_iterator found =
+      std::find_first_of(self.begin() + pos, self.end(), s.begin(), s.end());
+  if (found == self.end())
+    return StringPiece16::npos;
+  return found - self.begin();
+}
+
+// 8-bit version using lookup table.
+size_t find_first_not_of(const StringPiece& self,
+                         const StringPiece& s,
+                         size_t pos) {
+  if (self.size() == 0)
+    return StringPiece::npos;
+
+  if (s.size() == 0)
+    return 0;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.size() == 1)
+    return find_first_not_of(self, s.data()[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_t i = pos; i < self.size(); ++i) {
+    if (!lookup[static_cast<unsigned char>(self.data()[i])]) {
+      return i;
+    }
+  }
+  return StringPiece::npos;
+}
+
+// 16-bit brute-force version.
+BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
+                                     const StringPiece16& s,
+                                     size_t pos) {
+  if (self.size() == 0)
+    return StringPiece16::npos;
+
+  for (size_t self_i = pos; self_i < self.size(); ++self_i) {
+    bool found = false;
+    for (size_t s_i = 0; s_i < s.size(); ++s_i) {
+      if (self[self_i] == s[s_i]) {
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+      return self_i;
+  }
+  return StringPiece16::npos;
+}
+
+template<typename STR>
+size_t find_first_not_ofT(const BasicStringPiece<STR>& self,
+                          typename STR::value_type c,
+                          size_t pos) {
+  if (self.size() == 0)
+    return BasicStringPiece<STR>::npos;
+
+  for (; pos < self.size(); ++pos) {
+    if (self.data()[pos] != c) {
+      return pos;
+    }
+  }
+  return BasicStringPiece<STR>::npos;
+}
+
+size_t find_first_not_of(const StringPiece& self,
+                         char c,
+                         size_t pos) {
+  return find_first_not_ofT(self, c, pos);
+}
+
+size_t find_first_not_of(const StringPiece16& self,
+                         char16 c,
+                         size_t pos) {
+  return find_first_not_ofT(self, c, pos);
+}
+
+// 8-bit version using lookup table.
+size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) {
+  if (self.size() == 0 || s.size() == 0)
+    return StringPiece::npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.size() == 1)
+    return rfind(self, s.data()[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_t i = std::min(pos, self.size() - 1); ; --i) {
+    if (lookup[static_cast<unsigned char>(self.data()[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return StringPiece::npos;
+}
+
+// 16-bit brute-force version.
+size_t find_last_of(const StringPiece16& self,
+                    const StringPiece16& s,
+                    size_t pos) {
+  if (self.size() == 0)
+    return StringPiece16::npos;
+
+  for (size_t self_i = std::min(pos, self.size() - 1); ;
+       --self_i) {
+    for (size_t s_i = 0; s_i < s.size(); s_i++) {
+      if (self.data()[self_i] == s[s_i])
+        return self_i;
+    }
+    if (self_i == 0)
+      break;
+  }
+  return StringPiece16::npos;
+}
+
+// 8-bit version using lookup table.
+size_t find_last_not_of(const StringPiece& self,
+                        const StringPiece& s,
+                        size_t pos) {
+  if (self.size() == 0)
+    return StringPiece::npos;
+
+  size_t i = std::min(pos, self.size() - 1);
+  if (s.size() == 0)
+    return i;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.size() == 1)
+    return find_last_not_of(self, s.data()[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (; ; --i) {
+    if (!lookup[static_cast<unsigned char>(self.data()[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return StringPiece::npos;
+}
+
+// 16-bit brute-force version.
+size_t find_last_not_of(const StringPiece16& self,
+                        const StringPiece16& s,
+                        size_t pos) {
+  if (self.size() == 0)
+    return StringPiece::npos;
+
+  for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) {
+    bool found = false;
+    for (size_t s_i = 0; s_i < s.size(); s_i++) {
+      if (self.data()[self_i] == s[s_i]) {
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+      return self_i;
+    if (self_i == 0)
+      break;
+  }
+  return StringPiece16::npos;
+}
+
+template<typename STR>
+size_t find_last_not_ofT(const BasicStringPiece<STR>& self,
+                         typename STR::value_type c,
+                         size_t pos) {
+  if (self.size() == 0)
+    return BasicStringPiece<STR>::npos;
+
+  for (size_t i = std::min(pos, self.size() - 1); ; --i) {
+    if (self.data()[i] != c)
+      return i;
+    if (i == 0)
+      break;
+  }
+  return BasicStringPiece<STR>::npos;
+}
+
+size_t find_last_not_of(const StringPiece& self,
+                        char c,
+                        size_t pos) {
+  return find_last_not_ofT(self, c, pos);
+}
+
+size_t find_last_not_of(const StringPiece16& self,
+                        char16 c,
+                        size_t pos) {
+  return find_last_not_ofT(self, c, pos);
+}
+
+template<typename STR>
+BasicStringPiece<STR> substrT(const BasicStringPiece<STR>& self,
+                              size_t pos,
+                              size_t n) {
+  if (pos > self.size()) pos = self.size();
+  if (n > self.size() - pos) n = self.size() - pos;
+  return BasicStringPiece<STR>(self.data() + pos, n);
+}
+
+StringPiece substr(const StringPiece& self,
+                   size_t pos,
+                   size_t n) {
+  return substrT(self, pos, n);
+}
+
+StringPiece16 substr(const StringPiece16& self,
+                     size_t pos,
+                     size_t n) {
+  return substrT(self, pos, n);
+}
+
+#if !NDEBUG
+void AssertIteratorsInOrder(std::string::const_iterator begin,
+                            std::string::const_iterator end) {
+  DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
+}
+void AssertIteratorsInOrder(string16::const_iterator begin,
+                            string16::const_iterator end) {
+  DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
+}
+#endif
+
+}  // namespace internal
+}  // namespace base
+}  // namespace url

diff --git a/src/base/strings/string_piece.h b/src/base/strings/string_piece.h
new file mode 100644
index 0000000..7396eb4
--- /dev/null
+++ b/src/base/strings/string_piece.h

@@ -0,0 +1,472 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+// Copied from strings/stringpiece.h with modifications
+//
+// A string-like object that points to a sized piece of memory.
+//
+// You can use StringPiece as a function or method parameter.  A StringPiece
+// parameter can receive a double-quoted string literal argument, a "const
+// char*" argument, a string argument, or a StringPiece argument with no data
+// copying.  Systematic use of StringPiece for arguments reduces data
+// copies and strlen() calls.
+//
+// Prefer passing StringPieces by value:
+//   void MyFunction(StringPiece arg);
+// If circumstances require, you may also pass by const reference:
+//   void MyFunction(const StringPiece& arg);  // not preferred
+// Both of these have the same lifetime semantics.  Passing by value
+// generates slightly smaller code.  For more discussion, Googlers can see
+// the thread go/stringpiecebyvalue on c-users.
+
+#ifndef BASE_STRINGS_STRING_PIECE_H_
+#define BASE_STRINGS_STRING_PIECE_H_
+
+#include <stddef.h>
+
+#include <iosfwd>
+#include <string>
+
+#include "base/logging.h"
+#include "base/strings/string16.h"
+
+#define BASE_EXPORT
+
+namespace url {
+namespace base {
+
+template <typename STRING_TYPE> class BasicStringPiece;
+typedef BasicStringPiece<std::string> StringPiece;
+typedef BasicStringPiece<string16> StringPiece16;
+
+// internal --------------------------------------------------------------------
+
+// Many of the StringPiece functions use different implementations for the
+// 8-bit and 16-bit versions, and we don't want lots of template expansions in
+// this (very common) header that will slow down compilation.
+//
+// So here we define overloaded functions called by the StringPiece template.
+// For those that share an implementation, the two versions will expand to a
+// template internal to the .cc file.
+namespace internal {
+
+BASE_EXPORT void CopyToString(const StringPiece& self, std::string* target);
+BASE_EXPORT void CopyToString(const StringPiece16& self, string16* target);
+
+BASE_EXPORT void AppendToString(const StringPiece& self, std::string* target);
+BASE_EXPORT void AppendToString(const StringPiece16& self, string16* target);
+
+BASE_EXPORT size_t copy(const StringPiece& self,
+                        char* buf,
+                        size_t n,
+                        size_t pos);
+BASE_EXPORT size_t copy(const StringPiece16& self,
+                        char16* buf,
+                        size_t n,
+                        size_t pos);
+
+BASE_EXPORT size_t find(const StringPiece& self,
+                        const StringPiece& s,
+                        size_t pos);
+BASE_EXPORT size_t find(const StringPiece16& self,
+                        const StringPiece16& s,
+                        size_t pos);
+BASE_EXPORT size_t find(const StringPiece& self,
+                        char c,
+                        size_t pos);
+BASE_EXPORT size_t find(const StringPiece16& self,
+                        char16 c,
+                        size_t pos);
+
+BASE_EXPORT size_t rfind(const StringPiece& self,
+                         const StringPiece& s,
+                         size_t pos);
+BASE_EXPORT size_t rfind(const StringPiece16& self,
+                         const StringPiece16& s,
+                         size_t pos);
+BASE_EXPORT size_t rfind(const StringPiece& self,
+                         char c,
+                         size_t pos);
+BASE_EXPORT size_t rfind(const StringPiece16& self,
+                         char16 c,
+                         size_t pos);
+
+BASE_EXPORT size_t find_first_of(const StringPiece& self,
+                                 const StringPiece& s,
+                                 size_t pos);
+BASE_EXPORT size_t find_first_of(const StringPiece16& self,
+                                 const StringPiece16& s,
+                                 size_t pos);
+
+BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
+                                     const StringPiece& s,
+                                     size_t pos);
+BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
+                                     const StringPiece16& s,
+                                     size_t pos);
+BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
+                                     char c,
+                                     size_t pos);
+BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
+                                     char16 c,
+                                     size_t pos);
+
+BASE_EXPORT size_t find_last_of(const StringPiece& self,
+                                const StringPiece& s,
+                                size_t pos);
+BASE_EXPORT size_t find_last_of(const StringPiece16& self,
+                                const StringPiece16& s,
+                                size_t pos);
+BASE_EXPORT size_t find_last_of(const StringPiece& self,
+                                char c,
+                                size_t pos);
+BASE_EXPORT size_t find_last_of(const StringPiece16& self,
+                                char16 c,
+                                size_t pos);
+
+BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
+                                    const StringPiece& s,
+                                    size_t pos);
+BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
+                                    const StringPiece16& s,
+                                    size_t pos);
+BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
+                                    char16 c,
+                                    size_t pos);
+BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
+                                    char c,
+                                    size_t pos);
+
+BASE_EXPORT StringPiece substr(const StringPiece& self,
+                               size_t pos,
+                               size_t n);
+BASE_EXPORT StringPiece16 substr(const StringPiece16& self,
+                                 size_t pos,
+                                 size_t n);
+
+#if !NDEBUG
+// Asserts that begin <= end to catch some errors with iterator usage.
+BASE_EXPORT void AssertIteratorsInOrder(std::string::const_iterator begin,
+                                        std::string::const_iterator end);
+BASE_EXPORT void AssertIteratorsInOrder(string16::const_iterator begin,
+                                        string16::const_iterator end);
+#endif
+
+}  // namespace internal
+
+// BasicStringPiece ------------------------------------------------------------
+
+// Defines the types, methods, operators, and data members common to both
+// StringPiece and StringPiece16. Do not refer to this class directly, but
+// rather to BasicStringPiece, StringPiece, or StringPiece16.
+//
+// This is templatized by string class type rather than character type, so
+// BasicStringPiece<std::string> or BasicStringPiece<base::string16>.
+template <typename STRING_TYPE> class BasicStringPiece {
+ public:
+  // Standard STL container boilerplate.
+  typedef size_t size_type;
+  typedef typename STRING_TYPE::value_type value_type;
+  typedef const value_type* pointer;
+  typedef const value_type& reference;
+  typedef const value_type& const_reference;
+  typedef ptrdiff_t difference_type;
+  typedef const value_type* const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+  static const size_type npos;
+
+ public:
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected (likewise for char16, string16, StringPiece16).
+  BasicStringPiece() : ptr_(NULL), length_(0) {}
+  BasicStringPiece(const value_type* str)
+      : ptr_(str),
+        length_((str == NULL) ? 0 : STRING_TYPE::traits_type::length(str)) {}
+  BasicStringPiece(const STRING_TYPE& str)
+      : ptr_(str.data()), length_(str.size()) {}
+#ifdef HAS_GLOBAL_STRING
+  // ::basic_string<char> == ::string != std::string (in google3)
+  BasicStringPiece(const ::basic_string<value_type>& str)
+      : ptr_(str.data()), length_(str.size()) {}
+#endif
+  BasicStringPiece(const value_type* offset, size_type len)
+      : ptr_(offset), length_(len) {}
+  BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
+                   const typename STRING_TYPE::const_iterator& end) {
+#if !NDEBUG
+    // This assertion is done out-of-line to avoid bringing in logging.h and
+    // instantiating logging macros for every instantiation.
+    internal::AssertIteratorsInOrder(begin, end);
+#endif
+    length_ = static_cast<size_t>(std::distance(begin, end));
+
+    // The length test before assignment is to avoid dereferencing an iterator
+    // that may point to the end() of a string.
+    ptr_ = length_ > 0 ? &*begin : nullptr;
+  }
+
+  // data() may return a pointer to a buffer with embedded NULs, and the
+  // returned buffer may or may not be null terminated.  Therefore it is
+  // typically a mistake to pass data() to a routine that expects a NUL
+  // terminated string.
+  const value_type* data() const { return ptr_; }
+  size_type size() const { return length_; }
+  size_type length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+
+  void clear() {
+    ptr_ = NULL;
+    length_ = 0;
+  }
+  void set(const value_type* data, size_type len) {
+    ptr_ = data;
+    length_ = len;
+  }
+  void set(const value_type* str) {
+    ptr_ = str;
+    length_ = str ? STRING_TYPE::traits_type::length(str) : 0;
+  }
+
+  value_type operator[](size_type i) const { return ptr_[i]; }
+  value_type front() const { return ptr_[0]; }
+  value_type back() const { return ptr_[length_ - 1]; }
+
+  void remove_prefix(size_type n) {
+    ptr_ += n;
+    length_ -= n;
+  }
+
+  void remove_suffix(size_type n) {
+    length_ -= n;
+  }
+
+  int compare(const BasicStringPiece<STRING_TYPE>& x) const {
+    int r = wordmemcmp(
+        ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_));
+    if (r == 0) {
+      if (length_ < x.length_) r = -1;
+      else if (length_ > x.length_) r = +1;
+    }
+    return r;
+  }
+
+  STRING_TYPE as_string() const {
+    // std::string doesn't like to take a NULL pointer even with a 0 size.
+    return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
+  }
+
+  const_iterator begin() const { return ptr_; }
+  const_iterator end() const { return ptr_ + length_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(ptr_ + length_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(ptr_);
+  }
+
+  size_type max_size() const { return length_; }
+  size_type capacity() const { return length_; }
+
+  static int wordmemcmp(const value_type* p,
+                        const value_type* p2,
+                        size_type N) {
+    return STRING_TYPE::traits_type::compare(p, p2, N);
+  }
+
+  // Sets the value of the given string target type to be the current string.
+  // This saves a temporary over doing |a = b.as_string()|
+  void CopyToString(STRING_TYPE* target) const {
+    internal::CopyToString(*this, target);
+  }
+
+  void AppendToString(STRING_TYPE* target) const {
+    internal::AppendToString(*this, target);
+  }
+
+  size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
+    return internal::copy(*this, buf, n, pos);
+  }
+
+  // Does "this" start with "x"
+  bool starts_with(const BasicStringPiece& x) const {
+    return ((this->length_ >= x.length_) &&
+            (wordmemcmp(this->ptr_, x.ptr_, x.length_) == 0));
+  }
+
+  // Does "this" end with "x"
+  bool ends_with(const BasicStringPiece& x) const {
+    return ((this->length_ >= x.length_) &&
+            (wordmemcmp(this->ptr_ + (this->length_-x.length_),
+                        x.ptr_, x.length_) == 0));
+  }
+
+  // find: Search for a character or substring at a given offset.
+  size_type find(const BasicStringPiece<STRING_TYPE>& s,
+                 size_type pos = 0) const {
+    return internal::find(*this, s, pos);
+  }
+  size_type find(value_type c, size_type pos = 0) const {
+    return internal::find(*this, c, pos);
+  }
+
+  // rfind: Reverse find.
+  size_type rfind(const BasicStringPiece& s,
+                  size_type pos = BasicStringPiece::npos) const {
+    return internal::rfind(*this, s, pos);
+  }
+  size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const {
+    return internal::rfind(*this, c, pos);
+  }
+
+  // find_first_of: Find the first occurence of one of a set of characters.
+  size_type find_first_of(const BasicStringPiece& s,
+                          size_type pos = 0) const {
+    return internal::find_first_of(*this, s, pos);
+  }
+  size_type find_first_of(value_type c, size_type pos = 0) const {
+    return find(c, pos);
+  }
+
+  // find_first_not_of: Find the first occurence not of a set of characters.
+  size_type find_first_not_of(const BasicStringPiece& s,
+                              size_type pos = 0) const {
+    return internal::find_first_not_of(*this, s, pos);
+  }
+  size_type find_first_not_of(value_type c, size_type pos = 0) const {
+    return internal::find_first_not_of(*this, c, pos);
+  }
+
+  // find_last_of: Find the last occurence of one of a set of characters.
+  size_type find_last_of(const BasicStringPiece& s,
+                         size_type pos = BasicStringPiece::npos) const {
+    return internal::find_last_of(*this, s, pos);
+  }
+  size_type find_last_of(value_type c,
+                         size_type pos = BasicStringPiece::npos) const {
+    return rfind(c, pos);
+  }
+
+  // find_last_not_of: Find the last occurence not of a set of characters.
+  size_type find_last_not_of(const BasicStringPiece& s,
+                             size_type pos = BasicStringPiece::npos) const {
+    return internal::find_last_not_of(*this, s, pos);
+  }
+  size_type find_last_not_of(value_type c,
+                             size_type pos = BasicStringPiece::npos) const {
+    return internal::find_last_not_of(*this, c, pos);
+  }
+
+  // substr.
+  BasicStringPiece substr(size_type pos,
+                          size_type n = BasicStringPiece::npos) const {
+    return internal::substr(*this, pos, n);
+  }
+
+ protected:
+  const value_type* ptr_;
+  size_type     length_;
+};
+
+template <typename STRING_TYPE>
+const typename BasicStringPiece<STRING_TYPE>::size_type
+BasicStringPiece<STRING_TYPE>::npos =
+    typename BasicStringPiece<STRING_TYPE>::size_type(-1);
+
+// MSVC doesn't like complex extern templates and DLLs.
+#if !defined(COMPILER_MSVC)
+extern template class BASE_EXPORT BasicStringPiece<std::string>;
+extern template class BASE_EXPORT BasicStringPiece<string16>;
+#endif
+
+// StingPiece operators --------------------------------------------------------
+
+BASE_EXPORT bool operator==(const StringPiece& x, const StringPiece& y);
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  const int r = StringPiece::wordmemcmp(
+      x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size()));
+  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+  return !(x < y);
+}
+
+// StringPiece16 operators -----------------------------------------------------
+
+inline bool operator==(const StringPiece16& x, const StringPiece16& y) {
+  if (x.size() != y.size())
+    return false;
+
+  return StringPiece16::wordmemcmp(x.data(), y.data(), x.size()) == 0;
+}
+
+inline bool operator!=(const StringPiece16& x, const StringPiece16& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece16& x, const StringPiece16& y) {
+  const int r = StringPiece16::wordmemcmp(
+      x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size()));
+  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
+}
+
+inline bool operator>(const StringPiece16& x, const StringPiece16& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece16& x, const StringPiece16& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece16& x, const StringPiece16& y) {
+  return !(x < y);
+}
+
+BASE_EXPORT std::ostream& operator<<(std::ostream& o,
+                                     const StringPiece& piece);
+
+// Hashing ---------------------------------------------------------------------
+
+// We provide appropriate hash functions so StringPiece and StringPiece16 can
+// be used as keys in hash sets and maps.
+
+// This hash function is copied from base/strings/string16.h. We don't use the
+// ones already defined for string and string16 directly because it would
+// require the string constructors to be called, which we don't want.
+#define HASH_STRING_PIECE(StringPieceType, string_piece)         \
+  std::size_t result = 0;                                        \
+  for (StringPieceType::const_iterator i = string_piece.begin(); \
+       i != string_piece.end(); ++i)                             \
+    result = (result * 131) + *i;                                \
+  return result;
+
+struct StringPieceHash {
+  std::size_t operator()(const StringPiece& sp) const {
+    HASH_STRING_PIECE(StringPiece, sp);
+  }
+};
+struct StringPiece16Hash {
+  std::size_t operator()(const StringPiece16& sp16) const {
+    HASH_STRING_PIECE(StringPiece16, sp16);
+  }
+};
+
+}  // namespace base
+}  // namespace url
+
+#endif  // BASE_STRINGS_STRING_PIECE_H_

diff --git a/src/base/strings/string_util.cc b/src/base/strings/string_util.cc
index 8b2e068..095fbfa 100644
--- a/src/base/strings/string_util.cc
+++ b/src/base/strings/string_util.cc

@@ -4,144 +4,173 @@
 
 #include "base/strings/string_util.h"
 
-#include "base/basictypes.h"
-#include "base/third_party/icu/icu_utf.h"
+#include <ctype.h>
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <wchar.h>
+#include <wctype.h>
 
-static bool IsWildcard(base_icu::UChar32 character) {
-  return character == '*' || character == '?';
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+namespace url {
+namespace base {
+
+namespace {
+
+// Assuming that a pointer is the size of a "machine word", then
+// uintptr_t is an integer type that is also a machine word.
+typedef uintptr_t MachineWord;
+const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
+
+inline bool IsAlignedToMachineWord(const void* pointer) {
+  return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
 }
 
-// Move the strings pointers to the point where they start to differ.
-template <typename CHAR, typename NEXT>
-static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
-                         const CHAR** string, const CHAR* string_end,
-                         NEXT next) {
-  const CHAR* escape = NULL;
-  while (*pattern != pattern_end && *string != string_end) {
-    if (!escape && IsWildcard(**pattern)) {
-      // We don't want to match wildcard here, except if it's escaped.
-      return;
-    }
-
-    // Check if the escapement char is found. If so, skip it and move to the
-    // next character.
-    if (!escape && **pattern == '\\') {
-      escape = *pattern;
-      next(pattern, pattern_end);
-      continue;
-    }
-
-    // Check if the chars match, if so, increment the ptrs.
-    const CHAR* pattern_next = *pattern;
-    const CHAR* string_next = *string;
-    base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
-    if (pattern_char == next(&string_next, string_end) &&
-        pattern_char != CBU_SENTINEL) {
-      *pattern = pattern_next;
-      *string = string_next;
-    } else {
-      // Uh oh, it did not match, we are done. If the last char was an
-      // escapement, that means that it was an error to advance the ptr here,
-      // let's put it back where it was. This also mean that the MatchPattern
-      // function will return false because if we can't match an escape char
-      // here, then no one will.
-      if (escape) {
-        *pattern = escape;
-      }
-      return;
-    }
-
-    escape = NULL;
-  }
+template<typename T> inline T* AlignToMachineWord(T* pointer) {
+  return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
+                              ~kMachineWordAlignmentMask);
 }
 
-template <typename CHAR, typename NEXT>
-static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
-  while (*pattern != end) {
-    if (!IsWildcard(**pattern))
-      return;
-    next(pattern, end);
-  }
-}
-
-template <typename CHAR, typename NEXT>
-static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
-                          const CHAR* pattern, const CHAR* pattern_end,
-                          int depth,
-                          NEXT next) {
-  const int kMaxDepth = 16;
-  if (depth > kMaxDepth)
-    return false;
-
-  // Eat all the matching chars.
-  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
-
-  // If the string is empty, then the pattern must be empty too, or contains
-  // only wildcards.
-  if (eval == eval_end) {
-    EatWildcard(&pattern, pattern_end, next);
-    return pattern == pattern_end;
-  }
-
-  // Pattern is empty but not string, this is not a match.
-  if (pattern == pattern_end)
-    return false;
-
-  // If this is a question mark, then we need to compare the rest with
-  // the current string or the string with one character eaten.
-  const CHAR* next_pattern = pattern;
-  next(&next_pattern, pattern_end);
-  if (pattern[0] == '?') {
-    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
-                      depth + 1, next))
-      return true;
-    const CHAR* next_eval = eval;
-    next(&next_eval, eval_end);
-    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
-                      depth + 1, next))
-      return true;
-  }
-
-  // This is a *, try to match all the possible substrings with the remainder
-  // of the pattern.
-  if (pattern[0] == '*') {
-    // Collapse duplicate wild cards (********** into *) so that the
-    // method does not recurse unnecessarily. http://crbug.com/52839
-    EatWildcard(&next_pattern, pattern_end, next);
-
-    while (eval != eval_end) {
-      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
-                        depth + 1, next))
-        return true;
-      eval++;
-    }
-
-    // We reached the end of the string, let see if the pattern contains only
-    // wildcards.
-    if (eval == eval_end) {
-      EatWildcard(&pattern, pattern_end, next);
-      if (pattern != pattern_end)
-        return false;
-      return true;
-    }
-  }
-
-  return false;
-}
-
-struct NextCharUTF8 {
-  base_icu::UChar32 operator()(const char** p, const char* end) {
-    base_icu::UChar32 c;
-    int offset = 0;
-    CBU8_NEXT(*p, offset, end - *p, c);
-    *p += offset;
-    return c;
-  }
+template<size_t size, typename CharacterType> struct NonASCIIMask;
+template<> struct NonASCIIMask<4, char16> {
+    static inline uint32_t value() { return 0xFF80FF80U; }
 };
+template<> struct NonASCIIMask<4, char> {
+    static inline uint32_t value() { return 0x80808080U; }
+};
+template<> struct NonASCIIMask<8, char16> {
+    static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
+};
+template<> struct NonASCIIMask<8, char> {
+    static inline uint64_t value() { return 0x8080808080808080ULL; }
+};
+#if defined(WCHAR_T_IS_UTF32)
+template<> struct NonASCIIMask<4, wchar_t> {
+    static inline uint32_t value() { return 0xFFFFFF80U; }
+};
+template<> struct NonASCIIMask<8, wchar_t> {
+    static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
+};
+#endif  // WCHAR_T_IS_UTF32
 
-bool MatchPattern(const std::string& eval,
-                  const std::string& pattern) {
-  return MatchPatternT(eval.data(), eval.data() + eval.size(),
-                       pattern.data(), pattern.data() + pattern.size(),
-                       0, NextCharUTF8());
+template<typename StringType>
+StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
+  StringType ret;
+  ret.reserve(str.size());
+  for (size_t i = 0; i < str.size(); i++)
+    ret.push_back(ToLowerASCII(str[i]));
+  return ret;
 }
+
+template<typename StringType>
+StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
+  StringType ret;
+  ret.reserve(str.size());
+  for (size_t i = 0; i < str.size(); i++)
+    ret.push_back(ToUpperASCII(str[i]));
+  return ret;
+}
+
+}  // namespace
+
+std::string ToLowerASCII(StringPiece str) {
+  return ToLowerASCIIImpl<std::string>(str);
+}
+
+string16 ToLowerASCII(StringPiece16 str) {
+  return ToLowerASCIIImpl<string16>(str);
+}
+
+template <class Char>
+inline bool DoIsStringASCII(const Char* characters, size_t length) {
+  MachineWord all_char_bits = 0;
+  const Char* end = characters + length;
+
+  // Prologue: align the input.
+  while (!IsAlignedToMachineWord(characters) && characters != end) {
+    all_char_bits |= *characters;
+    ++characters;
+  }
+
+  // Compare the values of CPU word size.
+  const Char* word_end = AlignToMachineWord(end);
+  const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
+  while (characters < word_end) {
+    all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
+    characters += loop_increment;
+  }
+
+  // Process the remaining bytes.
+  while (characters != end) {
+    all_char_bits |= *characters;
+    ++characters;
+  }
+
+  MachineWord non_ascii_bit_mask =
+      NonASCIIMask<sizeof(MachineWord), Char>::value();
+  return !(all_char_bits & non_ascii_bit_mask);
+}
+
+bool IsStringASCII(const StringPiece& str) {
+  return DoIsStringASCII(str.data(), str.length());
+}
+
+bool IsStringASCII(const StringPiece16& str) {
+  return DoIsStringASCII(str.data(), str.length());
+}
+
+bool IsStringASCII(const string16& str) {
+  return DoIsStringASCII(str.data(), str.length());
+}
+
+#if defined(WCHAR_T_IS_UTF32)
+bool IsStringASCII(const std::wstring& str) {
+  return DoIsStringASCII(str.data(), str.length());
+}
+#endif
+
+// Implementation note: Normally this function will be called with a hardcoded
+// constant for the lowercase_ascii parameter. Constructing a StringPiece from
+// a C constant requires running strlen, so the result will be two passes
+// through the buffers, one to file the length of lowercase_ascii, and one to
+// compare each letter.
+//
+// This function could have taken a const char* to avoid this and only do one
+// pass through the string. But the strlen is faster than the case-insensitive
+// compares and lets us early-exit in the case that the strings are different
+// lengths (will often be the case for non-matches). So whether one approach or
+// the other will be faster depends on the case.
+//
+// The hardcoded strings are typically very short so it doesn't matter, and the
+// string piece gives additional flexibility for the caller (doesn't have to be
+// null terminated) so we choose the StringPiece route.
+template<typename Str>
+static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
+                                          StringPiece lowercase_ascii) {
+  if (str.size() != lowercase_ascii.size())
+    return false;
+  for (size_t i = 0; i < str.size(); i++) {
+    if (ToLowerASCII(str[i]) != lowercase_ascii[i])
+      return false;
+  }
+  return true;
+}
+
+bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
+  return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
+}
+
+bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
+  return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
+}
+
+}  // namespace base
+}  // namespace url

diff --git a/src/base/strings/string_util.h b/src/base/strings/string_util.h
index ffc1579..458d202 100644
--- a/src/base/strings/string_util.h
+++ b/src/base/strings/string_util.h

@@ -7,14 +7,53 @@
 #ifndef BASE_STRINGS_STRING_UTIL_H_
 #define BASE_STRINGS_STRING_UTIL_H_
 
-#include "base/basictypes.h"
+#include <ctype.h>
+#include <stdarg.h>   // va_list
+#include <stddef.h>
+#include <stdint.h>
 
-// Returns true if the string passed in matches the pattern. The pattern
-// string can contain wildcards like * and ?
-// The backslash character (\) is an escape character for * and ?
-// We limit the patterns to having a max of 16 * or ? characters.
-// ? matches 0 or 1 character, while * matches 0 or more characters.
-bool MatchPattern(const std::string& string,
-                  const std::string& pattern);
+#include <string>
+#include <vector>
+
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"  // For implicit conversions.
+#include "build/build_config.h"
+
+#define BASE_EXPORT
+
+namespace url {
+namespace base {
+
+// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
+// so we don't want to use it here.
+inline char ToLowerASCII(char c) {
+  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+inline char16 ToLowerASCII(char16 c) {
+  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+
+// Converts the given string to it's ASCII-lowercase equivalent.
+BASE_EXPORT std::string ToLowerASCII(StringPiece str);
+BASE_EXPORT string16 ToLowerASCII(StringPiece16 str);
+
+BASE_EXPORT bool IsStringASCII(const StringPiece& str);
+BASE_EXPORT bool IsStringASCII(const StringPiece16& str);
+// A convenience adaptor for WebStrings, as they don't convert into
+// StringPieces directly.
+BASE_EXPORT bool IsStringASCII(const string16& str);
+#if defined(WCHAR_T_IS_UTF32)
+BASE_EXPORT bool IsStringASCII(const std::wstring& str);
+#endif
+
+// Compare the lower-case form of the given string against the given
+// previously-lower-cased ASCII string (typically a constant).
+BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
+                                      StringPiece lowecase_ascii);
+BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
+                                      StringPiece lowecase_ascii);
+
+}  // namespace base
+}  // namespace url
 
 #endif  // BASE_STRINGS_STRING_UTIL_H_

diff --git a/src/base/strings/utf_string_conversion_utils.cc b/src/base/strings/utf_string_conversion_utils.cc
index e71605b..a22b109 100644
--- a/src/base/strings/utf_string_conversion_utils.cc
+++ b/src/base/strings/utf_string_conversion_utils.cc

@@ -12,15 +12,15 @@
 // ReadUnicodeCharacter --------------------------------------------------------
 
 bool ReadUnicodeCharacter(const char* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point_out) {
+                          int32_t src_len,
+                          int32_t* char_index,
+                          uint32_t* code_point_out) {
   // U8_NEXT expects to be able to use -1 to signal an error, so we must
   // use a signed type for code_point.  But this function returns false
   // on error anyway, so code_point_out is unsigned.
-  int32 code_point;
+  int32_t code_point;
   CBU8_NEXT(src, *char_index, src_len, code_point);
-  *code_point_out = static_cast<uint32>(code_point);
+  *code_point_out = static_cast<uint32_t>(code_point);
 
   // The ICU macro above moves to the next char, we want to point to the last
   // char consumed.
@@ -31,9 +31,9 @@
 }
 
 bool ReadUnicodeCharacter(const char16* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point) {
+                          int32_t src_len,
+                          int32_t* char_index,
+                          uint32_t* code_point) {
   if (CBU16_IS_SURROGATE(src[*char_index])) {
     if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
         *char_index + 1 >= src_len ||
@@ -56,9 +56,9 @@
 
 #if defined(WCHAR_T_IS_UTF32)
 bool ReadUnicodeCharacter(const wchar_t* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point) {
+                          int32_t src_len,
+                          int32_t* char_index,
+                          uint32_t* code_point) {
   // Conversion is easy since the source is 32-bit.
   *code_point = src[*char_index];
 
@@ -69,7 +69,7 @@
 
 // WriteUnicodeCharacter -------------------------------------------------------
 
-size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) {
+size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) {
   if (code_point <= 0x7f) {
     // Fast path the common case of one byte.
     output->push_back(static_cast<char>(code_point));
@@ -90,7 +90,7 @@
   return char_offset - original_char_offset;
 }
 
-size_t WriteUnicodeCharacter(uint32 code_point, string16* output) {
+size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) {
   if (CBU16_LENGTH(code_point) == 1) {
     // Thie code point is in the Basic Multilingual Plane (BMP).
     output->push_back(static_cast<char16>(code_point));

diff --git a/src/base/strings/utf_string_conversion_utils.h b/src/base/strings/utf_string_conversion_utils.h
index b24f03b..294670e 100644
--- a/src/base/strings/utf_string_conversion_utils.h
+++ b/src/base/strings/utf_string_conversion_utils.h

@@ -7,12 +7,17 @@
 
 // This should only be used by the various UTF string conversion files.
 
+#include <stddef.h>
+#include <stdint.h>
+
 #include "base/strings/string16.h"
 
+#define BASE_EXPORT
+
 namespace url {
 namespace base {
 
-inline bool IsValidCodepoint(uint32 code_point) {
+inline bool IsValidCodepoint(uint32_t code_point) {
   // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
   // codepoints larger than 0x10FFFF (the highest codepoint allowed).
   // Non-characters and unassigned codepoints are allowed.
@@ -20,7 +25,7 @@
          (code_point >= 0xE000u && code_point <= 0x10FFFFu);
 }
 
-inline bool IsValidCharacter(uint32 code_point) {
+inline bool IsValidCharacter(uint32_t code_point) {
   // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in
   // 0xFFFE or 0xFFFF) from the set of valid code points.
   return code_point < 0xD800u || (code_point >= 0xE000u &&
@@ -37,41 +42,40 @@
 // (as in a for loop) will take the reader to the next character.
 //
 // Returns true on success. On false, |*code_point| will be invalid.
-bool ReadUnicodeCharacter(const char* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point_out);
+BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
+                                      int32_t src_len,
+                                      int32_t* char_index,
+                                      uint32_t* code_point_out);
 
 // Reads a UTF-16 character. The usage is the same as the 8-bit version above.
-bool ReadUnicodeCharacter(const char16* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point);
+BASE_EXPORT bool ReadUnicodeCharacter(const char16* src,
+                                      int32_t src_len,
+                                      int32_t* char_index,
+                                      uint32_t* code_point);
 
 #if defined(WCHAR_T_IS_UTF32)
 // Reads UTF-32 character. The usage is the same as the 8-bit version above.
-bool ReadUnicodeCharacter(const wchar_t* src,
-                          int32 src_len,
-                          int32* char_index,
-                          uint32* code_point);
+BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
+                                      int32_t src_len,
+                                      int32_t* char_index,
+                                      uint32_t* code_point);
 #endif  // defined(WCHAR_T_IS_UTF32)
 
 // WriteUnicodeCharacter -------------------------------------------------------
 
 // Appends a UTF-8 character to the given 8-bit string.  Returns the number of
 // bytes written.
-// TODO(brettw) Bug 79631: This function should not be exposed.
-size_t WriteUnicodeCharacter(uint32 code_point,
-                             std::string* output);
+BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+                                         std::string* output);
 
 // Appends the given code point as a UTF-16 character to the given 16-bit
 // string.  Returns the number of 16-bit values written.
-size_t WriteUnicodeCharacter(uint32 code_point, string16* output);
+BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output);
 
 #if defined(WCHAR_T_IS_UTF32)
 // Appends the given UTF-32 character to the given 32-bit string.  Returns the
 // number of 32-bit values written.
-inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
+inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) {
   // This is the easy case, just append the character.
   output->push_back(code_point);
   return 1;

diff --git a/src/base/third_party/icu/icu_utf.cc b/src/base/third_party/icu/icu_utf.cc
index 55edce1..9d48707 100644
--- a/src/base/third_party/icu/icu_utf.cc
+++ b/src/base/third_party/icu/icu_utf.cc

@@ -18,6 +18,7 @@
 */
 
 #include "base/third_party/icu/icu_utf.h"
+#include "base/macros.h"
 
 namespace base_icu {
 
@@ -74,32 +75,28 @@
  * lead bytes above 0xf4 are illegal.
  * We keep them in this table for skipping long ISO 10646-UTF-8 sequences.
  */
-const uint8
-utf8_countTrailBytes[256]={
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+const uint8_t utf8_countTrailBytes[256] =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3,
-    3, 3, 3,    /* illegal in Unicode */
-    4, 4, 4, 4, /* illegal in Unicode */
-    5, 5,       /* illegal in Unicode */
-    0, 0        /* illegal bytes 0xfe and 0xff */
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
+        3, 3,       /* illegal in Unicode */
+        4, 4, 4, 4, /* illegal in Unicode */
+        5, 5,       /* illegal in Unicode */
+        0, 0        /* illegal bytes 0xfe and 0xff */
 };
 
 static const UChar32
@@ -133,12 +130,15 @@
  *
  * Note that a UBool is the same as an int8_t.
  */
-UChar32
-utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict) {
-    int32 i=*pi;
-    uint8 count=CBU8_COUNT_TRAIL_BYTES(c);
+UChar32 utf8_nextCharSafeBody(const uint8_t* s,
+                              int32_t* pi,
+                              int32_t length,
+                              UChar32 c,
+                              UBool strict) {
+  int32_t i = *pi;
+  uint8_t count = CBU8_COUNT_TRAIL_BYTES(c);
     if((i)+count<=(length)) {
-        uint8 trail, illegal=0;
+      uint8_t trail, illegal = 0;
 
         CBU8_MASK_LEAD_BYTE((c), count);
         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
@@ -194,7 +194,7 @@
         /* illegal is also set if count>=4 */
         if(illegal || (c)<utf8_minLegal[count] || (CBU_IS_SURROGATE(c) && strict!=-2)) {
             /* error handling */
-            uint8 errorCount=count;
+            uint8_t errorCount = count;
             /* don't go beyond this sequence */
             i=*pi;
             while(count>0 && CBU8_IS_TRAIL(s[i])) {
@@ -212,7 +212,7 @@
         }
     } else /* too few bytes left */ {
         /* error handling */
-        int32 i0=i;
+        int32_t i0 = i;
         /* don't just set (i)=(length) in case there is an illegal sequence */
         while((i)<(length) && CBU8_IS_TRAIL(s[i])) {
             ++(i);

diff --git a/src/base/third_party/icu/icu_utf.h b/src/base/third_party/icu/icu_utf.h
index 2b993b0..4370fde 100644
--- a/src/base/third_party/icu/icu_utf.h
+++ b/src/base/third_party/icu/icu_utf.h

@@ -17,13 +17,13 @@
 #ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_
 #define BASE_THIRD_PARTY_ICU_ICU_UTF_H_
 
-#include "base/basictypes.h"
+#include <stdint.h>
 
 namespace base_icu {
 
-typedef int32 UChar32;
-typedef uint16 UChar;
-typedef int8 UBool;
+typedef int32_t UChar32;
+typedef uint16_t UChar;
+typedef int8_t UBool;
 
 // General ---------------------------------------------------------------------
 // from utf.h
@@ -54,10 +54,9 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU_IS_UNICODE_NONCHAR(c) \
-    ((c)>=0xfdd0 && \
-     ((uint32)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
-     (uint32)(c)<=0x10ffff)
+#define CBU_IS_UNICODE_NONCHAR(c)                                          \
+  ((c) >= 0xfdd0 && ((uint32_t)(c) <= 0xfdef || ((c)&0xfffe) == 0xfffe) && \
+   (uint32_t)(c) <= 0x10ffff)
 
 /**
  * Is c a Unicode code point value (0..U+10ffff)
@@ -76,11 +75,10 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU_IS_UNICODE_CHAR(c) \
-    ((uint32)(c)<0xd800 || \
-        ((uint32)(c)>0xdfff && \
-         (uint32)(c)<=0x10ffff && \
-         !CBU_IS_UNICODE_NONCHAR(c)))
+#define CBU_IS_UNICODE_CHAR(c)                             \
+  ((uint32_t)(c) < 0xd800 ||                               \
+   ((uint32_t)(c) > 0xdfff && (uint32_t)(c) <= 0x10ffff && \
+    !CBU_IS_UNICODE_NONCHAR(c)))
 
 /**
  * Is this code point a surrogate (U+d800..U+dfff)?
@@ -103,13 +101,14 @@
 // UTF-8 macros ----------------------------------------------------------------
 // from utf8.h
 
-extern const uint8 utf8_countTrailBytes[256];
+extern const uint8_t utf8_countTrailBytes[256];
 
 /**
  * Count the trail bytes for a UTF-8 lead byte.
  * @internal
  */
-#define CBU8_COUNT_TRAIL_BYTES(leadByte) (base_icu::utf8_countTrailBytes[(uint8)leadByte])
+#define CBU8_COUNT_TRAIL_BYTES(leadByte) \
+  (base_icu::utf8_countTrailBytes[(uint8_t)leadByte])
 
 /**
  * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
@@ -131,7 +130,7 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU8_IS_LEAD(c) ((uint8)((c)-0xc0)<0x3e)
+#define CBU8_IS_LEAD(c) ((uint8_t)((c)-0xc0) < 0x3e)
 
 /**
  * Is this code unit (byte) a UTF-8 trail byte?
@@ -148,16 +147,16 @@
  * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
  * @stable ICU 2.4
  */
-#define CBU8_LENGTH(c) \
-    ((uint32)(c)<=0x7f ? 1 : \
-        ((uint32)(c)<=0x7ff ? 2 : \
-            ((uint32)(c)<=0xd7ff ? 3 : \
-                ((uint32)(c)<=0xdfff || (uint32)(c)>0x10ffff ? 0 : \
-                    ((uint32)(c)<=0xffff ? 3 : 4)\
-                ) \
-            ) \
-        ) \
-    )
+#define CBU8_LENGTH(c)                                                      \
+  ((uint32_t)(c) <= 0x7f                                                    \
+       ? 1                                                                  \
+       : ((uint32_t)(c) <= 0x7ff                                            \
+              ? 2                                                           \
+              : ((uint32_t)(c) <= 0xd7ff                                    \
+                     ? 3                                                    \
+                     : ((uint32_t)(c) <= 0xdfff || (uint32_t)(c) > 0x10ffff \
+                            ? 0                                             \
+                            : ((uint32_t)(c) <= 0xffff ? 3 : 4)))))
 
 /**
  * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
@@ -170,7 +169,11 @@
  * Function for handling "next code point" with error-checking.
  * @internal
  */
-UChar32 utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict);
+UChar32 utf8_nextCharSafeBody(const uint8_t* s,
+                              int32_t* pi,
+                              int32_t length,
+                              UChar32 c,
+                              UBool strict);
 
 /**
  * Get a code point from a string at a code point boundary offset,
@@ -183,55 +186,59 @@
  * If the offset points to a trail byte or an illegal UTF-8 sequence, then
  * c is set to a negative value.
  *
- * @param s const uint8 * string
+ * @param s const uint8_t * string
  * @param i string offset, i<length
  * @param length string length
  * @param c output UChar32 variable, set to <0 in case of an error
  * @see CBU8_NEXT_UNSAFE
  * @stable ICU 2.4
  */
-#define CBU8_NEXT(s, i, length, c) { \
-    (c)=(s)[(i)++]; \
-    if(((uint8)(c))>=0x80) { \
-        if(CBU8_IS_LEAD(c)) { \
-            (c)=base_icu::utf8_nextCharSafeBody((const uint8 *)s, &(i), (int32)(length), c, -1); \
-        } else { \
-            (c)=CBU_SENTINEL; \
-        } \
-    } \
-}
+#define CBU8_NEXT(s, i, length, c)                                       \
+  {                                                                      \
+    (c) = (s)[(i)++];                                                    \
+    if (((uint8_t)(c)) >= 0x80) {                                        \
+      if (CBU8_IS_LEAD(c)) {                                             \
+        (c) = base_icu::utf8_nextCharSafeBody((const uint8_t*)s, &(i),   \
+                                              (int32_t)(length), c, -1); \
+      } else {                                                           \
+        (c) = CBU_SENTINEL;                                              \
+      }                                                                  \
+    }                                                                    \
+  }
 
 /**
  * Append a code point to a string, overwriting 1 to 4 bytes.
  * The offset points to the current end of the string contents
  * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the
+ * string.
  * Otherwise, the result is undefined.
  *
- * @param s const uint8 * string buffer
+ * @param s const uint8_t * string buffer
  * @param i string offset
  * @param c code point to append
  * @see CBU8_APPEND
  * @stable ICU 2.4
  */
-#define CBU8_APPEND_UNSAFE(s, i, c) { \
-    if((uint32)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8)(c); \
-    } else { \
-        if((uint32)(c)<=0x7ff) { \
-            (s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
-        } else { \
-            if((uint32)(c)<=0xffff) { \
-                (s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
-            } else { \
-                (s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
-                (s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
-            } \
-            (s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
-        } \
-        (s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
-    } \
-}
+#define CBU8_APPEND_UNSAFE(s, i, c)                            \
+  {                                                            \
+    if ((uint32_t)(c) <= 0x7f) {                               \
+      (s)[(i)++] = (uint8_t)(c);                               \
+    } else {                                                   \
+      if ((uint32_t)(c) <= 0x7ff) {                            \
+        (s)[(i)++] = (uint8_t)(((c) >> 6) | 0xc0);             \
+      } else {                                                 \
+        if ((uint32_t)(c) <= 0xffff) {                         \
+          (s)[(i)++] = (uint8_t)(((c) >> 12) | 0xe0);          \
+        } else {                                               \
+          (s)[(i)++] = (uint8_t)(((c) >> 18) | 0xf0);          \
+          (s)[(i)++] = (uint8_t)((((c) >> 12) & 0x3f) | 0x80); \
+        }                                                      \
+        (s)[(i)++] = (uint8_t)((((c) >> 6) & 0x3f) | 0x80);    \
+      }                                                        \
+      (s)[(i)++] = (uint8_t)(((c)&0x3f) | 0x80);               \
+    }                                                          \
+  }
 
 // UTF-16 macros ---------------------------------------------------------------
 // from utf16.h
@@ -325,7 +332,7 @@
  * @return 1 or 2
  * @stable ICU 2.4
  */
-#define CBU16_LENGTH(c) ((uint32)(c)<=0xffff ? 1 : 2)
+#define CBU16_LENGTH(c) ((uint32_t)(c) <= 0xffff ? 1 : 2)
 
 /**
  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
@@ -353,16 +360,17 @@
  * @param c output UChar32 variable
  * @stable ICU 2.4
  */
-#define CBU16_NEXT(s, i, length, c) { \
-    (c)=(s)[(i)++]; \
-    if(CBU16_IS_LEAD(c)) { \
-        uint16 __c2; \
-        if((i)<(length) && CBU16_IS_TRAIL(__c2=(s)[(i)])) { \
-            ++(i); \
-            (c)=CBU16_GET_SUPPLEMENTARY((c), __c2); \
-        } \
-    } \
-}
+#define CBU16_NEXT(s, i, length, c)                            \
+  {                                                            \
+    (c) = (s)[(i)++];                                          \
+    if (CBU16_IS_LEAD(c)) {                                    \
+      uint16_t __c2;                                           \
+      if ((i) < (length) && CBU16_IS_TRAIL(__c2 = (s)[(i)])) { \
+        ++(i);                                                 \
+        (c) = CBU16_GET_SUPPLEMENTARY((c), __c2);              \
+      }                                                        \
+    }                                                          \
+  }
 
 /**
  * Append a code point to a string, overwriting 1 or 2 code units.
@@ -377,14 +385,15 @@
  * @see CBU16_APPEND
  * @stable ICU 2.4
  */
-#define CBU16_APPEND_UNSAFE(s, i, c) { \
-    if((uint32)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16)(c); \
-    } else { \
-        (s)[(i)++]=(uint16)(((c)>>10)+0xd7c0); \
-        (s)[(i)++]=(uint16)(((c)&0x3ff)|0xdc00); \
-    } \
-}
+#define CBU16_APPEND_UNSAFE(s, i, c)                 \
+  {                                                  \
+    if ((uint32_t)(c) <= 0xffff) {                   \
+      (s)[(i)++] = (uint16_t)(c);                    \
+    } else {                                         \
+      (s)[(i)++] = (uint16_t)(((c) >> 10) + 0xd7c0); \
+      (s)[(i)++] = (uint16_t)(((c)&0x3ff) | 0xdc00); \
+    }                                                \
+  }
 
 }  // namesapce base_icu
 

diff --git a/src/build/build_config.h b/src/build/build_config.h
index b07660d..d8c3db6 100644
--- a/src/build/build_config.h
+++ b/src/build/build_config.h

@@ -61,8 +61,8 @@
 #error Please add support for your platform in build/build_config.h
 #endif
 
-#if defined(USE_OPENSSL) && defined(USE_NSS)
-#error Cannot use both OpenSSL and NSS
+#if defined(USE_OPENSSL_CERTS) && defined(USE_NSS_CERTS)
+#error Cannot use both OpenSSL and NSS for certificates
 #endif
 
 // For access to standard BSD features, use OS_BSD instead of a

diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index 6801dda..b75c8f5 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc

@@ -2,21 +2,25 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "url/gurl.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <ostream>
+
+#include "base/logging.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_util.h"
+
 #ifdef WIN32
 #include <windows.h>
 #else
 #include <pthread.h>
 #endif
 
-#include <algorithm>
-#include <ostream>
-
-#include "url/gurl.h"
-
-#include "base/logging.h"
-#include "url/url_canon_stdstring.h"
-#include "url/url_util.h"
-
 namespace {
 
 static std::string* empty_string = NULL;
@@ -59,7 +63,7 @@
 
 #endif  // WIN32
 
-} // namespace
+}  // namespace
 
 GURL::GURL() : is_valid_(false) {
 }
@@ -74,16 +78,16 @@
   DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
 }
 
-GURL::GURL(const std::string& url_string) {
+GURL::GURL(url::base::StringPiece url_string) {
   InitCanonical(url_string, true);
 }
 
-GURL::GURL(const url::base::string16& url_string) {
+GURL::GURL(url::base::StringPiece16 url_string) {
   InitCanonical(url_string, true);
 }
 
 GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
-  InitCanonical(url_string, false);
+  InitCanonical(url::base::StringPiece(url_string), false);
 }
 
 GURL::GURL(const char* canonical_spec,
@@ -104,7 +108,8 @@
 }
 
 template<typename STR>
-void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
+void GURL::InitCanonical(url::base::BasicStringPiece<STR> input_spec,
+                         bool trim_path_end) {
   // Reserve enough room in the output for the input, plus some extra so that
   // we have room if we have to escape a few things without reallocating.
   spec_.reserve(input_spec.size() + 32);
@@ -130,7 +135,7 @@
 #ifndef NDEBUG
   // For testing purposes, check that the parsed canonical URL is identical to
   // what we would have produced. Skip checking for invalid URLs have no meaning
-  // and we can't always canonicalize then reproducabely.
+  // and we can't always canonicalize then reproducibly.
   if (is_valid_) {
     url::Component scheme;
     // We can't do this check on the inner_url of a filesystem URL, as
@@ -193,17 +198,8 @@
   return spec_ > other.spec_;
 }
 
-GURL GURL::Resolve(const std::string& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-GURL GURL::Resolve(const url::base::string16& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-
 // Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const std::string& relative,
-    url::CharsetConverter* charset_converter) const {
+GURL GURL::Resolve(const std::string& relative) const {
   // Not allowed for invalid URLs.
   if (!is_valid_)
     return GURL();
@@ -218,7 +214,7 @@
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
-                            charset_converter, &output, &result.parsed_)) {
+                            nullptr, &output, &result.parsed_)) {
     // Error resolving, return an empty URL.
     return GURL();
   }
@@ -234,9 +230,7 @@
 }
 
 // Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const url::base::string16& relative,
-    url::CharsetConverter* charset_converter) const {
+GURL GURL::Resolve(const url::base::string16& relative) const {
   // Not allowed for invalid URLs.
   if (!is_valid_)
     return GURL();
@@ -251,7 +245,7 @@
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
-                            charset_converter, &output, &result.parsed_)) {
+                            nullptr, &output, &result.parsed_)) {
     // Error resolving, return an empty URL.
     return GURL();
   }
@@ -320,7 +314,7 @@
 
 GURL GURL::GetOrigin() const {
   // This doesn't make sense for invalid or nonstandard URLs, so return
-  // the empty URL
+  // the empty URL.
   if (!is_valid_ || !IsStandard())
     return GURL();
 
@@ -338,7 +332,7 @@
 }
 
 GURL GURL::GetAsReferrer() const {
-  if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
+  if (!SchemeIsValidForReferrer())
     return GURL();
 
   if (!has_ref() && !has_username() && !has_password())
@@ -379,18 +373,23 @@
   return url::IsStandard(spec_.data(), parsed_.scheme);
 }
 
-bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
+bool GURL::SchemeIs(url::base::StringPiece lower_ascii_scheme) const {
+  DCHECK(url::base::IsStringASCII(lower_ascii_scheme));
+  DCHECK(url::base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
+
   if (parsed_.scheme.len <= 0)
-    return lower_ascii_scheme == NULL;
-  return url::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
-                                   spec_.data() + parsed_.scheme.end(),
-                                   lower_ascii_scheme);
+    return lower_ascii_scheme.empty();
+  return scheme_piece() == lower_ascii_scheme;
 }
 
 bool GURL::SchemeIsHTTPOrHTTPS() const {
   return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
 }
 
+bool GURL::SchemeIsValidForReferrer() const {
+  return is_valid_ && IsReferrerScheme(spec_.data(), parsed_.scheme);
+}
+
 bool GURL::SchemeIsWSOrWSS() const {
   return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
 }
@@ -416,16 +415,17 @@
 }
 
 std::string GURL::PathForRequest() const {
-  DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
+  DCHECK(parsed_.path.len > 0)
+      << "Canonical path for requests should be non-empty";
   if (parsed_.ref.len >= 0) {
-    // Clip off the reference when it exists. The reference starts after the #
-    // sign, so we have to subtract one to also remove it.
+    // Clip off the reference when it exists. The reference starts after the
+    // #-sign, so we have to subtract one to also remove it.
     return std::string(spec_, parsed_.path.begin,
                        parsed_.ref.begin - parsed_.path.begin - 1);
   }
   // Compute the actual path length, rather than depending on the spec's
-  // terminator.  If we're an inner_url, our spec continues on into our outer
-  // url's path/query/ref.
+  // terminator. If we're an inner_url, our spec continues on into our outer
+  // URL's path/query/ref.
   int path_len = parsed_.path.len;
   if (parsed_.query.is_valid())
     path_len = parsed_.query.end() - parsed_.path.begin;
@@ -490,48 +490,45 @@
 
 #endif  // WIN32
 
-bool GURL::DomainIs(const char* lower_ascii_domain,
-                    int domain_len) const {
-  // Return false if this URL is not valid or domain is empty.
-  if (!is_valid_ || !domain_len)
+bool GURL::DomainIs(url::base::StringPiece lower_ascii_domain) const {
+  if (!is_valid_ || lower_ascii_domain.empty())
     return false;
 
   // FileSystem URLs have empty parsed_.host, so check this first.
   if (SchemeIsFileSystem() && inner_url_)
-    return inner_url_->DomainIs(lower_ascii_domain, domain_len);
+    return inner_url_->DomainIs(lower_ascii_domain);
 
   if (!parsed_.host.is_nonempty())
     return false;
 
-  // Check whether the host name is end with a dot. If yes, treat it
-  // the same as no-dot unless the input comparison domain is end
-  // with dot.
-  const char* last_pos = spec_.data() + parsed_.host.end() - 1;
+  // If the host name ends with a dot but the input domain doesn't,
+  // then we ignore the dot in the host name.
+  const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
   int host_len = parsed_.host.len;
-  if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
-    last_pos--;
+  int domain_len = lower_ascii_domain.length();
+  if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
+    host_last_pos--;
     host_len--;
   }
 
-  // Return false if host's length is less than domain's length.
   if (host_len < domain_len)
     return false;
 
-  // Compare this url whether belong specific domain.
-  const char* start_pos = spec_.data() + parsed_.host.begin +
-                          host_len - domain_len;
+  // |host_first_pos| is the start of the compared part of the host name, not
+  // start of the whole host name.
+  const char* host_first_pos = spec_.data() + parsed_.host.begin +
+                               host_len - domain_len;
 
-  if (!url::LowerCaseEqualsASCII(start_pos,
-                                 last_pos + 1,
-                                 lower_ascii_domain,
-                                 lower_ascii_domain + domain_len))
+  if (!url::base::LowerCaseEqualsASCII(
+           url::base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
     return false;
 
-  // Check whether host has right domain start with dot, make sure we got
-  // right domain range. For example www.google.com has domain
-  // "google.com" but www.iamnotgoogle.com does not.
+  // Make sure there aren't extra characters in host before the compared part;
+  // if the host name is longer than the input domain name, then the character
+  // immediately before the compared part should be a dot. For example,
+  // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
   if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
-      '.' != *(start_pos - 1))
+      '.' != *(host_first_pos - 1))
     return false;
 
   return true;

diff --git a/src/url/gurl.h b/src/url/gurl.h
index 16d9a2a..70f70ec 100644
--- a/src/url/gurl.h
+++ b/src/url/gurl.h

@@ -5,21 +5,48 @@
 #ifndef URL_GURL_H_
 #define URL_GURL_H_
 
+#include <stddef.h>
+
 #include <iosfwd>
 #include <memory>
 #include <string>
 
 #include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_stdstring.h"
 #include "url/url_constants.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
+// Represents a URL.
+//
+// A parsed canonicalized URL will be guaranteed UTF-8. Only the ref (if
+// specified) can be non-ASCII, the host, path, etc. will be guaranteed ASCII
+// and any non-ASCII characters will be encoded and % escaped.
+//
+// The string representation of a URL is called the spec(). Getting the
+// spec will assert if the URL is invalid to help protect against malicious
+// URLs. If you want the "best effort" canonicalization of an invalid URL, you
+// can use possibly_invalid_spec(). Test validity with is_valid(). Data and
+// javascript URLs use GetContent() to extract the data.
+//
+// This class has existence checkers and getters for the various components of
+// a URL. Existence is different than being nonempty. "http://www.google.com/?"
+// has a query that just happens to be empty, and has_query() will return true
+// while the query getters will return the empty string.
+//
+// Prefer not to modify a URL using string operations (though sometimes this is
+// unavoidable). Instead, use ReplaceComponents which can replace or delete
+// multiple parts of a URL in one step, doesn't re-canonicalize unchanged
+// sections, and avoids some screw-ups. An example is creating a URL with a
+// path that contains a literal '#'. Using string concatenation will generate a
+// URL with a truncated path and a reference fragment, while ReplaceComponents
+// will know to escape this and produce the desired result.
 class URL_EXPORT GURL {
  public:
-  typedef url::StdStringReplacements<std::string> Replacements;
-  typedef url::StdStringReplacements<url::base::string16> ReplacementsW;
+  typedef url::StringPieceReplacements<std::string> Replacements;
+  typedef url::StringPieceReplacements<url::base::string16> ReplacementsW;
 
   // Creates an empty, invalid URL.
   GURL();
@@ -28,15 +55,9 @@
   // to reallocating the string. It does not re-parse.
   GURL(const GURL& other);
 
-  // The narrow version requires the input be UTF-8. Invalid UTF-8 input will
-  // result in an invalid URL.
-  //
-  // The wide version should also take an encoding parameter so we know how to
-  // encode the query parameters. It is probably sufficient for the narrow
-  // version to assume the query parameter encoding should be the same as the
-  // input encoding.
-  explicit GURL(const std::string& url_string /*, output_param_encoding*/);
-  explicit GURL(const url::base::string16& url_string /*, output_param_encoding*/);
+  // The strings to this contructor should be UTF-8 / UTF-16.
+  explicit GURL(url::base::StringPiece url_string);
+  explicit GURL(url::base::StringPiece16 url_string);
 
   // Constructor for URLs that have already been parsed and canonicalized. This
   // is used for conversions from KURL, for example. The caller must supply all
@@ -91,7 +112,7 @@
 
   // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
   // modified or sent over the network. It is designed to be displayed in error
-  // messages to the user, as the apperance of the spec may explain the error.
+  // messages to the user, as the appearance of the spec may explain the error.
   // If the spec is valid, the valid spec will be returned.
   //
   // The returned string is guaranteed to be valid UTF-8.
@@ -124,9 +145,8 @@
   // pages.
   //
   // It may be impossible to resolve the URLs properly. If the input is not
-  // "standard" (SchemeIsStandard() == false) and the input looks relative, we
-  // can't resolve it. In these cases, the result will be an empty, invalid
-  // GURL.
+  // "standard" (IsStandard() == false) and the input looks relative, we can't
+  // resolve it. In these cases, the result will be an empty, invalid GURL.
   //
   // The result may also be a nonempty, invalid URL if the input has some kind
   // of encoding error. In these cases, we will try to construct a "good" URL
@@ -137,20 +157,6 @@
   GURL Resolve(const std::string& relative) const;
   GURL Resolve(const url::base::string16& relative) const;
 
-  // Like Resolve() above but takes a character set encoder which will be used
-  // for any query text specified in the input. The charset converter parameter
-  // may be NULL, in which case it will be treated as UTF-8.
-  //
-  // TODO(brettw): These should be replaced with versions that take something
-  // more friendly than a raw CharsetConverter (maybe like an ICU character set
-  // name).
-  GURL ResolveWithCharsetConverter(
-      const std::string& relative,
-      url::CharsetConverter* charset_converter) const;
-  GURL ResolveWithCharsetConverter(
-      const url::base::string16& relative,
-      url::CharsetConverter* charset_converter) const;
-
   // Creates a new GURL by replacing the current URL's components with the
   // supplied versions. See the Replacements class in url_canon.h for more.
   //
@@ -194,21 +200,24 @@
   // returned.
   GURL GetAsReferrer() const;
 
-  // Returns true if the scheme for the current URL is a known "standard"
-  // scheme. Standard schemes have an authority and a path section. This
-  // includes file: and filesystem:, which some callers may want to filter out
-  // explicitly by calling SchemeIsFile[System].
+  // Returns true if the scheme for the current URL is a known "standard-format"
+  // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic
+  // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes
+  // file: and filesystem:, which some callers may want to filter out explicitly
+  // by calling SchemeIsFile[System].
   bool IsStandard() const;
 
   // Returns true if the given parameter (should be lower-case ASCII to match
-  // the canonicalized scheme) is the scheme for this URL. This call is more
-  // efficient than getting the scheme and comparing it because no copies or
-  // object constructions are done.
-  bool SchemeIs(const char* lower_ascii_scheme) const;
+  // the canonicalized scheme) is the scheme for this URL. Do not include a
+  // colon.
+  bool SchemeIs(url::base::StringPiece lower_ascii_scheme) const;
 
   // Returns true if the scheme is "http" or "https".
   bool SchemeIsHTTPOrHTTPS() const;
 
+  // Returns true if the scheme is valid for use as a referrer.
+  bool SchemeIsValidForReferrer() const;
+
   // Returns true is the scheme is "ws" or "wss".
   bool SchemeIsWSOrWSS() const;
 
@@ -223,10 +232,15 @@
     return SchemeIs(url::kFileSystemScheme);
   }
 
-  // If the scheme indicates a secure connection
-  bool SchemeIsSecure() const {
-    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) ||
-        (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
+  // Returns true if the scheme indicates a network connection that uses TLS or
+  // some other cryptographic protocol (e.g. QUIC) for security.
+  //
+  // This function is a not a complete test of whether or not an origin's code
+  // is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a
+  // higher-level and more complete semantics. See that function's documentation
+  // for more detail.
+  bool SchemeIsCryptographic() const {
+    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
   }
 
   // Returns true if the scheme is "blob".
@@ -235,82 +249,112 @@
   }
 
   // The "content" of the URL is everything after the scheme (skipping the
-  // scheme delimiting colon). It is an error to get the origin of an invalid
-  // URL. The result will be an empty string.
+  // scheme delimiting colon). It is an error to get the content of an invalid
+  // URL: the result will be an empty string.
   std::string GetContent() const;
 
   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
-  // This currently identifies only IPv4 addresses (bug 822685).
   bool HostIsIPAddress() const;
 
-  // Getters for various components of the URL. The returned string will be
-  // empty if the component is empty or is not present.
-  std::string scheme() const {  // Not including the colon. See also SchemeIs.
+  // Not including the colon. If you are comparing schemes, prefer SchemeIs.
+  bool has_scheme() const {
+    return parsed_.scheme.len >= 0;
+  }
+  std::string scheme() const {
     return ComponentString(parsed_.scheme);
   }
+  url::base::StringPiece scheme_piece() const {
+    return ComponentStringPiece(parsed_.scheme);
+  }
+
+  bool has_username() const {
+    return parsed_.username.len >= 0;
+  }
   std::string username() const {
     return ComponentString(parsed_.username);
   }
-  std::string password() const {
-    return ComponentString(parsed_.password);
-  }
-  // Note that this may be a hostname, an IPv4 address, or an IPv6 literal
-  // surrounded by square brackets, like "[2001:db8::1]".  To exclude these
-  // brackets, use HostNoBrackets() below.
-  std::string host() const {
-    return ComponentString(parsed_.host);
-  }
-  std::string port() const {  // Returns -1 if "default"
-    return ComponentString(parsed_.port);
-  }
-  std::string path() const {  // Including first slash following host
-    return ComponentString(parsed_.path);
-  }
-  std::string query() const {  // Stuff following '?'
-    return ComponentString(parsed_.query);
-  }
-  std::string ref() const {  // Stuff following '#'
-    return ComponentString(parsed_.ref);
+  url::base::StringPiece username_piece() const {
+    return ComponentStringPiece(parsed_.username);
   }
 
-  // Existance querying. These functions will return true if the corresponding
-  // URL component exists in this URL. Note that existance is different than
-  // being nonempty. http://www.google.com/? has a query that just happens to
-  // be empty, and has_query() will return true.
-  bool has_scheme() const {
-    return parsed_.scheme.len >= 0;
-  }
-  bool has_username() const {
-    return parsed_.username.len >= 0;
-  }
   bool has_password() const {
     return parsed_.password.len >= 0;
   }
+  std::string password() const {
+    return ComponentString(parsed_.password);
+  }
+  url::base::StringPiece password_piece() const {
+    return ComponentStringPiece(parsed_.password);
+  }
+
+  // The host may be a hostname, an IPv4 address, or an IPv6 literal surrounded
+  // by square brackets, like "[2001:db8::1]". To exclude these brackets, use
+  // HostNoBrackets() below.
   bool has_host() const {
-    // Note that hosts are special, absense of host means length 0.
+    // Note that hosts are special, absence of host means length 0.
     return parsed_.host.len > 0;
   }
+  std::string host() const {
+    return ComponentString(parsed_.host);
+  }
+  url::base::StringPiece host_piece() const {
+    return ComponentStringPiece(parsed_.host);
+  }
+
+  // The port if one is explicitly specified. Most callers will want IntPort()
+  // or EffectiveIntPort() instead of these. The getters will not include the
+  // ':'.
   bool has_port() const {
     return parsed_.port.len >= 0;
   }
+  std::string port() const {
+    return ComponentString(parsed_.port);
+  }
+  url::base::StringPiece port_piece() const {
+    return ComponentStringPiece(parsed_.port);
+  }
+
+  // Including first slash following host, up to the query. The URL
+  // "http://www.google.com/" has a path of "/".
   bool has_path() const {
-    // Note that http://www.google.com/" has a path, the path is "/". This can
-    // return false only for invalid or nonstandard URLs.
     return parsed_.path.len >= 0;
   }
+  std::string path() const {
+    return ComponentString(parsed_.path);
+  }
+  url::base::StringPiece path_piece() const {
+    return ComponentStringPiece(parsed_.path);
+  }
+
+  // Stuff following '?' up to the ref. The getters will not include the '?'.
   bool has_query() const {
     return parsed_.query.len >= 0;
   }
+  std::string query() const {
+    return ComponentString(parsed_.query);
+  }
+  url::base::StringPiece query_piece() const {
+    return ComponentStringPiece(parsed_.query);
+  }
+
+  // Stuff following '#' to the end of the string. This will be UTF-8 encoded
+  // (not necessarily ASCII). The getters will not include the '#'.
   bool has_ref() const {
     return parsed_.ref.len >= 0;
   }
+  std::string ref() const {
+    return ComponentString(parsed_.ref);
+  }
+  url::base::StringPiece ref_piece() const {
+    return ComponentStringPiece(parsed_.ref);
+  }
 
   // Returns a parsed version of the port. Can also be any of the special
   // values defined in Parsed for ExtractPort.
   int IntPort() const;
 
-  // Returns the port number of the url, or the default port number.
+  // Returns the port number of the URL, or the default port number.
   // If the scheme has no concept of port (or unknown default) returns
   // PORT_UNSPECIFIED.
   int EffectiveIntPort() const;
@@ -324,54 +368,48 @@
   std::string PathForRequest() const;
 
   // Returns the host, excluding the square brackets surrounding IPv6 address
-  // literals.  This can be useful for passing to getaddrinfo().
+  // literals. This can be useful for passing to getaddrinfo().
   std::string HostNoBrackets() const;
 
   // Returns true if this URL's host matches or is in the same domain as
-  // the given input string. For example if this URL was "www.google.com",
-  // this would match "com", "google.com", and "www.google.com
-  // (input domain should be lower-case ASCII to match the canonicalized
-  // scheme). This call is more efficient than getting the host and check
+  // the given input string. For example, if the hostname of the URL is
+  // "www.google.com", this will return true for "com", "google.com", and
+  // "www.google.com".
+  //
+  // The input domain should be lower-case ASCII to match the canonicalized
+  // scheme. This call is more efficient than getting the host and check
   // whether host has the specific domain or not because no copies or
   // object constructions are done.
-  //
-  // If function DomainIs has parameter domain_len, which means the parameter
-  // lower_ascii_domain does not gurantee to terminate with NULL character.
-  bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
+  bool DomainIs(url::base::StringPiece lower_ascii_domain) const;
 
-  // If function DomainIs only has parameter lower_ascii_domain, which means
-  // domain string should be terminate with NULL character.
-  bool DomainIs(const char* lower_ascii_domain) const {
-    return DomainIs(lower_ascii_domain,
-                    static_cast<int>(strlen(lower_ascii_domain)));
-  }
-
-  // Swaps the contents of this GURL object with the argument without doing
+  // Swaps the contents of this GURL object with |other|, without doing
   // any memory allocations.
   void Swap(GURL* other);
 
   // Returns a reference to a singleton empty GURL. This object is for callers
   // who return references but don't have anything to return in some cases.
-  // This function may be called from any thread.
+  // If you just want an empty URL for normal use, prefer GURL(). This function
+  // may be called from any thread.
   static const GURL& EmptyGURL();
 
-  // Returns the inner URL of a nested URL [currently only non-null for
-  // filesystem: URLs].
+  // Returns the inner URL of a nested URL (currently only non-null for
+  // filesystem URLs).
   const GURL* inner_url() const {
     return inner_url_.get();
   }
 
  private:
   // Variant of the string parsing constructor that allows the caller to elect
-  // retain trailing whitespace, if any, on the passed URL spec but only  if the
-  // scheme is one that allows trailing whitespace. The primary use-case is
+  // retain trailing whitespace, if any, on the passed URL spec, but only if
+  // the scheme is one that allows trailing whitespace. The primary use-case is
   // for data: URLs. In most cases, you want to use the single parameter
   // constructor above.
   enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
   GURL(const std::string& url_string, RetainWhiteSpaceSelector);
 
   template<typename STR>
-  void InitCanonical(const STR& input_spec, bool trim_path_end);
+  void InitCanonical(url::base::BasicStringPiece<STR> input_spec,
+                     bool trim_path_end);
 
   void InitializeFromCanonicalSpec();
 
@@ -381,6 +419,11 @@
       return std::string();
     return std::string(spec_, comp.begin, comp.len);
   }
+  url::base::StringPiece ComponentStringPiece(const url::Component& comp) const {
+    if (comp.len <= 0)
+      return url::base::StringPiece();
+    return url::base::StringPiece(&spec_[comp.begin], comp.len);
+  }
 
   // The actual text of the URL, in canonical ASCII form.
   std::string spec_;
@@ -395,8 +438,6 @@
 
   // Used for nested schemes [currently only filesystem:].
   std::unique_ptr<GURL> inner_url_;
-
-  // TODO bug 684583: Add encoding for query params.
 };
 
 // Stream operator so GURL can be used in assertion statements.

diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index 112ee5f..7b83468 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc

@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <stddef.h>
+
 #include "base/macros.h"
 #include "testing/base/public/gunit.h"
 #include "url/gurl.h"
@@ -45,14 +47,15 @@
   EXPECT_EQ("something:///HOSTNAME.com/",
             TypesTestCase("something:///HOSTNAME.com/"));
 
-  // In the reverse, known schemes should always trigger standard URL handling.
+  // Conversely, URLs with known schemes should always trigger standard URL
+  // handling.
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
 
 #ifdef WIN32
-  // URLs that look like absolute Windows drive specs.
+  // URLs that look like Windows absolute path specs.
   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
@@ -60,11 +63,16 @@
 #endif
 }
 
-// Test the basic creation and querying of components in a GURL. We assume
+// Test the basic creation and querying of components in a GURL. We assume that
 // the parser is already tested and works, so we are mostly interested if the
 // object does the right thing with the results.
 TEST(GURLTest, Components) {
+  GURL empty_url(WStringToUTF16(L""));
+  EXPECT_TRUE(empty_url.is_empty());
+  EXPECT_FALSE(empty_url.is_valid());
+
   GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+  EXPECT_FALSE(url.is_empty());
   EXPECT_TRUE(url.is_valid());
   EXPECT_TRUE(url.SchemeIs("http"));
   EXPECT_FALSE(url.SchemeIsFile());
@@ -175,7 +183,7 @@
   EXPECT_EQ("", invalid2.ref());
 }
 
-// This is a regression test for http://crbug.com/309975 .
+// This is a regression test for http://crbug.com/309975.
 TEST(GURLTest, SelfAssign) {
   GURL a("filesystem:http://example.com/temporary/");
   // This should not crash.
@@ -245,9 +253,9 @@
 }
 
 TEST(GURLTest, ExtraSlashesBeforeAuthority) {
-  // According to RFC3986, the hier-part for URI with an authority must use only
-  // two slashes, GURL intentionally just ignores slashes more than 2 and parses
-  // the following part as an authority.
+  // According to RFC3986, the hierarchical part for URI with an authority
+  // must use only two slashes; GURL intentionally just ignores extra slashes
+  // if there are more than 2, and parses the following part as an authority.
   GURL url("http:///host");
   EXPECT_EQ("host", url.host());
   EXPECT_EQ("/", url.path());
@@ -281,6 +289,9 @@
     const char* expected;
   } resolve_cases[] = {
     {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
+    {"http://www.google.com/foo/", "bar", true, "http://www.google.com/foo/bar"},
+    {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
+    {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
     {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
@@ -375,7 +386,7 @@
 }
 
 TEST(GURLTest, Replacements) {
-  // The url canonicalizer replacement test will handle most of these case.
+  // The URL canonicalizer replacement test will handle most of these case.
   // The most important thing to do here is to check that the proper
   // canonicalizer gets called based on the scheme of the input.
   struct ReplaceCase {
@@ -392,7 +403,7 @@
   } replace_cases[] = {
     {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
     {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
-    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
+    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"},
 #ifdef WIN32
     {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
 #endif
@@ -432,7 +443,7 @@
 
   EXPECT_EQ("data: one ? two ", url_no_ref.spec());
 
-  // Importing a parsed url via this constructor overload will retain trailing
+  // Importing a parsed URL via this constructor overload will retain trailing
   // whitespace.
   GURL import_url(url_no_ref.spec(),
                   url_no_ref.parsed_for_possibly_invalid_spec(),
@@ -558,43 +569,56 @@
 }
 
 TEST(GURLTest, DomainIs) {
-  const char google_domain[] = "google.com";
+  GURL url_1("http://google.com/foo");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
 
-  GURL url_1("http://www.google.com:99/foo");
-  EXPECT_TRUE(url_1.DomainIs(google_domain));
+  // Subdomain and port are ignored.
+  GURL url_2("http://www.google.com:99/foo");
+  EXPECT_TRUE(url_2.DomainIs("google.com"));
 
-  GURL url_2("http://google.com:99/foo");
-  EXPECT_TRUE(url_2.DomainIs(google_domain));
+  // Different top-level domain.
+  GURL url_3("http://www.google.com.cn/foo");
+  EXPECT_FALSE(url_3.DomainIs("google.com"));
 
-  GURL url_3("http://google.com./foo");
-  EXPECT_TRUE(url_3.DomainIs(google_domain));
+  // Different host name.
+  GURL url_4("http://www.iamnotgoogle.com/foo");
+  EXPECT_FALSE(url_4.DomainIs("google.com"));
 
-  GURL url_4("http://google.com/foo");
-  EXPECT_FALSE(url_4.DomainIs("google.com."));
+  // The input must be lower-cased otherwise DomainIs returns false.
+  GURL url_5("http://www.google.com/foo");
+  EXPECT_FALSE(url_5.DomainIs("Google.com"));
 
-  GURL url_5("http://google.com./foo");
-  EXPECT_TRUE(url_5.DomainIs("google.com."));
+  // If the URL is invalid, DomainIs returns false.
+  GURL invalid_url("google.com");
+  EXPECT_FALSE(invalid_url.is_valid());
+  EXPECT_FALSE(invalid_url.DomainIs("google.com"));
+}
 
-  GURL url_6("http://www.google.com./foo");
-  EXPECT_TRUE(url_6.DomainIs(".com."));
+TEST(GURLTest, DomainIsTerminatingDotBehavior) {
+  // If the host part ends with a dot, it matches input domains
+  // with or without a dot.
+  GURL url_with_dot("http://www.google.com./foo");
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com"));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com."));
 
-  GURL url_7("http://www.balabala.com/foo");
-  EXPECT_FALSE(url_7.DomainIs(google_domain));
+  // But, if the host name doesn't end with a dot and the input
+  // domain does, then it's considered to not match.
+  GURL url_without_dot("http://google.com/foo");
+  EXPECT_FALSE(url_without_dot.DomainIs("google.com."));
 
-  GURL url_8("http://www.google.com.cn/foo");
-  EXPECT_FALSE(url_8.DomainIs(google_domain));
+  // If the URL ends with two dots, it doesn't match.
+  GURL url_with_two_dots("http://www.google.com../foo");
+  EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
+}
 
-  GURL url_9("http://www.iamnotgoogle.com/foo");
-  EXPECT_FALSE(url_9.DomainIs(google_domain));
+TEST(GURLTest, DomainIsWithFilesystemScheme) {
+  GURL url_1("filesystem:http://www.google.com:99/foo/");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
 
-  GURL url_10("http://www.iamnotgoogle.com../foo");
-  EXPECT_FALSE(url_10.DomainIs(".com"));
-
-  GURL url_11("filesystem:http://www.google.com:99/foo/");
-  EXPECT_TRUE(url_11.DomainIs(google_domain));
-
-  GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/");
-  EXPECT_FALSE(url_12.DomainIs(google_domain));
+  GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
+  EXPECT_FALSE(url_2.DomainIs("google.com"));
 }
 
 // Newlines should be stripped from inputs.
@@ -639,4 +663,29 @@
   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
 }
 
+TEST(GURLTest, ContentAndPathForNonStandardURLs) {
+  struct TestCase {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"null", ""},
+      {"not-a-standard-scheme:this is arbitrary content",
+       "this is arbitrary content"},
+      {"view-source:http://example.com/path", "http://example.com/path"},
+      {"blob:http://example.com/GUID", "http://example.com/GUID"},
+      {"blob://http://example.com/GUID", "//http://example.com/GUID"},
+      {"blob:http://user:password@example.com/GUID",
+       "http://user:password@example.com/GUID"},
+
+      // TODO(mkwst): This seems like a bug. https://crbug.com/513600
+      {"filesystem:http://example.com/path", "/"},
+  };
+
+  for (const auto& test : cases) {
+    GURL url(test.url);
+    EXPECT_EQ(test.expected, url.path()) << test.url;
+    EXPECT_EQ(test.expected, url.GetContent()) << test.url;
+  }
+}
+
 }  // namespace url

diff --git a/src/url/origin.cc b/src/url/origin.cc
index fdb8913..43b5e7e 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc

@@ -1,20 +1,86 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "url/origin.h"
 
+#include <stdint.h>
+#include <string.h>
+
 #include "base/logging.h"
-#include "base/strings/string_util.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
 
 namespace url {
 
-Origin::Origin() : string_("null") {}
+Origin::Origin() : unique_(true) {
+}
 
-Origin::Origin(const std::string& origin) : string_(origin) {
-  DCHECK(origin == "null" || MatchPattern(origin, "?*://?*"));
-  DCHECK_GT(origin.size(), 0u);
-  DCHECK(origin == "file://" || origin[origin.size() - 1] != '/');
+Origin::Origin(const GURL& url) : unique_(true) {
+  if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
+    return;
+
+  if (url.SchemeIsFileSystem()) {
+    tuple_ = SchemeHostPort(*url.inner_url());
+  } else if (url.SchemeIsBlob()) {
+    // If we're dealing with a 'blob:' URL, https://url.spec.whatwg.org/#origin
+    // defines the origin as the origin of the URL which results from parsing
+    // the "path", which boils down to everything after the scheme. GURL's
+    // 'GetContent()' gives us exactly that.
+    tuple_ = SchemeHostPort(GURL(url.GetContent()));
+  } else {
+    tuple_ = SchemeHostPort(url);
+  }
+
+  unique_ = tuple_.IsInvalid();
+}
+
+Origin::Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port)
+    : tuple_(scheme, host, port) {
+  unique_ = tuple_.IsInvalid();
+}
+
+Origin::~Origin() {
+}
+
+// static
+Origin Origin::UnsafelyCreateOriginWithoutNormalization(
+    base::StringPiece scheme,
+    base::StringPiece host,
+    uint16_t port) {
+  return Origin(scheme, host, port);
+}
+
+std::string Origin::Serialize() const {
+  if (unique())
+    return "null";
+
+  if (scheme() == kFileScheme)
+    return "file://";
+
+  return tuple_.Serialize();
+}
+
+bool Origin::IsSameOriginWith(const Origin& other) const {
+  if (unique_ || other.unique_)
+    return false;
+
+  return tuple_.Equals(other.tuple_);
+}
+
+bool Origin::operator<(const Origin& other) const {
+  return tuple_ < other.tuple_;
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
+  return out << origin.Serialize();
+}
+
+bool IsSameOriginWith(const GURL& a, const GURL& b) {
+  return Origin(a).IsSameOriginWith(Origin(b));
 }
 
 }  // namespace url

diff --git a/src/url/origin.h b/src/url/origin.h
index 777e4e1..aab1f05 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h

@@ -1,33 +1,142 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #ifndef URL_ORIGIN_H_
 #define URL_ORIGIN_H_
 
+#include <stdint.h>
+
 #include <string>
 
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "url/scheme_host_port.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_constants.h"
 #include "url/url_export.h"
 
+class GURL;
+
 namespace url {
 
-// Origin represents a Web Origin serialized to a string.
-// See RFC6454 for details.
+// An Origin is a tuple of (scheme, host, port), as described in RFC 6454.
+//
+// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
+// If you only need to extract the bits of a URL which are relevant for a
+// network connection, use 'url::SchemeHostPort'.
+//
+// STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
+//
+// 'Origin', like 'SchemeHostPort', is composed of a tuple of (scheme, host,
+// port), but contains a number of additional concepts which make it appropriate
+// for use as a security boundary and access control mechanism between contexts.
+//
+// This class ought to be used when code needs to determine if two resources
+// are "same-origin", and when a canonical serialization of an origin is
+// required. Note that some origins are "unique", meaning that they are not
+// same-origin with any other origin (including themselves).
+//
+// There are a few subtleties to note:
+//
+// * Invalid and non-standard GURLs are parsed as unique origins. This includes
+//   non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
+//
+// * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
+//   internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
+//   is parsed as ('https', 'example.com', 443).
+//
+// * Unique origins all serialize to the string "null"; this means that the
+//   serializations of two unique origins are identical to each other, though
+//   the origins themselves are not "the same". This means that origins'
+//   serializations must not be relied upon for security checks.
+//
+// * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
+//   but their behavior may differ from embedder to embedder.
+//
+// * The host component of an IPv6 address includes brackets, just like the URL
+//   representation.
+//
+// Usage:
+//
+// * Origins are generally constructed from an already-canonicalized GURL:
+//
+//     GURL url("https://example.com/");
+//     url::Origin origin(url);
+//     origin.scheme(); // "https"
+//     origin.host(); // "example.com"
+//     origin.port(); // 443
+//     origin.unique(); // false
+//
+// * To answer the question "Are |this| and |that| "same-origin" with each
+//   other?", use |Origin::IsSameOriginWith|:
+//
+//     if (this.IsSameOriginWith(that)) {
+//       // Amazingness goes here.
+//     }
 class URL_EXPORT Origin {
  public:
+  // Creates a unique Origin.
   Origin();
-  explicit Origin(const std::string& origin);
 
-  const std::string& string() const { return string_; }
+  // Creates an Origin from |url|, as described at
+  // https://url.spec.whatwg.org/#origin, with the following additions:
+  //
+  // 1. If |url| is invalid or non-standard, a unique Origin is constructed.
+  // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
+  //    out of everything in the URL which follows the scheme).
+  // 3. 'file' URLs all parse as ("file", "", 0).
+  explicit Origin(const GURL& url);
 
-  bool IsSameAs(const Origin& that) const {
-    return string_ == that.string_;
+  // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
+  // must be valid and canonicalized. In particular, note that this cannot be
+  // used to create unique origins; 'url::Origin()' is the right way to do that.
+  //
+  // This constructor should be used in order to pass 'Origin' objects back and
+  // forth over IPC (as transitioning through GURL would risk potentially
+  // dangerous recanonicalization); other potential callers should prefer the
+  // 'GURL'-based constructor.
+  static Origin UnsafelyCreateOriginWithoutNormalization(
+      base::StringPiece scheme,
+      base::StringPiece host,
+      uint16_t port);
+
+  ~Origin();
+
+  // For unique origins, these return ("", "", 0).
+  const std::string& scheme() const { return tuple_.scheme(); }
+  const std::string& host() const { return tuple_.host(); }
+  uint16_t port() const { return tuple_.port(); }
+
+  bool unique() const { return unique_; }
+
+  // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
+  // the addition that all Origins with a 'file' scheme serialize to "file://".
+  std::string Serialize() const;
+
+  // Two Origins are "same-origin" if their schemes, hosts, and ports are exact
+  // matches; and neither is unique.
+  bool IsSameOriginWith(const Origin& other) const;
+  bool operator==(const Origin& other) const {
+    return IsSameOriginWith(other);
   }
 
+  // Allows Origin to be used as a key in STL (for example, a std::set or
+  // std::map).
+  bool operator<(const Origin& other) const;
+
  private:
-  std::string string_;
+  Origin(base::StringPiece scheme, base::StringPiece host, uint16_t port);
+
+  SchemeHostPort tuple_;
+  bool unique_;
 };
 
+URL_EXPORT std::ostream& operator<<(std::ostream& out, const Origin& origin);
+
+URL_EXPORT bool IsSameOriginWith(const GURL& a, const GURL& b);
+
 }  // namespace url
 
 #endif  // URL_ORIGIN_H_

diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index 910a1cf..68371a8 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc

@@ -1,41 +1,255 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "testing/base/public/gunit.h"
-#include "url/origin.h"
+#include <stddef.h>
+#include <stdint.h>
 
-namespace url {
+#include "base/logging.h"
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "url/gurl.h"
+#include "url/origin.h"
 
 namespace {
 
-// Each test examines the Origin is constructed correctly without
-// violating DCHECKs.
-TEST(OriginTest, constructEmpty) {
-  Origin origin;
-  EXPECT_EQ("null", origin.string());
+TEST(OriginTest, UniqueOriginComparison) {
+  url::Origin unique_origin;
+  EXPECT_EQ("", unique_origin.scheme());
+  EXPECT_EQ("", unique_origin.host());
+  EXPECT_EQ(0, unique_origin.port());
+  EXPECT_TRUE(unique_origin.unique());
+  EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin));
+
+  const char* const urls[] = {"data:text/html,Hello!",
+                              "javascript:alert(1)",
+                              "file://example.com:443/etc/passwd",
+                              "yay",
+                              "http::///invalid.example.com/"};
+
+  for (const auto& test_url : urls) {
+    SCOPED_TRACE(test_url);
+    GURL url(test_url);
+    url::Origin origin(url);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(unique_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
+  }
 }
 
-TEST(OriginTest, constructNull) {
-  Origin origin("null");
-  EXPECT_EQ("null", origin.string());
+TEST(OriginTest, ConstructFromGURL) {
+  url::Origin different_origin(GURL("https://not-in-the-list.test/"));
+
+  struct TestCases {
+    const char* const url;
+    const char* const expected_scheme;
+    const char* const expected_host;
+    const uint16_t expected_port;
+  } cases[] = {
+      // IP Addresses
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+
+      // Punycode
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"blob:http://☃.net/", "http", "xn--n3h.net", 80},
+
+      // Generic URLs
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"http://user:pass@example.com/", "http", "example.com", 80},
+      {"http://example.com:123/?query", "http", "example.com", 123},
+      {"https://example.com/#1234", "https", "example.com", 443},
+      {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123},
+
+      // Registered URLs
+      {"ftp://example.com/", "ftp", "example.com", 21},
+      {"gopher://example.com/", "gopher", "example.com", 70},
+      {"ws://example.com/", "ws", "example.com", 80},
+      {"wss://example.com/", "wss", "example.com", 443},
+
+      // file: URLs
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+
+      // Filesystem:
+      {"filesystem:http://example.com/type/", "http", "example.com", 80},
+      {"filesystem:http://example.com:123/type/", "http", "example.com", 123},
+      {"filesystem:https://example.com/type/", "https", "example.com", 443},
+      {"filesystem:https://example.com:123/type/", "https", "example.com", 123},
+
+      // Blob:
+      {"blob:http://example.com/guid-goes-here", "http", "example.com", 80},
+      {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123},
+      {"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
+      {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected_scheme, origin.scheme());
+    EXPECT_EQ(test_case.expected_host, origin.host());
+    EXPECT_EQ(test_case.expected_port, origin.port());
+    EXPECT_FALSE(origin.unique());
+    EXPECT_TRUE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
+  }
 }
 
-TEST(OriginTest, constructValidOrigin) {
-  Origin origin("http://example.com:8080");
-  EXPECT_EQ("http://example.com:8080", origin.string());
+TEST(OriginTest, Serialization) {
+  struct TestCases {
+    const char* const url;
+    const char* const expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://"},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected, origin.Serialize());
+
+    // The '<<' operator should produce the same serialization as Serialize().
+    std::stringstream out;
+    out << origin;
+    EXPECT_EQ(test_case.expected, out.str());
+  }
 }
 
-TEST(OriginTest, constructValidFileOrigin) {
-  Origin origin("file://");
-  EXPECT_EQ("file://", origin.string());
+TEST(OriginTest, Comparison) {
+  // These URLs are arranged in increasing order:
+  const char* const urls[] = {
+      "data:uniqueness",
+      "http://a:80",
+      "http://b:80",
+      "https://a:80",
+      "https://b:80",
+      "http://a:81",
+      "http://b:81",
+      "https://a:81",
+      "https://b:81",
+  };
+
+  for (size_t i = 0; i < arraysize(urls); i++) {
+    GURL current_url(urls[i]);
+    url::Origin current(current_url);
+    for (size_t j = i; j < arraysize(urls); j++) {
+      GURL compare_url(urls[j]);
+      url::Origin to_compare(compare_url);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
 }
 
-TEST(OriginTest, constructValidOriginWithoutPort) {
-  Origin origin("wss://example2.com");
-  EXPECT_EQ("wss://example2.com", origin.string());
+TEST(OriginTest, UnsafelyCreate) {
+  struct TestCase {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"file", "", 0},
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        test.scheme, test.host, test.port);
+    EXPECT_EQ(test.scheme, origin.scheme());
+    EXPECT_EQ(test.host, origin.host());
+    EXPECT_EQ(test.port, origin.port());
+    EXPECT_FALSE(origin.unique());
+    EXPECT_TRUE(origin.IsSameOriginWith(origin));
+  }
 }
 
-}  // namespace
+TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {{"", "", 0},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"data", "example.com", 80},
+               {"http", "☃.net", 80},
+               {"http\nmore", "example.com", 80},
+               {"http\rmore", "example.com", 80},
+               {"http\n", "example.com", 80},
+               {"http\r", "example.com", 80},
+               {"http", "example.com\nnot-example.com", 80},
+               {"http", "example.com\rnot-example.com", 80},
+               {"http", "example.com\n", 80},
+               {"http", "example.com\r", 80},
+               {"http", "example.com", 0},
+               {"file", "", 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        test.scheme, test.host, test.port);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+  }
+}
+
+TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
+  struct TestCases {
+    const char* scheme;
+    size_t scheme_length;
+    const char* host;
+    size_t host_length;
+    uint16_t port;
+  } cases[] = {{"http\0more", 9, "example.com", 11, 80},
+               {"http\0", 5, "example.com", 11, 80},
+               {"\0http", 5, "example.com", 11, 80},
+               {"http", 4, "example.com\0not-example.com", 27, 80},
+               {"http", 4, "example.com\0", 12, 80},
+               {"http", 4, "\0example.com", 12, 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        std::string(test.scheme, test.scheme_length),
+        std::string(test.host, test.host_length), test.port);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+  }
+}
 
 }  // namespace url

diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
new file mode 100644
index 0000000..ebc5232
--- /dev/null
+++ b/src/url/scheme_host_port.cc

@@ -0,0 +1,182 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/scheme_host_port.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <tuple>
+
+#include "base/logging.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+namespace {
+
+bool IsCanonicalHost(const base::StringPiece& host) {
+  std::string canon_host;
+
+  // Try to canonicalize the host (copy/pasted from net/base. :( ).
+  const Component raw_host_component(0,
+                                     static_cast<int>(host.length()));
+  StdStringCanonOutput canon_host_output(&canon_host);
+  CanonHostInfo host_info;
+  CanonicalizeHostVerbose(host.data(), raw_host_component,
+                          &canon_host_output, &host_info);
+
+  if (host_info.out_host.is_nonempty() &&
+      host_info.family != CanonHostInfo::BROKEN) {
+    // Success!  Assert that there's no extra garbage.
+    canon_host_output.Complete();
+    DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
+  } else {
+    // Empty host, or canonicalization failed.
+    canon_host.clear();
+  }
+
+  return host == canon_host;
+}
+
+bool IsValidInput(const base::StringPiece& scheme,
+                  const base::StringPiece& host,
+                  uint16_t port) {
+  SchemeType scheme_type = SCHEME_WITH_PORT;
+  bool is_standard = GetStandardSchemeType(
+      scheme.data(),
+      Component(0, static_cast<int>(scheme.length())),
+      &scheme_type);
+  if (!is_standard)
+    return false;
+
+  // These schemes do not follow the generic URL syntax, so we treat them as
+  // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
+  // have a (scheme, host, port) tuple, they themselves do not).
+  if (scheme == kFileSystemScheme || scheme == kBlobScheme)
+    return false;
+
+  switch (scheme_type) {
+    case SCHEME_WITH_PORT:
+      // A URL with |scheme| is required to have the host and port (may be
+      // omitted in a serialization if it's the same as the default value).
+      // Return an invalid instance if either of them is not given.
+      if (host.empty() || port == 0)
+        return false;
+
+      if (!IsCanonicalHost(host))
+        return false;
+
+      return true;
+
+    case SCHEME_WITHOUT_PORT:
+      if (port != 0) {
+        // Return an invalid object if a URL with the scheme never represents
+        // the port data but the given |port| is non-zero.
+        return false;
+      }
+
+      if (!IsCanonicalHost(host))
+        return false;
+
+      return true;
+
+    case SCHEME_WITHOUT_AUTHORITY:
+      return false;
+
+    default:
+      DCHECK(false);  // NOTREACHED();
+      return false;
+  }
+}
+
+}  // namespace
+
+SchemeHostPort::SchemeHostPort() : port_(0) {
+}
+
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+                               base::StringPiece host,
+                               uint16_t port)
+    : port_(0) {
+  if (!IsValidInput(scheme, host, port))
+    return;
+
+  scheme.CopyToString(&scheme_);
+  host.CopyToString(&host_);
+  port_ = port;
+}
+
+SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
+  if (!url.is_valid())
+    return;
+
+  base::StringPiece scheme = url.scheme_piece();
+  base::StringPiece host = url.host_piece();
+
+  // A valid GURL never returns PORT_INVALID.
+  int port = url.EffectiveIntPort();
+  if (port == PORT_UNSPECIFIED)
+    port = 0;
+
+  if (!IsValidInput(scheme, host, port))
+    return;
+
+  scheme.CopyToString(&scheme_);
+  host.CopyToString(&host_);
+  port_ = port;
+}
+
+SchemeHostPort::~SchemeHostPort() {
+}
+
+bool SchemeHostPort::IsInvalid() const {
+  return scheme_.empty() && host_.empty() && !port_;
+}
+
+std::string SchemeHostPort::Serialize() const {
+  std::string result;
+  if (IsInvalid())
+    return result;
+
+  result.append(scheme_);
+  result.append(kStandardSchemeSeparator);
+  result.append(host_);
+
+  if (port_ == 0)
+    return result;
+
+  // Omit the port component if the port matches with the default port
+  // defined for the scheme, if any.
+  int default_port = DefaultPortForScheme(scheme_.data(),
+                                          static_cast<int>(scheme_.length()));
+  if (default_port == PORT_UNSPECIFIED)
+    return result;
+  if (port_ != default_port) {
+    result.push_back(':');
+    const int buf_size = 6;
+    char buf[buf_size];
+    _itoa_s(port_, buf, buf_size, 10);
+    result.append(buf);
+  }
+
+  return result;
+}
+
+bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
+  return port_ == other.port() && scheme_ == other.scheme() &&
+         host_ == other.host();
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+  return std::tie(port_, scheme_, host_) <
+         std::tie(other.port_, other.scheme_, other.host_);
+}
+
+}  // namespace url

diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
new file mode 100644
index 0000000..47a9041
--- /dev/null
+++ b/src/url/scheme_host_port.h

@@ -0,0 +1,134 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_SCHEME_HOST_PORT_H_
+#define URL_SCHEME_HOST_PORT_H_
+
+#include <stdint.h>
+
+#include <string>
+
+#include "base/strings/string_piece.h"
+#include "url/url_export.h"
+
+class GURL;
+
+namespace url {
+
+// This class represents a (scheme, host, port) tuple extracted from a URL.
+//
+// The primary purpose of this class is to represent relevant network-authority
+// information for a URL. It is _not_ an Origin, as described in RFC 6454. In
+// particular, it is generally NOT the right thing to use for security
+// decisions.
+//
+// Instead, this class is a mechanism for simplifying URLs with standard schemes
+// (that is, those which follow the generic syntax of RFC 3986) down to the
+// uniquely identifying information necessary for network fetches. This makes it
+// suitable as a cache key for a collection of active connections, for instance.
+// It may, however, be inappropriate to use as a cache key for persistent
+// storage associated with a host.
+//
+// In particular, note that:
+//
+// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
+//   (e.g. those registered with GURL as "standard schemes"). Non-standard
+//   schemes such as "blob", "filesystem", "data", and "javascript" can only be
+//   represented as invalid SchemeHostPort objects.
+//
+// * For example, the "file" scheme follows the standard syntax, but it is
+//   important to note that the authority portion (host, port) is optional.
+//   URLs without an authority portion will be represented with an empty string
+//   for the host, and a port of 0 (e.g. "file:///etc/hosts" =>
+//   ("file", "", 0)), and URLs with a host-only authority portion will be
+//   represented with a port of 0 (e.g. "file://example.com/etc/hosts" =>
+//   ("file", "example.com", 0)). See Section 3 of RFC 3986 to better understand
+//   these constructs.
+//
+// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
+//   particular, it has no notion of a "unique" Origin. If you need to take
+//   uniqueness into account (and, if you're making security-relevant decisions
+//   then you absolutely do), please use 'url::Origin' instead.
+//
+// Usage:
+//
+// * SchemeHostPort objects are commonly created from GURL objects:
+//
+//     GURL url("https://example.com/");
+//     url::SchemeHostPort tuple(url);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+// * Objects may also be explicitly created and compared:
+//
+//     url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+//     GURL url("https://example.com/");
+//     tuple.Equals(url::SchemeHostPort(url)); // true
+class URL_EXPORT SchemeHostPort {
+ public:
+  // Creates an invalid (scheme, host, port) tuple, which represents an invalid
+  // or non-standard URL.
+  SchemeHostPort();
+
+  // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
+  // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
+  // must be a standard scheme. |port| must not be 0, unless |scheme| does not
+  // support ports (e.g. 'file'). In that case, |port| must be 0.
+  //
+  // Copies the data in |scheme| and |host|.
+  SchemeHostPort(base::StringPiece scheme,
+                 base::StringPiece host,
+                 uint16_t port);
+
+  // Creates a (scheme, host, port) tuple from |url|, as described at
+  // https://tools.ietf.org/html/rfc6454#section-4
+  //
+  // If |url| is invalid or non-standard, the result will be an invalid
+  // SchemeHostPort object.
+  explicit SchemeHostPort(const GURL& url);
+
+  ~SchemeHostPort();
+
+  // Returns the host component, in URL form. That is all IDN domain names will
+  // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
+  // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
+  const std::string& host() const { return host_; }
+  const std::string& scheme() const { return scheme_; }
+  uint16_t port() const { return port_; }
+  bool IsInvalid() const;
+
+  // Serializes the SchemeHostPort tuple to a canonical form.
+  //
+  // While this string form resembles the Origin serialization specified in
+  // Section 6.2 of RFC 6454, it is important to note that invalid
+  // SchemeHostPort tuples serialize to the empty string, rather than being
+  // serialized as a unique Origin.
+  std::string Serialize() const;
+
+  // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
+  // are exact matches.
+  //
+  // Note that this comparison is _not_ the same as an origin-based comparison.
+  // In particular, invalid SchemeHostPort objects match each other (and
+  // themselves). Unique origins, on the other hand, would not.
+  bool Equals(const SchemeHostPort& other) const;
+
+  // Allows SchemeHostPort to be used as a key in STL (for example, a std::set
+  // or std::map).
+  bool operator<(const SchemeHostPort& other) const;
+
+ private:
+  std::string scheme_;
+  std::string host_;
+  uint16_t port_;
+};
+
+}  // namespace url
+
+#endif  // URL_SCHEME_HOST_PORT_H_

diff --git a/src/url/scheme_host_port_unittest.cc b/src/url/scheme_host_port_unittest.cc
new file mode 100644
index 0000000..790a5f1
--- /dev/null
+++ b/src/url/scheme_host_port_unittest.cc

@@ -0,0 +1,219 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "url/gurl.h"
+#include "url/scheme_host_port.h"
+
+namespace {
+
+TEST(SchemeHostPortTest, Invalid) {
+  url::SchemeHostPort invalid;
+  EXPECT_EQ("", invalid.scheme());
+  EXPECT_EQ("", invalid.host());
+  EXPECT_EQ(0, invalid.port());
+  EXPECT_TRUE(invalid.IsInvalid());
+  EXPECT_TRUE(invalid.Equals(invalid));
+
+  const char* urls[] = {"data:text/html,Hello!",
+                        "javascript:alert(1)",
+                        "file://example.com:443/etc/passwd",
+                        "blob:https://example.com/uuid-goes-here",
+                        "filesystem:https://example.com/temporary/yay.png"};
+
+  for (const auto& test : urls) {
+    SCOPED_TRACE(test);
+    GURL url(test);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+    EXPECT_TRUE(tuple.Equals(invalid));
+    EXPECT_TRUE(invalid.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, ExplicitConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"file", "", 0},
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_FALSE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, InvalidConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {{"", "", 0},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"http", "", 80},
+               {"data", "example.com", 80},
+               {"http", "☃.net", 80},
+               {"http\nmore", "example.com", 80},
+               {"http\rmore", "example.com", 80},
+               {"http\n", "example.com", 80},
+               {"http\r", "example.com", 80},
+               {"http", "example.com\nnot-example.com", 80},
+               {"http", "example.com\rnot-example.com", 80},
+               {"http", "example.com\n", 80},
+               {"http", "example.com\r", 80},
+               {"http", "example.com", 0},
+               {"file", "", 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, InvalidConstructionWithEmbeddedNulls) {
+  struct TestCases {
+    const char* scheme;
+    size_t scheme_length;
+    const char* host;
+    size_t host_length;
+    uint16_t port;
+  } cases[] = {{"http\0more", 9, "example.com", 11, 80},
+               {"http\0", 5, "example.com", 11, 80},
+               {"\0http", 5, "example.com", 11, 80},
+               {"http", 4, "example.com\0not-example.com", 27, 80},
+               {"http", 4, "example.com\0", 12, 80},
+               {"http", 4, "\0example.com", 12, 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(std::string(test.scheme, test.scheme_length),
+                              std::string(test.host, test.host_length),
+                              test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+  }
+}
+
+TEST(SchemeHostPortTest, GURLConstruction) {
+  struct TestCases {
+    const char* url;
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+      {"http://u:p@example.com/", "http", "example.com", 80},
+      {"http://u:p@example.com/path", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123#hash", "http", "example.com", 80},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    EXPECT_TRUE(url.is_valid());
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_FALSE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, Serialization) {
+  struct TestCases {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://example.com"},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.expected, tuple.Serialize());
+  }
+}
+
+TEST(SchemeHostPortTest, Comparison) {
+  // These tuples are arranged in increasing order:
+  struct SchemeHostPorts {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } tuples[] = {
+      {"http", "a", 80},
+      {"http", "b", 80},
+      {"https", "a", 80},
+      {"https", "b", 80},
+      {"http", "a", 81},
+      {"http", "b", 81},
+      {"https", "a", 81},
+      {"https", "b", 81},
+  };
+
+  for (size_t i = 0; i < arraysize(tuples); i++) {
+    url::SchemeHostPort current(tuples[i].scheme, tuples[i].host,
+                                tuples[i].port);
+    for (size_t j = i; j < arraysize(tuples); j++) {
+      url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host,
+                                     tuples[j].port);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
+}
+
+}  // namespace url

diff --git a/src/url/third_party/mozilla/url_parse.h b/src/url/third_party/mozilla/url_parse.h
index 71dbb78..222d605 100644
--- a/src/url/third_party/mozilla/url_parse.h
+++ b/src/url/third_party/mozilla/url_parse.h

@@ -5,18 +5,11 @@
 #ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
 #define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
 
-#include <string>
-
-#include "base/basictypes.h"
 #include "base/strings/string16.h"
 #include "url/url_export.h"
 
 namespace url {
 
-// Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and
-// KURLGoogle.cpp still rely on this type.
-typedef base::char16 UTF16Char;
-
 // Component ------------------------------------------------------------------
 
 // Represents a substring for URL parsing.

diff --git a/src/url/url_canon.h b/src/url/url_canon.h
index 89e3509..95d5345 100644
--- a/src/url/url_canon.h
+++ b/src/url/url_canon.h

@@ -9,8 +9,8 @@
 #include <string.h>
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -285,7 +285,7 @@
 // User info: username/password. If present, this will add the delimiters so
 // the output will be "<username>:<password>@" or "<username>@". Empty
 // username/password pairs, or empty passwords, will get converted to
-// nonexistant in the canonical version.
+// nonexistent in the canonical version.
 //
 // The components for the username and password refer to ranges in the
 // respective source strings. Usually, these will be the same string, which
@@ -317,13 +317,13 @@
 
   // This field summarizes how the input was classified by the canonicalizer.
   enum Family {
-    NEUTRAL,   // - Doesn't resemble an IP address.  As far as the IP
+    NEUTRAL,   // - Doesn't resemble an IP address. As far as the IP
                //   canonicalizer is concerned, it should be treated as a
                //   hostname.
-    BROKEN,    // - Almost an IP, but was not canonicalized.  This could be an
+    BROKEN,    // - Almost an IP, but was not canonicalized. This could be an
                //   IPv4 address where truncation occurred, or something
                //   containing the special characters :[] which did not parse
-               //   as an IPv6 address.  Never attempt to connect to this
+               //   as an IPv6 address. Never attempt to connect to this
                //   address, because it might actually succeed!
     IPV4,      // - Successfully canonicalized as an IPv4 address.
     IPV6,      // - Successfully canonicalized as an IPv6 address.
@@ -331,7 +331,7 @@
   Family family;
 
   // If |family| is IPV4, then this is the number of nonempty dot-separated
-  // components in the input text, from 1 to 4.  If |family| is not IPV4,
+  // components in the input text, from 1 to 4. If |family| is not IPV4,
   // this value is undefined.
   int num_ipv4_components;
 
@@ -355,7 +355,7 @@
 
 // Host.
 //
-// The 8-bit version requires UTF-8 encoding.  Use this version when you only
+// The 8-bit version requires UTF-8 encoding. Use this version when you only
 // need to know whether canonicalization succeeded.
 URL_EXPORT bool CanonicalizeHost(const char* spec,
                                  const Component& host,
@@ -368,7 +368,7 @@
 
 // Extended version of CanonicalizeHost, which returns additional information.
 // Use this when you need to know whether the hostname was an IP address.
-// A successful return is indicated by host_info->family != BROKEN.  See the
+// A successful return is indicated by host_info->family != BROKEN. See the
 // definition of CanonHostInfo above for details.
 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
                                         const Component& host,
@@ -554,7 +554,7 @@
                                     CanonOutput* output,
                                     Parsed* new_parsed);
 
-// Use for mailto URLs. This "canonicalizes" the url into a path and query
+// Use for mailto URLs. This "canonicalizes" the URL into a path and query
 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
 // the query encoding if there is a query. This is because a mailto URL is
 // really intended for an external mail program, and the encoding of a page,
@@ -578,9 +578,9 @@
 // treated on the same code path as regular canonicalization (the same string
 // for each component).
 //
-// A Parsed structure usually goes along with this. Those
-// components identify offsets within these strings, so that they can all be
-// in the same string, or spread arbitrarily across different ones.
+// A Parsed structure usually goes along with this. Those components identify
+// offsets within these strings, so that they can all be in the same string,
+// or spread arbitrarily across different ones.
 //
 // This structures does not own any data. It is the caller's responsibility to
 // ensure that the data the pointers point to stays in scope and is not
@@ -725,7 +725,7 @@
   }
   bool IsRefOverridden() const { return sources_.ref != NULL; }
 
-  // Getters for the itnernal data. See the variables below for how the
+  // Getters for the internal data. See the variables below for how the
   // information is encoded.
   const URLComponentSource<CHAR>& sources() const { return sources_; }
   const Parsed& components() const { return components_; }
@@ -734,8 +734,8 @@
   // Returns a pointer to a static empty string that is used as a placeholder
   // to indicate a component should be deleted (see below).
   const CHAR* Placeholder() {
-    static const CHAR empty_string = 0;
-    return &empty_string;
+    static const CHAR empty_cstr = 0;
+    return &empty_cstr;
   }
 
   // We support three states:
@@ -863,7 +863,7 @@
 // The base URL should be canonical and have a host (may be empty for file
 // URLs) and a path. If it doesn't have these, we can't resolve relative
 // URLs off of it and will return the base as the output with an error flag.
-// Becausee it is canonical is should also be ASCII.
+// Because it is canonical is should also be ASCII.
 //
 // The query charset converter follows the same rules as CanonicalizeQuery.
 //

diff --git a/src/url/url_canon_etc.cc b/src/url/url_canon_etc.cc
index 7409efd..e9da94c 100644
--- a/src/url/url_canon_etc.cc
+++ b/src/url/url_canon_etc.cc

@@ -95,9 +95,9 @@
   // The output scheme starts from the current position.
   out_scheme->begin = output->length();
 
-  // Danger: it's important that this code does not strip any characters: it
-  // only emits the canonical version (be it valid or escaped) of each of
-  // the input characters. Stripping would put it out of sync with
+  // Danger: it's important that this code does not strip any characters;
+  // it only emits the canonical version (be it valid or escaped) for each
+  // of the input characters. Stripping would put it out of sync with
   // FindAndCompareScheme, which could cause some security checks on
   // schemes to be incorrect.
   bool success = true;
@@ -218,7 +218,7 @@
   char buf[buf_size];
   WritePortInt(buf, buf_size, port_num);
 
-  // Append the port number to the output, preceeded by a colon.
+  // Append the port number to the output, preceded by a colon.
   output->push_back(':');
   out_port->begin = output->length();
   for (int i = 0; i < buf_size && buf[i]; i++)

diff --git a/src/url/url_canon_host.cc b/src/url/url_canon_host.cc
index 513248a..d4cdfd5 100644
--- a/src/url/url_canon_host.cc
+++ b/src/url/url_canon_host.cc

@@ -34,7 +34,7 @@
 // NOTE: I didn't actually test all the control characters. Some may be
 // disallowed in the input, but they are all accepted escaped except for 0.
 // I also didn't test if characters affecting HTML parsing are allowed
-// unescaped, eg. (") or (#), which would indicate the beginning of the path.
+// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
 // Surprisingly, space is accepted in the input and always escaped.
 
 // This table lists the canonical version of all characters we allow in the
@@ -165,6 +165,8 @@
 
 // Canonicalizes a host that requires IDN conversion. Returns true on success
 bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) {
+  int original_output_len = output->length();  // So we can rewind below.
+
   // We need to escape URL before doing IDN conversion, since punicode strings
   // cannot be escaped after they are created.
   RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
@@ -187,7 +189,26 @@
   bool success = DoSimpleHost(wide_output.data(),
                               wide_output.length(),
                               output, &has_non_ascii);
-  DCHECK(!has_non_ascii);
+  if (has_non_ascii) {
+    // ICU generated something that DoSimpleHost didn't think looked like
+    // ASCII. This is quite rare, but ICU might convert some characters to
+    // percent signs which might generate new escape sequences which might in
+    // turn be invalid. An example is U+FE6A "small percent" which ICU will
+    // name prep into an ASCII percent and then we can interpret the following
+    // characters as escaped characters.
+    //
+    // If DoSimpleHost didn't think the output was ASCII, just escape the
+    // thing we gave ICU and give up. DoSimpleHost will have handled a further
+    // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
+    // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
+    // do more (like handle escaped non-ASCII sequences). Handling the escaped
+    // ASCII isn't strictly necessary, but DoSimpleHost handles this case
+    // anyway so we handle it/
+    output->set_length(original_output_len);
+    AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+                              output);
+    return false;
+  }
   return success;
 }
 
@@ -316,11 +337,11 @@
   }
 
   if (!success) {
-    // Canonicalization failed.  Set BROKEN to notify the caller.
+    // Canonicalization failed. Set BROKEN to notify the caller.
     host_info->family = CanonHostInfo::BROKEN;
   } else {
     // After all the other canonicalization, check if we ended up with an IP
-    // address.  IP addresses are small, so writing into this temporary buffer
+    // address. IP addresses are small, so writing into this temporary buffer
     // should not cause an allocation.
     RawCanonOutput<64> canon_ip;
     CanonicalizeIPAddress(output->data(),
@@ -328,7 +349,7 @@
                           &canon_ip, host_info);
 
     // If we got an IPv4/IPv6 address, copy the canonical form back to the
-    // real buffer.  Otherwise, it's a hostname or broken IP, in which case
+    // real buffer. Otherwise, it's a hostname or broken IP, in which case
     // we just leave it in place.
     if (host_info->IsIPAddress()) {
       output->set_length(output_begin);

diff --git a/src/url/url_canon_icu.cc b/src/url/url_canon_icu.cc
index 60bb004..70a2b27 100644
--- a/src/url/url_canon_icu.cc
+++ b/src/url/url_canon_icu.cc

@@ -4,6 +4,7 @@
 
 // ICU integration functions.
 
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -99,8 +100,10 @@
     // TODO(jungshik): Change options as different parties (browsers,
     // registrars, search engines) converge toward a consensus.
     value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
-    if (U_FAILURE(err))
+    if (U_FAILURE(err)) {
+      CHECK(false) << "failed to open UTS46 data with error: " << err;
       value = NULL;
+    }
   }
 
   UIDNA* value;

diff --git a/src/url/url_canon_icu_unittest.cc b/src/url/url_canon_icu_unittest.cc
index cfa4b49..f7ce199 100644
--- a/src/url/url_canon_icu_unittest.cc
+++ b/src/url/url_canon_icu_unittest.cc

@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <stddef.h>
+
 #include "base/macros.h"
 #include "testing/base/public/gunit.h"
 #include "third_party/icu/include/unicode/ucnv.h"

diff --git a/src/url/url_canon_internal.cc b/src/url/url_canon_internal.cc
index 1554814..a727ca2 100644
--- a/src/url/url_canon_internal.cc
+++ b/src/url/url_canon_internal.cc

@@ -5,6 +5,7 @@
 #include "url/url_canon_internal.h"
 
 #include <errno.h>
+#include <stddef.h>
 #include <stdlib.h>
 
 #include <cstdio>
@@ -249,9 +250,9 @@
 
 bool ReadUTFChar(const char* str, int* begin, int length,
                  unsigned* code_point_out) {
-  // This depends on ints and int32s being the same thing.  If they're not, it
+  // This depends on ints and int32s being the same thing. If they're not, it
   // will fail to compile.
-  // TODO(mmenke):  This should probably be fixed.
+  // TODO(mmenke): This should probably be fixed.
   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
@@ -262,9 +263,9 @@
 
 bool ReadUTFChar(const base::char16* str, int* begin, int length,
                  unsigned* code_point_out) {
-  // This depends on ints and int32s being the same thing.  If they're not, it
+  // This depends on ints and int32s being the same thing. If they're not, it
   // will fail to compile.
-  // TODO(mmenke):  This should probably be fixed.
+  // TODO(mmenke): This should probably be fixed.
   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;

diff --git a/src/url/url_canon_internal.h b/src/url/url_canon_internal.h
index a66cd8d..ba915e8 100644
--- a/src/url/url_canon_internal.h
+++ b/src/url/url_canon_internal.h

@@ -7,9 +7,10 @@
 
 // This file is intended to be included in another C++ file where the character
 // types are defined. This allows us to write mostly generic code, but not have
-// templace bloat because everything is inlined when anybody calls any of our
+// template bloat because everything is inlined when anybody calls any of our
 // functions.
 
+#include <stddef.h>
 #include <stdlib.h>
 
 #include "base/logging.h"
@@ -41,7 +42,7 @@
   // Valid in an ASCII-representation of an octal digit.
   CHAR_OCT = 32,
 
-  // Characters that do not require escaping in encodeURIComponent.  Characters
+  // Characters that do not require escaping in encodeURIComponent. Characters
   // that do not have this flag will be escaped; see url_util.cc.
   CHAR_COMPONENT = 64,
 };
@@ -175,7 +176,7 @@
              output);
     Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
              output);
-  } else if (char_value <= 0x10FFFF) {  // Max unicode code point.
+  } else if (char_value <= 0x10FFFF) {  // Max Unicode code point.
     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
              output);
@@ -199,7 +200,7 @@
 }
 
 // Writes the given character to the output as UTF-8. This does NO checking
-// of the validity of the unicode characters; the caller should ensure that
+// of the validity of the Unicode characters; the caller should ensure that
 // the value it is appending is valid to append.
 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
   DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
@@ -207,7 +208,7 @@
 
 // Writes the given character to the output as UTF-8, escaping ALL
 // characters (even when they are ASCII). This does NO checking of the
-// validity of the unicode characters; the caller should ensure that the value
+// validity of the Unicode characters; the caller should ensure that the value
 // it is appending is valid to append.
 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
   DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
@@ -260,7 +261,7 @@
 // that any following characters are.
 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
                                   int length, CanonOutput* output) {
-  // UTF-16 input. Readchar16 will handle invalid characters for us and give
+  // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
   // us the kUnicodeReplacementCharacter, so we don't have to do special
   // checking after failure, just pass through the failure to the caller.
   unsigned char_value;

diff --git a/src/url/url_canon_internal_file.h b/src/url/url_canon_internal_file.h
index 6903098..26a3eae 100644
--- a/src/url/url_canon_internal_file.h
+++ b/src/url/url_canon_internal_file.h

@@ -113,15 +113,15 @@
   new_parsed->path.begin = output->length();
   output->push_back('/');
 
-  // Copies and normalizes the "c:" at the beginning, if present.
+  // Copy and normalize the "c:" at the beginning, if present.
   int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
                                     parsed.path.end(), output);
 
-  // Copies the rest of the path
+  // Copy the rest of the path.
   FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
   new_parsed->path.len = output->length() - new_parsed->path.begin;
 
-  // Things following the path we can use the standard canonicalizers for.
+  // For things following the path, we can use the standard canonicalizers.
   success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
       source.query, parsed.query, output, &new_parsed->query);
   success &= URLCanonInternal<CHAR, UCHAR>::DoRef(

diff --git a/src/url/url_canon_ip.cc b/src/url/url_canon_ip.cc
index 45f95de..87c30c7 100644
--- a/src/url/url_canon_ip.cc
+++ b/src/url/url_canon_ip.cc

@@ -4,9 +4,10 @@
 
 #include "url/url_canon_ip.h"
 
+#include <stdint.h>
 #include <stdlib.h>
+#include <limits>
 
-#include "base/basictypes.h"
 #include "base/logging.h"
 #include "url/url_canon_internal.h"
 
@@ -92,7 +93,7 @@
 template<typename CHAR>
 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
                                             const Component& component,
-                                            uint32* number) {
+                                            uint32_t* number) {
   // Figure out the base
   SharedCharTypes base;
   int base_prefix_len = 0;  // Size of the prefix for this base.
@@ -118,7 +119,7 @@
     base_prefix_len++;
 
   // Put the component, minus any base prefix, into a NULL-terminated buffer so
-  // we can call the standard library.  Because leading zeros have already been
+  // we can call the standard library. Because leading zeros have already been
   // discarded, filling the entire buffer is guaranteed to trigger the 32-bit
   // overflow check.
   const int kMaxComponentLen = 16;
@@ -133,7 +134,7 @@
     if (!IsCharOfType(input, base))
       return CanonHostInfo::NEUTRAL;
 
-    // Fill the buffer, if there's space remaining.  This check allows us to
+    // Fill the buffer, if there's space remaining. This check allows us to
     // verify that all characters are numeric, even those that don't fit.
     if (dest_i < kMaxComponentLen)
       buf[dest_i++] = input;
@@ -143,14 +144,14 @@
 
   // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
   // number can overflow a 64-bit number in <= 16 characters).
-  uint64 num = _strtoui64(buf, NULL, BaseForType(base));
+  uint64_t num = _strtoui64(buf, NULL, BaseForType(base));
 
   // Check for 32-bit overflow.
-  if (num > kuint32max)
+  if (num > std::numeric_limits<uint32_t>::max())
     return CanonHostInfo::BROKEN;
 
-  // No overflow.  Success!
-  *number = static_cast<uint32>(num);
+  // No overflow. Success!
+  *number = static_cast<uint32_t>(num);
   return CanonHostInfo::IPV4;
 }
 
@@ -167,10 +168,10 @@
 
   // Convert existing components to digits. Values up to
   // |existing_components| will be valid.
-  uint32 component_values[4];
+  uint32_t component_values[4];
   int existing_components = 0;
 
-  // Set to true if one or more components are BROKEN.  BROKEN is only
+  // Set to true if one or more components are BROKEN. BROKEN is only
   // returned if all components are IPV4 or BROKEN, so, for example,
   // 12345678912345.de returns NEUTRAL rather than broken.
   bool broken = false;
@@ -198,7 +199,7 @@
   // First, process all components but the last, while making sure each fits
   // within an 8-bit field.
   for (int i = 0; i < existing_components - 1; i++) {
-    if (component_values[i] > kuint8max)
+    if (component_values[i] > std::numeric_limits<uint8_t>::max())
       return CanonHostInfo::BROKEN;
     address[i] = static_cast<unsigned char>(component_values[i]);
   }
@@ -209,7 +210,7 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
-  uint32 last_value = component_values[existing_components - 1];
+  uint32_t last_value = component_values[existing_components - 1];
 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
 #pragma GCC diagnostic pop
 #endif
@@ -440,11 +441,12 @@
   return true;
 }
 
-// Converts a hex comonent into a number. This cannot fail since the caller has
+// Converts a hex component into a number. This cannot fail since the caller has
 // already verified that each character in the string was a hex digit, and
 // that there were no more than 4 characters.
-template<typename CHAR>
-uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) {
+template <typename CHAR>
+uint16_t IPv6HexComponentToNumber(const CHAR* spec,
+                                  const Component& component) {
   DCHECK(component.len <= 4);
 
   // Copy the hex string into a C-string.
@@ -455,7 +457,7 @@
 
   // Convert it to a number (overflow is not possible, since with 4 hex
   // characters we can at most have a 16 bit number).
-  return static_cast<uint16>(_strtoui64(buf, NULL, 16));
+  return static_cast<uint16_t>(_strtoui64(buf, NULL, 16));
 }
 
 // Converts an IPv6 address to a 128-bit number (network byte order), returning
@@ -497,7 +499,7 @@
     // Append the hex component's value.
     if (i != ipv6_parsed.num_hex_components) {
       // Get the 16-bit value for this hex component.
-      uint16 number = IPv6HexComponentToNumber<CHAR>(
+      uint16_t number = IPv6HexComponentToNumber<CHAR>(
           spec, ipv6_parsed.hex_components[i]);
       // Append to |address|, in network byte order.
       address[cur_index_in_address++] = (number & 0xFF00) >> 8;
@@ -576,7 +578,7 @@
       }
     }
 
-    // No invalid characters.  Could still be IPv4 or a hostname.
+    // No invalid characters. Could still be IPv4 or a hostname.
     host_info->family = CanonHostInfo::NEUTRAL;
     return false;
   }

diff --git a/src/url/url_canon_ip.h b/src/url/url_canon_ip.h
index 19ecfdb..937bd46 100644
--- a/src/url/url_canon_ip.h
+++ b/src/url/url_canon_ip.h

@@ -6,9 +6,9 @@
 #define URL_URL_CANON_IP_H_
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -30,14 +30,14 @@
 // Not all components may exist. If there are only 3 components, for example,
 // the last one will have a length of -1 or 0 to indicate it does not exist.
 //
-// Note that many platform's inet_addr will ignore everything after a space
-// in certain curcumstances if the stuff before the space looks like an IP
+// Note that many platforms' inet_addr will ignore everything after a space
+// in certain circumstances if the stuff before the space looks like an IP
 // address. IE6 is included in this. We do NOT handle this case. In many cases,
 // the browser's canonicalization will get run before this which converts
-// spaces to %20 (in the case of IE7) or rejects them (in the case of
-// Mozilla), so this code path never gets hit. Our host canonicalization will
-// notice these spaces and escape them, which will make IP address finding
-// fail. This seems like better behavior than stripping after a space.
+// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
+// so this code path never gets hit. Our host canonicalization will notice
+// these spaces and escape them, which will make IP address finding fail. This
+// seems like better behavior than stripping after a space.
 URL_EXPORT bool FindIPv4Components(const char* spec,
                                    const Component& host,
                                    Component components[4]);

diff --git a/src/url/url_canon_mailtourl.cc b/src/url/url_canon_mailtourl.cc
index 7c48b95..fb6bc9a 100644
--- a/src/url/url_canon_mailtourl.cc
+++ b/src/url/url_canon_mailtourl.cc

@@ -55,7 +55,7 @@
     new_parsed->path.reset();
   }
 
-  // Query -- always use the default utf8 charset converter.
+  // Query -- always use the default UTF8 charset converter.
   CanonicalizeQuery(source.query, parsed.query, NULL,
                     output, &new_parsed->query);
 

diff --git a/src/url/url_canon_path.cc b/src/url/url_canon_path.cc
index ceff689..2e088a0 100644
--- a/src/url/url_canon_path.cc
+++ b/src/url/url_canon_path.cc

@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <limits.h>
+
 #include "base/logging.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
@@ -162,6 +164,76 @@
   output->set_length(i + 1);
 }
 
+// Looks for problematic nested escape sequences and escapes the output as
+// needed to ensure they can't be misinterpreted.
+//
+// Our concern is that in input escape sequence that's invalid because it
+// contains nested escape sequences might look valid once those are unescaped.
+// For example, "%%300" is not a valid escape sequence, but after unescaping the
+// inner "%30" this becomes "%00" which is valid.  Leaving this in the output
+// string can result in callers re-canonicalizing the string and unescaping this
+// sequence, thus resulting in something fundamentally different than the
+// original input here.  This can cause a variety of problems.
+//
+// This function is called after we've just unescaped a sequence that's within
+// two output characters of a previous '%' that we know didn't begin a valid
+// escape sequence in the input string.  We look for whether the output is going
+// to turn into a valid escape sequence, and if so, convert the initial '%' into
+// an escaped "%25" so the output can't be misinterpreted.
+//
+// |spec| is the input string we're canonicalizing.
+// |next_input_index| is the index of the next unprocessed character in |spec|.
+// |input_len| is the length of |spec|.
+// |last_invalid_percent_index| is the index in |output| of a previously-seen
+// '%' character.  The caller knows this '%' character isn't followed by a valid
+// escape sequence in the input string.
+// |output| is the canonicalized output thus far.  The caller guarantees this
+// ends with a '%' followed by one or two characters, and the '%' is the one
+// pointed to by |last_invalid_percent_index|.  The last character in the string
+// was just unescaped.
+template<typename CHAR>
+void CheckForNestedEscapes(const CHAR* spec,
+                           int next_input_index,
+                           int input_len,
+                           int last_invalid_percent_index,
+                           CanonOutput* output) {
+  const int length = output->length();
+  const char last_unescaped_char = output->at(length - 1);
+
+  // If |output| currently looks like "%c", we need to try appending the next
+  // input character to see if this will result in a problematic escape
+  // sequence.  Note that this won't trigger on the first nested escape of a
+  // two-escape sequence like "%%30%30" -- we'll allow the conversion to
+  // "%0%30" -- but the second nested escape will be caught by this function
+  // when it's called again in that case.
+  const bool append_next_char = last_invalid_percent_index == length - 2;
+  if (append_next_char) {
+    // If the input doesn't contain a 7-bit character next, this case won't be a
+    // problem.
+    if ((next_input_index == input_len) || (spec[next_input_index] >= 0x80))
+      return;
+    output->push_back(static_cast<char>(spec[next_input_index]));
+  }
+
+  // Now output ends like "%cc".  Try to unescape this.
+  int begin = last_invalid_percent_index;
+  unsigned char temp;
+  if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
+    // New escape sequence found.  Overwrite the characters following the '%'
+    // with "25", and push_back() the one or two characters that were following
+    // the '%' when we were called.
+    if (!append_next_char)
+      output->push_back(output->at(last_invalid_percent_index + 1));
+    output->set(last_invalid_percent_index + 1, '2');
+    output->set(last_invalid_percent_index + 2, '5');
+    output->push_back(last_unescaped_char);
+  } else if (append_next_char) {
+    // Not a valid escape sequence, but we still need to undo appending the next
+    // source character so the caller can process it normally.
+    output->set_length(length);
+  }
+}
+
 // Appends the given path to the output. It assumes that if the input path
 // starts with a slash, it should be copied to the output. If no path has
 // already been appended to the output (the case when not resolving
@@ -173,7 +245,7 @@
 // copied to the output.
 //
 // We do not collapse multiple slashes in a row to a single slash. It seems
-// no web browsers do this, and we don't want incompababilities, even though
+// no web browsers do this, and we don't want incompatibilities, even though
 // it would be correct for most systems.
 template<typename CHAR, typename UCHAR>
 bool DoPartialPath(const CHAR* spec,
@@ -182,10 +254,15 @@
                    CanonOutput* output) {
   int end = path.end();
 
+  // We use this variable to minimize the amount of work done when unescaping --
+  // we'll only call CheckForNestedEscapes() when this points at one of the last
+  // couple of characters in |output|.
+  int last_invalid_percent_index = INT_MIN;
+
   bool success = true;
   for (int i = path.begin; i < end; i++) {
     UCHAR uch = static_cast<UCHAR>(spec[i]);
-    if (sizeof(CHAR) > sizeof(char) && uch >= 0x80) {
+    if (sizeof(CHAR) > 1 && uch >= 0x80) {
       // We only need to test wide input for having non-ASCII characters. For
       // narrow input, we'll always just use the lookup table. We don't try to
       // do anything tricky with decoding/validating UTF-8. This function will
@@ -200,7 +277,7 @@
         // Needs special handling of some sort.
         int dotlen;
         if ((dotlen = IsDot(spec, i, end)) > 0) {
-          // See if this dot was preceeded by a slash in the output. We
+          // See if this dot was preceded by a slash in the output. We
           // assume that when canonicalizing paths, they will always
           // start with a slash and not a dot, so we don't have to
           // bounds check the output.
@@ -230,7 +307,7 @@
                 break;
             }
           } else {
-            // This dot is not preceeded by a slash, it is just part of some
+            // This dot is not preceded by a slash, it is just part of some
             // file name.
             output->push_back('.');
             i += dotlen - 1;
@@ -245,33 +322,40 @@
           unsigned char unescaped_value;
           if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
             // Valid escape sequence, see if we keep, reject, or unescape it.
+            // Note that at this point DecodeEscape() will have advanced |i| to
+            // the last character of the escape sequence.
             char unescaped_flags = kPathCharLookup[unescaped_value];
 
             if (unescaped_flags & UNESCAPE) {
-              // This escaped value shouldn't be escaped, copy it.
+              // This escaped value shouldn't be escaped.  Try to copy it.
               output->push_back(unescaped_value);
-            } else if (unescaped_flags & INVALID_BIT) {
-              // Invalid escaped character, copy it and remember the error.
-              output->push_back('%');
-              output->push_back(static_cast<char>(spec[i - 1]));
-              output->push_back(static_cast<char>(spec[i]));
-              success = false;
+              // If we just unescaped a value within 2 output characters of the
+              // '%' from a previously-detected invalid escape sequence, we
+              // might have an input string with problematic nested escape
+              // sequences; detect and fix them.
+              if (last_invalid_percent_index >= (output->length() - 3)) {
+                CheckForNestedEscapes(spec, i + 1, end,
+                                      last_invalid_percent_index, output);
+              }
             } else {
-              // Valid escaped character but we should keep it escaped. We
-              // don't want to change the case of any hex letters in case
-              // the server is sensitive to that, so we just copy the two
-              // characters without checking (DecodeEscape will have advanced
-              // to the last character of the pair).
+              // Either this is an invalid escaped character, or it's a valid
+              // escaped character we should keep escaped.  In the first case we
+              // should just copy it exactly and remember the error.  In the
+              // second we also copy exactly in case the server is sensitive to
+              // changing the case of any hex letters.
               output->push_back('%');
               output->push_back(static_cast<char>(spec[i - 1]));
               output->push_back(static_cast<char>(spec[i]));
+              if (unescaped_flags & INVALID_BIT)
+                success = false;
             }
           } else {
-            // Invalid escape sequence. IE7 rejects any URLs with such
-            // sequences, while Firefox, IE6, and Safari all pass it through
-            // unchanged. We are more permissive unlike IE7. I don't think this
-            // can cause significant problems, if it does, we should change
-            // to be more like IE7.
+            // Invalid escape sequence. IE7+ rejects any URLs with such
+            // sequences, while other browsers pass them through unchanged. We
+            // use the permissive behavior.
+            // TODO(brettw): Consider testing IE's strict behavior, which would
+            // allow removing the code to handle nested escapes above.
+            last_invalid_percent_index = output->length();
             output->push_back('%');
           }
 

diff --git a/src/url/url_canon_pathurl.cc b/src/url/url_canon_pathurl.cc
index 0d23ccb..494fbda 100644
--- a/src/url/url_canon_pathurl.cc
+++ b/src/url/url_canon_pathurl.cc

@@ -14,7 +14,7 @@
 namespace {
 
 // Canonicalize the given |component| from |source| into |output| and
-// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
+// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
 // prior to the canonicalized component; i.e. for the '?' or '#' characters.
 template<typename CHAR, typename UCHAR>
 bool DoCanonicalizePathComponent(const CHAR* source,

diff --git a/src/url/url_canon_query.cc b/src/url/url_canon_query.cc
index 5494ddf..bf59d10 100644
--- a/src/url/url_canon_query.cc
+++ b/src/url/url_canon_query.cc

@@ -80,7 +80,7 @@
 }
 
 // Runs the converter with the given UTF-16 input. We don't have to do
-// anything, but this overriddden function allows us to use the same code
+// anything, but this overridden function allows us to use the same code
 // for both UTF-8 and UTF-16 input.
 void RunConverter(const base::char16* spec,
                   const Component& query,

diff --git a/src/url/url_canon_relative.cc b/src/url/url_canon_relative.cc
index 9436245..e34ea2f 100644
--- a/src/url/url_canon_relative.cc
+++ b/src/url/url_canon_relative.cc

@@ -17,14 +17,14 @@
 namespace {
 
 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug
-// 379034), whereas IE is case-insensetive.
+// 379034), whereas IE is case-insensitive.
 //
 // We choose to be more permissive like IE. We don't need to worry about
 // unescaping or anything here: neither IE or Firefox allow this. We also
 // don't have to worry about invalid scheme characters since we are comparing
 // against the canonical scheme of the base.
 //
-// The base URL should always be canonical, therefore is ASCII.
+// The base URL should always be canonical, therefore it should be ASCII.
 template<typename CHAR>
 bool AreSchemesEqual(const char* base,
                      const Component& base_scheme,
@@ -75,6 +75,10 @@
   TrimURL(url, &begin, &url_len);
   if (begin >= url_len) {
     // Empty URLs are relative, but do nothing.
+    if (!is_base_hierarchical) {
+      // Don't allow relative URLs if the base scheme doesn't support it.
+      return false;
+    }
     *relative_component = Component(begin, 0);
     *is_relative = true;
     return true;
@@ -82,7 +86,7 @@
 
 #ifdef WIN32
   // We special case paths like "C:\foo" so they can link directly to the
-  // file on Windows (IE compatability). The security domain stuff should
+  // file on Windows (IE compatibility). The security domain stuff should
   // prevent a link like this from actually being followed if its on a
   // web page.
   //
@@ -91,22 +95,22 @@
   // is a file and the answer will still be correct.
   //
   // We require strict backslashes when detecting UNC since two forward
-  // shashes should be treated a a relative URL with a hostname.
+  // slashes should be treated a a relative URL with a hostname.
   if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
       DoesBeginUNCPath(url, begin, url_len, true))
     return true;
 #endif  // WIN32
 
   // See if we've got a scheme, if not, we know this is a relative URL.
-  // BUT: Just because we have a scheme, doesn't make it absolute.
+  // BUT, just because we have a scheme, doesn't make it absolute.
   // "http:foo.html" is a relative URL with path "foo.html". If the scheme is
-  // empty, we treat it as relative (":foo") like IE does.
+  // empty, we treat it as relative (":foo"), like IE does.
   Component scheme;
   const bool scheme_is_empty =
       !ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
   if (scheme_is_empty) {
     if (url[begin] == '#') {
-      // |url| is a bare fragement (e.g. "#foo"). This can be resolved against
+      // |url| is a bare fragment (e.g. "#foo"). This can be resolved against
       // any base. Fall-through.
     } else if (!is_base_hierarchical) {
       // Don't allow relative URLs if the base scheme doesn't support it.
@@ -145,7 +149,7 @@
   int colon_offset = scheme.end();
 
   // If it's a filesystem URL, the only valid way to make it relative is not to
-  // supply a scheme.  There's no equivalent to e.g. http:index.html.
+  // supply a scheme. There's no equivalent to e.g. http:index.html.
   if (CompareSchemeComponent(url, scheme, kFileSystemScheme))
     return true;
 
@@ -170,8 +174,8 @@
 // up until and including the last slash. There should be a slash in the
 // range, if not, nothing will be copied.
 //
-// The input is assumed to be canonical, so we search only for exact slashes
-// and not backslashes as well. We also know that it's ASCII.
+// For stardard URLs the input should be canonical, but when resolving relative
+// URLs on a non-standard base (like "data:") the input can be anything.
 void CopyToLastSlash(const char* spec,
                      int begin,
                      int end,
@@ -179,7 +183,7 @@
   // Find the last slash.
   int last_slash = -1;
   for (int i = end - 1; i >= begin; i--) {
-    if (spec[i] == '/') {
+    if (spec[i] == '/' || spec[i] == '\\') {
       last_slash = i;
       break;
     }
@@ -394,7 +398,7 @@
                             query_converter, output, out_parsed);
 }
 
-// Resolves a relative URL that happens to be an absolute file path.  Examples
+// Resolves a relative URL that happens to be an absolute file path. Examples
 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo".
 template<typename CHAR>
 bool DoResolveAbsoluteFile(const CHAR* relative_url,
@@ -460,7 +464,7 @@
   // how strict the UNC finder is).
   //
   // We also allow Windows absolute drive specs on any scheme (for example
-  // "c:\foo") like IE does. There must be no preceeding slashes in this
+  // "c:\foo") like IE does. There must be no preceding slashes in this
   // case (we reject anything like "/c:/foo") because that should be treated
   // as a path. For file URLs, we allow any number of slashes since that would
   // be setting the path.

diff --git a/src/url/url_canon_stdstring.h b/src/url/url_canon_stdstring.h
index f8a847d..aefc76a 100644
--- a/src/url/url_canon_stdstring.h
+++ b/src/url/url_canon_stdstring.h

@@ -11,6 +11,7 @@
 
 #include <string>
 
+#include "base/strings/string_piece.h"
 #include "url/url_canon.h"
 #include "url/url_export.h"
 
@@ -47,35 +48,35 @@
 };
 
 // An extension of the Replacements class that allows the setters to use
-// standard strings.
+// StringPieces (implicitly allowing strings or char*s).
 //
-// The strings passed as arguments are not copied and must remain valid until
-// this class goes out of scope.
+// The contents of the StringPieces are not copied and must remain valid until
+// the StringPieceReplacements object goes out of scope.
 template<typename STR>
-class StdStringReplacements : public Replacements<typename STR::value_type> {
+class StringPieceReplacements : public Replacements<typename STR::value_type> {
  public:
-  void SetSchemeStr(const STR& s) {
+  void SetSchemeStr(const base::BasicStringPiece<STR>& s) {
     this->SetScheme(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetUsernameStr(const STR& s) {
+  void SetUsernameStr(const base::BasicStringPiece<STR>& s) {
     this->SetUsername(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetPasswordStr(const STR& s) {
+  void SetPasswordStr(const base::BasicStringPiece<STR>& s) {
     this->SetPassword(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetHostStr(const STR& s) {
+  void SetHostStr(const base::BasicStringPiece<STR>& s) {
     this->SetHost(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetPortStr(const STR& s) {
+  void SetPortStr(const base::BasicStringPiece<STR>& s) {
     this->SetPort(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetPathStr(const STR& s) {
+  void SetPathStr(const base::BasicStringPiece<STR>& s) {
     this->SetPath(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetQueryStr(const STR& s) {
+  void SetQueryStr(const base::BasicStringPiece<STR>& s) {
     this->SetQuery(s.data(), Component(0, static_cast<int>(s.length())));
   }
-  void SetRefStr(const STR& s) {
+  void SetRefStr(const base::BasicStringPiece<STR>& s) {
     this->SetRef(s.data(), Component(0, static_cast<int>(s.length())));
   }
 };

diff --git a/src/url/url_canon_stdurl.cc b/src/url/url_canon_stdurl.cc
index 7a61de8..7d1758b 100644
--- a/src/url/url_canon_stdurl.cc
+++ b/src/url/url_canon_stdurl.cc

@@ -169,7 +169,7 @@
 }
 
 // For 16-bit replacements, we turn all the replacements into UTF-8 so the
-// regular codepath can be used.
+// regular code path can be used.
 bool ReplaceStandardURL(const char* base,
                         const Parsed& base_parsed,
                         const Replacements<base::char16>& replacements,

diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
index 1917cc9..3dd617d 100644
--- a/src/url/url_canon_unittest.cc
+++ b/src/url/url_canon_unittest.cc

@@ -3,13 +3,14 @@
 // found in the LICENSE file.
 
 #include <errno.h>
+#include <stddef.h>
 
 #include "base/macros.h"
 #include "testing/base/public/gunit.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
 #include "url/url_canon_stdstring.h"
-#include "url/url_parse.h"
 #include "url/url_test_utils.h"
 
 namespace url {
@@ -38,7 +39,7 @@
   bool expected_success;
 };
 
-// Test cases for CanonicalizeIPAddress().  The inputs are identical to
+// Test cases for CanonicalizeIPAddress(). The inputs are identical to
 // DualComponentCase, but the output has extra CanonHostInfo fields.
 struct IPAddressCase {
   const char* input8;
@@ -127,7 +128,7 @@
 
 #if defined(GTEST_HAS_DEATH_TEST)
 // TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
-// cause the Chromium stacktrace dialog to appear and hang the test.
+// cause the Chromium stack trace dialog to appear and hang the test.
 // See http://crbug.com/49580.
 #if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
 #define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
@@ -157,10 +158,10 @@
   } utf_cases[] = {
       // Valid canonical input should get passed through & escaped.
     {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
-      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
+      // Test a character that takes > 16 bits (U+10300 = old italic letter A)
     {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
-      // Non-shortest-form UTF-8 are invalid. The bad char should be replaced
-      // with the invalid character (EF BF DB in UTF-8).
+      // Non-shortest-form UTF-8 characters are invalid. The bad character
+      // should be replaced with the invalid character (EF BF DB in UTF-8).
     {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
       // Invalid UTF-8 sequences should be marked as invalid (the first
       // sequence is truncated).
@@ -259,7 +260,7 @@
     EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
     EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
 
-    // Now try the wide version
+    // Now try the wide version.
     out_str.clear();
     StdStringCanonOutput output2(&out_str);
 
@@ -275,7 +276,7 @@
     EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
   }
 
-  // Test the case where the scheme is declared nonexistant, it should be
+  // Test the case where the scheme is declared nonexistent, it should be
   // converted into an empty scheme.
   Component out_comp;
   out_str.clear();
@@ -321,6 +322,17 @@
       // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
     {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
     {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      // ICU will convert weird percents into ASCII percents, but not unescape
+      // further. A weird percent is U+FE6A (EF B9 AA in UTF-8) which is a
+      // "small percent". At this point we should be within our rights to mark
+      // anything as invalid since the URL is corrupt or malicious. The code
+      // happens to allow ASCII characters (%41 = "A" -> 'a') to be unescaped
+      // and kept as valid, so we validate that behavior here, but this level
+      // of fixing the input shouldn't be seen as required. "%81" is invalid.
+    {"\xef\xb9\xaa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+    {"%ef%b9%aa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+    {"\xef\xb9\xaa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+    {"%ef%b9%aa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
       // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
     {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
       // See http://unicode.org/cldr/utility/idna.jsp for other
@@ -390,6 +402,13 @@
       // (added in Unicode 4.1). UTS 46 table 4 row (k)
     {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com",
       Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps U+FF43 (Full Width Small Letter C) to 'c'.
+    {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz",
+      Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps U+1D68C (Math Monospace Small C) to 'c'.
+      // U+1D68C = \xD835\xDE8C in UTF-16
+    {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
+      Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
       // BiDi check test
       // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM.
       // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008.
@@ -638,7 +657,7 @@
     {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
     {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
     {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
-      // Old trunctations tests.  They're all "BROKEN" now.
+      // Old trunctations tests. They're all "BROKEN" now.
     {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -754,16 +773,17 @@
 
     {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
 
-      // Can only have one "::" contraction in an IPv6 string literal.
+    // Can only have one "::" contraction in an IPv6 string literal.
     {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // No more than 2 consecutive ':'s.
+    // No more than 2 consecutive ':'s.
     {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // Non-IP addresses due to invalid characters.
+    // Non-IP addresses due to invalid characters.
     {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // If there are not enough components, the last one should fill them out.
+    // If there are not enough components, the last one should fill them out.
     // ... omitted at this time ...
-      // Too many components means not an IP address.  Similarly with too few if using IPv4 compat or mapped addresses.
+    // Too many components means not an IP address. Similarly, with too few
+    // if using IPv4 compat or mapped addresses.
     {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -887,7 +907,7 @@
     {"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
     {"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
 
-      // IE7 compatability: old versions allowed backslashes in usernames, but
+      // IE7 compatibility: old versions allowed backslashes in usernames, but
       // IE7 does not. We disallow it as well.
     {"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
   };
@@ -943,7 +963,7 @@
   // buffer. The parser unit tests will test scanning the number correctly.
   //
   // Note that the CanonicalizePort will always prepend a colon to the output
-  // to separate it from the colon that it assumes preceeds it.
+  // to separate it from the colon that it assumes precedes it.
   struct PortCase {
     const char* input;
     int default_port;
@@ -1059,6 +1079,21 @@
     {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
       // @ should be passed through unchanged (escaped or unescaped).
     {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
+      // Nested escape sequences should result in escaping the leading '%' if
+      // unescaping would result in a new escape sequence.
+    {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
+    {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
+    {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
+      // Make sure truncated "nested" escapes don't result in reading off the
+      // string end.
+    {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
+      // Don't unescape the leading '%' if unescaping doesn't result in a valid
+      // new escape sequence.
+    {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
+    {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
+      // Don't erroneously downcast a UTF-16 charater in a way that makes it
+      // look like part of an escape sequence.
+    {NULL, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
 
     // ----- encoding tests -----
       // Basic conversions
@@ -1300,6 +1335,13 @@
     {"wss://foo:81/", "wss://foo:81/", true},
     {"wss://foo:443/", "wss://foo/", true},
     {"wss://foo:815/", "wss://foo:815/", true},
+
+      // This particular code path ends up "backing up" to replace an invalid
+      // host ICU generated with an escaped version. Test that in the context
+      // of a full URL to make sure the backing up doesn't mess up the non-host
+      // parts of the URL. "EF B9 AA" is U+FE6A which is a type of percent that
+      // ICU will convert to an ASCII one, generating "%81".
+    {"ws:)W\x1eW\xef\xb9\xaa""81:80/", "ws://%29w%1ew%81/", false},
   };
 
   for (size_t i = 0; i < arraysize(cases); i++) {
@@ -1329,7 +1371,7 @@
     {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
       // Replace nothing
     {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
-      // Replace scheme with filesystem.  The result is garbage, but you asked
+      // Replace scheme with filesystem. The result is garbage, but you asked
       // for it.
     {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"},
   };
@@ -1594,7 +1636,7 @@
     {"file:", "file:///", true, Component(), Component(7, 1)},
     {"file:UNChost/path", "file://unchost/path", true, Component(7, 7), Component(14, 5)},
       // CanonicalizeFileURL supports absolute Windows style paths for IE
-      // compatability. Note that the caller must decide that this is a file
+      // compatibility. Note that the caller must decide that this is a file
       // URL itself so it can call the file canonicalizer. This is usually
       // done automatically as part of relative URL resolving.
     {"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
@@ -1605,7 +1647,7 @@
     {"\\\\server\\file", "file://server/file", true, Component(7, 6), Component(13, 5)},
     {"/\\server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
       // We should preserve the number of slashes after the colon for IE
-      // compatability, except when there is none, in which case we should
+      // compatibility, except when there is none, in which case we should
       // add one.
     {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
     {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
@@ -1807,7 +1849,7 @@
 
 TEST(URLCanonTest, _itoa_s) {
   // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
+  // null-terminated. We also allocate one byte more than what we tell
   // _itoa_s about, and ensure that the extra byte is untouched.
   char buf[6];
   memset(buf, 0xff, sizeof(buf));
@@ -1846,7 +1888,7 @@
 
 TEST(URLCanonTest, _itow_s) {
   // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
+  // null-terminated. We also allocate one byte more than what we tell
   // _itoa_s about, and ensure that the extra byte is untouched.
   base::char16 buf[6];
   const char fill_mem = 0xff;
@@ -1956,6 +1998,8 @@
       // Non-hierarchical base: absolute input should succeed.
     {"data:foobar", false, false, "http://host/", true, false, false, NULL},
     {"data:foobar", false, false, "http:host", true, false, false, NULL},
+      // Non-hierarchical base: empty URL should give error.
+    {"data:foobar", false, false, "", false, false, false, NULL},
       // Invalid schemes should be treated as relative.
     {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
     {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
@@ -2022,7 +2066,7 @@
       // which is what is required.
     {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
       // Filesystem URL tests; filesystem URLs are only valid and relative if
-      // they have no scheme, e.g. "./index.html".  There's no valid equivalent
+      // they have no scheme, e.g. "./index.html". There's no valid equivalent
       // to http:index.html.
     {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
     {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
@@ -2090,10 +2134,10 @@
   }
 }
 
-// It used to be when we did a replacement with a long buffer of UTF-16
-// characters, we would get invalid data in the URL. This is because the buffer
-// it used to hold the UTF-8 data was resized, while some pointers were still
-// kept to the old buffer that was removed.
+// It used to be the case that when we did a replacement with a long buffer of
+// UTF-16 characters, we would get invalid data in the URL. This is because the
+// buffer that it used to hold the UTF-8 data was resized, while some pointers
+// were still kept to the old buffer that was removed.
 TEST(URLCanonTest, ReplacementOverflow) {
   const char src[] = "file:///C:/foo/bar";
   int src_len = static_cast<int>(strlen(src));
@@ -2101,7 +2145,7 @@
   ParseFileURL(src, src_len, &parsed);
 
   // Override two components, the path with something short, and the query with
-  // sonething long enough to trigger the bug.
+  // something long enough to trigger the bug.
   Replacements<base::char16> repl;
   base::string16 new_query;
   for (int i = 0; i < 4800; i++)

diff --git a/src/url/url_constants.cc b/src/url/url_constants.cc
index 2dc1478..0388fbc 100644
--- a/src/url/url_constants.cc
+++ b/src/url/url_constants.cc

@@ -25,4 +25,6 @@
 
 const char kStandardSchemeSeparator[] = "://";
 
+const size_t kMaxURLChars = 2 * 1024 * 1024;
+
 }  // namespace url

diff --git a/src/url/url_constants.h b/src/url/url_constants.h
index c48dafc..fa71164 100644
--- a/src/url/url_constants.h
+++ b/src/url/url_constants.h

@@ -5,6 +5,8 @@
 #ifndef URL_URL_CONSTANTS_H_
 #define URL_URL_CONSTANTS_H_
 
+#include <stddef.h>
+
 #include "url/url_export.h"
 
 namespace url {
@@ -30,6 +32,8 @@
 // Used to separate a standard scheme and the hostname: "://".
 URL_EXPORT extern const char kStandardSchemeSeparator[];
 
+URL_EXPORT extern const size_t kMaxURLChars;
+
 }  // namespace url
 
 #endif  // URL_URL_CONSTANTS_H_

diff --git a/src/url/url_parse.h b/src/url/url_parse.h
deleted file mode 100644
index 3b9c546..0000000
--- a/src/url/url_parse.h
+++ /dev/null

@@ -1,11 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef URL_URL_PARSE_H_
-#define URL_URL_PARSE_H_
-
-// TODO(tfarina): Remove this file when the callers are updated.
-#include "url/third_party/mozilla/url_parse.h"
-
-#endif  // URL_URL_PARSE_H_

diff --git a/src/url/url_parse_file.cc b/src/url/url_parse_file.cc
index c08ddc6..fcbb12d 100644
--- a/src/url/url_parse_file.cc
+++ b/src/url/url_parse_file.cc

@@ -3,8 +3,8 @@
 // found in the LICENSE file.
 
 #include "base/logging.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_file.h"
-#include "url/url_parse.h"
 #include "url/url_parse_internal.h"
 
 // Interesting IE file:isms...

diff --git a/src/url/url_parse_internal.h b/src/url/url_parse_internal.h
index 4070b7e..7630878 100644
--- a/src/url/url_parse_internal.h
+++ b/src/url/url_parse_internal.h

@@ -7,11 +7,11 @@
 
 // Contains common inline helper functions used by the URL parsing routines.
 
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 namespace url {
 
-// We treat slashes and backslashes the same for IE compatability.
+// We treat slashes and backslashes the same for IE compatibility.
 inline bool IsURLSlash(base::char16 ch) {
   return ch == '/' || ch == '\\';
 }

diff --git a/src/url/url_parse_unittest.cc b/src/url/url_parse_unittest.cc
index dedd663..c0d5960 100644
--- a/src/url/url_parse_unittest.cc
+++ b/src/url/url_parse_unittest.cc

@@ -2,11 +2,13 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+#include <stddef.h>
 
 #include "base/macros.h"
 #include "testing/base/public/gunit.h"
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 // Interesting IE file:isms...
 //
@@ -90,13 +92,13 @@
 bool ComponentMatches(const char* input,
                       const char* reference,
                       const Component& component) {
-  // If the component is nonexistant (length == -1), it should begin at 0.
+  // If the component is nonexistent (length == -1), it should begin at 0.
   EXPECT_TRUE(component.len >= 0 || component.len == -1);
 
   // Begin should be valid.
   EXPECT_LE(0, component.begin);
 
-  // A NULL reference means the component should be nonexistant.
+  // A NULL reference means the component should be nonexistent.
   if (!reference)
     return component.len == -1;
   if (component.len < 0)
@@ -345,7 +347,7 @@
 
 TEST(URLParser, PathURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(path_cases); i++) {
     const char* url = path_cases[i].input;
@@ -356,7 +358,7 @@
     EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
         << i;
 
-    // The remaining components are never used for path urls.
+    // The remaining components are never used for path URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.host);
@@ -537,7 +539,7 @@
     Component key, value;
     if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
       if (parameter >= i && !expected_key)
-        return true;  // Expected nonexistant key, got one.
+        return true;  // Expected nonexistent key, got one.
       return false;  // Not enough keys.
     }
 
@@ -613,7 +615,7 @@
 
 TEST(URLParser, MailtoUrl) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
     const char* url = mailto_cases[i].input;
@@ -625,7 +627,7 @@
     EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
     EXPECT_EQ(PORT_UNSPECIFIED, port);
 
-    // The remaining components are never used for mailto urls.
+    // The remaining components are never used for mailto URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.port);
@@ -645,7 +647,7 @@
 
 TEST(URLParser, FileSystemURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
     const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
@@ -667,7 +669,7 @@
       int port = ParsePort(url, parsed.inner_parsed()->port);
       EXPECT_EQ(parsecase->inner_port, port);
 
-      // The remaining components are never used for filesystem urls.
+      // The remaining components are never used for filesystem URLs.
       ExpectInvalidComponent(parsed.inner_parsed()->query);
       ExpectInvalidComponent(parsed.inner_parsed()->ref);
     }
@@ -676,7 +678,7 @@
     EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
     EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
 
-    // The remaining components are never used for filesystem urls.
+    // The remaining components are never used for filesystem URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.host);

diff --git a/src/url/url_test_utils.h b/src/url/url_test_utils.h
index 6e66e85..174e5e0 100644
--- a/src/url/url_test_utils.h
+++ b/src/url/url_test_utils.h

@@ -19,7 +19,7 @@
 namespace test_utils {
 
 // Converts a UTF-16 string from native wchar_t format to char16, by
-// truncating the high 32 bits.  This is not meant to handle true UTF-32
+// truncating the high 32 bits. This is not meant to handle true UTF-32
 // encoded strings.
 inline base::string16 WStringToUTF16(const wchar_t* src) {
   base::string16 str;
@@ -30,7 +30,7 @@
   return str;
 }
 
-// Converts a string from UTF-8 to UTF-16
+// Converts a string from UTF-8 to UTF-16.
 inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
   int length = static_cast<int>(src.length());
   EXPECT_LT(length, 1024);
@@ -39,7 +39,7 @@
   return base::string16(output.data(), output.length());
 }
 
-// Converts a string from UTF-16 to UTF-8
+// Converts a string from UTF-16 to UTF-8.
 inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
   std::string str;
   StdStringCanonOutput output(&str);

diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index f4246e9..bb43a4a 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc

@@ -4,6 +4,7 @@
 
 #include "url/url_util.h"
 
+#include <stddef.h>
 #include <string.h>
 #include <vector>
 
@@ -11,6 +12,7 @@
 #include "base/heap-checker.h"
 #endif
 #include "base/logging.h"
+#include "base/strings/string_util.h"
 #include "url/url_canon_internal.h"
 #include "url/url_file.h"
 #include "url/url_util_internal.h"
@@ -19,51 +21,69 @@
 
 namespace {
 
-// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
-// so we don't want to use it here.
-template<class Char>
-inline Char ToLowerASCII(Char c) {
-  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
-}
-
-// Backend for LowerCaseEqualsASCII.
-template<typename Iter>
-inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
-  for (Iter it = a_begin; it != a_end; ++it, ++b) {
-    if (!*b || ToLowerASCII(*it) != *b)
-      return false;
-  }
-  return *b == 0;
-}
-
 const int kNumStandardURLSchemes = 8;
-const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
-  kHttpScheme,
-  kHttpsScheme,
-  kFileScheme,  // Yes, file urls can have a hostname!
-  kFtpScheme,
-  kGopherScheme,
-  kWsScheme,    // WebSocket.
-  kWssScheme,   // WebSocket secure.
-  kFileSystemScheme,
+const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
+  {kHttpScheme, SCHEME_WITH_PORT},
+  {kHttpsScheme, SCHEME_WITH_PORT},
+  // Yes, file URLs can have a hostname, so file URLs should be handled as
+  // "standard". File URLs never have a port as specified by the SchemeType
+  // field.
+  {kFileScheme, SCHEME_WITHOUT_PORT},
+  {kFtpScheme, SCHEME_WITH_PORT},
+  {kGopherScheme, SCHEME_WITH_PORT},
+  {kWsScheme, SCHEME_WITH_PORT},    // WebSocket.
+  {kWssScheme, SCHEME_WITH_PORT},   // WebSocket secure.
+  {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
 };
 
-// List of the currently installed standard schemes. This list is lazily
-// initialized by InitStandardSchemes and is leaked on shutdown to prevent
-// any destructors from being called that will slow us down or cause problems.
-std::vector<const char*>* standard_schemes = NULL;
+const int kNumReferrerURLSchemes = 2;
+const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
+  {kHttpScheme, SCHEME_WITH_PORT},
+  {kHttpsScheme, SCHEME_WITH_PORT},
+};
 
-// See the LockStandardSchemes declaration in the header.
-bool standard_schemes_locked = false;
+// Lists of the currently installed standard and referrer schemes. These lists
+// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are
+// leaked on shutdown to prevent any destructors from being called that will
+// slow us down or cause problems.
+std::vector<SchemeWithType>* standard_schemes = nullptr;
+std::vector<SchemeWithType>* referrer_schemes = nullptr;
 
-// Ensures that the standard_schemes list is initialized, does nothing if it
-// already has values.
-void InitStandardSchemes() {
-  if (standard_schemes)
+// See the LockSchemeRegistries declaration in the header.
+bool scheme_registries_locked = false;
+
+// This template converts a given character type to the corresponding
+// StringPiece type.
+template<typename CHAR> struct CharToStringPiece {
+};
+template<> struct CharToStringPiece<char> {
+  typedef base::StringPiece Piece;
+};
+template<> struct CharToStringPiece<base::char16> {
+  typedef base::StringPiece16 Piece;
+};
+
+void InitSchemes(std::vector<SchemeWithType>** schemes,
+                 const SchemeWithType* initial_schemes,
+                 size_t size) {
+  if (*schemes)
     return;
-  standard_schemes = new std::vector<const char*>;
-  for (int i = 0; i < kNumStandardURLSchemes; i++)
-    standard_schemes->push_back(kStandardURLSchemes[i]);
+  *schemes = new std::vector<SchemeWithType>(size);
+  for (size_t i = 0; i < size; i++) {
+    (*schemes)->push_back(initial_schemes[i]);
+  }
+}
+
+// Ensures that the standard_schemes list is initialized, does nothing if
+// it already has values.
+void InitStandardSchemes() {
+  InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes);
+}
+
+// Ensures that the referrer_schemes list is initialized, does nothing if
+// it already has values.
+void InitReferrerSchemes() {
+  InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes);
 }
 
 // Given a string and a range inside the string, compares it to the given
@@ -74,28 +94,41 @@
                                      const char* compare_to) {
   if (!component.is_nonempty())
     return compare_to[0] == 0;  // When component is empty, match empty scheme.
-  return LowerCaseEqualsASCII(&spec[component.begin],
-                              &spec[component.end()],
-                              compare_to);
+  return base::LowerCaseEqualsASCII(
+      typename CharToStringPiece<CHAR>::Piece(
+          &spec[component.begin], component.len),
+      compare_to);
 }
 
-// Returns true if the given scheme identified by |scheme| within |spec| is one
-// of the registered "standard" schemes.
+// Returns true and sets |type| to the SchemeType of the given scheme
+// identified by |scheme| within |spec| if in |schemes|.
 template<typename CHAR>
-bool DoIsStandard(const CHAR* spec, const Component& scheme) {
+bool DoIsInSchemes(const CHAR* spec,
+                   const Component& scheme,
+                   SchemeType* type,
+                   const std::vector<SchemeWithType>& schemes) {
   if (!scheme.is_nonempty())
     return false;  // Empty or invalid schemes are non-standard.
 
-  InitStandardSchemes();
-  for (size_t i = 0; i < standard_schemes->size(); i++) {
-    if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
-                             standard_schemes->at(i)))
+  for (const SchemeWithType& scheme_with_type : schemes) {
+    if (base::LowerCaseEqualsASCII(typename CharToStringPiece<CHAR>::Piece(
+                                       &spec[scheme.begin], scheme.len),
+                                   scheme_with_type.scheme)) {
+      *type = scheme_with_type.type;
       return true;
+    }
   }
   return false;
 }
 
 template<typename CHAR>
+bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
+  InitStandardSchemes();
+  return DoIsInSchemes(spec, scheme, type, *standard_schemes);
+}
+
+
+template<typename CHAR>
 bool DoFindAndCompareScheme(const CHAR* str,
                             int str_len,
                             const char* compare,
@@ -136,7 +169,7 @@
   Parsed parsed_input;
 #ifdef WIN32
   // For Windows, we allow things that look like absolute Windows paths to be
-  // fixed up magically to file URLs. This is done for IE compatability. For
+  // fixed up magically to file URLs. This is done for IE compatibility. For
   // example, this will change "c:/foo" into a file URL rather than treating
   // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
   // There is similar logic in url_canon_relative.cc for
@@ -160,6 +193,7 @@
   // This is the parsed version of the input URL, we have to canonicalize it
   // before storing it in our object.
   bool success;
+  SchemeType unused_scheme_type = SCHEME_WITH_PORT;
   if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) {
     // File URLs are special.
     ParseFileURL(spec, spec_len, &parsed_input);
@@ -172,20 +206,21 @@
                                         charset_converter, output,
                                         output_parsed);
 
-  } else if (DoIsStandard(spec, scheme)) {
+  } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) {
     // All "normal" URLs.
     ParseStandardURL(spec, spec_len, &parsed_input);
     success = CanonicalizeStandardURL(spec, spec_len, parsed_input,
                                       charset_converter, output, output_parsed);
 
   } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
-    // Mailto are treated like a standard url with only a scheme, path, query
+    // Mailto URLs are treated like standard URLs, with only a scheme, path,
+    // and query.
     ParseMailtoURL(spec, spec_len, &parsed_input);
     success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
                                     output_parsed);
 
   } else {
-    // "Weird" URLs like data: and javascript:
+    // "Weird" URLs like data: and javascript:.
     ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
     success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
                                   output_parsed);
@@ -220,9 +255,10 @@
     base_is_hierarchical = num_slashes > 0;
   }
 
+  SchemeType unused_scheme_type = SCHEME_WITH_PORT;
   bool standard_base_scheme =
       base_parsed.scheme.is_nonempty() &&
-      DoIsStandard(base_spec, base_parsed.scheme);
+      DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type);
 
   bool is_relative;
   Component relative_component;
@@ -275,7 +311,7 @@
                          CanonOutput* output,
                          Parsed* out_parsed) {
   // If the scheme is overridden, just do a simple string substitution and
-  // reparse the whole thing. There are lots of edge cases that we really don't
+  // re-parse the whole thing. There are lots of edge cases that we really don't
   // want to deal with. Like what happens if I replace "http://e:8080/foo"
   // with a file. Does it become "file:///E:/8080/foo" where the port number
   // becomes part of the path? Parsing that string as a file URL says "yes"
@@ -322,7 +358,7 @@
     // getting replaced here. If ReplaceComponents didn't re-check everything,
     // we wouldn't know if something *not* getting replaced is a problem.
     // If the scheme-specific replacers are made more intelligent so they don't
-    // re-check everything, we should instead recanonicalize the whole thing
+    // re-check everything, we should instead re-canonicalize the whole thing
     // after this call to check validity (this assumes replacing the scheme is
     // much much less common than other types of replacements, like clearing the
     // ref).
@@ -343,7 +379,8 @@
     return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter,
                                 output, out_parsed);
   }
-  if (DoIsStandard(spec, parsed.scheme)) {
+  SchemeType unused_scheme_type = SCHEME_WITH_PORT;
+  if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) {
     return ReplaceStandardURL(spec, parsed, replacements, charset_converter,
                               output, out_parsed);
   }
@@ -355,36 +392,25 @@
   return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
 }
 
-}  // namespace
-
-void Initialize() {
-  InitStandardSchemes();
-}
-
-void Shutdown() {
-  if (standard_schemes) {
-    delete standard_schemes;
-    standard_schemes = NULL;
-  }
-}
-
-void AddStandardScheme(const char* new_scheme) {
-  // If this assert triggers, it means you've called AddStandardScheme after
-  // LockStandardSchemes have been called (see the header file for
-  // LockStandardSchemes for more).
+void DoAddScheme(const char* new_scheme,
+                 SchemeType type,
+                 std::vector<SchemeWithType>* schemes) {
+  DCHECK(schemes);
+  // If this assert triggers, it means you've called Add*Scheme after
+  // LockSchemeRegistries has been called (see the header file for
+  // LockSchemeRegistries for more).
   //
-  // This normally means you're trying to set up a new standard scheme too late
-  // in your application's init process. Locate where your app does this
-  // initialization and calls LockStandardScheme, and add your new standard
-  // scheme there.
-  DCHECK(!standard_schemes_locked) <<
-      "Trying to add a standard scheme after the list has been locked.";
+  // This normally means you're trying to set up a new scheme too late in your
+  // application's init process. Locate where your app does this initialization
+  // and calls LockSchemeRegistries, and add your new scheme there.
+  DCHECK(!scheme_registries_locked)
+      << "Trying to add a scheme after the lists have been locked.";
 
   size_t scheme_len = strlen(new_scheme);
   if (scheme_len == 0)
     return;
 
-  // Dulicate the scheme into a new buffer and add it to the list of standard
+  // Duplicate the scheme into a new buffer and add it to the list of standard
   // schemes. This pointer will be leaked on shutdown.
   char* dup_scheme = new char[scheme_len + 1];
 #ifdef GOOGLEURL_IN_GOOGLE3
@@ -392,20 +418,64 @@
 #endif
   memcpy(dup_scheme, new_scheme, scheme_len + 1);
 
-  InitStandardSchemes();
-  standard_schemes->push_back(dup_scheme);
+  SchemeWithType scheme_with_type;
+  scheme_with_type.scheme = dup_scheme;
+  scheme_with_type.type = type;
+  schemes->push_back(scheme_with_type);
 }
 
-void LockStandardSchemes() {
-  standard_schemes_locked = true;
+}  // namespace
+
+void Initialize() {
+  InitStandardSchemes();
+  InitReferrerSchemes();
+}
+
+void Shutdown() {
+  if (standard_schemes) {
+    delete standard_schemes;
+    standard_schemes = NULL;
+  }
+  if (referrer_schemes) {
+    delete referrer_schemes;
+    referrer_schemes = NULL;
+  }
+}
+
+void AddStandardScheme(const char* new_scheme, SchemeType type) {
+  InitStandardSchemes();
+  DoAddScheme(new_scheme, type, standard_schemes);
+}
+
+void AddReferrerScheme(const char* new_scheme, SchemeType type) {
+  InitReferrerSchemes();
+  DoAddScheme(new_scheme, type, referrer_schemes);
+}
+
+void LockSchemeRegistries() {
+  scheme_registries_locked = true;
 }
 
 bool IsStandard(const char* spec, const Component& scheme) {
-  return DoIsStandard(spec, scheme);
+  SchemeType unused_scheme_type;
+  return DoIsStandard(spec, scheme, &unused_scheme_type);
+}
+
+bool GetStandardSchemeType(const char* spec,
+                           const Component& scheme,
+                           SchemeType* type) {
+  return DoIsStandard(spec, scheme, type);
 }
 
 bool IsStandard(const base::char16* spec, const Component& scheme) {
-  return DoIsStandard(spec, scheme);
+  SchemeType unused_scheme_type;
+  return DoIsStandard(spec, scheme, &unused_scheme_type);
+}
+
+bool IsReferrerScheme(const char* spec, const Component& scheme) {
+  InitReferrerSchemes();
+  SchemeType unused_scheme_type;
+  return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
 }
 
 bool FindAndCompareScheme(const char* str,
@@ -490,31 +560,6 @@
                              charset_converter, output, out_parsed);
 }
 
-// Front-ends for LowerCaseEqualsASCII.
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b_begin,
-                          const char* b_end) {
-  while (a_begin != a_end && b_begin != b_end &&
-         ToLowerASCII(*a_begin) == *b_begin) {
-    a_begin++;
-    b_begin++;
-  }
-  return a_begin == a_end && b_begin == b_end;
-}
-
-bool LowerCaseEqualsASCII(const base::char16* a_begin,
-                          const base::char16* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
 void DecodeURLEscapeSequences(const char* input,
                               int length,
                               CanonOutputW* output) {

diff --git a/src/url/url_util.h b/src/url/url_util.h
index 458d1e8..a209a61 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h

@@ -8,10 +8,10 @@
 #include <string>
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_constants.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -20,14 +20,13 @@
 // Initialization is NOT required, it will be implicitly initialized when first
 // used. However, this implicit initialization is NOT threadsafe. If you are
 // using this library in a threaded environment and don't have a consistent
-// "first call" (an example might be calling "AddStandardScheme" with your
-// special application-specific schemes) then you will want to call initialize
-// before spawning any threads.
+// "first call" (an example might be calling Add*Scheme with your special
+// application-specific schemes) then you will want to call initialize before
+// spawning any threads.
 //
-// It is OK to call this function more than once, subsequent calls will simply
-// "noop", unless Shutdown() was called in the mean time. This will also be a
-// "noop" if other calls to the library have forced an initialization
-// beforehand.
+// It is OK to call this function more than once, subsequent calls will be
+// no-ops, unless Shutdown was called in the mean time. This will also be a
+// no-op if other calls to the library have forced an initialization beforehand.
 URL_EXPORT void Initialize();
 
 // Cleanup is not required, except some strings may leak. For most user
@@ -38,25 +37,57 @@
 
 // Schemes --------------------------------------------------------------------
 
-// Adds an application-defined scheme to the internal list of "standard" URL
-// schemes. This function is not threadsafe and can not be called concurrently
-// with any other url_util function. It will assert if the list of standard
-// schemes has been locked (see LockStandardSchemes).
-URL_EXPORT void AddStandardScheme(const char* new_scheme);
+// Types of a scheme representing the requirements on the data represented by
+// the authority component of a URL with the scheme.
+enum SchemeType {
+  // The authority component of a URL with the scheme, if any, has the port
+  // (the default values may be omitted in a serialization).
+  SCHEME_WITH_PORT,
+  // The authority component of a URL with the scheme, if any, doesn't have a
+  // port.
+  SCHEME_WITHOUT_PORT,
+  // A URL with the scheme doesn't have the authority component.
+  SCHEME_WITHOUT_AUTHORITY,
+};
 
-// Sets a flag to prevent future calls to AddStandardScheme from succeeding.
+// A pair for representing a standard scheme name and the SchemeType for it.
+struct URL_EXPORT SchemeWithType {
+  const char* scheme;
+  SchemeType type;
+};
+
+// Adds an application-defined scheme to the internal list of "standard-format"
+// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
+// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
+//
+// This function is not threadsafe and can not be called concurrently with any
+// other url_util function. It will assert if the lists of schemes have
+// been locked (see LockSchemeRegistries).
+URL_EXPORT void AddStandardScheme(const char* new_scheme,
+                                  SchemeType scheme_type);
+
+// Adds an application-defined scheme to the internal list of schemes allowed
+// for referrers.
+//
+// This function is not threadsafe and can not be called concurrently with any
+// other url_util function. It will assert if the lists of schemes have
+// been locked (see LockSchemeRegistries).
+URL_EXPORT void AddReferrerScheme(const char* new_scheme,
+                                  SchemeType scheme_type);
+
+// Sets a flag to prevent future calls to Add*Scheme from succeeding.
 //
 // This is designed to help prevent errors for multithreaded applications.
-// Normal usage would be to call AddStandardScheme for your custom schemes at
-// the beginning of program initialization, and then LockStandardSchemes. This
-// prevents future callers from mistakenly calling AddStandardScheme when the
+// Normal usage would be to call Add*Scheme for your custom schemes at
+// the beginning of program initialization, and then LockSchemeRegistries. This
+// prevents future callers from mistakenly calling Add*Scheme when the
 // program is running with multiple threads, where such usage would be
 // dangerous.
 //
-// We could have had AddStandardScheme use a lock instead, but that would add
+// We could have had Add*Scheme use a lock instead, but that would add
 // some platform-specific dependencies we don't otherwise have now, and is
 // overkill considering the normal usage is so simple.
-URL_EXPORT void LockStandardSchemes();
+URL_EXPORT void LockSchemeRegistries();
 
 // Locates the scheme in the given string and places it into |found_scheme|,
 // which may be NULL to indicate the caller does not care about the range.
@@ -85,18 +116,21 @@
                               compare, found_scheme);
 }
 
-// Returns true if the given string represents a standard URL. This means that
-// either the scheme is in the list of known standard schemes.
+// Returns true if the given scheme identified by |scheme| within |spec| is in
+// the list of known standard-format schemes (see AddStandardScheme).
 URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
 URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
 
-// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
-// breaking the WebKit build when this version is synced via Chrome.
-inline bool IsStandard(const char* spec,
-                       int spec_len,
-                       const Component& scheme) {
-  return IsStandard(spec, scheme);
-}
+// Returns true if the given scheme identified by |scheme| within |spec| is in
+// the list of allowed schemes for referrers (see AddReferrerScheme).
+URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
+
+// Returns true and sets |type| to the SchemeType of the given scheme
+// identified by |scheme| within |spec| if the scheme is in the list of known
+// standard-format schemes (see AddStandardScheme).
+URL_EXPORT bool GetStandardSchemeType(const char* spec,
+                                      const Component& scheme,
+                                      SchemeType* type);
 
 // URL library wrappers -------------------------------------------------------
 
@@ -150,7 +184,7 @@
                                 CanonOutput* output,
                                 Parsed* output_parsed);
 
-// Replaces components in the given VALID input url. The new canonical URL info
+// Replaces components in the given VALID input URL. The new canonical URL info
 // is written to output and out_parsed.
 //
 // Returns true if the resulting URL is valid.
@@ -172,29 +206,12 @@
 
 // String helper functions ----------------------------------------------------
 
-// Compare the lower-case form of the given string against the given ASCII
-// string.  This is useful for doing checking if an input string matches some
-// token, and it is optimized to avoid intermediate string copies.
-//
-// The versions of this function that don't take a b_end assume that the b
-// string is NULL terminated.
-URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
-                                     const char* a_end,
-                                     const char* b);
-URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
-                                     const char* a_end,
-                                     const char* b_begin,
-                                     const char* b_end);
-URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin,
-                                     const base::char16* a_end,
-                                     const char* b);
-
 // Unescapes the given string using URL escaping rules.
 URL_EXPORT void DecodeURLEscapeSequences(const char* input,
                                          int length,
                                          CanonOutputW* output);
 
-// Escapes the given string as defined by the JS method encodeURIComponent.  See
+// Escapes the given string as defined by the JS method encodeURIComponent. See
 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
 URL_EXPORT void EncodeURIComponent(const char* input,
                                    int length,

diff --git a/src/url/url_util_internal.h b/src/url/url_util_internal.h
index c72598f..756c736 100644
--- a/src/url/url_util_internal.h
+++ b/src/url/url_util_internal.h

@@ -8,7 +8,7 @@
 #include <string>
 
 #include "base/strings/string16.h"
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 namespace url {
 

diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
index 2216252..74db9e5 100644
--- a/src/url/url_util_unittest.cc
+++ b/src/url/url_util_unittest.cc

@@ -2,11 +2,13 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <stddef.h>
+
 #include "base/macros.h"
 #include "testing/base/public/gunit.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_stdstring.h"
-#include "url/url_parse.h"
 #include "url/url_test_utils.h"
 #include "url/url_util.h"
 
@@ -44,7 +46,7 @@
   EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
   EXPECT_TRUE(found_scheme == Component());
 
-  // When there is a whitespace char in scheme, it should canonicalize the url
+  // When there is a whitespace char in scheme, it should canonicalize the URL
   // before comparison.
   const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
   EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
@@ -61,6 +63,53 @@
   EXPECT_TRUE(found_scheme == Component(1, 11));
 }
 
+TEST(URLUtilTest, IsStandard) {
+  const char kHTTPScheme[] = "http";
+  EXPECT_TRUE(IsStandard(kHTTPScheme, Component(0, strlen(kHTTPScheme))));
+
+  const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsStandard(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST(URLUtilTest, IsReferrerScheme) {
+  const char kHTTPScheme[] = "http";
+  EXPECT_TRUE(IsReferrerScheme(kHTTPScheme, Component(0, strlen(kHTTPScheme))));
+
+  const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST(URLUtilTest, AddReferrerScheme) {
+  const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+  AddReferrerScheme(kFooScheme, url::SCHEME_WITHOUT_PORT);
+  EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST(URLUtilTest, GetStandardSchemeType) {
+  url::SchemeType scheme_type;
+
+  const char kHTTPScheme[] = "http";
+  scheme_type = url::SCHEME_WITHOUT_AUTHORITY;
+  EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme,
+                                    Component(0, strlen(kHTTPScheme)),
+                                    &scheme_type));
+  EXPECT_EQ(url::SCHEME_WITH_PORT, scheme_type);
+
+  const char kFilesystemScheme[] = "filesystem";
+  scheme_type = url::SCHEME_WITH_PORT;
+  EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme,
+                                    Component(0, strlen(kFilesystemScheme)),
+                                    &scheme_type));
+  EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type);
+
+  const char kFooScheme[] = "foo";
+  scheme_type = url::SCHEME_WITH_PORT;
+  EXPECT_FALSE(GetStandardSchemeType(kFooScheme,
+                                     Component(0, strlen(kFooScheme)),
+                                     &scheme_type));
+}
+
 TEST(URLUtilTest, ReplaceComponents) {
   Parsed parsed;
   RawCanonOutputT<char> output;
@@ -220,7 +269,7 @@
 }
 
 TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
-  // This tests non-standard (in the sense that GIsStandard() == false)
+  // This tests non-standard (in the sense that IsStandard() == false)
   // hierarchical schemes.
   struct ResolveRelativeCase {
     const char* base;
@@ -273,6 +322,15 @@
       // any URL scheme is we might break javascript: URLs by doing so...
     {"javascript:alert('foo#bar')", "#badfrag", true,
       "javascript:alert('foo#badfrag" },
+      // In this case, the backslashes will not be canonicalized because it's a
+      // non-standard URL, but they will be treated as a path separators,
+      // giving the base URL here a path of "\".
+      //
+      // The result here is somewhat arbitrary. One could argue it should be
+      // either "aaa://a\" or "aaa://a/" since the path is being replaced with
+      // the "current directory". But in the context of resolving on data URLs,
+      // adding the requested dot doesn't seem wrong either.
+    {"aaa://a\\", "aaa:.", true, "aaa://a\\." }
   };
 
   for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) {
@@ -296,8 +354,8 @@
 }
 
 TEST(URLUtilTest, TestNoRefComponent) {
-  // The hash-mark must be ignored when mailto: scheme is
-  // parsed, even if the url has a base and relative part.
+  // The hash-mark must be ignored when mailto: scheme is parsed,
+  // even if the URL has a base and relative part.
   const char* base = "mailto://to/";
   const char* rel = "any#body";
commit	0edea97f03182e76ae24c25b1f63544c34e7ad6d	[log] [tgz]
author	Devany Sandoval <sandovad@google.com>	Thu Jun 02 09:34:23 2016 -0700
committer	sandovad <sandovad@google.com>	Tue Sep 03 12:52:33 2019 -0700
tree	266602de5d553a98a496e454c922719ae6494546
parent	20c50a19f25dc04563b2963c93b4c237b993132d [diff]