Internal change PiperOrigin-RevId: 152704883 Change-Id: I4d434c21f0a3116bb5923014d37402e9426db26e

commit: 2c4a471e13ed1114224bc2b8c6c901230aac0eaf [log] [tgz]
author: Devany Sandoval <sandovad@google.com> Mon Apr 10 10:41:58 2017 -0700
committer: sandovad <sandovad@google.com> Tue Sep 03 12:54:52 2019 -0700
tree: 4b68400bf7f8be9083903a78afec2363173e1c64
parent: 44ac360d844b6ab5e40070aa367443ff0a9d5ac4 [diff]
diff --git a/import.sh b/import.sh
index 8671302..1d95c38 100644
--- a/import.sh
+++ b/import.sh

@@ -4,7 +4,7 @@
 top=/tmp/chromium
 mkdir $top
 prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 55.0.2883.87 56.0.2924.87
+for version in 56.0.2924.87 57.0.2987.133
 do
   mkdir $top/$version
   cd $top/$version

diff --git a/src/base/strings/utf_string_conversions.cc b/src/base/strings/utf_string_conversions.cc
index 944078f..ffbfa53 100644
--- a/src/base/strings/utf_string_conversions.cc
+++ b/src/base/strings/utf_string_conversions.cc

@@ -82,10 +82,6 @@
 }
 
 std::string UTF16ToUTF8(StringPiece16 utf16) {
-  if (IsStringASCII(utf16)) {
-    return std::string(utf16.begin(), utf16.end());
-  }
-
   std::string ret;
   // Ignore the success flag of this call, it will do the best it can for
   // invalid input, which is what we want here.

diff --git a/src/build/build_config.h b/src/build/build_config.h
index 5f99fc1..b84a9d3 100644
--- a/src/build/build_config.h
+++ b/src/build/build_config.h

@@ -6,6 +6,7 @@
 //  Operating System:
 //    OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) /
 //    OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI
+//    OS_CHROMEOS is set by the build system
 //  Compiler:
 //    COMPILER_MSVC / COMPILER_GCC
 //  Processor:

diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index 1d6dfc6..606eedb 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc

@@ -108,9 +108,6 @@
 template<typename STR>
 void GURL::InitCanonical(url::base::BasicStringPiece<STR> input_spec,
                          bool trim_path_end) {
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  spec_.reserve(input_spec.size() + 32);
   url::StdStringCanonOutput output(&spec_);
   is_valid_ = url::Canonicalize(
       input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -121,6 +118,8 @@
     inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
                               *parsed_.inner_parsed(), true));
   }
+  // Valid URLs always have non-empty specs.
+  DCHECK(!is_valid_ || !spec_.empty());
 }
 
 void GURL::InitializeFromCanonicalSpec() {
@@ -135,6 +134,7 @@
   // what we would have produced. Skip checking for invalid URLs have no meaning
   // and we can't always canonicalize then reproducibly.
   if (is_valid_) {
+    DCHECK(!spec_.empty());
     url::Component scheme;
     // We can't do this check on the inner_url of a filesystem URL, as
     // canonical_spec actually points to the start of the outer URL, so we'd
@@ -195,12 +195,7 @@
     return GURL();
 
   GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
@@ -226,12 +221,7 @@
     return GURL();
 
   GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
@@ -259,11 +249,7 @@
   if (!is_valid_)
     return GURL();
 
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   result.is_valid_ = url::ReplaceComponents(
       spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
       NULL, &output, &result.parsed_);
@@ -286,11 +272,7 @@
   if (!is_valid_)
     return GURL();
 
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
   url::StdStringCanonOutput output(&result.spec_);
-
   result.is_valid_ = url::ReplaceComponents(
       spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
       NULL, &output, &result.parsed_);
@@ -440,14 +422,7 @@
 }
 
 bool GURL::HostIsIPAddress() const {
-  if (!is_valid_ || spec_.empty())
-     return false;
-
-  url::RawCanonOutputT<char, 128> ignored_output;
-  url::CanonHostInfo host_info;
-  url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
-                             &host_info);
-  return host_info.IsIPAddress();
+  return is_valid_ && url::HostIsIPAddress(host_piece());
 }
 
 #ifdef WIN32

diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index aae5048..5998b07 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc

@@ -294,6 +294,7 @@
     {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
     {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
     {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
+    {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
     {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},

diff --git a/src/url/origin.cc b/src/url/origin.cc
index fac78cf..1b0a9a6 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc

@@ -84,11 +84,21 @@
                uint16_t port,
                base::StringPiece suborigin,
                SchemeHostPort::ConstructPolicy policy)
-    : tuple_(scheme, host, port, policy) {
+    : tuple_(scheme.as_string(), host.as_string(), port, policy) {
   unique_ = tuple_.IsInvalid();
   suborigin_ = suborigin.as_string();
 }
 
+Origin::Origin(std::string scheme,
+               std::string host,
+               uint16_t port,
+               std::string suborigin,
+               SchemeHostPort::ConstructPolicy policy)
+    : tuple_(std::move(scheme), std::move(host), port, policy) {
+  unique_ = tuple_.IsInvalid();
+  suborigin_ = std::move(suborigin);
+}
+
 Origin::~Origin() {
 }
 
@@ -100,18 +110,12 @@
   return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
 }
 
-Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
-                                         base::StringPiece host,
-                                         uint16_t port) {
-  return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
-}
-
 Origin Origin::CreateFromNormalizedTupleWithSuborigin(
-    base::StringPiece scheme,
-    base::StringPiece host,
+    std::string scheme,
+    std::string host,
     uint16_t port,
-    base::StringPiece suborigin) {
-  return Origin(scheme, host, port, suborigin,
+    std::string suborigin) {
+  return Origin(std::move(scheme), std::move(host), port, std::move(suborigin),
                 SchemeHostPort::ALREADY_CANONICALIZED);
 }
 

diff --git a/src/url/origin.h b/src/url/origin.h
index 1c28588..4b838e4 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h

@@ -104,18 +104,13 @@
 
   // Creates an origin without sanity checking that the host is canonicalized.
   // This should only be used when converting between already normalized types,
-  // and should NOT be used for IPC.
-  static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
-                                          base::StringPiece host,
-                                          uint16_t port);
-
-  // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
-  // as well.
+  // and should NOT be used for IPC. Method takes std::strings for use with move
+  // operators to avoid copies.
   static Origin CreateFromNormalizedTupleWithSuborigin(
-      base::StringPiece scheme,
-      base::StringPiece host,
+      std::string scheme,
+      std::string host,
       uint16_t port,
-      base::StringPiece suborigin);
+      std::string suborigin);
 
   ~Origin();
 
@@ -173,6 +168,11 @@
          uint16_t port,
          base::StringPiece suborigin,
          SchemeHostPort::ConstructPolicy policy);
+  Origin(std::string scheme,
+         std::string host,
+         uint16_t port,
+         std::string suborigin,
+         SchemeHostPort::ConstructPolicy policy);
 
   SchemeHostPort tuple_;
   bool unique_;

diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index fee161b..412d03e 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc

@@ -90,20 +90,11 @@
                     << test_case.port;
     }
     SCOPED_TRACE(scope_message);
-
-    url::Origin origin_without_suborigin =
-        url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
-                                               test_case.port);
-
     url::Origin origin_with_suborigin =
         url::Origin::CreateFromNormalizedTupleWithSuborigin(
             test_case.scheme, test_case.host, test_case.port,
             test_case.suborigin);
 
-    EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
-    EXPECT_EQ(test_case.host, origin_without_suborigin.host());
-    EXPECT_EQ(test_case.port, origin_without_suborigin.port());
-
     EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
     EXPECT_EQ(test_case.host, origin_with_suborigin.host());
     EXPECT_EQ(test_case.port, origin_with_suborigin.port());

diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index b5de079..8d89b57 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc

@@ -58,12 +58,6 @@
   if (!is_standard)
     return false;
 
-  // These schemes do not follow the generic URL syntax, so we treat them as
-  // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
-  // have a (scheme, host, port) tuple, they themselves do not).
-  if (scheme == kFileSystemScheme || scheme == kBlobScheme)
-    return false;
-
   switch (scheme_type) {
     case SCHEME_WITH_PORT:
       // A URL with |scheme| is required to have the host and port (may be
@@ -115,24 +109,24 @@
 SchemeHostPort::SchemeHostPort() : port_(0) {
 }
 
-SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
-                               base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string scheme,
+                               std::string host,
                                uint16_t port,
                                ConstructPolicy policy)
     : port_(0) {
   if (!IsValidInput(scheme, host, port, policy))
     return;
 
-  scheme.CopyToString(&scheme_);
-  host.CopyToString(&host_);
+  scheme_ = std::move(scheme);
+  host_ = std::move(host);
   port_ = port;
 }
 
 SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
                                base::StringPiece host,
                                uint16_t port)
-    : SchemeHostPort(scheme,
-                     host,
+    : SchemeHostPort(scheme.as_string(),
+                     host.as_string(),
                      port,
                      ConstructPolicy::CHECK_CANONICALIZATION) {}
 
@@ -201,6 +195,9 @@
   if (IsInvalid())
     return result;
 
+  // Reserve enough space for the "normal" case of scheme://host/.
+  result.reserve(scheme_.size() + host_.size() + 4);
+
   if (!scheme_.empty()) {
     parsed->scheme = Component(0, scheme_.length());
     result.append(scheme_);

diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index 065e4aa..b2e030d 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h

@@ -96,8 +96,8 @@
   // that the host and port are canonicalized. This should only be used when
   // converting between already normalized types, and should NOT be used for
   // IPC.
-  SchemeHostPort(base::StringPiece scheme,
-                 base::StringPiece host,
+  SchemeHostPort(std::string scheme,
+                 std::string host,
                  uint16_t port,
                  ConstructPolicy policy);
 

diff --git a/src/url/scheme_host_port_unittest.cc b/src/url/scheme_host_port_unittest.cc
index 9c59b9f..4a213ca 100644
--- a/src/url/scheme_host_port_unittest.cc
+++ b/src/url/scheme_host_port_unittest.cc

@@ -42,11 +42,19 @@
   EXPECT_TRUE(invalid.IsInvalid());
   EXPECT_TRUE(invalid.Equals(invalid));
 
-  const char* urls[] = {"data:text/html,Hello!",
-                        "javascript:alert(1)",
-                        "file://example.com:443/etc/passwd",
-                        "blob:https://example.com/uuid-goes-here",
-                        "filesystem:https://example.com/temporary/yay.png"};
+  const char* urls[] = {
+      "data:text/html,Hello!", "javascript:alert(1)",
+      "file://example.com:443/etc/passwd",
+
+      // These schemes do not follow the generic URL syntax, so make sure we
+      // treat them as invalid (scheme, host, port) tuples (even though such
+      // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves
+      // do not). This is only *implicitly* checked in the code, by means of
+      // blob schemes not being standard, and filesystem schemes having type
+      // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit
+      // checks no longer hold, this policy should be made explicit.
+      "blob:https://example.com/uuid-goes-here",
+      "filesystem:https://example.com/temporary/yay.png"};
 
   for (auto* test : urls) {
     SCOPED_TRACE(test);

diff --git a/src/url/third_party/mozilla/url_parse.cc b/src/url/third_party/mozilla/url_parse.cc
index 211043c..9977cae 100644
--- a/src/url/third_party/mozilla/url_parse.cc
+++ b/src/url/third_party/mozilla/url_parse.cc

@@ -175,6 +175,31 @@
   }
 }
 
+template <typename CHAR>
+inline void FindQueryAndRefParts(const CHAR* spec,
+                          const Component& path,
+                          int* query_separator,
+                          int* ref_separator) {
+  int path_end = path.begin + path.len;
+  for (int i = path.begin; i < path_end; i++) {
+    switch (spec[i]) {
+      case '?':
+        // Only match the query string if it precedes the reference fragment
+        // and when we haven't found one already.
+        if (*query_separator < 0)
+          *query_separator = i;
+        break;
+      case '#':
+        // Record the first # sign only.
+        if (*ref_separator < 0) {
+          *ref_separator = i;
+          return;
+        }
+        break;
+    }
+  }
+}
+
 template<typename CHAR>
 void ParsePath(const CHAR* spec,
                const Component& path,
@@ -193,25 +218,9 @@
   DCHECK(path.len > 0) << "We should never have 0 length paths";
 
   // Search for first occurrence of either ? or #.
-  int path_end = path.begin + path.len;
-
   int query_separator = -1;  // Index of the '?'
   int ref_separator = -1;    // Index of the '#'
-  for (int i = path.begin; i < path_end; i++) {
-    switch (spec[i]) {
-      case '?':
-        // Only match the query string if it precedes the reference fragment
-        // and when we haven't found one already.
-        if (ref_separator < 0 && query_separator < 0)
-          query_separator = i;
-        break;
-      case '#':
-        // Record the first # sign only.
-        if (ref_separator < 0)
-          ref_separator = i;
-        break;
-    }
-  }
+  FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
 
   // Markers pointing to the character after each of these corresponding
   // components. The code below words from the end back to the beginning,
@@ -219,6 +228,7 @@
   int file_end, query_end;
 
   // Ref fragment: from the # to the end of the path.
+  int path_end = path.begin + path.len;
   if (ref_separator >= 0) {
     file_end = query_end = ref_separator;
     *ref = MakeRange(ref_separator + 1, path_end);
@@ -682,8 +692,7 @@
 
 }  // namespace
 
-Parsed::Parsed() : inner_parsed_(NULL) {
-}
+Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
 
 Parsed::Parsed(const Parsed& other) :
     scheme(other.scheme),

diff --git a/src/url/third_party/mozilla/url_parse.h b/src/url/third_party/mozilla/url_parse.h
index 222d605..968578b 100644
--- a/src/url/third_party/mozilla/url_parse.h
+++ b/src/url/third_party/mozilla/url_parse.h

@@ -177,6 +177,9 @@
   // the string with the scheme stripped off.
   Component GetContent() const;
 
+  // True if whitespace was removed from the URL during parsing.
+  bool whitespace_removed;
+
   // This is used for nested URL types, currently only filesystem.  If you
   // parse a filesystem URL, the resulting Parsed will have a nested
   // inner_parsed_ to hold the parsed inner URL's component information.

diff --git a/src/url/url_canon.h b/src/url/url_canon.h
index c4852e4..ff66c6e 100644
--- a/src/url/url_canon.h
+++ b/src/url/url_canon.h

@@ -117,6 +117,11 @@
     cur_len_ += str_len;
   }
 
+  void ReserveSizeIfNeeded(int estimated_size) {
+    if (estimated_size > buffer_len_)
+      Resize(estimated_size);
+  }
+
  protected:
   // Grows the given buffer so that it can fit at least |min_additional|
   // characters. Returns true if the buffer could be resized, false on OOM.

diff --git a/src/url/url_canon_relative.cc b/src/url/url_canon_relative.cc
index e34ea2f..8259056 100644
--- a/src/url/url_canon_relative.cc
+++ b/src/url/url_canon_relative.cc

@@ -4,6 +4,8 @@
 
 // Canonicalizer functions for working with and resolving relative URLs.
 
+#include <algorithm>
+
 #include "base/logging.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
@@ -264,7 +266,7 @@
 #endif  // WIN32
 
 // A subroutine of DoResolveRelativeURL, this resolves the URL knowning that
-// the input is a relative path or less (qyuery or ref).
+// the input is a relative path or less (query or ref).
 template<typename CHAR>
 bool DoResolveRelativePath(const char* base_url,
                            const Parsed& base_parsed,
@@ -280,7 +282,13 @@
   // also know we have a path so can copy up to there.
   Component path, query, ref;
   ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
-  // Canonical URLs always have a path, so we can use that offset.
+
+  // Canonical URLs always have a path, so we can use that offset. Reserve
+  // enough room for the base URL, the new path, and some extra bytes for
+  // possible escaped characters.
+  output->ReserveSizeIfNeeded(
+      base_parsed.path.begin +
+      std::max(path.end(), std::max(query.end(), ref.end())) + 8);
   output->Append(base_url, base_parsed.path.begin);
 
   if (path.len > 0) {
@@ -394,6 +402,11 @@
   replacements.SetQuery(relative_url, relative_parsed.query);
   replacements.SetRef(relative_url, relative_parsed.ref);
 
+  // Length() does not include the old scheme, so make sure to add it from the
+  // base URL.
+  output->ReserveSizeIfNeeded(
+      replacements.components().Length() +
+      base_parsed.CountCharactersBefore(Parsed::USERNAME, false) + 8);
   return ReplaceStandardURL(base_url, base_parsed, replacements,
                             query_converter, output, out_parsed);
 }

diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index ebe386f..97ebeff 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc

@@ -6,7 +6,6 @@
 
 #include <stddef.h>
 #include <string.h>
-#include <vector>
 
 #ifdef GOOGLEURL_IN_GOOGLE3
 #include "base/heap-checker.h"
@@ -14,6 +13,7 @@
 #include "base/logging.h"
 #include "base/strings/string_util.h"
 #include "url/url_canon_internal.h"
+#include "url/url_constants.h"
 #include "url/url_file.h"
 #include "url/url_util_internal.h"
 
@@ -21,8 +21,14 @@
 
 namespace {
 
-const int kNumStandardURLSchemes = 10;
-const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
+// Pass this enum through for methods which would like to know if whitespace
+// removal is necessary.
+enum WhitespaceRemovalPolicy {
+  REMOVE_WHITESPACE,
+  DO_NOT_REMOVE_WHITESPACE,
+};
+
+const SchemeWithType kStandardURLSchemes[] = {
     {kHttpScheme, SCHEME_WITH_PORT},
     {kHttpsScheme, SCHEME_WITH_PORT},
     // Yes, file URLs can have a hostname, so file URLs should be handled as
@@ -38,21 +44,50 @@
     {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
-const int kNumReferrerURLSchemes = 4;
-const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
+const SchemeWithType kReferrerURLSchemes[] = {
     {kHttpScheme, SCHEME_WITH_PORT},
     {kHttpsScheme, SCHEME_WITH_PORT},
     {kHttpSuboriginScheme, SCHEME_WITH_PORT},
     {kHttpsSuboriginScheme, SCHEME_WITH_PORT},
 };
 
+const char* kSecureSchemes[] = {
+  kHttpsScheme,
+  kAboutScheme,
+  kDataScheme,
+  kWssScheme,
+};
+
+const char* kLocalSchemes[] = {
+  kFileScheme,
+};
+
+const char* kNoAccessSchemes[] = {
+  kAboutScheme,
+  kJavaScriptScheme,
+  kDataScheme,
+};
+
+const char* kCORSEnabledSchemes[] = {
+  kHttpScheme,
+  kHttpsScheme,
+  kDataScheme,
+};
+
+bool initialized = false;
+
 // Lists of the currently installed standard and referrer schemes. These lists
-// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are
-// leaked on shutdown to prevent any destructors from being called that will
-// slow us down or cause problems.
+// are lazily initialized by Initialize and are leaked on shutdown to prevent
+// any destructors from being called that will slow us down or cause problems.
 std::vector<SchemeWithType>* standard_schemes = nullptr;
 std::vector<SchemeWithType>* referrer_schemes = nullptr;
 
+// Similar to above, initialized by the Init*Schemes methods.
+std::vector<std::string>* secure_schemes = nullptr;
+std::vector<std::string>* local_schemes = nullptr;
+std::vector<std::string>* no_access_schemes = nullptr;
+std::vector<std::string>* cors_enabled_schemes = nullptr;
+
 // See the LockSchemeRegistries declaration in the header.
 bool scheme_registries_locked = false;
 
@@ -67,27 +102,22 @@
   typedef base::StringPiece16 Piece;
 };
 
-void InitSchemes(std::vector<SchemeWithType>** schemes,
-                 const SchemeWithType* initial_schemes,
+void InitSchemes(std::vector<std::string>** schemes,
+                 const char** initial_schemes,
                  size_t size) {
-  if (*schemes)
-    return;
-  *schemes = new std::vector<SchemeWithType>(size);
+  *schemes = new std::vector<std::string>(size);
   for (size_t i = 0; i < size; i++) {
-    (*schemes)->push_back(initial_schemes[i]);
+    (*(*schemes))[i] = initial_schemes[i];
   }
 }
 
-// Ensures that the standard_schemes list is initialized, does nothing if
-// it already has values.
-void InitStandardSchemes() {
-  InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes);
-}
-
-// Ensures that the referrer_schemes list is initialized, does nothing if
-// it already has values.
-void InitReferrerSchemes() {
-  InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes);
+void InitSchemesWithType(std::vector<SchemeWithType>** schemes,
+                         const SchemeWithType* initial_schemes,
+                         size_t size) {
+  *schemes = new std::vector<SchemeWithType>(size);
+  for (size_t i = 0; i < size; i++) {
+    (*(*schemes))[i] = initial_schemes[i];
+  }
 }
 
 // Given a string and a range inside the string, compares it to the given
@@ -127,7 +157,7 @@
 
 template<typename CHAR>
 bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
-  InitStandardSchemes();
+  Initialize();
   return DoIsInSchemes(spec, scheme, type, *standard_schemes);
 }
 
@@ -156,19 +186,28 @@
   return DoCompareSchemeComponent(spec, our_scheme, compare);
 }
 
-template<typename CHAR>
-bool DoCanonicalize(const CHAR* in_spec,
-                    int in_spec_len,
+template <typename CHAR>
+bool DoCanonicalize(const CHAR* spec,
+                    int spec_len,
                     bool trim_path_end,
+                    WhitespaceRemovalPolicy whitespace_policy,
                     CharsetConverter* charset_converter,
                     CanonOutput* output,
                     Parsed* output_parsed) {
-  // Remove any whitespace from the middle of the relative URL, possibly
-  // copying to the new buffer.
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  output->ReserveSizeIfNeeded(spec_len + 8);
+
+  // Remove any whitespace from the middle of the relative URL if necessary.
+  // Possibly this will result in copying to the new buffer.
   RawCanonOutputT<CHAR> whitespace_buffer;
-  int spec_len;
-  const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
-                                         &whitespace_buffer, &spec_len);
+  if (whitespace_policy == REMOVE_WHITESPACE) {
+    int original_len = spec_len;
+    spec =
+        RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len);
+    if (spec_len != original_len)
+      output_parsed->whitespace_removed = true;
+  }
 
   Parsed parsed_input;
 #ifdef WIN32
@@ -248,6 +287,9 @@
   const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
                                              &whitespace_buffer,
                                              &relative_length);
+  if (in_relative_length != relative_length)
+    output_parsed->whitespace_removed = true;
+
   bool base_is_authority_based = false;
   bool base_is_hierarchical = false;
   if (base_spec &&
@@ -273,6 +315,9 @@
     return false;
   }
 
+  // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and
+  // ReserveRelativeURL, to enable more accurate buffer sizes.
+
   // Pretend for a moment that |base_spec| is a standard URL. Normally
   // non-standard URLs are treated as PathURLs, but if the base has an
   // authority we would like to preserve it.
@@ -289,7 +334,8 @@
       // based on base_parsed_authority instead of base_parsed) and needs to be
       // re-created.
       DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
-                     charset_converter, output, output_parsed);
+                     REMOVE_WHITESPACE, charset_converter, output,
+                     output_parsed);
       return did_resolve_succeed;
     }
   } else if (is_relative) {
@@ -302,8 +348,9 @@
   }
 
   // Not relative, canonicalize the input.
-  return DoCanonicalize(relative, relative_length, true, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(relative, relative_length, true,
+                        DO_NOT_REMOVE_WHITESPACE, charset_converter, output,
+                        output_parsed);
 }
 
 template<typename CHAR>
@@ -350,8 +397,8 @@
     RawCanonOutput<128> recanonicalized;
     Parsed recanonicalized_parsed;
     DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
-                   charset_converter,
-                   &recanonicalized, &recanonicalized_parsed);
+                   REMOVE_WHITESPACE, charset_converter, &recanonicalized,
+                   &recanonicalized_parsed);
 
     // Recurse using the version with the scheme already replaced. This will now
     // use the replacement rules for the new scheme.
@@ -373,6 +420,12 @@
                                charset_converter, output, out_parsed);
   }
 
+  // TODO(csharrison): We could be smarter about size to reserve if this is done
+  // in callers below, and the code checks to see which components are being
+  // replaced, and with what length. If this ends up being a hot spot it should
+  // be changed.
+  output->ReserveSizeIfNeeded(spec_len + 8);
+
   // If we get here, then we know the scheme doesn't need to be replaced, so can
   // just key off the scheme in the spec to know how to do the replacements.
   if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
@@ -396,9 +449,7 @@
   return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
 }
 
-void DoAddScheme(const char* new_scheme,
-                 SchemeType type,
-                 std::vector<SchemeWithType>* schemes) {
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
   DCHECK(schemes);
   // If this assert triggers, it means you've called Add*Scheme after
   // LockSchemeRegistries has been called (see the header file for
@@ -414,6 +465,29 @@
   if (scheme_len == 0)
     return;
 
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+  schemes->push_back(std::string(new_scheme));
+}
+
+void DoAddSchemeWithType(const char* new_scheme,
+                         SchemeType type,
+                         std::vector<SchemeWithType>* schemes) {
+  DCHECK(schemes);
+  // If this assert triggers, it means you've called Add*Scheme after
+  // LockSchemeRegistries has been called (see the header file for
+  // LockSchemeRegistries for more).
+  //
+  // This normally means you're trying to set up a new scheme too late in your
+  // application's init process. Locate where your app does this initialization
+  // and calls LockSchemeRegistries, and add your new scheme there.
+  DCHECK(!scheme_registries_locked)
+      << "Trying to add a scheme after the lists have been locked.";
+
+  size_t scheme_len = strlen(new_scheme);
+  if (scheme_len == 0)
+    return;
+
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
   // Duplicate the scheme into a new buffer and add it to the list of standard
   // schemes. This pointer will be leaked on shutdown.
   char* dup_scheme = new char[scheme_len + 1];
@@ -431,29 +505,85 @@
 }  // namespace
 
 void Initialize() {
-  InitStandardSchemes();
-  InitReferrerSchemes();
+  if (initialized)
+    return;
+  InitSchemesWithType(&standard_schemes, kStandardURLSchemes,
+                      arraysize(kStandardURLSchemes));
+  InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes,
+                      arraysize(kReferrerURLSchemes));
+  InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes));
+  InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes));
+  InitSchemes(&no_access_schemes, kNoAccessSchemes,
+              arraysize(kNoAccessSchemes));
+  InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes,
+              arraysize(kCORSEnabledSchemes));
+  initialized = true;
 }
 
 void Shutdown() {
-  if (standard_schemes) {
-    delete standard_schemes;
-    standard_schemes = NULL;
-  }
-  if (referrer_schemes) {
-    delete referrer_schemes;
-    referrer_schemes = NULL;
-  }
+  initialized = false;
+  delete standard_schemes;
+  standard_schemes = nullptr;
+  delete referrer_schemes;
+  referrer_schemes = nullptr;
+  delete secure_schemes;
+  secure_schemes = nullptr;
+  delete local_schemes;
+  local_schemes = nullptr;
+  delete no_access_schemes;
+  no_access_schemes = nullptr;
+  delete cors_enabled_schemes;
+  cors_enabled_schemes = nullptr;
 }
 
 void AddStandardScheme(const char* new_scheme, SchemeType type) {
-  InitStandardSchemes();
-  DoAddScheme(new_scheme, type, standard_schemes);
+  Initialize();
+  DoAddSchemeWithType(new_scheme, type, standard_schemes);
 }
 
 void AddReferrerScheme(const char* new_scheme, SchemeType type) {
-  InitReferrerSchemes();
-  DoAddScheme(new_scheme, type, referrer_schemes);
+  Initialize();
+  DoAddSchemeWithType(new_scheme, type, referrer_schemes);
+}
+
+void AddSecureScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, secure_schemes);
+}
+
+const std::vector<std::string>& GetSecureSchemes() {
+  Initialize();
+  return *secure_schemes;
+}
+
+void AddLocalScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, local_schemes);
+}
+
+const std::vector<std::string>& GetLocalSchemes() {
+  Initialize();
+  return *local_schemes;
+}
+
+void AddNoAccessScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, no_access_schemes);
+}
+
+const std::vector<std::string>& GetNoAccessSchemes() {
+  Initialize();
+  return *no_access_schemes;
+}
+
+void AddCORSEnabledScheme(const char* new_scheme) {
+  Initialize();
+  DoAddScheme(new_scheme, cors_enabled_schemes);
+}
+
+const std::vector<std::string>& GetCORSEnabledSchemes() {
+  Initialize();
+  return *cors_enabled_schemes;
 }
 
 void LockSchemeRegistries() {
@@ -477,7 +607,7 @@
 }
 
 bool IsReferrerScheme(const char* spec, const Component& scheme) {
-  InitReferrerSchemes();
+  Initialize();
   SchemeType unused_scheme_type;
   return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
 }
@@ -533,14 +663,22 @@
   return true;
 }
 
+bool HostIsIPAddress(base::StringPiece host) {
+  url::RawCanonOutputT<char, 128> ignored_output;
+  url::CanonHostInfo host_info;
+  url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
+                             &ignored_output, &host_info);
+  return host_info.IsIPAddress();
+}
+
 bool Canonicalize(const char* spec,
                   int spec_len,
                   bool trim_path_end,
                   CharsetConverter* charset_converter,
                   CanonOutput* output,
                   Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
 }
 
 bool Canonicalize(const base::char16* spec,
@@ -549,8 +687,8 @@
                   CharsetConverter* charset_converter,
                   CanonOutput* output,
                   Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
-                        output, output_parsed);
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
 }
 
 bool ResolveRelative(const char* base_spec,

diff --git a/src/url/url_util.h b/src/url/url_util.h
index 724ce95..a4b74b1 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h

@@ -6,6 +6,7 @@
 #define URL_URL_UTIL_H_
 
 #include <string>
+#include <vector>
 
 #include "base/strings/string16.h"
 #include "base/strings/string_piece.h"
@@ -57,25 +58,44 @@
   SchemeType type;
 };
 
+// The following Add*Scheme method are not threadsafe and can not be called
+// concurrently with any other url_util function. They will assert if the lists
+// of schemes have been locked (see LockSchemeRegistries).
+
 // Adds an application-defined scheme to the internal list of "standard-format"
 // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
+
 URL_EXPORT void AddStandardScheme(const char* new_scheme,
                                   SchemeType scheme_type);
 
 // Adds an application-defined scheme to the internal list of schemes allowed
 // for referrers.
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
 URL_EXPORT void AddReferrerScheme(const char* new_scheme,
                                   SchemeType scheme_type);
 
+// Adds an application-defined scheme to the list of schemes that do not trigger
+// mixed content warnings.
+URL_EXPORT void AddSecureScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
+
+// Adds an application-defined scheme to the list of schemes that normal pages
+// cannot link to or access (i.e., with the same security rules as those applied
+// to "file" URLs).
+URL_EXPORT void AddLocalScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
+
+// Adds an application-defined scheme to the list of schemes that cause pages
+// loaded with them to not have access to pages loaded with any other URL
+// scheme.
+URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can be sent
+// CORS requests.
+URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
+
 // Sets a flag to prevent future calls to Add*Scheme from succeeding.
 //
 // This is designed to help prevent errors for multithreaded applications.
@@ -133,7 +153,7 @@
                                       const Component& scheme,
                                       SchemeType* type);
 
-// Domains ---------------------------------------------------------------------
+// Hosts  ----------------------------------------------------------------------
 
 // Returns true if the |canonicalized_host| matches or is in the same domain as
 // the given |lower_ascii_domain| string. For example, if the canonicalized
@@ -146,6 +166,10 @@
 URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
                          base::StringPiece lower_ascii_domain);
 
+// Returns true if the hostname is an IP address. Note: this function isn't very
+// cheap, as it must re-parse the host to verify.
+URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
+
 // URL library wrappers --------------------------------------------------------
 
 // Parses the given spec according to the extracted scheme type. Normal users
commit	2c4a471e13ed1114224bc2b8c6c901230aac0eaf	[log] [tgz]
author	Devany Sandoval <sandovad@google.com>	Mon Apr 10 10:41:58 2017 -0700
committer	sandovad <sandovad@google.com>	Tue Sep 03 12:54:52 2019 -0700
tree	4b68400bf7f8be9083903a78afec2363173e1c64
parent	44ac360d844b6ab5e40070aa367443ff0a9d5ac4 [diff]