Internal change
PiperOrigin-RevId: 152704883
Change-Id: I4d434c21f0a3116bb5923014d37402e9426db26e
diff --git a/import.sh b/import.sh
index 8671302..1d95c38 100644
--- a/import.sh
+++ b/import.sh
@@ -4,7 +4,7 @@
top=/tmp/chromium
mkdir $top
prefix=https://chromium.googlesource.com/chromium/src.git/+archive
-for version in 55.0.2883.87 56.0.2924.87
+for version in 56.0.2924.87 57.0.2987.133
do
mkdir $top/$version
cd $top/$version
diff --git a/src/base/strings/utf_string_conversions.cc b/src/base/strings/utf_string_conversions.cc
index 944078f..ffbfa53 100644
--- a/src/base/strings/utf_string_conversions.cc
+++ b/src/base/strings/utf_string_conversions.cc
@@ -82,10 +82,6 @@
}
std::string UTF16ToUTF8(StringPiece16 utf16) {
- if (IsStringASCII(utf16)) {
- return std::string(utf16.begin(), utf16.end());
- }
-
std::string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
diff --git a/src/build/build_config.h b/src/build/build_config.h
index 5f99fc1..b84a9d3 100644
--- a/src/build/build_config.h
+++ b/src/build/build_config.h
@@ -6,6 +6,7 @@
// Operating System:
// OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) /
// OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI
+// OS_CHROMEOS is set by the build system
// Compiler:
// COMPILER_MSVC / COMPILER_GCC
// Processor:
diff --git a/src/url/gurl.cc b/src/url/gurl.cc
index 1d6dfc6..606eedb 100644
--- a/src/url/gurl.cc
+++ b/src/url/gurl.cc
@@ -108,9 +108,6 @@
template<typename STR>
void GURL::InitCanonical(url::base::BasicStringPiece<STR> input_spec,
bool trim_path_end) {
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- spec_.reserve(input_spec.size() + 32);
url::StdStringCanonOutput output(&spec_);
is_valid_ = url::Canonicalize(
input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -121,6 +118,8 @@
inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
*parsed_.inner_parsed(), true));
}
+ // Valid URLs always have non-empty specs.
+ DCHECK(!is_valid_ || !spec_.empty());
}
void GURL::InitializeFromCanonicalSpec() {
@@ -135,6 +134,7 @@
// what we would have produced. Skip checking for invalid URLs have no meaning
// and we can't always canonicalize then reproducibly.
if (is_valid_) {
+ DCHECK(!spec_.empty());
url::Component scheme;
// We can't do this check on the inner_url of a filesystem URL, as
// canonical_spec actually points to the start of the outer URL, so we'd
@@ -195,12 +195,7 @@
return GURL();
GURL result;
-
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
parsed_, relative.data(),
static_cast<int>(relative.length()),
@@ -226,12 +221,7 @@
return GURL();
GURL result;
-
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
parsed_, relative.data(),
static_cast<int>(relative.length()),
@@ -259,11 +249,7 @@
if (!is_valid_)
return GURL();
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
result.is_valid_ = url::ReplaceComponents(
spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
NULL, &output, &result.parsed_);
@@ -286,11 +272,7 @@
if (!is_valid_)
return GURL();
- // Reserve enough room in the output for the input, plus some extra so that
- // we have room if we have to escape a few things without reallocating.
- result.spec_.reserve(spec_.size() + 32);
url::StdStringCanonOutput output(&result.spec_);
-
result.is_valid_ = url::ReplaceComponents(
spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
NULL, &output, &result.parsed_);
@@ -440,14 +422,7 @@
}
bool GURL::HostIsIPAddress() const {
- if (!is_valid_ || spec_.empty())
- return false;
-
- url::RawCanonOutputT<char, 128> ignored_output;
- url::CanonHostInfo host_info;
- url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
- &host_info);
- return host_info.IsIPAddress();
+ return is_valid_ && url::HostIsIPAddress(host_piece());
}
#ifdef WIN32
diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
index aae5048..5998b07 100644
--- a/src/url/gurl_unittest.cc
+++ b/src/url/gurl_unittest.cc
@@ -294,6 +294,7 @@
{"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
{"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
{"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
+ {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
{"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
{"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
{"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
diff --git a/src/url/origin.cc b/src/url/origin.cc
index fac78cf..1b0a9a6 100644
--- a/src/url/origin.cc
+++ b/src/url/origin.cc
@@ -84,11 +84,21 @@
uint16_t port,
base::StringPiece suborigin,
SchemeHostPort::ConstructPolicy policy)
- : tuple_(scheme, host, port, policy) {
+ : tuple_(scheme.as_string(), host.as_string(), port, policy) {
unique_ = tuple_.IsInvalid();
suborigin_ = suborigin.as_string();
}
+Origin::Origin(std::string scheme,
+ std::string host,
+ uint16_t port,
+ std::string suborigin,
+ SchemeHostPort::ConstructPolicy policy)
+ : tuple_(std::move(scheme), std::move(host), port, policy) {
+ unique_ = tuple_.IsInvalid();
+ suborigin_ = std::move(suborigin);
+}
+
Origin::~Origin() {
}
@@ -100,18 +110,12 @@
return Origin(scheme, host, port, "", SchemeHostPort::CHECK_CANONICALIZATION);
}
-Origin Origin::CreateFromNormalizedTuple(base::StringPiece scheme,
- base::StringPiece host,
- uint16_t port) {
- return CreateFromNormalizedTupleWithSuborigin(scheme, host, port, "");
-}
-
Origin Origin::CreateFromNormalizedTupleWithSuborigin(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string scheme,
+ std::string host,
uint16_t port,
- base::StringPiece suborigin) {
- return Origin(scheme, host, port, suborigin,
+ std::string suborigin) {
+ return Origin(std::move(scheme), std::move(host), port, std::move(suborigin),
SchemeHostPort::ALREADY_CANONICALIZED);
}
diff --git a/src/url/origin.h b/src/url/origin.h
index 1c28588..4b838e4 100644
--- a/src/url/origin.h
+++ b/src/url/origin.h
@@ -104,18 +104,13 @@
// Creates an origin without sanity checking that the host is canonicalized.
// This should only be used when converting between already normalized types,
- // and should NOT be used for IPC.
- static Origin CreateFromNormalizedTuple(base::StringPiece scheme,
- base::StringPiece host,
- uint16_t port);
-
- // Same as CreateFromNormalizedTuple() above, but adds a suborigin component
- // as well.
+ // and should NOT be used for IPC. Method takes std::strings for use with move
+ // operators to avoid copies.
static Origin CreateFromNormalizedTupleWithSuborigin(
- base::StringPiece scheme,
- base::StringPiece host,
+ std::string scheme,
+ std::string host,
uint16_t port,
- base::StringPiece suborigin);
+ std::string suborigin);
~Origin();
@@ -173,6 +168,11 @@
uint16_t port,
base::StringPiece suborigin,
SchemeHostPort::ConstructPolicy policy);
+ Origin(std::string scheme,
+ std::string host,
+ uint16_t port,
+ std::string suborigin,
+ SchemeHostPort::ConstructPolicy policy);
SchemeHostPort tuple_;
bool unique_;
diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
index fee161b..412d03e 100644
--- a/src/url/origin_unittest.cc
+++ b/src/url/origin_unittest.cc
@@ -90,20 +90,11 @@
<< test_case.port;
}
SCOPED_TRACE(scope_message);
-
- url::Origin origin_without_suborigin =
- url::Origin::CreateFromNormalizedTuple(test_case.scheme, test_case.host,
- test_case.port);
-
url::Origin origin_with_suborigin =
url::Origin::CreateFromNormalizedTupleWithSuborigin(
test_case.scheme, test_case.host, test_case.port,
test_case.suborigin);
- EXPECT_EQ(test_case.scheme, origin_without_suborigin.scheme());
- EXPECT_EQ(test_case.host, origin_without_suborigin.host());
- EXPECT_EQ(test_case.port, origin_without_suborigin.port());
-
EXPECT_EQ(test_case.scheme, origin_with_suborigin.scheme());
EXPECT_EQ(test_case.host, origin_with_suborigin.host());
EXPECT_EQ(test_case.port, origin_with_suborigin.port());
diff --git a/src/url/scheme_host_port.cc b/src/url/scheme_host_port.cc
index b5de079..8d89b57 100644
--- a/src/url/scheme_host_port.cc
+++ b/src/url/scheme_host_port.cc
@@ -58,12 +58,6 @@
if (!is_standard)
return false;
- // These schemes do not follow the generic URL syntax, so we treat them as
- // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
- // have a (scheme, host, port) tuple, they themselves do not).
- if (scheme == kFileSystemScheme || scheme == kBlobScheme)
- return false;
-
switch (scheme_type) {
case SCHEME_WITH_PORT:
// A URL with |scheme| is required to have the host and port (may be
@@ -115,24 +109,24 @@
SchemeHostPort::SchemeHostPort() : port_(0) {
}
-SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string scheme,
+ std::string host,
uint16_t port,
ConstructPolicy policy)
: port_(0) {
if (!IsValidInput(scheme, host, port, policy))
return;
- scheme.CopyToString(&scheme_);
- host.CopyToString(&host_);
+ scheme_ = std::move(scheme);
+ host_ = std::move(host);
port_ = port;
}
SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
base::StringPiece host,
uint16_t port)
- : SchemeHostPort(scheme,
- host,
+ : SchemeHostPort(scheme.as_string(),
+ host.as_string(),
port,
ConstructPolicy::CHECK_CANONICALIZATION) {}
@@ -201,6 +195,9 @@
if (IsInvalid())
return result;
+ // Reserve enough space for the "normal" case of scheme://host/.
+ result.reserve(scheme_.size() + host_.size() + 4);
+
if (!scheme_.empty()) {
parsed->scheme = Component(0, scheme_.length());
result.append(scheme_);
diff --git a/src/url/scheme_host_port.h b/src/url/scheme_host_port.h
index 065e4aa..b2e030d 100644
--- a/src/url/scheme_host_port.h
+++ b/src/url/scheme_host_port.h
@@ -96,8 +96,8 @@
// that the host and port are canonicalized. This should only be used when
// converting between already normalized types, and should NOT be used for
// IPC.
- SchemeHostPort(base::StringPiece scheme,
- base::StringPiece host,
+ SchemeHostPort(std::string scheme,
+ std::string host,
uint16_t port,
ConstructPolicy policy);
diff --git a/src/url/scheme_host_port_unittest.cc b/src/url/scheme_host_port_unittest.cc
index 9c59b9f..4a213ca 100644
--- a/src/url/scheme_host_port_unittest.cc
+++ b/src/url/scheme_host_port_unittest.cc
@@ -42,11 +42,19 @@
EXPECT_TRUE(invalid.IsInvalid());
EXPECT_TRUE(invalid.Equals(invalid));
- const char* urls[] = {"data:text/html,Hello!",
- "javascript:alert(1)",
- "file://example.com:443/etc/passwd",
- "blob:https://example.com/uuid-goes-here",
- "filesystem:https://example.com/temporary/yay.png"};
+ const char* urls[] = {
+ "data:text/html,Hello!", "javascript:alert(1)",
+ "file://example.com:443/etc/passwd",
+
+ // These schemes do not follow the generic URL syntax, so make sure we
+ // treat them as invalid (scheme, host, port) tuples (even though such
+ // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves
+ // do not). This is only *implicitly* checked in the code, by means of
+ // blob schemes not being standard, and filesystem schemes having type
+ // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit
+ // checks no longer hold, this policy should be made explicit.
+ "blob:https://example.com/uuid-goes-here",
+ "filesystem:https://example.com/temporary/yay.png"};
for (auto* test : urls) {
SCOPED_TRACE(test);
diff --git a/src/url/third_party/mozilla/url_parse.cc b/src/url/third_party/mozilla/url_parse.cc
index 211043c..9977cae 100644
--- a/src/url/third_party/mozilla/url_parse.cc
+++ b/src/url/third_party/mozilla/url_parse.cc
@@ -175,6 +175,31 @@
}
}
+template <typename CHAR>
+inline void FindQueryAndRefParts(const CHAR* spec,
+ const Component& path,
+ int* query_separator,
+ int* ref_separator) {
+ int path_end = path.begin + path.len;
+ for (int i = path.begin; i < path_end; i++) {
+ switch (spec[i]) {
+ case '?':
+ // Only match the query string if it precedes the reference fragment
+ // and when we haven't found one already.
+ if (*query_separator < 0)
+ *query_separator = i;
+ break;
+ case '#':
+ // Record the first # sign only.
+ if (*ref_separator < 0) {
+ *ref_separator = i;
+ return;
+ }
+ break;
+ }
+ }
+}
+
template<typename CHAR>
void ParsePath(const CHAR* spec,
const Component& path,
@@ -193,25 +218,9 @@
DCHECK(path.len > 0) << "We should never have 0 length paths";
// Search for first occurrence of either ? or #.
- int path_end = path.begin + path.len;
-
int query_separator = -1; // Index of the '?'
int ref_separator = -1; // Index of the '#'
- for (int i = path.begin; i < path_end; i++) {
- switch (spec[i]) {
- case '?':
- // Only match the query string if it precedes the reference fragment
- // and when we haven't found one already.
- if (ref_separator < 0 && query_separator < 0)
- query_separator = i;
- break;
- case '#':
- // Record the first # sign only.
- if (ref_separator < 0)
- ref_separator = i;
- break;
- }
- }
+ FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
// Markers pointing to the character after each of these corresponding
// components. The code below words from the end back to the beginning,
@@ -219,6 +228,7 @@
int file_end, query_end;
// Ref fragment: from the # to the end of the path.
+ int path_end = path.begin + path.len;
if (ref_separator >= 0) {
file_end = query_end = ref_separator;
*ref = MakeRange(ref_separator + 1, path_end);
@@ -682,8 +692,7 @@
} // namespace
-Parsed::Parsed() : inner_parsed_(NULL) {
-}
+Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
Parsed::Parsed(const Parsed& other) :
scheme(other.scheme),
diff --git a/src/url/third_party/mozilla/url_parse.h b/src/url/third_party/mozilla/url_parse.h
index 222d605..968578b 100644
--- a/src/url/third_party/mozilla/url_parse.h
+++ b/src/url/third_party/mozilla/url_parse.h
@@ -177,6 +177,9 @@
// the string with the scheme stripped off.
Component GetContent() const;
+ // True if whitespace was removed from the URL during parsing.
+ bool whitespace_removed;
+
// This is used for nested URL types, currently only filesystem. If you
// parse a filesystem URL, the resulting Parsed will have a nested
// inner_parsed_ to hold the parsed inner URL's component information.
diff --git a/src/url/url_canon.h b/src/url/url_canon.h
index c4852e4..ff66c6e 100644
--- a/src/url/url_canon.h
+++ b/src/url/url_canon.h
@@ -117,6 +117,11 @@
cur_len_ += str_len;
}
+ void ReserveSizeIfNeeded(int estimated_size) {
+ if (estimated_size > buffer_len_)
+ Resize(estimated_size);
+ }
+
protected:
// Grows the given buffer so that it can fit at least |min_additional|
// characters. Returns true if the buffer could be resized, false on OOM.
diff --git a/src/url/url_canon_relative.cc b/src/url/url_canon_relative.cc
index e34ea2f..8259056 100644
--- a/src/url/url_canon_relative.cc
+++ b/src/url/url_canon_relative.cc
@@ -4,6 +4,8 @@
// Canonicalizer functions for working with and resolving relative URLs.
+#include <algorithm>
+
#include "base/logging.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -264,7 +266,7 @@
#endif // WIN32
// A subroutine of DoResolveRelativeURL, this resolves the URL knowning that
-// the input is a relative path or less (qyuery or ref).
+// the input is a relative path or less (query or ref).
template<typename CHAR>
bool DoResolveRelativePath(const char* base_url,
const Parsed& base_parsed,
@@ -280,7 +282,13 @@
// also know we have a path so can copy up to there.
Component path, query, ref;
ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
- // Canonical URLs always have a path, so we can use that offset.
+
+ // Canonical URLs always have a path, so we can use that offset. Reserve
+ // enough room for the base URL, the new path, and some extra bytes for
+ // possible escaped characters.
+ output->ReserveSizeIfNeeded(
+ base_parsed.path.begin +
+ std::max(path.end(), std::max(query.end(), ref.end())) + 8);
output->Append(base_url, base_parsed.path.begin);
if (path.len > 0) {
@@ -394,6 +402,11 @@
replacements.SetQuery(relative_url, relative_parsed.query);
replacements.SetRef(relative_url, relative_parsed.ref);
+ // Length() does not include the old scheme, so make sure to add it from the
+ // base URL.
+ output->ReserveSizeIfNeeded(
+ replacements.components().Length() +
+ base_parsed.CountCharactersBefore(Parsed::USERNAME, false) + 8);
return ReplaceStandardURL(base_url, base_parsed, replacements,
query_converter, output, out_parsed);
}
diff --git a/src/url/url_util.cc b/src/url/url_util.cc
index ebe386f..97ebeff 100644
--- a/src/url/url_util.cc
+++ b/src/url/url_util.cc
@@ -6,7 +6,6 @@
#include <stddef.h>
#include <string.h>
-#include <vector>
#ifdef GOOGLEURL_IN_GOOGLE3
#include "base/heap-checker.h"
@@ -14,6 +13,7 @@
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
#include "url/url_file.h"
#include "url/url_util_internal.h"
@@ -21,8 +21,14 @@
namespace {
-const int kNumStandardURLSchemes = 10;
-const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
+// Pass this enum through for methods which would like to know if whitespace
+// removal is necessary.
+enum WhitespaceRemovalPolicy {
+ REMOVE_WHITESPACE,
+ DO_NOT_REMOVE_WHITESPACE,
+};
+
+const SchemeWithType kStandardURLSchemes[] = {
{kHttpScheme, SCHEME_WITH_PORT},
{kHttpsScheme, SCHEME_WITH_PORT},
// Yes, file URLs can have a hostname, so file URLs should be handled as
@@ -38,21 +44,50 @@
{kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
-const int kNumReferrerURLSchemes = 4;
-const SchemeWithType kReferrerURLSchemes[kNumReferrerURLSchemes] = {
+const SchemeWithType kReferrerURLSchemes[] = {
{kHttpScheme, SCHEME_WITH_PORT},
{kHttpsScheme, SCHEME_WITH_PORT},
{kHttpSuboriginScheme, SCHEME_WITH_PORT},
{kHttpsSuboriginScheme, SCHEME_WITH_PORT},
};
+const char* kSecureSchemes[] = {
+ kHttpsScheme,
+ kAboutScheme,
+ kDataScheme,
+ kWssScheme,
+};
+
+const char* kLocalSchemes[] = {
+ kFileScheme,
+};
+
+const char* kNoAccessSchemes[] = {
+ kAboutScheme,
+ kJavaScriptScheme,
+ kDataScheme,
+};
+
+const char* kCORSEnabledSchemes[] = {
+ kHttpScheme,
+ kHttpsScheme,
+ kDataScheme,
+};
+
+bool initialized = false;
+
// Lists of the currently installed standard and referrer schemes. These lists
-// are lazily initialized by InitStandardSchemes and InitReferrerSchemes and are
-// leaked on shutdown to prevent any destructors from being called that will
-// slow us down or cause problems.
+// are lazily initialized by Initialize and are leaked on shutdown to prevent
+// any destructors from being called that will slow us down or cause problems.
std::vector<SchemeWithType>* standard_schemes = nullptr;
std::vector<SchemeWithType>* referrer_schemes = nullptr;
+// Similar to above, initialized by the Init*Schemes methods.
+std::vector<std::string>* secure_schemes = nullptr;
+std::vector<std::string>* local_schemes = nullptr;
+std::vector<std::string>* no_access_schemes = nullptr;
+std::vector<std::string>* cors_enabled_schemes = nullptr;
+
// See the LockSchemeRegistries declaration in the header.
bool scheme_registries_locked = false;
@@ -67,27 +102,22 @@
typedef base::StringPiece16 Piece;
};
-void InitSchemes(std::vector<SchemeWithType>** schemes,
- const SchemeWithType* initial_schemes,
+void InitSchemes(std::vector<std::string>** schemes,
+ const char** initial_schemes,
size_t size) {
- if (*schemes)
- return;
- *schemes = new std::vector<SchemeWithType>(size);
+ *schemes = new std::vector<std::string>(size);
for (size_t i = 0; i < size; i++) {
- (*schemes)->push_back(initial_schemes[i]);
+ (*(*schemes))[i] = initial_schemes[i];
}
}
-// Ensures that the standard_schemes list is initialized, does nothing if
-// it already has values.
-void InitStandardSchemes() {
- InitSchemes(&standard_schemes, kStandardURLSchemes, kNumStandardURLSchemes);
-}
-
-// Ensures that the referrer_schemes list is initialized, does nothing if
-// it already has values.
-void InitReferrerSchemes() {
- InitSchemes(&referrer_schemes, kReferrerURLSchemes, kNumReferrerURLSchemes);
+void InitSchemesWithType(std::vector<SchemeWithType>** schemes,
+ const SchemeWithType* initial_schemes,
+ size_t size) {
+ *schemes = new std::vector<SchemeWithType>(size);
+ for (size_t i = 0; i < size; i++) {
+ (*(*schemes))[i] = initial_schemes[i];
+ }
}
// Given a string and a range inside the string, compares it to the given
@@ -127,7 +157,7 @@
template<typename CHAR>
bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
- InitStandardSchemes();
+ Initialize();
return DoIsInSchemes(spec, scheme, type, *standard_schemes);
}
@@ -156,19 +186,28 @@
return DoCompareSchemeComponent(spec, our_scheme, compare);
}
-template<typename CHAR>
-bool DoCanonicalize(const CHAR* in_spec,
- int in_spec_len,
+template <typename CHAR>
+bool DoCanonicalize(const CHAR* spec,
+ int spec_len,
bool trim_path_end,
+ WhitespaceRemovalPolicy whitespace_policy,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- // Remove any whitespace from the middle of the relative URL, possibly
- // copying to the new buffer.
+ // Reserve enough room in the output for the input, plus some extra so that
+ // we have room if we have to escape a few things without reallocating.
+ output->ReserveSizeIfNeeded(spec_len + 8);
+
+ // Remove any whitespace from the middle of the relative URL if necessary.
+ // Possibly this will result in copying to the new buffer.
RawCanonOutputT<CHAR> whitespace_buffer;
- int spec_len;
- const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
- &whitespace_buffer, &spec_len);
+ if (whitespace_policy == REMOVE_WHITESPACE) {
+ int original_len = spec_len;
+ spec =
+ RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len);
+ if (spec_len != original_len)
+ output_parsed->whitespace_removed = true;
+ }
Parsed parsed_input;
#ifdef WIN32
@@ -248,6 +287,9 @@
const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
&whitespace_buffer,
&relative_length);
+ if (in_relative_length != relative_length)
+ output_parsed->whitespace_removed = true;
+
bool base_is_authority_based = false;
bool base_is_hierarchical = false;
if (base_spec &&
@@ -273,6 +315,9 @@
return false;
}
+ // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and
+ // ReserveRelativeURL, to enable more accurate buffer sizes.
+
// Pretend for a moment that |base_spec| is a standard URL. Normally
// non-standard URLs are treated as PathURLs, but if the base has an
// authority we would like to preserve it.
@@ -289,7 +334,8 @@
// based on base_parsed_authority instead of base_parsed) and needs to be
// re-created.
DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
- charset_converter, output, output_parsed);
+ REMOVE_WHITESPACE, charset_converter, output,
+ output_parsed);
return did_resolve_succeed;
}
} else if (is_relative) {
@@ -302,8 +348,9 @@
}
// Not relative, canonicalize the input.
- return DoCanonicalize(relative, relative_length, true, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(relative, relative_length, true,
+ DO_NOT_REMOVE_WHITESPACE, charset_converter, output,
+ output_parsed);
}
template<typename CHAR>
@@ -350,8 +397,8 @@
RawCanonOutput<128> recanonicalized;
Parsed recanonicalized_parsed;
DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
- charset_converter,
- &recanonicalized, &recanonicalized_parsed);
+ REMOVE_WHITESPACE, charset_converter, &recanonicalized,
+ &recanonicalized_parsed);
// Recurse using the version with the scheme already replaced. This will now
// use the replacement rules for the new scheme.
@@ -373,6 +420,12 @@
charset_converter, output, out_parsed);
}
+ // TODO(csharrison): We could be smarter about size to reserve if this is done
+ // in callers below, and the code checks to see which components are being
+ // replaced, and with what length. If this ends up being a hot spot it should
+ // be changed.
+ output->ReserveSizeIfNeeded(spec_len + 8);
+
// If we get here, then we know the scheme doesn't need to be replaced, so can
// just key off the scheme in the spec to know how to do the replacements.
if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
@@ -396,9 +449,7 @@
return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
}
-void DoAddScheme(const char* new_scheme,
- SchemeType type,
- std::vector<SchemeWithType>* schemes) {
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
DCHECK(schemes);
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
@@ -414,6 +465,29 @@
if (scheme_len == 0)
return;
+ DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+ schemes->push_back(std::string(new_scheme));
+}
+
+void DoAddSchemeWithType(const char* new_scheme,
+ SchemeType type,
+ std::vector<SchemeWithType>* schemes) {
+ DCHECK(schemes);
+ // If this assert triggers, it means you've called Add*Scheme after
+ // LockSchemeRegistries has been called (see the header file for
+ // LockSchemeRegistries for more).
+ //
+ // This normally means you're trying to set up a new scheme too late in your
+ // application's init process. Locate where your app does this initialization
+ // and calls LockSchemeRegistries, and add your new scheme there.
+ DCHECK(!scheme_registries_locked)
+ << "Trying to add a scheme after the lists have been locked.";
+
+ size_t scheme_len = strlen(new_scheme);
+ if (scheme_len == 0)
+ return;
+
+ DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
// Duplicate the scheme into a new buffer and add it to the list of standard
// schemes. This pointer will be leaked on shutdown.
char* dup_scheme = new char[scheme_len + 1];
@@ -431,29 +505,85 @@
} // namespace
void Initialize() {
- InitStandardSchemes();
- InitReferrerSchemes();
+ if (initialized)
+ return;
+ InitSchemesWithType(&standard_schemes, kStandardURLSchemes,
+ arraysize(kStandardURLSchemes));
+ InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes,
+ arraysize(kReferrerURLSchemes));
+ InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes));
+ InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes));
+ InitSchemes(&no_access_schemes, kNoAccessSchemes,
+ arraysize(kNoAccessSchemes));
+ InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes,
+ arraysize(kCORSEnabledSchemes));
+ initialized = true;
}
void Shutdown() {
- if (standard_schemes) {
- delete standard_schemes;
- standard_schemes = NULL;
- }
- if (referrer_schemes) {
- delete referrer_schemes;
- referrer_schemes = NULL;
- }
+ initialized = false;
+ delete standard_schemes;
+ standard_schemes = nullptr;
+ delete referrer_schemes;
+ referrer_schemes = nullptr;
+ delete secure_schemes;
+ secure_schemes = nullptr;
+ delete local_schemes;
+ local_schemes = nullptr;
+ delete no_access_schemes;
+ no_access_schemes = nullptr;
+ delete cors_enabled_schemes;
+ cors_enabled_schemes = nullptr;
}
void AddStandardScheme(const char* new_scheme, SchemeType type) {
- InitStandardSchemes();
- DoAddScheme(new_scheme, type, standard_schemes);
+ Initialize();
+ DoAddSchemeWithType(new_scheme, type, standard_schemes);
}
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
- InitReferrerSchemes();
- DoAddScheme(new_scheme, type, referrer_schemes);
+ Initialize();
+ DoAddSchemeWithType(new_scheme, type, referrer_schemes);
+}
+
+void AddSecureScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, secure_schemes);
+}
+
+const std::vector<std::string>& GetSecureSchemes() {
+ Initialize();
+ return *secure_schemes;
+}
+
+void AddLocalScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, local_schemes);
+}
+
+const std::vector<std::string>& GetLocalSchemes() {
+ Initialize();
+ return *local_schemes;
+}
+
+void AddNoAccessScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, no_access_schemes);
+}
+
+const std::vector<std::string>& GetNoAccessSchemes() {
+ Initialize();
+ return *no_access_schemes;
+}
+
+void AddCORSEnabledScheme(const char* new_scheme) {
+ Initialize();
+ DoAddScheme(new_scheme, cors_enabled_schemes);
+}
+
+const std::vector<std::string>& GetCORSEnabledSchemes() {
+ Initialize();
+ return *cors_enabled_schemes;
}
void LockSchemeRegistries() {
@@ -477,7 +607,7 @@
}
bool IsReferrerScheme(const char* spec, const Component& scheme) {
- InitReferrerSchemes();
+ Initialize();
SchemeType unused_scheme_type;
return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
}
@@ -533,14 +663,22 @@
return true;
}
+bool HostIsIPAddress(base::StringPiece host) {
+ url::RawCanonOutputT<char, 128> ignored_output;
+ url::CanonHostInfo host_info;
+ url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
+ &ignored_output, &host_info);
+ return host_info.IsIPAddress();
+}
+
bool Canonicalize(const char* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+ charset_converter, output, output_parsed);
}
bool Canonicalize(const base::char16* spec,
@@ -549,8 +687,8 @@
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
- return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
- output, output_parsed);
+ return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+ charset_converter, output, output_parsed);
}
bool ResolveRelative(const char* base_spec,
diff --git a/src/url/url_util.h b/src/url/url_util.h
index 724ce95..a4b74b1 100644
--- a/src/url/url_util.h
+++ b/src/url/url_util.h
@@ -6,6 +6,7 @@
#define URL_URL_UTIL_H_
#include <string>
+#include <vector>
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
@@ -57,25 +58,44 @@
SchemeType type;
};
+// The following Add*Scheme method are not threadsafe and can not be called
+// concurrently with any other url_util function. They will assert if the lists
+// of schemes have been locked (see LockSchemeRegistries).
+
// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
+
URL_EXPORT void AddStandardScheme(const char* new_scheme,
SchemeType scheme_type);
// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
-//
-// This function is not threadsafe and can not be called concurrently with any
-// other url_util function. It will assert if the lists of schemes have
-// been locked (see LockSchemeRegistries).
URL_EXPORT void AddReferrerScheme(const char* new_scheme,
SchemeType scheme_type);
+// Adds an application-defined scheme to the list of schemes that do not trigger
+// mixed content warnings.
+URL_EXPORT void AddSecureScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
+
+// Adds an application-defined scheme to the list of schemes that normal pages
+// cannot link to or access (i.e., with the same security rules as those applied
+// to "file" URLs).
+URL_EXPORT void AddLocalScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
+
+// Adds an application-defined scheme to the list of schemes that cause pages
+// loaded with them to not have access to pages loaded with any other URL
+// scheme.
+URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can be sent
+// CORS requests.
+URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
+URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
+
// Sets a flag to prevent future calls to Add*Scheme from succeeding.
//
// This is designed to help prevent errors for multithreaded applications.
@@ -133,7 +153,7 @@
const Component& scheme,
SchemeType* type);
-// Domains ---------------------------------------------------------------------
+// Hosts ----------------------------------------------------------------------
// Returns true if the |canonicalized_host| matches or is in the same domain as
// the given |lower_ascii_domain| string. For example, if the canonicalized
@@ -146,6 +166,10 @@
URL_EXPORT bool DomainIs(base::StringPiece canonicalized_host,
base::StringPiece lower_ascii_domain);
+// Returns true if the hostname is an IP address. Note: this function isn't very
+// cheap, as it must re-parse the host to verify.
+URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
+
// URL library wrappers --------------------------------------------------------
// Parses the given spec according to the extracted scheme type. Normal users