| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "extensions/common/url_pattern.h" |
| |
| #include <stddef.h> |
| |
| #include <ostream> |
| |
| #include "base/stl_util.h" |
| #include "base/strings/pattern.h" |
| #include "base/strings/strcat.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_split.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "content/public/common/url_constants.h" |
| #include "extensions/common/constants.h" |
| #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| #include "net/base/url_util.h" |
| #include "url/gurl.h" |
| #include "url/url_util.h" |
| |
| const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; |
| |
| namespace { |
| |
| // TODO(aa): What about more obscure schemes like javascript: ? |
| // Note: keep this array in sync with kValidSchemeMasks. |
| const char* const kValidSchemes[] = { |
| url::kHttpScheme, url::kHttpsScheme, |
| url::kFileScheme, url::kFtpScheme, |
| content::kChromeUIScheme, extensions::kExtensionScheme, |
| url::kFileSystemScheme, url::kWsScheme, |
| url::kWssScheme, url::kDataScheme, |
| url::kQrcScheme, |
| }; |
| |
| const int kValidSchemeMasks[] = { |
| URLPattern::SCHEME_HTTP, URLPattern::SCHEME_HTTPS, |
| URLPattern::SCHEME_FILE, URLPattern::SCHEME_FTP, |
| URLPattern::SCHEME_CHROMEUI, URLPattern::SCHEME_EXTENSION, |
| URLPattern::SCHEME_FILESYSTEM, URLPattern::SCHEME_WS, |
| URLPattern::SCHEME_WSS, URLPattern::SCHEME_DATA, |
| URLPattern::SCHEME_QRC, |
| }; |
| |
| static_assert(base::size(kValidSchemes) == base::size(kValidSchemeMasks), |
| "must keep these arrays in sync"); |
| |
| const char kParseSuccess[] = "Success."; |
| const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; |
| const char kParseErrorInvalidScheme[] = "Invalid scheme."; |
| const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; |
| const char kParseErrorEmptyHost[] = "Host can not be empty."; |
| const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; |
| const char kParseErrorEmptyPath[] = "Empty path."; |
| const char kParseErrorInvalidPort[] = "Invalid port."; |
| const char kParseErrorInvalidHost[] = "Invalid host."; |
| |
| // Message explaining each URLPattern::ParseResult. |
| const char* const kParseResultMessages[] = { |
| kParseSuccess, |
| kParseErrorMissingSchemeSeparator, |
| kParseErrorInvalidScheme, |
| kParseErrorWrongSchemeType, |
| kParseErrorEmptyHost, |
| kParseErrorInvalidHostWildcard, |
| kParseErrorEmptyPath, |
| kParseErrorInvalidPort, |
| kParseErrorInvalidHost, |
| }; |
| |
| static_assert(static_cast<int>(URLPattern::ParseResult::kNumParseResults) == |
| base::size(kParseResultMessages), |
| "must add message for each parse result"); |
| |
| const char kPathSeparator[] = "/"; |
| |
| bool IsStandardScheme(base::StringPiece scheme) { |
| // "*" gets the same treatment as a standard scheme. |
| if (scheme == "*") |
| return true; |
| |
| return url::IsStandard(scheme.data(), |
| url::Component(0, static_cast<int>(scheme.length()))); |
| } |
| |
| bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) { |
| if (port == "*") |
| return true; |
| |
| // Only accept non-wildcard ports if the scheme uses ports. |
| if (url::DefaultPortForScheme(scheme.data(), scheme.length()) == |
| url::PORT_UNSPECIFIED) { |
| return false; |
| } |
| |
| int parsed_port = url::PORT_UNSPECIFIED; |
| if (!base::StringToInt(port, &parsed_port)) |
| return false; |
| return (parsed_port >= 0) && (parsed_port < 65536); |
| } |
| |
| // Returns |path| with the trailing wildcard stripped if one existed. |
| // |
| // The functions that rely on this (OverlapsWith and Contains) are only |
| // called for the patterns inside URLPatternSet. In those cases, we know that |
| // the path will have only a single wildcard at the end. This makes figuring |
| // out overlap much easier. It seems like there is probably a computer-sciency |
| // way to solve the general case, but we don't need that yet. |
| base::StringPiece StripTrailingWildcard(base::StringPiece path) { |
| if (path.ends_with("*")) |
| path.remove_suffix(1); |
| return path; |
| } |
| |
| // Removes trailing dot from |host_piece| if any. |
| base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) { |
| if (host_piece.ends_with(".")) |
| host_piece.remove_suffix(1); |
| return host_piece; |
| } |
| |
| } // namespace |
| |
| // static |
| bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) { |
| for (size_t i = 0; i < base::size(kValidSchemes); ++i) { |
| if (scheme == kValidSchemes[i]) |
| return true; |
| } |
| return false; |
| } |
| |
| // static |
| int URLPattern::GetValidSchemeMaskForExtensions() { |
| int result = 0; |
| for (size_t i = 0; i < base::size(kValidSchemeMasks); ++i) |
| result |= kValidSchemeMasks[i]; |
| return result; |
| } |
| |
| URLPattern::URLPattern() |
| : valid_schemes_(SCHEME_NONE), |
| match_all_urls_(false), |
| match_subdomains_(false), |
| port_("*") {} |
| |
| URLPattern::URLPattern(int valid_schemes) |
| : valid_schemes_(valid_schemes), |
| match_all_urls_(false), |
| match_subdomains_(false), |
| port_("*") {} |
| |
| URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern) |
| // Strict error checking is used, because this constructor is only |
| // appropriate when we know |pattern| is valid. |
| : valid_schemes_(valid_schemes), |
| match_all_urls_(false), |
| match_subdomains_(false), |
| port_("*") { |
| ParseResult result = Parse(pattern); |
| if (result != ParseResult::kSuccess) { |
| const char* error_string = GetParseResultString(result); |
| // Temporarily add more logging to investigate why this code path is |
| // reached. For http://crbug.com/856948 |
| LOG(ERROR) << "Invalid pattern was given " << pattern << " result " |
| << error_string; |
| NOTREACHED() << "URLPattern invalid: '" << pattern |
| << "'; error: " << error_string; |
| } |
| } |
| |
| URLPattern::URLPattern(const URLPattern& other) = default; |
| |
| URLPattern::URLPattern(URLPattern&& other) = default; |
| |
| URLPattern::~URLPattern() { |
| } |
| |
| URLPattern& URLPattern::operator=(const URLPattern& other) = default; |
| |
| URLPattern& URLPattern::operator=(URLPattern&& other) = default; |
| |
| bool URLPattern::operator<(const URLPattern& other) const { |
| return GetAsString() < other.GetAsString(); |
| } |
| |
| bool URLPattern::operator>(const URLPattern& other) const { |
| return GetAsString() > other.GetAsString(); |
| } |
| |
| bool URLPattern::operator==(const URLPattern& other) const { |
| return GetAsString() == other.GetAsString(); |
| } |
| |
| std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) { |
| return out << '"' << url_pattern.GetAsString() << '"'; |
| } |
| |
| URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) { |
| spec_.clear(); |
| SetMatchAllURLs(false); |
| SetMatchSubdomains(false); |
| SetPort("*"); |
| |
| // Special case pattern to match every valid URL. |
| if (pattern == kAllUrlsPattern) { |
| SetMatchAllURLs(true); |
| return ParseResult::kSuccess; |
| } |
| |
| // Parse out the scheme. |
| size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator); |
| bool has_standard_scheme_separator = true; |
| |
| // Some urls also use ':' alone as the scheme separator. |
| if (scheme_end_pos == base::StringPiece::npos) { |
| scheme_end_pos = pattern.find(':'); |
| has_standard_scheme_separator = false; |
| } |
| |
| if (scheme_end_pos == base::StringPiece::npos) |
| return ParseResult::kMissingSchemeSeparator; |
| |
| if (!SetScheme(pattern.substr(0, scheme_end_pos))) |
| return ParseResult::kInvalidScheme; |
| |
| bool standard_scheme = IsStandardScheme(scheme_); |
| if (standard_scheme != has_standard_scheme_separator) |
| return ParseResult::kWrongSchemeSeparator; |
| |
| // Advance past the scheme separator. |
| scheme_end_pos += |
| (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1); |
| if (scheme_end_pos >= pattern.size()) |
| return ParseResult::kEmptyHost; |
| |
| // Parse out the host and path. |
| size_t host_start_pos = scheme_end_pos; |
| size_t path_start_pos = 0; |
| |
| if (!standard_scheme) { |
| path_start_pos = host_start_pos; |
| } else if (scheme_ == url::kFileScheme) { |
| size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); |
| if (host_end_pos == base::StringPiece::npos) { |
| // Allow hostname omission. |
| // e.g. file://* is interpreted as file:///*, |
| // file://foo* is interpreted as file:///foo*. |
| path_start_pos = host_start_pos - 1; |
| } else { |
| // Ignore hostname if scheme is file://. |
| // e.g. file://localhost/foo is equal to file:///foo. |
| path_start_pos = host_end_pos; |
| } |
| } else { |
| size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); |
| |
| // Host is required. |
| if (host_start_pos == host_end_pos) |
| return ParseResult::kEmptyHost; |
| |
| if (host_end_pos == base::StringPiece::npos) |
| return ParseResult::kEmptyPath; |
| |
| base::StringPiece host_and_port = |
| pattern.substr(host_start_pos, host_end_pos - host_start_pos); |
| |
| size_t port_separator_pos = base::StringPiece::npos; |
| if (host_and_port[0] != '[') { |
| // Not IPv6 (either IPv4 or just a normal address). |
| port_separator_pos = host_and_port.find(':'); |
| } else { // IPv6. |
| size_t host_end_pos = host_and_port.find(']'); |
| if (host_end_pos == base::StringPiece::npos) |
| return ParseResult::kInvalidHost; |
| if (host_end_pos == 1) |
| return ParseResult::kEmptyHost; |
| |
| if (host_end_pos < host_and_port.length() - 1) { |
| // The host isn't the only component. Check for a port. This would |
| // require a ':' to follow the closing ']' from the host. |
| if (host_and_port[host_end_pos + 1] != ':') |
| return ParseResult::kInvalidHost; |
| |
| port_separator_pos = host_end_pos + 1; |
| } |
| } |
| |
| if (port_separator_pos != base::StringPiece::npos && |
| !SetPort(host_and_port.substr(port_separator_pos + 1))) { |
| return ParseResult::kInvalidPort; |
| } |
| |
| // Note: this substr() will be the entire string if the port position |
| // wasn't found. |
| base::StringPiece host_piece = host_and_port.substr(0, port_separator_pos); |
| |
| if (host_piece.empty()) |
| return ParseResult::kEmptyHost; |
| |
| if (host_piece == "*") { |
| match_subdomains_ = true; |
| host_piece.clear(); |
| } else if (host_piece.starts_with("*.")) { |
| if (host_piece.length() == 2) { |
| // We don't allow just '*.' as a host. |
| return ParseResult::kEmptyHost; |
| } |
| match_subdomains_ = true; |
| host_piece = host_piece.substr(2); |
| } |
| |
| host_ = host_piece.as_string(); |
| |
| path_start_pos = host_end_pos; |
| } |
| |
| SetPath(pattern.substr(path_start_pos)); |
| |
| // No other '*' can occur in the host, though. This isn't necessary, but is |
| // done as a convenience to developers who might otherwise be confused and |
| // think '*' works as a glob in the host. |
| if (host_.find('*') != std::string::npos) |
| return ParseResult::kInvalidHostWildcard; |
| |
| if (!host_.empty()) { |
| // If |host_| is present (i.e., isn't a wildcard), we need to canonicalize |
| // it. |
| url::CanonHostInfo host_info; |
| host_ = net::CanonicalizeHost(host_, &host_info); |
| // net::CanonicalizeHost() returns an empty string on failure. |
| if (host_.empty()) |
| return ParseResult::kInvalidHost; |
| } |
| |
| // Null characters are not allowed in hosts. |
| if (host_.find('\0') != std::string::npos) |
| return ParseResult::kInvalidHost; |
| |
| return ParseResult::kSuccess; |
| } |
| |
| void URLPattern::SetValidSchemes(int valid_schemes) { |
| // TODO(devlin): Should we check that valid_schemes agrees with |scheme_| |
| // here? Otherwise, valid_schemes_ and schemes_ may stop agreeing with each |
| // other (e.g., in the case of `*://*/*`, where the scheme should only be |
| // http or https). |
| spec_.clear(); |
| valid_schemes_ = valid_schemes; |
| } |
| |
| void URLPattern::SetHost(base::StringPiece host) { |
| spec_.clear(); |
| host.CopyToString(&host_); |
| } |
| |
| void URLPattern::SetMatchAllURLs(bool val) { |
| spec_.clear(); |
| match_all_urls_ = val; |
| |
| if (val) { |
| match_subdomains_ = true; |
| scheme_ = "*"; |
| host_.clear(); |
| SetPath("/*"); |
| } |
| } |
| |
| void URLPattern::SetMatchSubdomains(bool val) { |
| spec_.clear(); |
| match_subdomains_ = val; |
| } |
| |
| bool URLPattern::SetScheme(base::StringPiece scheme) { |
| spec_.clear(); |
| scheme.CopyToString(&scheme_); |
| if (scheme_ == "*") { |
| valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); |
| } else if (!IsValidScheme(scheme_)) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool URLPattern::IsValidScheme(base::StringPiece scheme) const { |
| if (valid_schemes_ == SCHEME_ALL) |
| return true; |
| |
| for (size_t i = 0; i < base::size(kValidSchemes); ++i) { |
| if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void URLPattern::SetPath(base::StringPiece path) { |
| spec_.clear(); |
| path.CopyToString(&path_); |
| path_escaped_ = path_; |
| base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); |
| base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); |
| } |
| |
| bool URLPattern::SetPort(base::StringPiece port) { |
| spec_.clear(); |
| if (IsValidPortForScheme(scheme_, port)) { |
| port.CopyToString(&port_); |
| return true; |
| } |
| return false; |
| } |
| |
| bool URLPattern::MatchesURL(const GURL& test) const { |
| const GURL* test_url = &test; |
| bool has_inner_url = test.inner_url() != nullptr; |
| |
| if (has_inner_url) { |
| if (!test.SchemeIsFileSystem()) |
| return false; // The only nested URLs we handle are filesystem URLs. |
| test_url = test.inner_url(); |
| } |
| |
| // Ensure the scheme matches first, since <all_urls> may not match this URL if |
| // the scheme is excluded. |
| if (!MatchesScheme(test_url->scheme_piece())) |
| return false; |
| |
| if (match_all_urls_) |
| return true; |
| |
| // Unless |match_all_urls_| is true, the grammar only permits matching |
| // URLs with nonempty paths. |
| if (!test.has_path()) |
| return false; |
| |
| std::string path_for_request = test.PathForRequest(); |
| if (has_inner_url) { |
| path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(), |
| path_for_request.c_str()); |
| } |
| |
| return MatchesSecurityOriginHelper(*test_url) && |
| MatchesPath(path_for_request); |
| } |
| |
| bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { |
| const GURL* test_url = &test; |
| bool has_inner_url = test.inner_url() != NULL; |
| |
| if (has_inner_url) { |
| if (!test.SchemeIsFileSystem()) |
| return false; // The only nested URLs we handle are filesystem URLs. |
| test_url = test.inner_url(); |
| } |
| |
| if (!MatchesScheme(test_url->scheme())) |
| return false; |
| |
| if (match_all_urls_) |
| return true; |
| |
| return MatchesSecurityOriginHelper(*test_url); |
| } |
| |
| bool URLPattern::MatchesScheme(base::StringPiece test) const { |
| if (!IsValidScheme(test)) |
| return false; |
| |
| return scheme_ == "*" || test == scheme_; |
| } |
| |
| bool URLPattern::MatchesHost(base::StringPiece host) const { |
| // TODO(devlin): This is a bit sad. Parsing urls is expensive. However, it's |
| // important that we do this conversion to a GURL in order to canonicalize the |
| // host (the pattern's host_ already is canonicalized from Parse()). We can't |
| // just do string comparison. |
| return MatchesHost( |
| GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme, |
| url::kStandardSchemeSeparator, host.data()))); |
| } |
| |
| bool URLPattern::MatchesHost(const GURL& test) const { |
| base::StringPiece test_host(CanonicalizeHostForMatching(test.host_piece())); |
| const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_)); |
| |
| // If the hosts are exactly equal, we have a match. |
| if (test_host == pattern_host) |
| return true; |
| |
| // If we're matching subdomains, and we have no host in the match pattern, |
| // that means that we're matching all hosts, which means we have a match no |
| // matter what the test host is. |
| if (match_subdomains_ && pattern_host.empty()) |
| return true; |
| |
| // Otherwise, we can only match if our match pattern matches subdomains. |
| if (!match_subdomains_) |
| return false; |
| |
| // We don't do subdomain matching against IP addresses, so we can give up now |
| // if the test host is an IP address. |
| if (test.HostIsIPAddress()) |
| return false; |
| |
| // Check if the test host is a subdomain of our host. |
| if (test_host.length() <= (pattern_host.length() + 1)) |
| return false; |
| |
| if (!test_host.ends_with(pattern_host)) |
| return false; |
| |
| return test_host[test_host.length() - pattern_host.length() - 1] == '.'; |
| } |
| |
| bool URLPattern::MatchesEffectiveTld( |
| net::registry_controlled_domains::PrivateRegistryFilter private_filter, |
| net::registry_controlled_domains::UnknownRegistryFilter unknown_filter) |
| const { |
| // Check if it matches all urls or is a pattern like http://*/*. |
| if (match_all_urls_ || (match_subdomains_ && host_.empty())) |
| return true; |
| |
| // If this doesn't even match subdomains, it can't possibly be a TLD wildcard. |
| if (!match_subdomains_) |
| return false; |
| |
| // If there was more than just a TLD in the host (e.g., *.foobar.com), it |
| // doesn't match all hosts in an effective TLD. |
| if (net::registry_controlled_domains::HostHasRegistryControlledDomain( |
| host_, unknown_filter, private_filter)) { |
| return false; |
| } |
| |
| // At this point the host could either be just a TLD ("com") or some unknown |
| // TLD-like string ("notatld"). To disambiguate between them construct a |
| // fake URL, and check the registry. |
| // |
| // If we recognized this TLD, then this is a pattern like *.com, and it |
| // matches an effective TLD. |
| return net::registry_controlled_domains::HostHasRegistryControlledDomain( |
| "notatld." + host_, unknown_filter, private_filter); |
| } |
| |
| bool URLPattern::MatchesSingleOrigin() const { |
| // Strictly speaking, the port is part of the origin, but in URLPattern it |
| // defaults to *. It's not very interesting anyway, so leave it out. |
| return !MatchesEffectiveTld() && scheme_ != "*" && !match_subdomains_; |
| } |
| |
| bool URLPattern::MatchesPath(base::StringPiece test) const { |
| // Make the behaviour of OverlapsWith consistent with MatchesURL, which is |
| // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. |
| // The below if is a no-copy way of doing (test + "/*" == path_escaped_). |
| if (path_escaped_.length() == test.length() + 2 && |
| base::StartsWith(path_escaped_.c_str(), test, |
| base::CompareCase::SENSITIVE) && |
| base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) { |
| return true; |
| } |
| |
| return base::MatchPattern(test, path_escaped_); |
| } |
| |
| const std::string& URLPattern::GetAsString() const { |
| if (!spec_.empty()) |
| return spec_; |
| |
| if (match_all_urls_) { |
| spec_ = kAllUrlsPattern; |
| return spec_; |
| } |
| |
| bool standard_scheme = IsStandardScheme(scheme_); |
| |
| std::string spec = scheme_ + |
| (standard_scheme ? url::kStandardSchemeSeparator : ":"); |
| |
| if (scheme_ != url::kFileScheme && standard_scheme) { |
| if (match_subdomains_) { |
| spec += "*"; |
| if (!host_.empty()) |
| spec += "."; |
| } |
| |
| if (!host_.empty()) |
| spec += host_; |
| |
| if (port_ != "*") { |
| spec += ":"; |
| spec += port_; |
| } |
| } |
| |
| if (!path_.empty()) |
| spec += path_; |
| |
| spec_ = std::move(spec); |
| return spec_; |
| } |
| |
| bool URLPattern::OverlapsWith(const URLPattern& other) const { |
| if (match_all_urls() || other.match_all_urls()) |
| return true; |
| return (MatchesAnyScheme(other.GetExplicitSchemes()) || |
| other.MatchesAnyScheme(GetExplicitSchemes())) |
| && (MatchesHost(other.host()) || other.MatchesHost(host())) |
| && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) |
| && (MatchesPath(StripTrailingWildcard(other.path())) || |
| other.MatchesPath(StripTrailingWildcard(path()))); |
| } |
| |
| bool URLPattern::Contains(const URLPattern& other) const { |
| // Important: it's not enough to just check match_all_urls(); we also need to |
| // make sure that the schemes in this pattern are a superset of those in |
| // |other|. |
| if (match_all_urls() && |
| (valid_schemes_ & other.valid_schemes_) == other.valid_schemes_) { |
| return true; |
| } |
| |
| return MatchesAllSchemes(other.GetExplicitSchemes()) && |
| MatchesHost(other.host()) && |
| (!other.match_subdomains_ || match_subdomains_) && |
| MatchesPortPattern(other.port()) && |
| MatchesPath(StripTrailingWildcard(other.path())); |
| } |
| |
| base::Optional<URLPattern> URLPattern::CreateIntersection( |
| const URLPattern& other) const { |
| // Easy case: Schemes don't overlap. Return nullopt. |
| int intersection_schemes = URLPattern::SCHEME_NONE; |
| if (valid_schemes_ == URLPattern::SCHEME_ALL) |
| intersection_schemes = other.valid_schemes_; |
| else if (other.valid_schemes_ == URLPattern::SCHEME_ALL) |
| intersection_schemes = valid_schemes_; |
| else |
| intersection_schemes = valid_schemes_ & other.valid_schemes_; |
| |
| if (intersection_schemes == URLPattern::SCHEME_NONE) |
| return base::nullopt; |
| |
| { |
| // In a few cases, we can (mostly) return a copy of one of the patterns. |
| // This can happen when either: |
| // - The URLPattern's are identical (possibly excluding valid_schemes_) |
| // - One of the patterns has match_all_urls() equal to true. |
| // NOTE(devlin): Theoretically, we could use Contains() instead of |
| // match_all_urls() here. However, Contains() strips the trailing wildcard |
| // from the path, which could yield the incorrect result. |
| const URLPattern* copy_source = nullptr; |
| if (*this == other || other.match_all_urls()) |
| copy_source = this; |
| else if (match_all_urls()) |
| copy_source = &other; |
| |
| if (copy_source) { |
| // NOTE: equality checks don't take into account valid_schemes_, and |
| // schemes can be different in the case of match_all_urls() as well, so |
| // we can't always just return *copy_source. |
| if (intersection_schemes == copy_source->valid_schemes_) |
| return *copy_source; |
| URLPattern result(intersection_schemes); |
| ParseResult parse_result = result.Parse(copy_source->GetAsString()); |
| CHECK_EQ(ParseResult::kSuccess, parse_result); |
| return result; |
| } |
| } |
| |
| // No more easy cases. Go through component by component to find the patterns |
| // that intersect. |
| |
| // Note: Alias the function type (rather than using auto) because |
| // MatchesHost() is overloaded. |
| using match_function_type = bool (URLPattern::*)(base::StringPiece) const; |
| |
| auto get_intersection = [this, &other](base::StringPiece own_str, |
| base::StringPiece other_str, |
| match_function_type match_function, |
| base::StringPiece* out) { |
| if ((this->*match_function)(other_str)) { |
| *out = other_str; |
| return true; |
| } |
| if ((other.*match_function)(own_str)) { |
| *out = own_str; |
| return true; |
| } |
| return false; |
| }; |
| |
| base::StringPiece scheme; |
| base::StringPiece host; |
| base::StringPiece port; |
| base::StringPiece path; |
| // If any pieces fail to overlap, then there is no intersection. |
| if (!get_intersection(scheme_, other.scheme_, &URLPattern::MatchesScheme, |
| &scheme) || |
| !get_intersection(host_, other.host_, &URLPattern::MatchesHost, &host) || |
| !get_intersection(port_, other.port_, &URLPattern::MatchesPortPattern, |
| &port) || |
| !get_intersection(path_, other.path_, &URLPattern::MatchesPath, &path)) { |
| return base::nullopt; |
| } |
| |
| // Only match subdomains if both patterns match subdomains. |
| base::StringPiece subdomains; |
| if (match_subdomains_ && other.match_subdomains_) { |
| // The host may be empty (e.g., in the case of *://*/* - in that case, only |
| // append '*' instead of '*.'. |
| subdomains = host.empty() ? "*" : "*."; |
| } |
| |
| base::StringPiece scheme_separator = |
| IsStandardScheme(scheme) ? url::kStandardSchemeSeparator : ":"; |
| |
| std::string pattern_str = base::StrCat( |
| {scheme, scheme_separator, subdomains, host, ":", port, path}); |
| |
| URLPattern pattern(intersection_schemes); |
| ParseResult result = pattern.Parse(pattern_str); |
| // TODO(devlin): I don't think there's any way this should ever fail, but |
| // use a CHECK() to flush any cases out. If nothing crops up, downgrade this |
| // to a DCHECK in M72. |
| CHECK_EQ(ParseResult::kSuccess, result); |
| |
| return pattern; |
| } |
| |
| bool URLPattern::MatchesAnyScheme( |
| const std::vector<std::string>& schemes) const { |
| for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) { |
| if (MatchesScheme(*i)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool URLPattern::MatchesAllSchemes( |
| const std::vector<std::string>& schemes) const { |
| for (auto i = schemes.cbegin(); i != schemes.cend(); ++i) { |
| if (!MatchesScheme(*i)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { |
| // Ignore hostname if scheme is file://. |
| if (scheme_ != url::kFileScheme && !MatchesHost(test)) |
| return false; |
| |
| if (!MatchesPortPattern(base::NumberToString(test.EffectiveIntPort()))) |
| return false; |
| |
| return true; |
| } |
| |
| bool URLPattern::MatchesPortPattern(base::StringPiece port) const { |
| return port_ == "*" || port_ == port; |
| } |
| |
| std::vector<std::string> URLPattern::GetExplicitSchemes() const { |
| std::vector<std::string> result; |
| |
| if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { |
| result.push_back(scheme_); |
| return result; |
| } |
| |
| for (size_t i = 0; i < base::size(kValidSchemes); ++i) { |
| if (MatchesScheme(kValidSchemes[i])) { |
| result.push_back(kValidSchemes[i]); |
| } |
| } |
| |
| return result; |
| } |
| |
| std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { |
| std::vector<std::string> explicit_schemes = GetExplicitSchemes(); |
| std::vector<URLPattern> result; |
| |
| for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); |
| i != explicit_schemes.end(); ++i) { |
| URLPattern temp = *this; |
| temp.SetScheme(*i); |
| temp.SetMatchAllURLs(false); |
| result.push_back(temp); |
| } |
| |
| return result; |
| } |
| |
| // static |
| const char* URLPattern::GetParseResultString( |
| URLPattern::ParseResult parse_result) { |
| return kParseResultMessages[static_cast<int>(parse_result)]; |
| } |