| // Copyright 2015 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/url_formatter/url_formatter.h" |
| |
| #include <stddef.h> |
| #include <string.h> |
| |
| #include <vector> |
| |
| #include "base/stl_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_piece.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "url/gurl.h" |
| |
| namespace url_formatter { |
| |
| namespace { |
| |
| using base::WideToUTF16; |
| using base::ASCIIToUTF16; |
| |
| const size_t kNpos = base::string16::npos; |
| |
| struct AdjustOffsetCase { |
| size_t input_offset; |
| size_t output_offset; |
| }; |
| |
| struct UrlTestData { |
| const char* const description; |
| const char* const input; |
| FormatUrlTypes format_types; |
| net::UnescapeRule::Type escape_rules; |
| const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. |
| size_t prefix_len; |
| }; |
| |
| // A pair of helpers for the FormatUrlWithOffsets() test. |
| void VerboseExpect(size_t expected, |
| size_t actual, |
| const std::string& original_url, |
| size_t position, |
| const base::string16& formatted_url) { |
| EXPECT_EQ(expected, actual) << "Original URL: " << original_url |
| << " (at char " << position << ")\nFormatted URL: " << formatted_url; |
| } |
| |
| void CheckAdjustedOffsets(const std::string& url_string, |
| FormatUrlTypes format_types, |
| net::UnescapeRule::Type unescape_rules, |
| const size_t* output_offsets) { |
| GURL url(url_string); |
| size_t url_length = url_string.length(); |
| std::vector<size_t> offsets; |
| for (size_t i = 0; i <= url_length + 1; ++i) |
| offsets.push_back(i); |
| offsets.push_back(500000); // Something larger than any input length. |
| offsets.push_back(std::string::npos); |
| base::string16 formatted_url = FormatUrlWithOffsets(url, format_types, |
| unescape_rules, nullptr, nullptr, &offsets); |
| for (size_t i = 0; i < url_length; ++i) |
| VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); |
| VerboseExpect(formatted_url.length(), offsets[url_length], url_string, |
| url_length, formatted_url); |
| VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, |
| 500000, formatted_url); |
| VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, |
| std::string::npos, formatted_url); |
| } |
| |
| } // namespace |
| |
| TEST(UrlFormatterTest, FormatUrl) { |
| FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; |
| // clang-format off |
| const UrlTestData tests[] = { |
| {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"", 0}, |
| |
| {"Simple URL", "http://www.google.com/", default_format_type, |
| net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, |
| |
| {"With a port number and a reference", |
| "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type, |
| net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, |
| |
| // -------- IDN tests -------- |
| {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |
| |
| {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", |
| default_format_type, net::UnescapeRule::NORMAL, |
| // GURL doesn't assume an email address's domain part as a host name. |
| L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, |
| |
| {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, |
| |
| {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, |
| |
| // -------- omit_username_password flag tests -------- |
| {"With username and password, omit_username_password=false", |
| "http://user:passwd@example.com/foo", kFormatUrlOmitNothing, |
| net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, |
| |
| {"With username and password, omit_username_password=true", |
| "http://user:passwd@example.com/foo", default_format_type, |
| net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, |
| |
| {"With username and no password", "http://user@example.com/foo", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"http://example.com/foo", 7}, |
| |
| {"Just '@' without username and password", "http://@example.com/foo", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"http://example.com/foo", 7}, |
| |
| // GURL doesn't think local-part of an email address is username for URL. |
| {"mailto:, omit_username_password=true", "mailto:foo@example.com", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"mailto:foo@example.com", 7}, |
| |
| // -------- unescape flag tests -------- |
| {"Do not unescape", |
| "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
| "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
| "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
| default_format_type, net::UnescapeRule::NONE, |
| // GURL parses %-encoded hostnames into Punycode. |
| L"http://\x30B0\x30FC\x30B0\x30EB.jp/" |
| L"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
| L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
| 7}, |
| |
| {"Unescape normally", |
| "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
| "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
| "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB" |
| L"?q=\x30B0\x30FC\x30B0\x30EB", |
| 7}, |
| |
| {"Unescape normally with BiDi control character", |
| "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", default_format_type, |
| net::UnescapeRule::NORMAL, |
| L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, |
| |
| {"Unescape normally including unescape spaces", |
| "http://www.google.com/search?q=Hello%20World", default_format_type, |
| net::UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", |
| 7}, |
| |
| /* |
| {"unescape=true with some special characters", |
| "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
| L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, |
| */ |
| // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". |
| |
| // -------- omit http: -------- |
| {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, L"www.google.com/", 0}, |
| |
| {"omit http on bare scheme", "http://", kFormatUrlOmitDefaults, |
| net::UnescapeRule::NORMAL, L"", 0}, |
| |
| {"omit http with user name", "http://user@example.com/foo", |
| kFormatUrlOmitDefaults, net::UnescapeRule::NORMAL, L"example.com/foo", |
| 0}, |
| |
| {"omit http with https", "https://www.google.com/", kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, L"https://www.google.com/", 8}, |
| |
| {"omit http starts with ftp.", "http://ftp.google.com/", |
| kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", |
| 7}, |
| |
| // -------- omit file: -------- |
| #if defined(OS_WIN) |
| {"omit file on Windows", "file:///C:/Users/homedirname/folder/file.pdf/", |
| kFormatUrlOmitFileScheme, net::UnescapeRule::NORMAL, |
| L"C:/Users/homedirname/folder/file.pdf/", -1}, |
| #else |
| {"omit file", "file:///Users/homedirname/folder/file.pdf/", |
| kFormatUrlOmitFileScheme, net::UnescapeRule::NORMAL, |
| L"/Users/homedirname/folder/file.pdf/", 0}, |
| #endif |
| // -------- omit mailto: -------- |
| { "omit mailto", "mailto:foo@bar.com", |
| kFormatUrlOmitMailToScheme, net::UnescapeRule::NORMAL, |
| L"foo@bar.com", 0 }, |
| |
| // -------- omit trailing slash on bare hostname -------- |
| {"omit slash when it's the entire path", "http://www.google.com/", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"http://www.google.com", 7}, |
| {"omit slash when there's a ref", "http://www.google.com/#ref", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"http://www.google.com/#ref", 7}, |
| {"omit slash when there's a query", "http://www.google.com/?", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"http://www.google.com/?", 7}, |
| {"omit slash when it's not the entire path", "http://www.google.com/foo", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"http://www.google.com/foo", 7}, |
| {"omit slash for nonstandard URLs", "data:/", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"data:/", 5}, |
| {"omit slash for file URLs", "file:///", |
| kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
| L"file:///", 7}, |
| |
| // -------- view-source: -------- |
| {"view-source", "view-source:http://xn--qcka1pmc.jp/", |
| default_format_type, net::UnescapeRule::NORMAL, |
| L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, |
| |
| {"view-source of view-source", |
| "view-source:view-source:http://xn--qcka1pmc.jp/", default_format_type, |
| net::UnescapeRule::NORMAL, |
| L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, |
| |
| // view-source should omit http and trailing slash where non-view-source |
| // would. |
| {"view-source omit http", "view-source:http://a.b/c", |
| kFormatUrlOmitDefaults, net::UnescapeRule::NORMAL, L"view-source:a.b/c", |
| 12}, |
| {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", |
| kFormatUrlOmitDefaults, net::UnescapeRule::NORMAL, |
| L"view-source:http://ftp.b/c", 19}, |
| {"view-source omit slash when it's the entire path", |
| "view-source:http://a.b/", kFormatUrlOmitDefaults, |
| net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, |
| {"view-source never applies destructive elisions to its inner URL", |
| "view-source:https://www.google.com/foo", |
| kFormatUrlOmitDefaults | kFormatUrlOmitHTTPS | |
| kFormatUrlOmitTrivialSubdomains | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"view-source:https://www.google.com/foo", |
| 20}, |
| |
| // -------- omit https -------- |
| {"omit https", "https://www.google.com/", kFormatUrlOmitHTTPS, |
| net::UnescapeRule::NORMAL, L"www.google.com/", 0}, |
| {"omit https but do not omit http", "http://www.google.com/", |
| kFormatUrlOmitHTTPS, net::UnescapeRule::NORMAL, |
| L"http://www.google.com/", 7}, |
| {"omit https, username, and password", |
| "https://user:password@example.com/foo", |
| kFormatUrlOmitDefaults | kFormatUrlOmitHTTPS, net::UnescapeRule::NORMAL, |
| L"example.com/foo", 0}, |
| {"omit https, but preserve user name and password", |
| "https://user:password@example.com/foo", kFormatUrlOmitHTTPS, |
| net::UnescapeRule::NORMAL, L"user:password@example.com/foo", 14}, |
| {"omit https should not affect hosts starting with ftp.", |
| "https://ftp.google.com/", kFormatUrlOmitHTTP | kFormatUrlOmitHTTPS, |
| net::UnescapeRule::NORMAL, L"https://ftp.google.com/", 8}, |
| |
| // -------- omit trivial subdomains -------- |
| {"omit trivial subdomains - trim leading www", |
| "http://www.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://wikipedia.org/", 7}, |
| {"omit trivial subdomains - don't trim leading m", |
| "http://m.google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://m.google.com/", 7}, |
| {"omit trivial subdomains - don't trim www after a leading m", |
| "http://m.www.google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://m.www.google.com/", 7}, |
| {"omit trivial subdomains - trim first www only", |
| "http://www.www.www.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://www.www.wikipedia.org/", 7}, |
| {"omit trivial subdomains - don't trim www from middle", |
| "http://en.www.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://en.www.wikipedia.org/", 7}, |
| {"omit trivial subdomains - don't do blind substring matches for www", |
| "http://foowww.google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://foowww.google.com/", 7}, |
| {"omit trivial subdomains - don't crash on multiple delimiters", |
| "http://www....foobar...google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://...foobar...google.com/", 7}, |
| |
| {"omit trivial subdomains - sanity check for ordinary subdomains", |
| "http://mail.yahoo.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://mail.yahoo.com/", 7}, |
| {"omit trivial subdomains - sanity check for auth", |
| "http://www:m@google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://www:m@google.com/", 13}, |
| {"omit trivial subdomains - sanity check for path", |
| "http://google.com/www.m.foobar", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://google.com/www.m.foobar", 7}, |
| {"omit trivial subdomains - sanity check for IDN", |
| "http://www.xn--cy2a840a.www.xn--cy2a840a.com", |
| kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, |
| L"http://\x89c6\x9891.www.\x89c6\x9891.com/", 7}, |
| |
| {"omit trivial subdomains but leave registry and domain alone - trivial", |
| "http://google.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://google.com/", 7}, |
| {"omit trivial subdomains but leave registry and domain alone - www", |
| "http://www.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://www.com/", 7}, |
| {"omit trivial subdomains but leave registry and domain alone - co.uk", |
| "http://m.co.uk/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://m.co.uk/", 7}, |
| {"omit trivial subdomains but leave eTLD (effective TLD) alone", |
| "http://www.appspot.com/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://www.appspot.com/", 7}, |
| |
| |
| {"omit trivial subdomains but leave intranet hostnames alone", |
| "http://router/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://router/", 7}, |
| {"omit trivial subdomains but leave alone if host itself is a registry", |
| "http://co.uk/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, L"http://co.uk/", 7}, |
| |
| // -------- trim after host -------- |
| {"omit the trailing slash when ommitting the path", "http://google.com/", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit the simple file path when ommitting the path", |
| "http://google.com/foo", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit the file and folder path when ommitting the path", |
| "http://google.com/ab/cd", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with query only", |
| "http://google.com/?foo=bar", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with ref only", "http://google.com/#foobar", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with path and query only", |
| "http://google.com/foo?a=b", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with path and ref only", |
| "http://google.com/foo#c", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with query and ref only", |
| "http://google.com/?a=b#c", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with path, query and ref", |
| "http://google.com/foo?a=b#c", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"omit everything after host with repeated delimiters (sanity check)", |
| "http://google.com////???####", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, L"google.com", 0}, |
| {"never trim file paths", "file:///Users/homedirname/folder/file.pdf/", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, |
| L"file:///Users/homedirname/folder/file.pdf/", 7}, |
| }; |
| // clang-format on |
| |
| for (size_t i = 0; i < base::size(tests); ++i) { |
| size_t prefix_len; |
| base::string16 formatted = FormatUrl( |
| GURL(tests[i].input), tests[i].format_types, tests[i].escape_rules, |
| nullptr, &prefix_len, nullptr); |
| EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; |
| EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; |
| } |
| } |
| |
| TEST(UrlFormatterTest, FormatUrlParsed) { |
| // No unescape case. |
| url::Parsed parsed; |
| base::string16 formatted = |
| FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
| "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
| kFormatUrlOmitNothing, net::UnescapeRule::NONE, |
| &parsed, nullptr, nullptr); |
| EXPECT_EQ( |
| WideToUTF16(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" |
| L"/%E3%82%B0/?q=%E3%82%B0#%E3%82%B0"), |
| formatted); |
| EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), |
| formatted.substr(parsed.username.begin, parsed.username.len)); |
| EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), |
| formatted.substr(parsed.password.begin, parsed.password.len)); |
| EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"8080"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // Unescape case. |
| formatted = |
| FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
| "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, &parsed, |
| nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" |
| L"/\x30B0/?q=\x30B0#\x30B0"), |
| formatted); |
| EXPECT_EQ(WideToUTF16(L"\x30B0"), |
| formatted.substr(parsed.username.begin, parsed.username.len)); |
| EXPECT_EQ(WideToUTF16(L"\x30FC"), |
| formatted.substr(parsed.password.begin, parsed.password.len)); |
| EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"8080"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/\x30B0/"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"q=\x30B0"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"\x30B0"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // Omit_username_password + unescape case. |
| formatted = |
| FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
| "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
| kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, |
| &parsed, nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" |
| L"/\x30B0/?q=\x30B0#\x30B0"), |
| formatted); |
| EXPECT_FALSE(parsed.username.is_valid()); |
| EXPECT_FALSE(parsed.password.is_valid()); |
| EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"8080"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/\x30B0/"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"q=\x30B0"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"\x30B0"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // View-source case. |
| formatted = |
| FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), |
| kFormatUrlOmitUsernamePassword, net::UnescapeRule::NORMAL, |
| &parsed, nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), |
| formatted); |
| EXPECT_EQ(WideToUTF16(L"view-source:http"), |
| formatted.substr(parsed.scheme.begin, parsed.scheme.len)); |
| EXPECT_FALSE(parsed.username.is_valid()); |
| EXPECT_FALSE(parsed.password.is_valid()); |
| EXPECT_EQ(WideToUTF16(L"host"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"81"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/path"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"query"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"ref"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // omit http case. |
| formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, &parsed, nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); |
| EXPECT_FALSE(parsed.scheme.is_valid()); |
| EXPECT_FALSE(parsed.username.is_valid()); |
| EXPECT_FALSE(parsed.password.is_valid()); |
| EXPECT_EQ(WideToUTF16(L"host"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"8000"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/a"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"b=c"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"d"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // omit http starts with ftp case. |
| formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), |
| kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, &parsed, |
| nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); |
| EXPECT_TRUE(parsed.scheme.is_valid()); |
| EXPECT_FALSE(parsed.username.is_valid()); |
| EXPECT_FALSE(parsed.password.is_valid()); |
| EXPECT_EQ(WideToUTF16(L"http"), |
| formatted.substr(parsed.scheme.begin, parsed.scheme.len)); |
| EXPECT_EQ(WideToUTF16(L"ftp.host"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"8000"), |
| formatted.substr(parsed.port.begin, parsed.port.len)); |
| EXPECT_EQ(WideToUTF16(L"/a"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| EXPECT_EQ(WideToUTF16(L"b=c"), |
| formatted.substr(parsed.query.begin, parsed.query.len)); |
| EXPECT_EQ(WideToUTF16(L"d"), |
| formatted.substr(parsed.ref.begin, parsed.ref.len)); |
| |
| // omit http starts with 'f' case. |
| formatted = FormatUrl(GURL("http://f/"), kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, &parsed, nullptr, nullptr); |
| EXPECT_EQ(WideToUTF16(L"f/"), formatted); |
| EXPECT_FALSE(parsed.scheme.is_valid()); |
| EXPECT_FALSE(parsed.username.is_valid()); |
| EXPECT_FALSE(parsed.password.is_valid()); |
| EXPECT_FALSE(parsed.port.is_valid()); |
| EXPECT_TRUE(parsed.path.is_valid()); |
| EXPECT_FALSE(parsed.query.is_valid()); |
| EXPECT_FALSE(parsed.ref.is_valid()); |
| EXPECT_EQ(WideToUTF16(L"f"), |
| formatted.substr(parsed.host.begin, parsed.host.len)); |
| EXPECT_EQ(WideToUTF16(L"/"), |
| formatted.substr(parsed.path.begin, parsed.path.len)); |
| } |
| |
| // Make sure that calling FormatUrl on a GURL and then converting back to a GURL |
| // results in the original GURL, for each ASCII character in the path. |
| TEST(UrlFormatterTest, FormatUrlRoundTripPathASCII) { |
| for (unsigned char test_char = 32; test_char < 128; ++test_char) { |
| GURL url(std::string("http://www.google.com/") + |
| static_cast<char>(test_char)); |
| size_t prefix_len; |
| base::string16 formatted = |
| FormatUrl(url, kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, nullptr, &prefix_len, nullptr); |
| EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
| } |
| } |
| |
| // Make sure that calling FormatUrl on a GURL and then converting back to a GURL |
| // results in the original GURL, for each escaped ASCII character in the path. |
| TEST(UrlFormatterTest, FormatUrlRoundTripPathEscaped) { |
| for (unsigned char test_char = 32; test_char < 128; ++test_char) { |
| std::string original_url("http://www.google.com/"); |
| original_url.push_back('%'); |
| original_url.append(base::HexEncode(&test_char, 1)); |
| |
| GURL url(original_url); |
| size_t prefix_len; |
| base::string16 formatted = FormatUrl(url, kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, nullptr, &prefix_len, nullptr); |
| EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
| } |
| } |
| |
| // Make sure that calling FormatUrl on a GURL and then converting back to a GURL |
| // results in the original GURL, for each ASCII character in the query. |
| TEST(UrlFormatterTest, FormatUrlRoundTripQueryASCII) { |
| for (unsigned char test_char = 32; test_char < 128; ++test_char) { |
| GURL url(std::string("http://www.google.com/?") + |
| static_cast<char>(test_char)); |
| size_t prefix_len; |
| base::string16 formatted = |
| FormatUrl(url, kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, nullptr, &prefix_len, nullptr); |
| EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
| } |
| } |
| |
| // Make sure that calling FormatUrl on a GURL and then converting back to a GURL |
| // only results in a different GURL for certain characters. |
| TEST(UrlFormatterTest, FormatUrlRoundTripQueryEscaped) { |
| // A full list of characters which FormatURL should unescape and GURL should |
| // not escape again, when they appear in a query string. |
| const char kUnescapedCharacters[] = |
| "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; |
| for (unsigned char test_char = 0; test_char < 128; ++test_char) { |
| std::string original_url("http://www.google.com/?"); |
| original_url.push_back('%'); |
| original_url.append(base::HexEncode(&test_char, 1)); |
| |
| GURL url(original_url); |
| size_t prefix_len; |
| base::string16 formatted = |
| FormatUrl(url, kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, nullptr, &prefix_len, nullptr); |
| |
| if (test_char && |
| strchr(kUnescapedCharacters, static_cast<char>(test_char))) { |
| EXPECT_NE(url.spec(), GURL(formatted).spec()); |
| } else { |
| EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
| } |
| } |
| } |
| |
| TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
| CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing, |
| net::UnescapeRule::NORMAL, nullptr); |
| |
| const size_t basic_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
| 21, 22, 23, 24, 25 |
| }; |
| CheckAdjustedOffsets("http://www.google.com/foo/", |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
| basic_offsets); |
| |
| const size_t omit_auth_offsets_1[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, |
| 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
| }; |
| CheckAdjustedOffsets("http://foo:bar@www.google.com/", |
| kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, omit_auth_offsets_1); |
| |
| const size_t omit_auth_offsets_2[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, |
| 15, 16, 17, 18, 19, 20, 21 |
| }; |
| CheckAdjustedOffsets("http://foo@www.google.com/", |
| kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, omit_auth_offsets_2); |
| |
| const size_t dont_omit_auth_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
| 30, 31 |
| }; |
| // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". |
| CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
| dont_omit_auth_offsets); |
| |
| const size_t view_source_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, |
| kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 |
| }; |
| CheckAdjustedOffsets("view-source:http://foo@www.google.com/", |
| kFormatUrlOmitUsernamePassword, |
| net::UnescapeRule::NORMAL, view_source_offsets); |
| |
| const size_t idn_hostname_offsets_1[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, |
| 13, 14, 15, 16, 17, 18, 19 |
| }; |
| // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". |
| CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
| idn_hostname_offsets_1); |
| |
| const size_t idn_hostname_offsets_2[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, 19, 20, 21, 22, 23, 24 |
| }; |
| // Convert punycode to |
| // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". |
| CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", |
| kFormatUrlOmitNothing, |
| net::UnescapeRule::NORMAL, idn_hostname_offsets_2); |
| |
| const size_t unescape_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
| 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos |
| }; |
| // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". |
| CheckAdjustedOffsets( |
| "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
| kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); |
| |
| const size_t ref_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
| 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
| 30, 31, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 33}; |
| |
| // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". |
| CheckAdjustedOffsets("http://www.google.com/foo.html#%E3%82%B0%E3%82%B0z", |
| kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
| ref_offsets); |
| |
| const size_t omit_http_offsets[] = { |
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
| 10, 11, 12, 13, 14 |
| }; |
| CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, omit_http_offsets); |
| |
| const size_t omit_http_start_with_ftp_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
| }; |
| CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP, |
| net::UnescapeRule::NORMAL, |
| omit_http_start_with_ftp_offsets); |
| |
| const size_t omit_all_offsets[] = { |
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
| 0, 1, 2, 3, 4, 5, 6, 7 |
| }; |
| CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitDefaults, |
| net::UnescapeRule::NORMAL, omit_all_offsets); |
| |
| const size_t trim_after_host_offsets[] = { |
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, |
| 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 9}; |
| CheckAdjustedOffsets("http://foo.com/abcdefg", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| CheckAdjustedOffsets("http://foo.com/abc/def", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| CheckAdjustedOffsets("http://foo.com/abc?a=b", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| CheckAdjustedOffsets("http://foo.com/abc#def", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| CheckAdjustedOffsets("http://foo.com/a?a=b#f", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| CheckAdjustedOffsets("http://foo.com//??###", |
| kFormatUrlOmitDefaults | kFormatUrlTrimAfterHost, |
| net::UnescapeRule::NORMAL, trim_after_host_offsets); |
| |
| const size_t omit_https_offsets[] = { |
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, |
| 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; |
| CheckAdjustedOffsets("https://www.google.com/", kFormatUrlOmitHTTPS, |
| net::UnescapeRule::NORMAL, omit_https_offsets); |
| |
| const size_t omit_https_with_auth_offsets[] = { |
| 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, |
| kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, |
| 6, 7, 8, 9, 10, 11, 12, 13, 14}; |
| CheckAdjustedOffsets("https://u:p@www.google.com/", |
| kFormatUrlOmitDefaults | kFormatUrlOmitHTTPS, |
| net::UnescapeRule::NORMAL, omit_https_with_auth_offsets); |
| |
| const size_t strip_trivial_subdomains_offsets_1[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, |
| 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; |
| CheckAdjustedOffsets( |
| "http://www.google.com/foo/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, strip_trivial_subdomains_offsets_1); |
| |
| const size_t strip_trivial_subdomains_from_idn_offsets[] = { |
| 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
| kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, |
| 13, 14, 15, 16, 17, 18, 19}; |
| CheckAdjustedOffsets( |
| "http://www.xn--l8jvb1ey91xtjb.jp/foo/", kFormatUrlOmitTrivialSubdomains, |
| net::UnescapeRule::NORMAL, strip_trivial_subdomains_from_idn_offsets); |
| } |
| |
| } // namespace url_formatter |