Internal change PiperOrigin-RevId: 16353969 Change-Id: I360e3d80f35a82ead1515ce54e60a9528cd4f383
diff --git a/googleurl/src/url_parse.cc b/googleurl/src/url_parse.cc index dbc1394..762bc25 100644 --- a/googleurl/src/url_parse.cc +++ b/googleurl/src/url_parse.cc
@@ -62,54 +62,6 @@ return spec_len; // Not found. } -// Fills in all members of the Parsed structure except for the scheme. -// -// |spec| is the full spec being parsed, of length |spec_len|. -// |after_scheme| is the character immediately following the scheme (after the -// colon) where we'll begin parsing. -// -// Compatability data points. I list "host", "path" extracted: -// Input IE6 Firefox Us -// ----- -------------- -------------- -------------- -// http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/" -// http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/" -// http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" -// -// (*) Interestingly, although IE fails to load these URLs, its history -// canonicalizer handles them, meaning if you've been to the corresponding -// "http://foo.com/" link, it will be colored. -template <typename CHAR> -void DoParseAfterScheme(const CHAR* spec, - int spec_len, - int after_scheme, - Parsed* parsed) { - int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); - int after_slashes = after_scheme + num_slashes; - - // First split into two main parts, the authority (username, password, host, - // and port) and the full path (path, query, and reference). - Component authority; - Component full_path; - - // Found "//<some data>", looks like an authority section. Treat everything - // from there to the next slash (or end of spec) to be the authority. Note - // that we ignore the number of slashes and treat it as the authority. - int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len); - authority = Component(after_slashes, end_auth - after_slashes); - - if (end_auth == spec_len) // No beginning of path found. - full_path = Component(); - else // Everything starting from the slash to the end is the path. - full_path = Component(end_auth, spec_len - end_auth); - - // Now parse those two sub-parts. - ParseAuthority(spec, authority, &parsed->username, &parsed->password, - &parsed->host, &parsed->port); - ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref); -} - template<typename CHAR> void ParseUserInfo(const CHAR* spec, const Component& user, @@ -281,6 +233,54 @@ *filepath = Component(); } +// Fills in all members of the Parsed structure except for the scheme. +// +// |spec| is the full spec being parsed, of length |spec_len|. +// |after_scheme| is the character immediately following the scheme (after the +// colon) where we'll begin parsing. +// +// Compatability data points. I list "host", "path" extracted: +// Input IE6 Firefox Us +// ----- -------------- -------------- -------------- +// http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" +// http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" +// http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/" +// http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/" +// http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/" +// +// (*) Interestingly, although IE fails to load these URLs, its history +// canonicalizer handles them, meaning if you've been to the corresponding +// "http://foo.com/" link, it will be colored. +template <typename CHAR> +void DoParseAfterScheme(const CHAR* spec, + int spec_len, + int after_scheme, + Parsed* parsed) { + int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); + int after_slashes = after_scheme + num_slashes; + + // First split into two main parts, the authority (username, password, host, + // and port) and the full path (path, query, and reference). + Component authority; + Component full_path; + + // Found "//<some data>", looks like an authority section. Treat everything + // from there to the next slash (or end of spec) to be the authority. Note + // that we ignore the number of slashes and treat it as the authority. + int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len); + authority = Component(after_slashes, end_auth - after_slashes); + + if (end_auth == spec_len) // No beginning of path found. + full_path = Component(); + else // Everything starting from the slash to the end is the path. + full_path = Component(end_auth, spec_len - end_auth); + + // Now parse those two sub-parts. + ParseAuthority(spec, authority, &parsed->username, &parsed->password, + &parsed->host, &parsed->port); + ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref); +} + template<typename CHAR> bool DoExtractScheme(const CHAR* url, int url_len,