Internal change PiperOrigin-RevId: 96791655 Change-Id: I1232926a48b9fa8ba7ca739ba16294d17da1dd6a

commit: 8013a334f29538d9383baf4b5eb252feaabfe9d4 [log] [tgz]
author: Devany Sandoval <sandovad@google.com> Wed Jun 24 11:07:13 2015 -0700
committer: sandovad <sandovad@google.com> Tue Sep 03 12:52:03 2019 -0700
tree: a6727d72f2ad4b873a719a86936fda4263739079
parent: c53c811b6f3515682094bc554115b764da7fccca [diff]
diff --git a/LICENSE b/LICENSE
index ac40837..4917789 100644
--- a/LICENSE
+++ b/LICENSE

@@ -63,3 +63,40 @@
 and other provisions required by the GPL or the LGPL. If you do not delete
 the provisions above, a recipient may use your version of this file under
 the terms of any one of the MPL, the GPL or the LGPL.
+
+-------------------------------------------------------------------------------
+
+The file icu_utf.cc is from IBM. This file is licensed separately as follows:
+
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2009 International Business Machines Corporation and others
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.

diff --git a/README.google b/README.google
index b7346d5..eab9cf3 100644
--- a/README.google
+++ b/README.google

@@ -1,32 +1,51 @@
-URL: http://google-url.googlecode.com/svn/trunk/
-Version: Snapshot of Subversion trunk, revision [139]
-License: BSD and MPL (one source file under MPL)
-License File: googleurl/LICENSE.txt
+URL: https://chromium.googlesource.com/chromium/src/+archive/6e0744b15b09421eac6634fb3fb7fe0a03427d56/url.tar.gz
+Version: 6e0744b15b09421eac6634fb3fb7fe0a03427d56 (matching Chromium 41.0.2272.118)
+License: BSD, MPL, ICU (one source file under MPL, one source file under ICU)
+License File: LICENSE
 
 Description:
 A small library for parsing and canonicalizing URLs
 
 Local Modifications:
-We use our google3 versions of //base (which is a superset of the base/
-functionality in googleurl), and ICU.  These are both injected through BUILD
-and don't require source modifications.
+1. src/base directory:
+- Remove BASE_EXPORT macros.
+- Wrap namespace base with namespace url to distinguish from google3 base.
+- src/base/strings/string16.*
+  * Include src/build/build_config.h to detect wchar_t size.
+  * PrintTo function and operator << are removed to eliminate dependency on
+    src/base/strings/utf_string_conversion.
+- src/base/strings/string_util.*
+  * Only one MatchPattern function is kept for src/url/origin.cc.
+  * Change the argument type from StringPiece to std::string to remove
+    dependency on google3 StringPiece.
+- src/base/third_party/icu/icu_utf.cc
+  * Add FALLTHROUGH_INTENDED for fall-through switch cases.
 
-We use //depot/google3/third_party/breakpad/import_to_p4_from_svn.py
-to ease synchronization of this Perforce mirror with the authoritative
-Subversion repository.  When using this script, don't forget to update this
-file, README.google, to reflect the Subversion revision being imported.
+2. src/url directory:
+- Use google3 version of //base, //util/gtl/lazy_static_ptr.h
+  //third_party/icu and //testing/base/public:gunit_main. Some users don't want
+  googleurl to be dependent on google3 (e.g. geo/render/mirth/net:googleurl),
+  so we try our best to do it.
+- src/url/gurl.cc
+  * Replace scoped_ptr with std::unique_ptr to eliminate dependency on google3
+    scoped_ptr.
+- src/url/url_canon_icu.cc
+  * Replace LazyInstance with google3 LazyStaticPtr, modify intialization
+    and access methods accordingly.
+- src/url/url_util.cc
+  * Replace ANNOTATE_LEAKING_OBJECT_PTR() with google3
+    HeapLeakChecker::IgnoreObject(), and only use it when GOOGLEURL_IN_GOOGLE3
+    is defined.
+- src/url/url_canon_internal.h
+  * Expand NOT_REACHED() as DCHECK(false).
+- src/url/url_canon_icu.h and src/url/url_canon_stdstring.h
+  * Remove the include of src/base/compiler_specific.h.
+- src/url/third_party/mozilla/url_parse.cc
+  * Compile filesystemurl related function only when NO_FILESYSTEMURL_SUPPORT
+    is not defined, so that
+    wireless/android/icing/lib/core:liburl_parse_icing_static doesn't need to
+    depend on other googleurl srcs as well as third_party/icu.
 
-Because googleurl uses include paths like "googleurl/src/header.h", the source
-is located in a googleurl subdirectory under this directory.  This allows
-the paths to work correctly without adding //third_party to the include path.
-
-2010-01-22: the upstream code uses an open-source version of gunit and the
-google3 code uses a google3 version of gunit.  When importing, be careful
-to use the current google3 names: testing/base/public/googletest.h and
-testing/base/public/gunit.h .  -- mec
-
+3. google3_addidions directory:
 2014-07-30: added google3_additions/googleurl_init.cc, which properly
 initializes googleurl during InitGoogle().
-
-2014-09-29: Adjusted googleurl/src/url_canon_unittest.cc for C++11
-compatibility.

diff --git a/google3_additions/googleurl_init.cc b/google3_additions/googleurl_init.cc
index e0af8e6..03470f0 100644
--- a/google3_additions/googleurl_init.cc
+++ b/google3_additions/googleurl_init.cc

@@ -4,12 +4,12 @@
 // InitGoogle() at startup.
 
 #include "base/googleinit.h"
-#include "third_party/googleurl/googleurl/src/url_util.h"
+#include "third_party/googleurl/src/url/url_util.h"
 
 namespace {
 
 void InitGoogleUrl() {
-  url_util::Initialize();
+  url::Initialize();
 }
 
 }  // namespace

diff --git a/googleurl/LICENSE.txt b/googleurl/LICENSE.txt
deleted file mode 100644
index ac40837..0000000
--- a/googleurl/LICENSE.txt
+++ /dev/null

@@ -1,65 +0,0 @@
-Copyright 2007, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------------------
-
-The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is
-licensed separately as follows:
-
-The contents of this file are subject to the Mozilla Public License Version
-1.1 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.mozilla.org/MPL/
-
-Software distributed under the License is distributed on an "AS IS" basis,
-WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
-for the specific language governing rights and limitations under the
-License.
-
-The Original Code is mozilla.org code.
-
-The Initial Developer of the Original Code is
-Netscape Communications Corporation.
-Portions created by the Initial Developer are Copyright (C) 1998
-the Initial Developer. All Rights Reserved.
-
-Contributor(s):
-  Darin Fisher (original author)
-
-Alternatively, the contents of this file may be used under the terms of
-either the GNU General Public License Version 2 or later (the "GPL"), or
-the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
-in which case the provisions of the GPL or the LGPL are applicable instead
-of those above. If you wish to allow use of your version of this file only
-under the terms of either the GPL or the LGPL, and not to allow others to
-use your version of this file under the terms of the MPL, indicate your
-decision by deleting the provisions above and replace them with the notice
-and other provisions required by the GPL or the LGPL. If you do not delete
-the provisions above, a recipient may use your version of this file under
-the terms of any one of the MPL, the GPL or the LGPL.

diff --git a/googleurl/README.txt b/googleurl/README.txt
deleted file mode 100644
index b28fd04..0000000
--- a/googleurl/README.txt
+++ /dev/null

@@ -1,180 +0,0 @@
-                       ==============================
-                       The Google URL Parsing Library
-                       ==============================
-
-This is the Google URL Parsing Library which parses and canonicalizes URLs.
-Please see the LICENSE.txt file for licensing information.
-
-Features
-========
-
-   * Easily embeddable: This library was written for a variety of client and
-     server programs in mind, so unlike most implementations of URL parsing
-     and canonicalization, it can be easily emdedded.
-
-   * Fast: hundreds of thousands of typical URLs can be parsed and
-     canonicalized per second on a modern CPU. It is much faster than, for
-     example, calling WinInet's corresponding functions.
-
-   * Compatible: When possible, this library has strived for IE7 compatability
-     for both general web compatability, and so IE addons or other applications
-     that communicate with or embed IE will work properly.
-
-     It supports Unix-style file URLs, as well as the more complex rules for
-     Window file URLs. Note that total compatability is not possible (for
-     example, IE6 and IE7 disagree about how to parse certain IP addresses),
-     and that this is more strict about certain illegal, rarely used, and
-     potentially dangerous constructs such as escaped control characters in
-     host names that IE will allow. It is typically a little less strict than
-     Firefox.
-
-
-Example
-=======
-
-An example implementation of a URL object that uses this library is provided
-in src/gurl.*. This implementation uses the "application integration" layer
-discussed below to interface with the low-level parsing and canonicalization
-functions.
-
-
-Building
-========
-
-The canonicalization files require ICU for some UTF-8 and UTF-16 conversion
-macros. If your project does not use ICU, it should be straightforward to
-factor out the macros and functions used in ICU, there are only a few well-
-isolated things that are used.
-
-TODO(brettw) ADD INSTRUCTIONS FOR GETTING ICU HERE!
-
-logging.h and logging.cc are Windows-only because the corresponding Unix
-logging system has many dependencies. This library uses few of the logging
-macros, and a dummy header can easily be written that defines the
-appropriate things for Unix.
-
-
-Definitions
-===========
-
-"Standard URL": A URL with an "authority", which is a hostname and optionally
-   a port, username, and password. Most URLs are standard such as HTTP and FTP.
-
-"File URL": A URL that references a file on disk. There are special rules for
-   this type of URL. Note that it may have a hostname! "localhost" is allowed,
-   for example "file://localhost/foo" is the same as "file:///foo".
-
-"Path URL": This is everything else. There is no standard on how to treat these
-   URLs, or even what they are called. This library decomposes them into a
-   scheme and a path. The path is everything following the scheme. This type of
-   URL includes "javascript", "data", and even "mailto" (although "mailto"
-   might look like a standard scheme in some respects, it is not).
-
-
-Design
-======
-
-The library is divided into four layers. They are listed here from the lowest
-to the highest; you can use any portion of the library as long as you embed the
-layers below it.
-
-1. Parsing
-----------
-At the lowest level is the parsing code. The files encompasing this are
-url_parse.* and the main include file is src/url_parse.h. This code will, given
-an input string, parse it into the most likely form of a URL.
-
-Parsing can not fail and does no validation. The exception is the port number,
-which it currently validates, but this is a bug. Given crazy input, the parser
-will do its best to find the various URL components according to its rules (see
-url_parse_unittest.cc for some examples).
-
-To use this, an application will typically use ExtractScheme to determine the
-type of a given input URL, and then call one of the initialization functions:
-"ParseStandardURL", "ParsePathURL", or "ParseFileURL". This will result in
-a "Parsed" structure which identifies the substrings of each identified
-component.
-
-2. Canonicalization
--------------------
-At the next highest level is canonicalization. The files encompasing this are
-url_canon.* and the main include file is src/url_canon.h. This code will
-validate an already-parsed URL, and will convert it to a canonical form. For
-example, this will convert host names to lowercase, convert IP addresses
-into dotted-decimal notation, handle encoding issues, etc.
-
-This layer will always do its best to produce a reasonable output string, but
-it may return that the string is invalid. For example, if there are invalid
-characters in the host name, it will escape them or replace them with the
-Unicode "invalid character" character, but will fail. This way, the program can
-display error messages to the user with the output, log it, etc.  and the
-string will have some meaning.
-
-Canonicalized output is written to a CanonOutput object which is a simple
-wrapper around an expanding buffer. An implementation called RawCanonOutput is
-proivided that writes to a raw buffer with a fixed amount statically allocated
-(for performance). Applications using STL can use StdStringCanonOutput defined
-in url_canon_stdstring.h which writes into a std::string.
-
-A normal application would call one of the three high-level functions
-"CanonicalizeStandardURL", "CanonicalizeFileURL", and CanonicalizePathURL"
-depending on the type of URL in question. Lower-level functions are also
-provided which will canonicalize individual parts of a URL (for example,
-"CanonicalizeHost").
-
-Part of this layer is the integration with the host system for IDN and encoding
-conversion. An implementation that provides integration with the ICU
-(http://www-306.ibm.com/software/globalization/icu/index.jsp) is provided in
-src/url_canon_icu.cc. The embedder may wish to replace this file with
-implementations of the functions for their own IDN library if they do not use
-ICU.
-
-3. Application integration
---------------------------
-The canonicalization and parsing layers do not know anything about the URI
-schemes supported by your application. The parsing and canonicalization
-functions are very low-level, and you must call the correct function to do the
-work (for example, "CanonicalizeFileURL").
-
-The application integration in url_util.* provides wrappers around the
-low-level parsing and canonicalization to call the correct versions for
-different identified schemes.  Embedders will want to modify this file if
-necessary to suit the needs of their application.
-
-4. URL object
--------------
-The highest level is the "URL" object that a C++ application would use to
-to encapsulate a URL. Embedders will typically want to provide their own URL
-object that meets the requirements of their system. A reasonably complete
-example implemnetation is provided in src/gurl.*. You may wish to use this
-object, extend or modify it, or write your own.
-
-Whitespace
-----------
-Sometimes, you may want to remove linefeeds and tabs from the content of a URL.
-Some web pages, for example, expect that a URL spanning two lines should be
-treated as one with the newline removed. Depending on the source of the URLs
-you are canonicalizing, these newlines may or may not be trimmed off.
-
-If you want this behavior, call RemoveURLWhitespace before parsing. This will
-remove CR, LF and TAB from the input. Note that it preserves spaces. On typical
-URLs, this function produces a 10-15% speed reduction, so it is optional and
-not done automatically. The example GURL object and the url_util wrapper does
-this for you.
-
-Tests
-=====
-
-There are a number of *_unittest.cc and *_perftest.cc files. These files are
-not currently compilable as they rely on a not-included unit testing framework
-Tests are declared like this:
-  TEST(TestCaseName, TestName) {
-    ASSERT_TRUE(a);
-    EXPECT_EQ(a, b);
-  }
-If you would like to compile them, it should be straightforward to define
-the TEST macro (which would declare a function by combining the two arguments)
-and the other macros whose behavior should be self-explanatory (EXPECT is like
-an ASSERT, but does not stop the test, if you are doing this, you probably
-don't care about this difference). Then you would define a .cc file that
-calls all of these functions.

diff --git a/googleurl/base/README.txt b/googleurl/base/README.txt
deleted file mode 100644
index 311faa0..0000000
--- a/googleurl/base/README.txt
+++ /dev/null

@@ -1,2 +0,0 @@
-These files contain some shared code. You can define your own assertion macros
-to eliminate the dependency on logging.h.

diff --git a/googleurl/base/string16.cc b/googleurl/base/string16.cc
deleted file mode 100644
index fc25809..0000000
--- a/googleurl/base/string16.cc
+++ /dev/null

@@ -1,94 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//    * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//    * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//    * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "base/string16.h"
-
-#ifdef WIN32
-
-#error This file should not be used on 2-byte wchar_t systems
-// If this winds up being needed on 2-byte wchar_t systems, either the
-// definitions below can be used, or the host system's wide character
-// functions like wmemcmp can be wrapped.
-
-#else  // !WIN32
-
-namespace base {
-
-int c16memcmp(const char16* s1, const char16* s2, size_t n) {
-  // We cannot call memcmp because that changes the semantics.
-  while (n-- > 0) {
-    if (*s1 != *s2) {
-      // We cannot use (*s1 - *s2) because char16 is unsigned.
-      return ((*s1 < *s2) ? -1 : 1);
-    }
-    ++s1;
-    ++s2;
-  }
-  return 0;
-}
-
-size_t c16len(const char16* s) {
-  const char16 *s_orig = s;
-  while (*s) {
-    ++s;
-  }
-  return s - s_orig;
-}
-
-const char16* c16memchr(const char16* s, char16 c, size_t n) {
-  while (n-- > 0) {
-    if (*s == c) {
-      return s;
-    }
-    ++s;
-  }
-  return 0;
-}
-
-char16* c16memmove(char16* s1, const char16* s2, size_t n) {
-  return reinterpret_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
-}
-
-char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
-  return reinterpret_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
-}
-
-char16* c16memset(char16* s, char16 c, size_t n) {
-  char16 *s_orig = s;
-  while (n-- > 0) {
-    *s = c;
-    ++s;
-  }
-  return s_orig;
-}
-
-}  // namespace base
-
-template class std::basic_string<char16, base::string16_char_traits>;
-
-#endif  // WIN32

diff --git a/googleurl/build/README.txt b/googleurl/build/README.txt
deleted file mode 100644
index eab011a..0000000
--- a/googleurl/build/README.txt
+++ /dev/null

@@ -1,4 +0,0 @@
-This directory includes solution and project files for compiling with
-Visual Studio 2005 on Windows.
-
-The base checkout directory must be named 'googleurl'.

diff --git a/googleurl/build/base.vcproj b/googleurl/build/base.vcproj
deleted file mode 100644
index 0e923cf..0000000
--- a/googleurl/build/base.vcproj
+++ /dev/null

@@ -1,151 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="base"
-	ProjectGUID="{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}"
-	RootNamespace="base"
-	Keyword="Win32Proj"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			ConfigurationType="4"
-			InheritedPropertySheets="$(SolutionDir)..\build\common.vsprops;$(SolutionDir)..\build\debug.vsprops;$(SolutionDir)../third_party/icu/build/using_icu.vsprops"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLibrarianTool"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			ConfigurationType="4"
-			InheritedPropertySheets="$(SolutionDir)..\build\common.vsprops;$(SolutionDir)..\build\release.vsprops;$(SolutionDir)../third_party/icu/build/using_icu.vsprops"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLibrarianTool"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<File
-			RelativePath="..\base\basictypes.h"
-			>
-		</File>
-		<File
-			RelativePath="..\base\logging.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\base\logging.h"
-			>
-		</File>
-		<File
-			RelativePath="..\base\README.txt"
-			>
-		</File>
-		<File
-			RelativePath="..\base\scoped_ptr.h"
-			>
-		</File>
-		<File
-			RelativePath="..\base\string16.h"
-			>
-		</File>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>

diff --git a/googleurl/build/common.vsprops b/googleurl/build/common.vsprops
deleted file mode 100644
index ede28e9..0000000
--- a/googleurl/build/common.vsprops
+++ /dev/null

@@ -1,22 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioPropertySheet
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="common"
-	OutputDirectory="$(SolutionDir)$(ConfigurationName)"
-	IntermediateDirectory="$(SolutionDir)$(ConfigurationName)\obj\$(ProjectName)"
-	CharacterSet="1"
-	>
-	<Tool
-		Name="VCCLCompilerTool"
-		AdditionalIncludeDirectories="$(SolutionDir)..\..;$(SolutionDir).."
-		PreprocessorDefinitions="_WIN32_WINNT=0x0501;WINVER=0x0501;WIN32;_WINDOWS"
-		MinimalRebuild="false"
-		BufferSecurityCheck="true"
-		EnableFunctionLevelLinking="true"
-		WarningLevel="3"
-		WarnAsError="true"
-		Detect64BitPortabilityProblems="true"
-		DebugInformationFormat="3"
-	/>
-</VisualStudioPropertySheet>

diff --git a/googleurl/build/debug.vsprops b/googleurl/build/debug.vsprops
deleted file mode 100644
index d2aa43f..0000000
--- a/googleurl/build/debug.vsprops
+++ /dev/null

@@ -1,18 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioPropertySheet
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="debug"
-	>
-	<Tool
-		Name="VCCLCompilerTool"
-		Optimization="0"
-		PreprocessorDefinitions="_DEBUG"
-		BasicRuntimeChecks="3"
-		RuntimeLibrary="1"
-	/>
-	<Tool
-		Name="VCLinkerTool"
-		LinkIncremental="2"
-	/>
-</VisualStudioPropertySheet>

diff --git a/googleurl/build/googleurl.sln b/googleurl/build/googleurl.sln
deleted file mode 100644
index 347810d..0000000
--- a/googleurl/build/googleurl.sln
+++ /dev/null

@@ -1,32 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 9.00
-# Visual Studio 2005
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "googleurl", "googleurl.vcproj", "{EF5E94AB-B646-4E5B-A058-52EF07B8351C}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "base", "base.vcproj", "{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{D8E84C85-89D3-4B8D-9A3A-C44B63C3383A}"
-	ProjectSection(SolutionItems) = preProject
-		..\LICENSE.txt = ..\LICENSE.txt
-		..\README.txt = ..\README.txt
-	EndProjectSection
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Win32 = Debug|Win32
-		Release|Win32 = Release|Win32
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{EF5E94AB-B646-4E5B-A058-52EF07B8351C}.Debug|Win32.ActiveCfg = Debug|Win32
-		{EF5E94AB-B646-4E5B-A058-52EF07B8351C}.Debug|Win32.Build.0 = Debug|Win32
-		{EF5E94AB-B646-4E5B-A058-52EF07B8351C}.Release|Win32.ActiveCfg = Release|Win32
-		{EF5E94AB-B646-4E5B-A058-52EF07B8351C}.Release|Win32.Build.0 = Release|Win32
-		{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}.Debug|Win32.ActiveCfg = Debug|Win32
-		{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}.Debug|Win32.Build.0 = Debug|Win32
-		{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}.Release|Win32.ActiveCfg = Release|Win32
-		{ACAC8E18-F003-4881-9BA0-C7718AC5FFD5}.Release|Win32.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal

diff --git a/googleurl/build/googleurl.vcproj b/googleurl/build/googleurl.vcproj
deleted file mode 100644
index 71b3123..0000000
--- a/googleurl/build/googleurl.vcproj
+++ /dev/null

@@ -1,239 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="googleurl"
-	ProjectGUID="{EF5E94AB-B646-4E5B-A058-52EF07B8351C}"
-	RootNamespace="googleurl"
-	Keyword="Win32Proj"
-	>
-	<Platforms>
-		<Platform
-			Name="Win32"
-		/>
-	</Platforms>
-	<ToolFiles>
-	</ToolFiles>
-	<Configurations>
-		<Configuration
-			Name="Debug|Win32"
-			ConfigurationType="4"
-			InheritedPropertySheets="$(SolutionDir)..\build\common.vsprops;$(SolutionDir)..\build\debug.vsprops;$(SolutionDir)../third_party/icu/build/using_icu.vsprops"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLibrarianTool"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Release|Win32"
-			ConfigurationType="4"
-			InheritedPropertySheets="$(SolutionDir)..\build\common.vsprops;$(SolutionDir)..\build\release.vsprops;$(SolutionDir)../third_party/icu/build/using_icu.vsprops"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLibrarianTool"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-	</Configurations>
-	<References>
-	</References>
-	<Files>
-		<File
-			RelativePath="..\src\gurl.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\gurl.h"
-			>
-		</File>
-		<File
-			RelativePath=".\README.txt"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_etc.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_fileurl.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_host.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_icu.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_icu.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_internal.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_internal.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_internal_file.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_ip.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_ip.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_mailtourl.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_path.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_pathurl.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_query.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_relative.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_stdstring.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_canon_stdurl.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_file.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_parse.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_parse.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_parse_file.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_parse_internal.h"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_util.cc"
-			>
-		</File>
-		<File
-			RelativePath="..\src\url_util.h"
-			>
-		</File>
-	</Files>
-	<Globals>
-	</Globals>
-</VisualStudioProject>

diff --git a/googleurl/build/release.vsprops b/googleurl/build/release.vsprops
deleted file mode 100644
index 2e59356..0000000
--- a/googleurl/build/release.vsprops
+++ /dev/null

@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioPropertySheet
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="release"
-	>
-	<Tool
-		Name="VCCLCompilerTool"
-		WholeProgramOptimization="true"
-		PreprocessorDefinitions="NDEBUG"
-	/>
-	<Tool
-		Name="VCLibrarianTool"
-		AdditionalOptions="/ltcg"
-	/>
-	<Tool
-		Name="VCLinkerTool"
-		LinkIncremental="1"
-		OptimizeReferences="2"
-		EnableCOMDATFolding="2"
-		LinkTimeCodeGeneration="1"
-	/>
-</VisualStudioPropertySheet>

diff --git a/googleurl/src/gurl.cc b/googleurl/src/gurl.cc
deleted file mode 100644
index a0bfd26..0000000
--- a/googleurl/src/gurl.cc
+++ /dev/null

@@ -1,449 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef WIN32
-#include <windows.h>
-#else
-#include <pthread.h>
-#endif
-
-#include <algorithm>
-
-#include "googleurl/src/gurl.h"
-
-#include "base/logging.h"
-#include "googleurl/src/url_canon_stdstring.h"
-#include "googleurl/src/url_util.h"
-
-namespace {
-
-// External template that can handle initialization of either character type.
-// The input spec is given, and the canonical version will be placed in
-// |*canonical|, along with the parsing of the canonical spec in |*parsed|.
-template<typename STR>
-bool InitCanonical(const STR& input_spec,
-                   std::string* canonical,
-                   url_parse::Parsed* parsed) {
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  canonical->reserve(input_spec.size() + 32);
-  url_canon::StdStringCanonOutput output(canonical);
-  bool success = url_util::Canonicalize(
-      input_spec.data(), static_cast<int>(input_spec.length()),
-      NULL, &output, parsed);
-
-  output.Complete();  // Must be done before using string.
-  return success;
-}
-
-static std::string* empty_string = NULL;
-static GURL* empty_gurl = NULL;
-
-#ifdef WIN32
-
-// Returns a static reference to an empty string for returning a reference
-// when there is no underlying string.
-const std::string& EmptyStringForGURL() {
-  // Avoid static object construction/destruction on startup/shutdown.
-  if (!empty_string) {
-    // Create the string. Be careful that we don't break in the case that this
-    // is being called from multiple threads. Statics are not threadsafe.
-    std::string* new_empty_string = new std::string;
-    if (InterlockedCompareExchangePointer(
-        reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
-      // The old value was non-NULL, so no replacement was done. Another
-      // thread did the initialization out from under us.
-      delete new_empty_string;
-    }
-  }
-  return *empty_string;
-}
-
-#else
-
-static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
-static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
-
-void EmptyStringForGURLOnce(void) {
-  empty_string = new std::string;
-}
-
-const std::string& EmptyStringForGURL() {
-  // Avoid static object construction/destruction on startup/shutdown.
-  pthread_once(&empty_string_once, EmptyStringForGURLOnce);
-  return *empty_string;
-}
-
-#endif  // WIN32
-
-} // namespace
-
-GURL::GURL() : is_valid_(false) {
-}
-
-GURL::GURL(const GURL& other)
-    : spec_(other.spec_),
-      is_valid_(other.is_valid_),
-      parsed_(other.parsed_) {
-}
-
-GURL::GURL(const std::string& url_string) {
-  is_valid_ = InitCanonical(url_string, &spec_, &parsed_);
-}
-
-GURL::GURL(const string16& url_string) {
-  is_valid_ = InitCanonical(url_string, &spec_, &parsed_);
-}
-
-GURL::GURL(const char* canonical_spec, size_t canonical_spec_len,
-           const url_parse::Parsed& parsed, bool is_valid)
-    : spec_(canonical_spec, canonical_spec_len),
-      is_valid_(is_valid),
-      parsed_(parsed) {
-#ifndef NDEBUG
-  // For testing purposes, check that the parsed canonical URL is identical to
-  // what we would have produced. Skip checking for invalid URLs have no meaning
-  // and we can't always canonicalize then reproducabely.
-  if (is_valid_) {
-    GURL test_url(spec_);
-
-    DCHECK(test_url.is_valid_ == is_valid_);
-    DCHECK(test_url.spec_ == spec_);
-
-    DCHECK(test_url.parsed_.scheme == parsed_.scheme);
-    DCHECK(test_url.parsed_.username == parsed_.username);
-    DCHECK(test_url.parsed_.password == parsed_.password);
-    DCHECK(test_url.parsed_.host == parsed_.host);
-    DCHECK(test_url.parsed_.port == parsed_.port);
-    DCHECK(test_url.parsed_.path == parsed_.path);
-    DCHECK(test_url.parsed_.query == parsed_.query);
-    DCHECK(test_url.parsed_.ref == parsed_.ref);
-  }
-#endif
-}
-
-const std::string& GURL::spec() const {
-  if (is_valid_ || spec_.empty())
-    return spec_;
-
-  DCHECK(false) << "Trying to get the spec of an invalid URL!";
-  return EmptyStringForGURL();
-}
-
-GURL GURL::Resolve(const std::string& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-GURL GURL::Resolve(const string16& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-
-// Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const std::string& relative,
-    url_canon::CharsetConverter* charset_converter) const {
-  // Not allowed for invalid URLs.
-  if (!is_valid_)
-    return GURL();
-
-  GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
-  url_canon::StdStringCanonOutput output(&result.spec_);
-
-  if (!url_util::ResolveRelative(
-          spec_.data(), static_cast<int>(spec_.length()), parsed_,
-          relative.data(), static_cast<int>(relative.length()),
-          charset_converter, &output, &result.parsed_)) {
-    // Error resolving, return an empty URL.
-    return GURL();
-  }
-
-  output.Complete();
-  result.is_valid_ = true;
-  return result;
-}
-
-// Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const string16& relative,
-    url_canon::CharsetConverter* charset_converter) const {
-  // Not allowed for invalid URLs.
-  if (!is_valid_)
-    return GURL();
-
-  GURL result;
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
-  url_canon::StdStringCanonOutput output(&result.spec_);
-
-  if (!url_util::ResolveRelative(
-          spec_.data(), static_cast<int>(spec_.length()), parsed_,
-          relative.data(), static_cast<int>(relative.length()),
-          charset_converter, &output, &result.parsed_)) {
-    // Error resolving, return an empty URL.
-    return GURL();
-  }
-
-  output.Complete();
-  result.is_valid_ = true;
-  return result;
-}
-
-// Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::ReplaceComponents(
-    const url_canon::Replacements<char>& replacements) const {
-  GURL result;
-
-  // Not allowed for invalid URLs.
-  if (!is_valid_)
-    return GURL();
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
-  url_canon::StdStringCanonOutput output(&result.spec_);
-
-  result.is_valid_ = url_util::ReplaceComponents(
-      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
-      NULL, &output, &result.parsed_);
-
-  output.Complete();
-  return result;
-}
-
-// Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::ReplaceComponents(
-    const url_canon::Replacements<char16>& replacements) const {
-  GURL result;
-
-  // Not allowed for invalid URLs.
-  if (!is_valid_)
-    return GURL();
-
-  // Reserve enough room in the output for the input, plus some extra so that
-  // we have room if we have to escape a few things without reallocating.
-  result.spec_.reserve(spec_.size() + 32);
-  url_canon::StdStringCanonOutput output(&result.spec_);
-
-  result.is_valid_ = url_util::ReplaceComponents(
-      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
-      NULL, &output, &result.parsed_);
-
-  output.Complete();
-  return result;
-}
-
-GURL GURL::GetOrigin() const {
-  // This doesn't make sense for invalid or nonstandard URLs, so return
-  // the empty URL
-  if (!is_valid_ || !IsStandard())
-    return GURL();
-
-  url_canon::Replacements<char> replacements;
-  replacements.ClearUsername();
-  replacements.ClearPassword();
-  replacements.ClearPath();
-  replacements.ClearQuery();
-  replacements.ClearRef();
-
-  return ReplaceComponents(replacements);
-}
-
-GURL GURL::GetWithEmptyPath() const {
-  // This doesn't make sense for invalid or nonstandard URLs, so return
-  // the empty URL.
-  if (!is_valid_ || !IsStandard())
-    return GURL();
-
-  // We could optimize this since we know that the URL is canonical, and we are
-  // appending a canonical path, so avoiding re-parsing.
-  GURL other(*this);
-  if (parsed_.path.len == 0)
-    return other;
-
-  // Clear everything after the path.
-  other.parsed_.query.reset();
-  other.parsed_.ref.reset();
-
-  // Set the path, since the path is longer than one, we can just set the
-  // first character and resize.
-  other.spec_[other.parsed_.path.begin] = '/';
-  other.parsed_.path.len = 1;
-  other.spec_.resize(other.parsed_.path.begin + 1);
-  return other;
-}
-
-bool GURL::IsStandard() const {
-  return url_util::IsStandard(spec_.data(), parsed_.scheme);
-}
-
-bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
-  if (parsed_.scheme.len <= 0)
-    return lower_ascii_scheme == NULL;
-  return url_util::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
-                                        spec_.data() + parsed_.scheme.end(),
-                                        lower_ascii_scheme);
-}
-
-int GURL::IntPort() const {
-  if (parsed_.port.is_nonempty())
-    return url_parse::ParsePort(spec_.data(), parsed_.port);
-  return url_parse::PORT_UNSPECIFIED;
-}
-
-int GURL::EffectiveIntPort() const {
-  int int_port = IntPort();
-  if (int_port == url_parse::PORT_UNSPECIFIED && IsStandard())
-    return url_canon::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
-                                           parsed_.scheme.len);
-  return int_port;
-}
-
-std::string GURL::ExtractFileName() const {
-  url_parse::Component file_component;
-  url_parse::ExtractFileName(spec_.data(), parsed_.path, &file_component);
-  return ComponentString(file_component);
-}
-
-std::string GURL::PathForRequest() const {
-  DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
-  if (parsed_.ref.len >= 0) {
-    // Clip off the reference when it exists. The reference starts after the #
-    // sign, so we have to subtract one to also remove it.
-    return std::string(spec_, parsed_.path.begin,
-                       parsed_.ref.begin - parsed_.path.begin - 1);
-  }
-
-  // Use everything form the path to the end.
-  return std::string(spec_, parsed_.path.begin);
-}
-
-std::string GURL::HostNoBrackets() const {
-  // If host looks like an IPv6 literal, strip the square brackets.
-  url_parse::Component h(parsed_.host);
-  if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
-    h.begin++;
-    h.len -= 2;
-  }
-  return ComponentString(h);
-}
-
-bool GURL::HostIsIPAddress() const {
-  if (!is_valid_ || spec_.empty())
-     return false;
-
-  url_canon::RawCanonOutputT<char, 128> ignored_output;
-  url_canon::CanonHostInfo host_info;
-  url_canon::CanonicalizeIPAddress(spec_.c_str(), parsed_.host,
-                                   &ignored_output, &host_info);
-  return host_info.IsIPAddress();
-}
-
-#ifdef WIN32
-
-const GURL& GURL::EmptyGURL() {
-  // Avoid static object construction/destruction on startup/shutdown.
-  if (!empty_gurl) {
-    // Create the string. Be careful that we don't break in the case that this
-    // is being called from multiple threads.
-    GURL* new_empty_gurl = new GURL;
-    if (InterlockedCompareExchangePointer(
-        reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
-      // The old value was non-NULL, so no replacement was done. Another
-      // thread did the initialization out from under us.
-      delete new_empty_gurl;
-    }
-  }
-  return *empty_gurl;
-}
-
-#else
-
-void EmptyGURLOnce(void) {
-  empty_gurl = new GURL;
-}
-
-const GURL& GURL::EmptyGURL() {
-  // Avoid static object construction/destruction on startup/shutdown.
-  pthread_once(&empty_gurl_once, EmptyGURLOnce);
-  return *empty_gurl;
-}
-
-#endif  // WIN32
-
-bool GURL::DomainIs(const char* lower_ascii_domain,
-                    int domain_len) const {
-  // Return false if this URL is not valid or domain is empty.
-  if (!is_valid_ || !parsed_.host.is_nonempty() || !domain_len)
-    return false;
-
-  // Check whether the host name is end with a dot. If yes, treat it
-  // the same as no-dot unless the input comparison domain is end
-  // with dot.
-  const char* last_pos = spec_.data() + parsed_.host.end() - 1;
-  int host_len = parsed_.host.len;
-  if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
-    last_pos--;
-    host_len--;
-  }
-
-  // Return false if host's length is less than domain's length.
-  if (host_len < domain_len)
-    return false;
-
-  // Compare this url whether belong specific domain.
-  const char* start_pos = spec_.data() + parsed_.host.begin +
-                          host_len - domain_len;
-
-  if (!url_util::LowerCaseEqualsASCII(start_pos,
-                                      last_pos + 1,
-                                      lower_ascii_domain,
-                                      lower_ascii_domain + domain_len))
-    return false;
-
-  // Check whether host has right domain start with dot, make sure we got
-  // right domain range. For example www.google.com has domain
-  // "google.com" but www.iamnotgoogle.com does not.
-  if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
-      '.' != *(start_pos - 1))
-    return false;
-
-  return true;
-}
-
-void GURL::Swap(GURL* other) {
-  spec_.swap(other->spec_);
-  std::swap(is_valid_, other->is_valid_);
-  std::swap(parsed_, other->parsed_);
-}
-

diff --git a/googleurl/src/gurl_test_main.cc b/googleurl/src/gurl_test_main.cc
deleted file mode 100644
index 9a7c9f4..0000000
--- a/googleurl/src/gurl_test_main.cc
+++ /dev/null

@@ -1,97 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "build/build_config.h"
-
-#if defined(OS_WIN)
-#include <windows.h>
-#endif
-
-#include <string>
-
-#include "testing/gtest/include/gtest/gtest.h"
-#include "unicode/putil.h"
-#include "unicode/udata.h"
-
-#define ICU_UTIL_DATA_SHARED 1
-#define ICU_UTIL_DATA_STATIC 2
-
-#ifndef ICU_UTIL_DATA_IMPL
-
-#if defined(OS_WIN)
-#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_SHARED
-#elif defined(OS_MACOSX)
-#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_STATIC
-#elif defined(OS_LINUX)
-#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_FILE
-#endif
-
-#endif  // ICU_UTIL_DATA_IMPL
-
-#if defined(OS_WIN)
-#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat"
-#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt" U_ICU_VERSION_SHORT ".dll"
-#endif
-
-bool InitializeICU() {
-#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED)
-  // We expect to find the ICU data module alongside the current module.
-  // Because the module name is ASCII-only, "A" API should be safe.
-  HMODULE module = LoadLibraryA(ICU_UTIL_DATA_SHARED_MODULE_NAME);
-  if (!module)
-    return false;
-
-  FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL);
-  if (!addr)
-    return false;
-
-  UErrorCode err = U_ZERO_ERROR;
-  udata_setCommonData(reinterpret_cast<void*>(addr), &err);
-  return err == U_ZERO_ERROR;
-#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
-  // Mac bundles the ICU data in.
-  return true;
-#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
-  // We expect to find the ICU data module alongside the current module.
-  u_setDataDirectory(".");
-  // Only look for the packaged data file;
-  // the default behavior is to look for individual files.
-  UErrorCode err = U_ZERO_ERROR;
-  udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
-  return err == U_ZERO_ERROR;
-#endif
-}
-
-int main(int argc, char **argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  InitializeICU();
-
-  return RUN_ALL_TESTS();
-}

diff --git a/googleurl/src/gurl_unittest.cc b/googleurl/src/gurl_unittest.cc
deleted file mode 100644
index b548cc2..0000000
--- a/googleurl/src/gurl_unittest.cc
+++ /dev/null

@@ -1,432 +0,0 @@
-// Copyright 2007 Google Inc. All Rights Reserved.
-// Author: brettw@google.com (Brett Wilson)
-
-#include "googleurl/src/gurl.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_test_utils.h"
-#include "testing/base/public/gunit.h"
-
-// Some implementations of base/basictypes.h may define ARRAYSIZE.
-// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
-// which is in our version of basictypes.h.
-#ifndef ARRAYSIZE
-#define ARRAYSIZE ARRAYSIZE_UNSAFE
-#endif
-
-using url_test_utils::WStringToUTF16;
-using url_test_utils::ConvertUTF8ToUTF16;
-
-namespace {
-
-template<typename CHAR>
-void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
-                          const url_parse::Component&),
-                      url_canon::Replacements<CHAR>* replacements,
-                      const CHAR* str) {
-  if (str) {
-    url_parse::Component comp;
-    if (str[0])
-      comp.len = static_cast<int>(strlen(str));
-    (replacements->*func)(str, comp);
-  }
-}
-
-// Returns the canonicalized string for the given URL string for the
-// GURLTest.Types test.
-std::string TypesTestCase(const char* src) {
-  GURL gurl(src);
-  return gurl.possibly_invalid_spec();
-}
-
-}  // namespace
-
-// Different types of URLs should be handled differently by url_util, and
-// handed off to different canonicalizers.
-TEST(GURLTest, Types) {
-  // URLs with unknown schemes should be treated as path URLs, even when they
-  // have things like "://".
-  EXPECT_EQ("something:///HOSTNAME.com/",
-            TypesTestCase("something:///HOSTNAME.com/"));
-
-  // In the reverse, known schemes should always trigger standard URL handling.
-  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
-  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
-  EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
-  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
-
-#ifdef WIN32
-  // URLs that look like absolute Windows drive specs.
-  EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
-  EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
-  EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
-  EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
-#endif
-}
-
-// Test the basic creation and querying of components in a GURL. We assume
-// the parser is already tested and works, so we are mostly interested if the
-// object does the right thing with the results.
-TEST(GURLTest, Components) {
-  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
-  EXPECT_TRUE(url.is_valid());
-  EXPECT_TRUE(url.SchemeIs("http"));
-  EXPECT_FALSE(url.SchemeIsFile());
-
-  // This is the narrow version of the URL, which should match the wide input.
-  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
-
-  EXPECT_EQ("http", url.scheme());
-  EXPECT_EQ("user", url.username());
-  EXPECT_EQ("pass", url.password());
-  EXPECT_EQ("google.com", url.host());
-  EXPECT_EQ("99", url.port());
-  EXPECT_EQ(99, url.IntPort());
-  EXPECT_EQ("/foo;bar", url.path());
-  EXPECT_EQ("q=a", url.query());
-  EXPECT_EQ("ref", url.ref());
-}
-
-TEST(GURLTest, Empty) {
-  GURL url;
-  EXPECT_FALSE(url.is_valid());
-  EXPECT_EQ("", url.spec());
-
-  EXPECT_EQ("", url.scheme());
-  EXPECT_EQ("", url.username());
-  EXPECT_EQ("", url.password());
-  EXPECT_EQ("", url.host());
-  EXPECT_EQ("", url.port());
-  EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort());
-  EXPECT_EQ("", url.path());
-  EXPECT_EQ("", url.query());
-  EXPECT_EQ("", url.ref());
-}
-
-TEST(GURLTest, Copy) {
-  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
-
-  GURL url2(url);
-  EXPECT_TRUE(url2.is_valid());
-
-  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
-  EXPECT_EQ("http", url2.scheme());
-  EXPECT_EQ("user", url2.username());
-  EXPECT_EQ("pass", url2.password());
-  EXPECT_EQ("google.com", url2.host());
-  EXPECT_EQ("99", url2.port());
-  EXPECT_EQ(99, url2.IntPort());
-  EXPECT_EQ("/foo;bar", url2.path());
-  EXPECT_EQ("q=a", url2.query());
-  EXPECT_EQ("ref", url2.ref());
-
-  // Copying of invalid URL should be invalid
-  GURL invalid;
-  GURL invalid2(invalid);
-  EXPECT_FALSE(invalid2.is_valid());
-  EXPECT_EQ("", invalid2.spec());
-  EXPECT_EQ("", invalid2.scheme());
-  EXPECT_EQ("", invalid2.username());
-  EXPECT_EQ("", invalid2.password());
-  EXPECT_EQ("", invalid2.host());
-  EXPECT_EQ("", invalid2.port());
-  EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort());
-  EXPECT_EQ("", invalid2.path());
-  EXPECT_EQ("", invalid2.query());
-  EXPECT_EQ("", invalid2.ref());
-}
-
-// Given an invalid URL, we should still get most of the components.
-TEST(GURLTest, Invalid) {
-  GURL url("http:google.com:foo");
-  EXPECT_FALSE(url.is_valid());
-  EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
-
-  EXPECT_EQ("http", url.scheme());
-  EXPECT_EQ("", url.username());
-  EXPECT_EQ("", url.password());
-  EXPECT_EQ("google.com", url.host());
-  EXPECT_EQ("foo", url.port());
-  EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort());
-  EXPECT_EQ("/", url.path());
-  EXPECT_EQ("", url.query());
-  EXPECT_EQ("", url.ref());
-}
-
-TEST(GURLTest, Resolve) {
-  // The tricky cases for relative URL resolving are tested in the
-  // canonicalizer unit test. Here, we just test that the GURL integration
-  // works properly.
-  struct ResolveCase {
-    const char* base;
-    const char* relative;
-    bool expected_valid;
-    const char* expected;
-  } resolve_cases[] = {
-    {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
-    {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
-    {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
-    {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
-    {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
-      // Unknown schemes are not standard.
-    {"data:blahblah", "http://google.com/", true, "http://google.com/"},
-    {"data:blahblah", "http:google.com", true, "http://google.com/"},
-    {"data:/blahblah", "file.html", false, ""},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) {
-    // 8-bit code path.
-    GURL input(resolve_cases[i].base);
-    GURL output = input.Resolve(resolve_cases[i].relative);
-    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
-    EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
-
-    // Wide code path.
-    GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
-    GURL outputw =
-        input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
-    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
-    EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
-  }
-}
-
-TEST(GURLTest, GetOrigin) {
-  struct TestCase {
-    const char* input;
-    const char* expected;
-  } cases[] = {
-    {"http://www.google.com", "http://www.google.com/"},
-    {"javascript:window.alert(\"hello,world\");", ""},
-    {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
-    {"http://user@www.google.com", "http://www.google.com/"},
-    {"http://:pass@www.google.com", "http://www.google.com/"},
-    {"http://:@www.google.com", "http://www.google.com/"},
-  };
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    GURL url(cases[i].input);
-    GURL origin = url.GetOrigin();
-    EXPECT_EQ(cases[i].expected, origin.spec());
-  }
-}
-
-TEST(GURLTest, GetWithEmptyPath) {
-  struct TestCase {
-    const char* input;
-    const char* expected;
-  } cases[] = {
-    {"http://www.google.com", "http://www.google.com/"},
-    {"javascript:window.alert(\"hello, world\");", ""},
-    {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    GURL url(cases[i].input);
-    GURL empty_path = url.GetWithEmptyPath();
-    EXPECT_EQ(cases[i].expected, empty_path.spec());
-  }
-}
-
-TEST(GURLTest, Replacements) {
-  // The url canonicalizer replacement test will handle most of these case.
-  // The most important thing to do here is to check that the proper
-  // canonicalizer gets called based on the scheme of the input.
-  struct ReplaceCase {
-    const char* base;
-    const char* scheme;
-    const char* username;
-    const char* password;
-    const char* host;
-    const char* port;
-    const char* path;
-    const char* query;
-    const char* ref;
-    const char* expected;
-  } replace_cases[] = {
-    {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
-    {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
-    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
-#ifdef WIN32
-    {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
-#endif
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) {
-    const ReplaceCase& cur = replace_cases[i];
-    GURL url(cur.base);
-    GURL::Replacements repl;
-    SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
-    SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
-    SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
-    SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
-    SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
-    SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
-    SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
-    SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
-    GURL output = url.ReplaceComponents(repl);
-
-    EXPECT_EQ(replace_cases[i].expected, output.spec());
-  }
-}
-
-TEST(GURLTest, PathForRequest) {
-  struct TestCase {
-    const char* input;
-    const char* expected;
-  } cases[] = {
-    {"http://www.google.com", "/"},
-    {"http://www.google.com/", "/"},
-    {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"},
-    {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"},
-    {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    GURL url(cases[i].input);
-    std::string path_request = url.PathForRequest();
-    EXPECT_EQ(cases[i].expected, path_request);
-  }
-}
-
-TEST(GURLTest, EffectiveIntPort) {
-  struct PortTest {
-    const char* spec;
-    int expected_int_port;
-  } port_tests[] = {
-    // http
-    {"http://www.google.com/", 80},
-    {"http://www.google.com:80/", 80},
-    {"http://www.google.com:443/", 443},
-
-    // https
-    {"https://www.google.com/", 443},
-    {"https://www.google.com:443/", 443},
-    {"https://www.google.com:80/", 80},
-
-    // ftp
-    {"ftp://www.google.com/", 21},
-    {"ftp://www.google.com:21/", 21},
-    {"ftp://www.google.com:80/", 80},
-
-    // gopher
-    {"gopher://www.google.com/", 70},
-    {"gopher://www.google.com:70/", 70},
-    {"gopher://www.google.com:80/", 80},
-
-    // file - no port
-    {"file://www.google.com/", url_parse::PORT_UNSPECIFIED},
-    {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED},
-
-    // data - no port
-    {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED},
-    {"data:www.google.com", url_parse::PORT_UNSPECIFIED},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) {
-    GURL url(port_tests[i].spec);
-    EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
-  }
-}
-
-TEST(GURLTest, IPAddress) {
-  struct IPTest {
-    const char* spec;
-    bool expected_ip;
-  } ip_tests[] = {
-    {"http://www.google.com/", false},
-    {"http://192.168.9.1/", true},
-    {"http://192.168.9.1.2/", false},
-    {"http://192.168.m.1/", false},
-    {"http://2001:db8::1/", false},
-    {"http://[2001:db8::1]/", true},
-    {"", false},
-    {"some random input!", false},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) {
-    GURL url(ip_tests[i].spec);
-    EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
-  }
-}
-
-TEST(GURLTest, HostNoBrackets) {
-  struct TestCase {
-    const char* input;
-    const char* expected_host;
-    const char* expected_plainhost;
-  } cases[] = {
-    {"http://www.google.com", "www.google.com", "www.google.com"},
-    {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
-    {"http://[::]/", "[::]", "::"},
-
-    // Don't require a valid URL, but don't crash either.
-    {"http://[]/", "[]", ""},
-    {"http://[x]/", "[x]", "x"},
-    {"http://[x/", "[x", "[x"},
-    {"http://x]/", "x]", "x]"},
-    {"http://[/", "[", "["},
-    {"http://]/", "]", "]"},
-    {"", "", ""},
-  };
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    GURL url(cases[i].input);
-    EXPECT_EQ(cases[i].expected_host, url.host());
-    EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
-  }
-}
-
-TEST(GURLTest, DomainIs) {
-  const char google_domain[] = "google.com";
-
-  GURL url_1("http://www.google.com:99/foo");
-  EXPECT_TRUE(url_1.DomainIs(google_domain));
-
-  GURL url_2("http://google.com:99/foo");
-  EXPECT_TRUE(url_2.DomainIs(google_domain));
-
-  GURL url_3("http://google.com./foo");
-  EXPECT_TRUE(url_3.DomainIs(google_domain));
-
-  GURL url_4("http://google.com/foo");
-  EXPECT_FALSE(url_4.DomainIs("google.com."));
-
-  GURL url_5("http://google.com./foo");
-  EXPECT_TRUE(url_5.DomainIs("google.com."));
-
-  GURL url_6("http://www.google.com./foo");
-  EXPECT_TRUE(url_6.DomainIs(".com."));
-
-  GURL url_7("http://www.balabala.com/foo");
-  EXPECT_FALSE(url_7.DomainIs(google_domain));
-
-  GURL url_8("http://www.google.com.cn/foo");
-  EXPECT_FALSE(url_8.DomainIs(google_domain));
-
-  GURL url_9("http://www.iamnotgoogle.com/foo");
-  EXPECT_FALSE(url_9.DomainIs(google_domain));
-
-  GURL url_10("http://www.iamnotgoogle.com../foo");
-  EXPECT_FALSE(url_10.DomainIs(".com"));
-}
-
-// Newlines should be stripped from inputs.
-TEST(GURLTest, Newlines) {
-  // Constructor.
-  GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
-  EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
-
-  // Relative path resolver.
-  GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
-  EXPECT_EQ("http://www.google.com/foo", url_2.spec());
-
-  // Note that newlines are NOT stripped from ReplaceComponents.
-}
-
-TEST(GURLTest, IsStandard) {
-  GURL a("http:foo/bar");
-  EXPECT_TRUE(a.IsStandard());
-
-  GURL b("foo:bar/baz");
-  EXPECT_FALSE(b.IsStandard());
-
-  GURL c("foo://bar/baz");
-  EXPECT_FALSE(c.IsStandard());
-}

diff --git a/googleurl/src/url_canon_fileurl.cc b/googleurl/src/url_canon_fileurl.cc
deleted file mode 100644
index 97023eb..0000000
--- a/googleurl/src/url_canon_fileurl.cc
+++ /dev/null

@@ -1,215 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Functions for canonicalizing "file:" URLs.
-
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_file.h"
-#include "googleurl/src/url_parse_internal.h"
-
-namespace url_canon {
-
-namespace {
-
-#ifdef WIN32
-
-// Given a pointer into the spec, this copies and canonicalizes the drive
-// letter and colon to the output, if one is found. If there is not a drive
-// spec, it won't do anything. The index of the next character in the input
-// spec is returned (after the colon when a drive spec is found, the begin
-// offset if one is not).
-template<typename CHAR>
-int FileDoDriveSpec(const CHAR* spec, int begin, int end,
-                    CanonOutput* output) {
-  // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
-  // (with backslashes instead of slashes as well).
-  int num_slashes = url_parse::CountConsecutiveSlashes(spec, begin, end);
-  int after_slashes = begin + num_slashes;
-
-  if (!url_parse::DoesBeginWindowsDriveSpec(spec, after_slashes, end))
-    return begin;  // Haven't consumed any characters
-
-  // A drive spec is the start of a path, so we need to add a slash for the
-  // authority terminator (typically the third slash).
-  output->push_back('/');
-
-  // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
-  // and that it is followed by a colon/pipe.
-
-  // Normalize Windows drive letters to uppercase
-  if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
-    output->push_back(spec[after_slashes] - 'a' + 'A');
-  else
-    output->push_back(static_cast<char>(spec[after_slashes]));
-
-  // Normalize the character following it to a colon rather than pipe.
-  output->push_back(':');
-  return after_slashes + 2;
-}
-
-#endif  // WIN32
-
-template<typename CHAR, typename UCHAR>
-bool DoFileCanonicalizePath(const CHAR* spec,
-                            const url_parse::Component& path,
-                            CanonOutput* output,
-                            url_parse::Component* out_path) {
-  // Copies and normalizes the "c:" at the beginning, if present.
-  out_path->begin = output->length();
-  int after_drive;
-#ifdef WIN32
-  after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
-#else
-  after_drive = path.begin;
-#endif
-
-  // Copies the rest of the path, starting from the slash following the
-  // drive colon (if any, Windows only), or the first slash of the path.
-  bool success = true;
-  if (after_drive < path.end()) {
-    // Use the regular path canonicalizer to canonicalize the rest of the
-    // path. Give it a fake output component to write into. DoCanonicalizeFile
-    // will compute the full path component.
-    url_parse::Component sub_path =
-        url_parse::MakeRange(after_drive, path.end());
-    url_parse::Component fake_output_path;
-    success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
-  } else {
-    // No input path, canonicalize to a slash.
-    output->push_back('/');
-  }
-
-  out_path->len = output->length() - out_path->begin;
-  return success;
-}
-
-template<typename CHAR, typename UCHAR>
-bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
-                           const url_parse::Parsed& parsed,
-                           CharsetConverter* query_converter,
-                           CanonOutput* output,
-                           url_parse::Parsed* new_parsed) {
-  // Things we don't set in file: URLs.
-  new_parsed->username = url_parse::Component();
-  new_parsed->password = url_parse::Component();
-  new_parsed->port = url_parse::Component();
-
-  // Scheme (known, so we don't bother running it through the more
-  // complicated scheme canonicalizer).
-  new_parsed->scheme.begin = output->length();
-  output->Append("file://", 7);
-  new_parsed->scheme.len = 4;
-
-  // Append the host. For many file URLs, this will be empty. For UNC, this
-  // will be present.
-  // TODO(brettw) This doesn't do any checking for host name validity. We
-  // should probably handle validity checking of UNC hosts differently than
-  // for regular IP hosts.
-  bool success = CanonicalizeHost(source.host, parsed.host,
-                                  output, &new_parsed->host);
-  success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
-                                    output, &new_parsed->path);
-  CanonicalizeQuery(source.query, parsed.query, query_converter,
-                    output, &new_parsed->query);
-
-  // Ignore failure for refs since the URL can probably still be loaded.
-  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
-
-  return success;
-}
-
-} // namespace
-
-bool CanonicalizeFileURL(const char* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CharsetConverter* query_converter,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed) {
-  return DoCanonicalizeFileURL<char, unsigned char>(
-      URLComponentSource<char>(spec), parsed, query_converter,
-      output, new_parsed);
-}
-
-bool CanonicalizeFileURL(const char16* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CharsetConverter* query_converter,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed) {
-  return DoCanonicalizeFileURL<char16, char16>(
-      URLComponentSource<char16>(spec), parsed, query_converter,
-      output, new_parsed);
-}
-
-bool FileCanonicalizePath(const char* spec,
-                          const url_parse::Component& path,
-                          CanonOutput* output,
-                          url_parse::Component* out_path) {
-  return DoFileCanonicalizePath<char, unsigned char>(spec, path,
-                                                     output, out_path);
-}
-
-bool FileCanonicalizePath(const char16* spec,
-                          const url_parse::Component& path,
-                          CanonOutput* output,
-                          url_parse::Component* out_path) {
-  return DoFileCanonicalizePath<char16, char16>(spec, path,
-                                                output, out_path);
-}
-
-bool ReplaceFileURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char>& replacements,
-                    CharsetConverter* query_converter,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed) {
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupOverrideComponents(base, replacements, &source, &parsed);
-  return DoCanonicalizeFileURL<char, unsigned char>(
-      source, parsed, query_converter, output, new_parsed);
-}
-
-bool ReplaceFileURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char16>& replacements,
-                    CharsetConverter* query_converter,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed) {
-  RawCanonOutput<1024> utf8;
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
-  return DoCanonicalizeFileURL<char, unsigned char>(
-      source, parsed, query_converter, output, new_parsed);
-}
-
-}  // namespace url_canon

diff --git a/googleurl/src/url_canon_icu.cc b/googleurl/src/url_canon_icu.cc
deleted file mode 100644
index b06808c..0000000
--- a/googleurl/src/url_canon_icu.cc
+++ /dev/null

@@ -1,207 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ICU integration functions.
-
-#include <stdlib.h>
-#include <string.h>
-#include <unicode/ucnv.h>
-#include <unicode/ucnv_cb.h>
-#include <unicode/uidna.h>
-
-#include "googleurl/src/url_canon_icu.h"
-#include "googleurl/src/url_canon_internal.h"  // for _itoa_s
-
-#include "base/logging.h"
-
-namespace url_canon {
-
-namespace {
-
-// Called when converting a character that can not be represented, this will
-// append an escaped version of the numerical character reference for that code
-// point. It is of the form "&#1234;" and we will escape the non-digits to
-// "%26%231234%3B". Why? This is what Netscape did back in the olden days.
-void appendURLEscapedChar(const void* context,
-                          UConverterFromUnicodeArgs* from_args,
-                          const UChar* code_units,
-                          int32_t length,
-                          UChar32 code_point,
-                          UConverterCallbackReason reason,
-                          UErrorCode* err) {
-  if (reason == UCNV_UNASSIGNED) {
-    *err = U_ZERO_ERROR;
-
-    const static int prefix_len = 6;
-    const static char prefix[prefix_len + 1] = "%26%23";  // "&#" percent-escaped
-    ucnv_cbFromUWriteBytes(from_args, prefix, prefix_len, 0, err);
-
-    DCHECK(code_point < 0x110000);
-    char number[8];  // Max Unicode code point is 7 digits.
-    _itoa_s(code_point, number, 10);
-    int number_len = static_cast<int>(strlen(number));
-    ucnv_cbFromUWriteBytes(from_args, number, number_len, 0, err);
-
-    const static int postfix_len = 3;
-    const static char postfix[postfix_len + 1] = "%3B";   // ";" percent-escaped
-    ucnv_cbFromUWriteBytes(from_args, postfix, postfix_len, 0, err);
-  }
-}
-
-// A class for scoping the installation of the invalid character callback.
-class AppendHandlerInstaller {
- public:
-  // The owner of this object must ensure that the converter is alive for the
-  // duration of this object's lifetime.
-  AppendHandlerInstaller(UConverter* converter) : converter_(converter) {
-    UErrorCode err = U_ZERO_ERROR;
-    ucnv_setFromUCallBack(converter_, appendURLEscapedChar, 0,
-                          &old_callback_, &old_context_, &err);
-  }
-
-  ~AppendHandlerInstaller() {
-    UErrorCode err = U_ZERO_ERROR;
-    ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);
-  }
-
- private:
-  UConverter* converter_;
-
-  UConverterFromUCallback old_callback_;
-  const void* old_context_;
-};
-
-}  // namespace
-
-ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
-    : converter_(converter) {
-}
-
-void ICUCharsetConverter::ConvertFromUTF16(const char16* input,
-                                           int input_len,
-                                           CanonOutput* output) {
-  // Install our error handler. It will be called for character that can not
-  // be represented in the destination character set.
-  AppendHandlerInstaller handler(converter_);
-
-  int begin_offset = output->length();
-  int dest_capacity = output->capacity() - begin_offset;
-  output->set_length(output->length());
-
-  do {
-    UErrorCode err = U_ZERO_ERROR;
-    char* dest = &output->data()[begin_offset];
-    int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
-                                            input, input_len, &err);
-    if (err != U_BUFFER_OVERFLOW_ERROR) {
-      output->set_length(begin_offset + required_capacity);
-      return;
-    }
-
-    // Output didn't fit, expand
-    dest_capacity = required_capacity;
-    output->Resize(begin_offset + dest_capacity);
-  } while (true);
-}
-
-// Converts the Unicode input representing a hostname to ASCII using IDN rules.
-// The output must be ASCII, but is represented as wide characters.
-//
-// On success, the output will be filled with the ASCII host name and it will
-// return true. Unlike most other canonicalization functions, this assumes that
-// the output is empty. The beginning of the host will be at offset 0, and
-// the length of the output will be set to the length of the new host name.
-//
-// On error, this will return false. The output in this case is undefined.
-bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) {
-  DCHECK(output->length() == 0);  // Output buffer is assumed empty.
-  while (true) {
-    // Use ALLOW_UNASSIGNED to be more tolerant of hostnames that violate
-    // the spec (which do exist). This does not present any risk and is a
-    // little more future proof.
-    UErrorCode err = U_ZERO_ERROR;
-    int num_converted = uidna_IDNToASCII(src, src_len, output->data(),
-                                         output->capacity(),
-                                         UIDNA_ALLOW_UNASSIGNED, NULL, &err);
-    if (err == U_ZERO_ERROR) {
-      output->set_length(num_converted);
-      return true;
-    }
-    if (err != U_BUFFER_OVERFLOW_ERROR)
-      return false;  // Unknown error, give up.
-
-    // Not enough room in our buffer, expand.
-    output->Resize(output->capacity() * 2);
-  }
-}
-
-bool ReadUTFChar(const char* str, int* begin, int length,
-                 unsigned* code_point_out) {
-  int code_point;  // Avoids warning when U8_NEXT writes -1 to it.
-  U8_NEXT(str, *begin, length, code_point);
-  *code_point_out = static_cast<unsigned>(code_point);
-
-  // The ICU macro above moves to the next char, we want to point to the last
-  // char consumed.
-  (*begin)--;
-
-  // Validate the decoded value.
-  if (U_IS_UNICODE_CHAR(code_point))
-    return true;
-  *code_point_out = kUnicodeReplacementCharacter;
-  return false;
-}
-
-bool ReadUTFChar(const char16* str, int* begin, int length,
-                 unsigned* code_point) {
-  if (U16_IS_SURROGATE(str[*begin])) {
-    if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length ||
-        !U16_IS_TRAIL(str[*begin + 1])) {
-      // Invalid surrogate pair.
-      *code_point = kUnicodeReplacementCharacter;
-      return false;
-    } else {
-      // Valid surrogate pair.
-      *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
-      (*begin)++;
-    }
-  } else {
-    // Not a surrogate, just one 16-bit word.
-    *code_point = str[*begin];
-  }
-
-  if (U_IS_UNICODE_CHAR(*code_point))
-    return true;
-
-  // Invalid code point.
-  *code_point = kUnicodeReplacementCharacter;
-  return false;
-}
-
-}  // namespace url_canon

diff --git a/googleurl/src/url_canon_icu.h b/googleurl/src/url_canon_icu.h
deleted file mode 100644
index 6bc52c3..0000000
--- a/googleurl/src/url_canon_icu.h
+++ /dev/null

@@ -1,63 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ICU integration functions.
-
-#ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
-#define GOOGLEURL_SRC_URL_CANON_ICU_H__
-
-#include "googleurl/src/url_canon.h"
-
-typedef struct UConverter UConverter;
-
-namespace url_canon {
-
-// An implementation of CharsetConverter that implementations can use to
-// interface the canonicalizer with ICU's conversion routines.
-class ICUCharsetConverter : public CharsetConverter {
- public:
-  // Constructs a converter using an already-existing ICU character set
-  // converter. This converter is NOT owned by this object; the lifetime must
-  // be managed by the creator such that it is alive as long as this is.
-  GURL_API ICUCharsetConverter(UConverter* converter);
-
-  GURL_API virtual ~ICUCharsetConverter() {}
-
-  GURL_API virtual void ConvertFromUTF16(const char16* input,
-                                         int input_len,
-                                         CanonOutput* output);
-
- private:
-  // The ICU converter, not owned by this class.
-  UConverter* converter_;
-};
-
-}  // namespace url_canon
-
-#endif  // GOOGLEURL_SRC_URL_CANON_ICU_H__

diff --git a/googleurl/src/url_canon_ip.h b/googleurl/src/url_canon_ip.h
deleted file mode 100644
index 0a01c9f..0000000
--- a/googleurl/src/url_canon_ip.h
+++ /dev/null

@@ -1,101 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef GOOGLEURL_SRC_URL_CANON_IP_H__
-#define GOOGLEURL_SRC_URL_CANON_IP_H__
-
-#include "base/string16.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_common.h"
-#include "googleurl/src/url_parse.h"
-
-namespace url_canon {
-
-// Searches the host name for the portions of the IPv4 address. On success,
-// each component will be placed into |components| and it will return true.
-// It will return false if the host can not be separated as an IPv4 address
-// or if there are any non-7-bit characters or other characters that can not
-// be in an IP address. (This is important so we fail as early as possible for
-// common non-IP hostnames.)
-//
-// Not all components may exist. If there are only 3 components, for example,
-// the last one will have a length of -1 or 0 to indicate it does not exist.
-//
-// Note that many platform's inet_addr will ignore everything after a space
-// in certain curcumstances if the stuff before the space looks like an IP
-// address. IE6 is included in this. We do NOT handle this case. In many cases,
-// the browser's canonicalization will get run before this which converts
-// spaces to %20 (in the case of IE7) or rejects them (in the case of
-// Mozilla), so this code path never gets hit. Our host canonicalization will
-// notice these spaces and escape them, which will make IP address finding
-// fail. This seems like better behavior than stripping after a space.
-GURL_API bool FindIPv4Components(const char* spec,
-                                 const url_parse::Component& host,
-                                 url_parse::Component components[4]);
-GURL_API bool FindIPv4Components(const char16* spec,
-                                 const url_parse::Component& host,
-                                 url_parse::Component components[4]);
-
-// Converts an IPv4 address to a 32-bit number (network byte order).
-//
-// Possible return values:
-//   IPV4    - IPv4 address was successfully parsed.
-//   BROKEN  - Input was formatted like an IPv4 address, but overflow occurred
-//             during parsing.
-//   NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
-//             It might be an IPv6 address, or a hostname.
-//
-// On success, |num_ipv4_components| will be populated with the number of
-// components in the IPv4 address.
-GURL_API CanonHostInfo::Family IPv4AddressToNumber(
-    const char* spec,
-    const url_parse::Component& host,
-    unsigned char address[4],
-    int* num_ipv4_components);
-GURL_API CanonHostInfo::Family IPv4AddressToNumber(
-    const char16* spec,
-    const url_parse::Component& host,
-    unsigned char address[4],
-    int* num_ipv4_components);
-
-// Converts an IPv6 address to a 128-bit number (network byte order), returning
-// true on success. False means that the input was not a valid IPv6 address.
-//
-// NOTE that |host| is expected to be surrounded by square brackets.
-// i.e. "[::1]" rather than "::1".
-GURL_API bool IPv6AddressToNumber(const char* spec,
-                                  const url_parse::Component& host,
-                                  unsigned char address[16]);
-GURL_API bool IPv6AddressToNumber(const char16* spec,
-                                  const url_parse::Component& host,
-                                  unsigned char address[16]);
-
-}  // namespace url_canon
-
-#endif  // GOOGLEURL_SRC_URL_CANON_IP_H__

diff --git a/googleurl/src/url_canon_mailtourl.cc b/googleurl/src/url_canon_mailtourl.cc
deleted file mode 100644
index 97868b8..0000000
--- a/googleurl/src/url_canon_mailtourl.cc
+++ /dev/null

@@ -1,137 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Functions for canonicalizing "mailto:" URLs.
-
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_file.h"
-#include "googleurl/src/url_parse_internal.h"
-
-namespace url_canon {
-
-namespace {
-
-
-template<typename CHAR, typename UCHAR>
-bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
-                             const url_parse::Parsed& parsed,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed) {
-
-  // mailto: only uses {scheme, path, query} -- clear the rest.
-  new_parsed->username = url_parse::Component();
-  new_parsed->password = url_parse::Component();
-  new_parsed->host = url_parse::Component();
-  new_parsed->port = url_parse::Component();
-  new_parsed->ref = url_parse::Component();
-
-  // Scheme (known, so we don't bother running it through the more
-  // complicated scheme canonicalizer).
-  new_parsed->scheme.begin = output->length();
-  output->Append("mailto:", 7);
-  new_parsed->scheme.len = 6;
-
-  bool success = true;
-
-  // Path
-  if (parsed.path.is_valid()) {
-    new_parsed->path.begin = output->length();
-
-    // Copy the path using path URL's more lax escaping rules.
-    // We convert to UTF-8 and escape non-ASCII, but leave all
-    // ASCII characters alone.
-    int end = parsed.path.end();
-    for (int i = parsed.path.begin; i < end; ++i) {
-      UCHAR uch = static_cast<UCHAR>(source.path[i]);
-      if (uch < 0x20 || uch >= 0x80)
-        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
-      else
-        output->push_back(static_cast<char>(uch));
-    }
-
-    new_parsed->path.len = output->length() - new_parsed->path.begin;
-  } else {
-    // No path at all
-    new_parsed->path.reset();
-  }
-
-  // Query -- always use the default utf8 charset converter.
-  CanonicalizeQuery(source.query, parsed.query, NULL,
-                    output, &new_parsed->query);
-
-  return success;
-}
-
-} // namespace
-
-bool CanonicalizeMailtoURL(const char* spec,
-                          int spec_len,
-                          const url_parse::Parsed& parsed,
-                          CanonOutput* output,
-                          url_parse::Parsed* new_parsed) {
-  return DoCanonicalizeMailtoURL<char, unsigned char>(
-      URLComponentSource<char>(spec), parsed, output, new_parsed);
-}
-
-bool CanonicalizeMailtoURL(const char16* spec,
-                           int spec_len,
-                           const url_parse::Parsed& parsed,
-                           CanonOutput* output,
-                           url_parse::Parsed* new_parsed) {
-  return DoCanonicalizeMailtoURL<char16, char16>(
-      URLComponentSource<char16>(spec), parsed, output, new_parsed);
-}
-
-bool ReplaceMailtoURL(const char* base,
-                      const url_parse::Parsed& base_parsed,
-                      const Replacements<char>& replacements,
-                      CanonOutput* output,
-                      url_parse::Parsed* new_parsed) {
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupOverrideComponents(base, replacements, &source, &parsed);
-  return DoCanonicalizeMailtoURL<char, unsigned char>(
-      source, parsed, output, new_parsed);
-}
-
-bool ReplaceMailtoURL(const char* base,
-                      const url_parse::Parsed& base_parsed,
-                      const Replacements<char16>& replacements,
-                      CanonOutput* output,
-                      url_parse::Parsed* new_parsed) {
-  RawCanonOutput<1024> utf8;
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
-  return DoCanonicalizeMailtoURL<char, unsigned char>(
-      source, parsed, output, new_parsed);
-}
-
-}  // namespace url_canon

diff --git a/googleurl/src/url_canon_pathurl.cc b/googleurl/src/url_canon_pathurl.cc
deleted file mode 100644
index 4a990c7..0000000
--- a/googleurl/src/url_canon_pathurl.cc
+++ /dev/null

@@ -1,128 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Functions for canonicalizing "path" URLs. Not to be confused with the path
-// of a URL, these are URLs that have no authority section, only a path. For
-// example, "javascript:" and "data:".
-
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
-
-namespace url_canon {
-
-namespace {
-
-template<typename CHAR, typename UCHAR>
-bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
-                           const url_parse::Parsed& parsed,
-                           CanonOutput* output,
-                           url_parse::Parsed* new_parsed) {
-  // Scheme: this will append the colon.
-  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
-                                    output, &new_parsed->scheme);
-
-  // We assume there's no authority for path URLs. Note that hosts should never
-  // have -1 length.
-  new_parsed->username.reset();
-  new_parsed->password.reset();
-  new_parsed->host.reset();
-  new_parsed->port.reset();
-
-  if (parsed.path.is_valid()) {
-    // Copy the path using path URL's more lax escaping rules (think for
-    // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
-    // ASCII characters alone. This helps readability of JavaStript.
-    new_parsed->path.begin = output->length();
-    int end = parsed.path.end();
-    for (int i = parsed.path.begin; i < end; i++) {
-      UCHAR uch = static_cast<UCHAR>(source.path[i]);
-      if (uch < 0x20 || uch >= 0x80)
-        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
-      else
-        output->push_back(static_cast<char>(uch));
-    }
-    new_parsed->path.len = output->length() - new_parsed->path.begin;
-  } else {
-    // Empty path.
-    new_parsed->path.reset();
-  }
-
-  // Assume there's no query or ref.
-  new_parsed->query.reset();
-  new_parsed->ref.reset();
-
-  return success;
-}
-
-}  // namespace
-
-bool CanonicalizePathURL(const char* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed) {
-  return DoCanonicalizePathURL<char, unsigned char>(
-      URLComponentSource<char>(spec), parsed, output, new_parsed);
-}
-
-bool CanonicalizePathURL(const char16* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         CanonOutput* output,
-                         url_parse::Parsed* new_parsed) {
-  return DoCanonicalizePathURL<char16, char16>(
-      URLComponentSource<char16>(spec), parsed, output, new_parsed);
-}
-
-bool ReplacePathURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char>& replacements,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed) {
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupOverrideComponents(base, replacements, &source, &parsed);
-  return DoCanonicalizePathURL<char, unsigned char>(
-      source, parsed, output, new_parsed);
-}
-
-bool ReplacePathURL(const char* base,
-                    const url_parse::Parsed& base_parsed,
-                    const Replacements<char16>& replacements,
-                    CanonOutput* output,
-                    url_parse::Parsed* new_parsed) {
-  RawCanonOutput<1024> utf8;
-  URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
-  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
-  return DoCanonicalizePathURL<char, unsigned char>(
-      source, parsed, output, new_parsed);
-}
-
-}  // namespace url_canon

diff --git a/googleurl/src/url_canon_stdstring.h b/googleurl/src/url_canon_stdstring.h
deleted file mode 100644
index c43b777..0000000
--- a/googleurl/src/url_canon_stdstring.h
+++ /dev/null

@@ -1,134 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This header file defines a canonicalizer output method class for STL
-// strings. Because the canonicalizer tries not to be dependent on the STL,
-// we have segregated it here.
-
-#ifndef GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
-#define GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
-
-#include <string>
-#include "googleurl/src/url_canon.h"
-
-namespace url_canon {
-
-// Write into a std::string given in the constructor. This object does not own
-// the string itself, and the user must ensure that the string stays alive
-// throughout the lifetime of this object.
-//
-// The given string will be appended to; any existing data in the string will
-// be preserved. The caller should reserve() the amount of data in the string
-// they expect to be written. We will resize if necessary, but that's slow.
-//
-// Note that when canonicalization is complete, the string will likely have
-// unused space at the end because we make the string very big to start out
-// with (by |initial_size|). This ends up being important because resize
-// operations are slow, and because the base class needs to write directly
-// into the buffer.
-//
-// Therefore, the user should call Complete() before using the string that
-// this class wrote into.
-class StdStringCanonOutput : public CanonOutput {
- public:
-  StdStringCanonOutput(std::string* str)
-      : CanonOutput(),
-        str_(str) {
-    cur_len_ = static_cast<int>(str_->size());  // Append to existing data.
-    str_->resize(str_->capacity());
-    buffer_ = &(*str_)[0];
-    buffer_len_ = static_cast<int>(str_->size());
-  }
-  virtual ~StdStringCanonOutput() {
-    // Nothing to do, we don't own the string.
-  }
-
-  // Must be called after writing has completed but before the string is used.
-  void Complete() {
-    str_->resize(cur_len_);
-    buffer_len_ = cur_len_;
-  }
-
-  virtual void Resize(int sz) {
-    str_->resize(sz);
-    buffer_ = &(*str_)[0];
-    buffer_len_ = sz;
-  }
-
- protected:
-  std::string* str_;
-};
-
-// An extension of the Replacements class that allows the setters to use
-// standard strings.
-//
-// The strings passed as arguments are not copied and must remain valid until
-// this class goes out of scope.
-template<typename STR>
-class StdStringReplacements :
-    public url_canon::Replacements<typename STR::value_type> {
- public:
-  void SetSchemeStr(const STR& s) {
-    this->SetScheme(s.data(),
-                    url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetUsernameStr(const STR& s) {
-    this->SetUsername(s.data(),
-                      url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetPasswordStr(const STR& s) {
-    this->SetPassword(s.data(),
-                      url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetHostStr(const STR& s) {
-    this->SetHost(s.data(),
-                  url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetPortStr(const STR& s) {
-    this->SetPort(s.data(),
-                  url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetPathStr(const STR& s) {
-    this->SetPath(s.data(),
-                  url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetQueryStr(const STR& s) {
-    this->SetQuery(s.data(),
-                   url_parse::Component(0, static_cast<int>(s.length())));
-  }
-  void SetRefStr(const STR& s) {
-    this->SetRef(s.data(),
-                 url_parse::Component(0, static_cast<int>(s.length())));
-  }
-};
-
-}  // namespace url_canon
-
-#endif  // GOOGLEURL_SRC_URL_CANON_STDSTRING_H__
-

diff --git a/googleurl/src/url_canon_unittest.cc b/googleurl/src/url_canon_unittest.cc
deleted file mode 100644
index 731d82f..0000000
--- a/googleurl/src/url_canon_unittest.cc
+++ /dev/null

@@ -1,1955 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <errno.h>
-#include <unicode/ucnv.h>
-
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_icu.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_canon_stdstring.h"
-#include "googleurl/src/url_parse.h"
-#include "googleurl/src/url_test_utils.h"
-#include "testing/base/public/gunit.h"
-
-// Some implementations of base/basictypes.h may define ARRAYSIZE.
-// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
-// which is in our version of basictypes.h.
-#ifndef ARRAYSIZE
-#define ARRAYSIZE ARRAYSIZE_UNSAFE
-#endif
-
-using url_test_utils::WStringToUTF16;
-using url_test_utils::ConvertUTF8ToUTF16;
-using url_test_utils::ConvertUTF16ToUTF8;
-using url_canon::CanonHostInfo;
-
-namespace {
-
-struct ComponentCase {
-  const char* input;
-  const char* expected;
-  url_parse::Component expected_component;
-  bool expected_success;
-};
-
-// ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests
-// treat each input as optional, and will only try processing if non-NULL.
-// The output is always 8-bit.
-struct DualComponentCase {
-  const char* input8;
-  const wchar_t* input16;
-  const char* expected;
-  url_parse::Component expected_component;
-  bool expected_success;
-};
-
-// Test cases for CanonicalizeIPAddress().  The inputs are identical to
-// DualComponentCase, but the output has extra CanonHostInfo fields.
-struct IPAddressCase {
-  const char* input8;
-  const wchar_t* input16;
-  const char* expected;
-  url_parse::Component expected_component;
-
-  // CanonHostInfo fields, for verbose output.
-  CanonHostInfo::Family expected_family;
-  int expected_num_ipv4_components;
-};
-
-struct ReplaceCase {
-  const char* base;
-  const char* scheme;
-  const char* username;
-  const char* password;
-  const char* host;
-  const char* port;
-  const char* path;
-  const char* query;
-  const char* ref;
-  const char* expected;
-};
-
-// Wrapper around a UConverter object that managers creation and destruction.
-class UConvScoper {
- public:
-  explicit UConvScoper(const char* charset_name) {
-    UErrorCode err = U_ZERO_ERROR;
-    converter_ = ucnv_open(charset_name, &err);
-  }
-
-  ~UConvScoper() {
-    if (converter_)
-      ucnv_close(converter_);
-  }
-
-  // Returns the converter object, may be NULL.
-  UConverter* converter() const { return converter_; }
-
- private:
-  UConverter* converter_;
-};
-
-// Magic string used in the replacements code that tells SetupReplComp to
-// call the clear function.
-const char kDeleteComp[] = "|";
-
-// Sets up a replacement for a single component. This is given pointers to
-// the set and clear function for the component being replaced, and will
-// either set the component (if it exists) or clear it (if the replacement
-// string matches kDeleteComp).
-//
-// This template is currently used only for the 8-bit case, and the strlen
-// causes it to fail in other cases. It is left a template in case we have
-// tests for wide replacements.
-template<typename CHAR>
-void SetupReplComp(
-    void (url_canon::Replacements<CHAR>::*set)(const CHAR*,
-                                               const url_parse::Component&),
-    void (url_canon::Replacements<CHAR>::*clear)(),
-    url_canon::Replacements<CHAR>* rep,
-    const CHAR* str) {
-  if (str && str[0] == kDeleteComp[0]) {
-    (rep->*clear)();
-  } else if (str) {
-    (rep->*set)(str, url_parse::Component(0, static_cast<int>(strlen(str))));
-  }
-}
-
-}  // namespace
-
-TEST(URLCanonTest, UTF) {
-  // Low-level test that we handle reading, canonicalization, and writing
-  // UTF-8/UTF-16 strings properly.
-  struct UTFCase {
-    const char* input8;
-    const wchar_t* input16;
-    bool expected_success;
-    const char* output;
-  } utf_cases[] = {
-      // Valid canonical input should get passed through & escaped.
-    {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
-      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
-    {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
-      // Non-shortest-form UTF-8 are invalid. The bad char should be replaced
-      // with the invalid character (EF BF DB in UTF-8).
-    {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
-      // Invalid UTF-8 sequences should be marked as invalid (the first
-      // sequence is truncated).
-    {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"},
-      // Character going off the end.
-    {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"},
-      // ...same with low surrogates with no high surrogate.
-    {"\xed\xb0\x80", L"\xdc00", false, "%EF%BF%BD"},
-      // Test a UTF-8 encoded surrogate value is marked as invalid.
-      // ED A0 80 = U+D800
-    {"\xed\xa0\x80", NULL, false, "%EF%BF%BD"},
-  };
-
-  std::string out_str;
-  for (size_t i = 0; i < ARRAYSIZE(utf_cases); i++) {
-    if (utf_cases[i].input8) {
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-
-      int input_len = static_cast<int>(strlen(utf_cases[i].input8));
-      bool success = true;
-      for (int ch = 0; ch < input_len; ch++) {
-        success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
-                                         &output);
-      }
-      output.Complete();
-      EXPECT_EQ(utf_cases[i].expected_success, success);
-      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
-    }
-    if (utf_cases[i].input16) {
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-
-      string16 input_str(WStringToUTF16(utf_cases[i].input16));
-      int input_len = static_cast<int>(input_str.length());
-      bool success = true;
-      for (int ch = 0; ch < input_len; ch++) {
-        success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
-                                         &output);
-      }
-      output.Complete();
-      EXPECT_EQ(utf_cases[i].expected_success, success);
-      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
-    }
-
-    if (utf_cases[i].input8 && utf_cases[i].input16 &&
-        utf_cases[i].expected_success) {
-      // Check that the UTF-8 and UTF-16 inputs are equivalent.
-
-      // UTF-16 -> UTF-8
-      std::string input8_str(utf_cases[i].input8);
-      string16 input16_str(WStringToUTF16(utf_cases[i].input16));
-      EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));
-
-      // UTF-8 -> UTF-16
-      EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));
-    }
-  }
-}
-
-TEST(URLCanonTest, ICUCharsetConverter) {
-  struct ICUCase {
-    const wchar_t* input;
-    const char* encoding;
-    const char* expected;
-  } icu_cases[] = {
-      // UTF-8.
-    {L"Hello, world", "utf-8", "Hello, world"},
-    {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
-      // Non-BMP UTF-8.
-    {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
-      // Big5
-    {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
-      // Unrepresentable character in the destination set.
-    {L"hello\x4f60\x06de\x597dworld", "big5", "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(icu_cases); i++) {
-    UConvScoper conv(icu_cases[i].encoding);
-    ASSERT_TRUE(conv.converter() != NULL);
-    url_canon::ICUCharsetConverter converter(conv.converter());
-
-    std::string str;
-    url_canon::StdStringCanonOutput output(&str);
-
-    string16 input_str(WStringToUTF16(icu_cases[i].input));
-    int input_len = static_cast<int>(input_str.length());
-    converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
-    output.Complete();
-
-    EXPECT_STREQ(icu_cases[i].expected, str.c_str());
-  }
-
-  // Test string sizes around the resize boundary for the output to make sure
-  // the converter resizes as needed.
-  const int static_size = 16;
-  UConvScoper conv("utf-8");
-  ASSERT_TRUE(conv.converter());
-  url_canon::ICUCharsetConverter converter(conv.converter());
-  for (int i = static_size - 2; i <= static_size + 2; i++) {
-    // Make a string with the appropriate length.
-    string16 input;
-    for (int ch = 0; ch < i; ch++)
-      input.push_back('a');
-
-    url_canon::RawCanonOutput<static_size> output;
-    converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
-                               &output);
-    EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
-  }
-}
-
-TEST(URLCanonTest, Scheme) {
-  // Here, we're mostly testing that unusual characters are handled properly.
-  // The canonicalizer doesn't do any parsing or whitespace detection. It will
-  // also do its best on error, and will escape funny sequences (these won't be
-  // valid schemes and it will return error).
-  //
-  // Note that the canonicalizer will append a colon to the output to separate
-  // out the rest of the URL, which is not present in the input. We check,
-  // however, that the output range includes everything but the colon.
-  ComponentCase scheme_cases[] = {
-    {"http", "http:", url_parse::Component(0, 4), true},
-    {"HTTP", "http:", url_parse::Component(0, 4), true},
-    {" HTTP ", "%20http%20:", url_parse::Component(0, 10),false},
-    {"htt: ", "htt%3A%20:", url_parse::Component(0, 9), false},
-    {"\xe4\xbd\xa0\xe5\xa5\xbdhttp", "%E4%BD%A0%E5%A5%BDhttp:", url_parse::Component(0, 22), false},
-      // Don't re-escape something already escaped. Note that it will
-      // "canonicalize" the 'A' to 'a', but that's OK.
-    {"ht%3Atp", "ht%3atp:", url_parse::Component(0, 7), false},
-  };
-
-  std::string out_str;
-
-  for (size_t i = 0; i < arraysize(scheme_cases); i++) {
-    int url_len = static_cast<int>(strlen(scheme_cases[i].input));
-    url_parse::Component in_comp(0, url_len);
-    url_parse::Component out_comp;
-
-    out_str.clear();
-    url_canon::StdStringCanonOutput output1(&out_str);
-    bool success = url_canon::CanonicalizeScheme(scheme_cases[i].input,
-                                                 in_comp, &output1, &out_comp);
-    output1.Complete();
-
-    EXPECT_EQ(scheme_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
-    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
-    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
-
-    // Now try the wide version
-    out_str.clear();
-    url_canon::StdStringCanonOutput output2(&out_str);
-
-    string16 wide_input(ConvertUTF8ToUTF16(scheme_cases[i].input));
-    in_comp.len = static_cast<int>(wide_input.length());
-    success = url_canon::CanonicalizeScheme(wide_input.c_str(), in_comp,
-                                            &output2, &out_comp);
-    output2.Complete();
-
-    EXPECT_EQ(scheme_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
-    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
-    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
-  }
-
-  // Test the case where the scheme is declared nonexistant, it should be
-  // converted into an empty scheme.
-  url_parse::Component out_comp;
-  out_str.clear();
-  url_canon::StdStringCanonOutput output(&out_str);
-
-  EXPECT_TRUE(url_canon::CanonicalizeScheme("", url_parse::Component(0, -1),
-                                            &output, &out_comp));
-  output.Complete();
-
-  EXPECT_EQ(std::string(":"), out_str);
-  EXPECT_EQ(0, out_comp.begin);
-  EXPECT_EQ(0, out_comp.len);
-}
-
-TEST(URLCanonTest, Host) {
-  IPAddressCase host_cases[] = {
-       // Basic canonicalization, uppercase should be converted to lowercase.
-    {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", url_parse::Component(0, 10), CanonHostInfo::NEUTRAL, -1},
-      // Spaces and some other characters should be escaped.
-    {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", url_parse::Component(0, 22), CanonHostInfo::NEUTRAL, -1},
-      // Exciting different types of spaces!
-    {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", url_parse::Component(0, 16), CanonHostInfo::NEUTRAL, -1},
-      // Other types of space (no-break, zero-width, zero-width-no-break) are
-      // name-prepped away to nothing.
-    {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", url_parse::Component(0, 10), CanonHostInfo::NEUTRAL, -1},
-      // Ideographic full stop (full-width period for Chinese, etc.) should be
-      // treated as a dot.
-    {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", url_parse::Component(0, 15), CanonHostInfo::NEUTRAL, -1},
-      // Invalid unicode characters should fail...
-      // ...In wide input, ICU will barf and we'll end up with the input as
-      //    escaped UTF-8 (the invalid character should be replaced with the
-      //    replacement character).
-    {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", url_parse::Component(0, 16), CanonHostInfo::BROKEN, -1},
-      // ...This is the same as previous but with with escaped.
-    {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", url_parse::Component(0, 16), CanonHostInfo::BROKEN, -1},
-      // Test name prepping, fullwidth input should be converted to ASCII and NOT
-      // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
-    {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", url_parse::Component(0, 6), CanonHostInfo::NEUTRAL, -1},
-      // Test that fullwidth escaped values are properly name-prepped,
-      // then converted or rejected.
-      // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
-    {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},
-    {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", url_parse::Component(0, 5), CanonHostInfo::NEUTRAL, -1},
-      // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
-    {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},
-    {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", url_parse::Component(0, 7), CanonHostInfo::BROKEN, -1},
-      // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
-    {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1},
-      // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
-      // UTF-8 (wide case). The output should be equivalent to the true wide
-      // character input above).
-    {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", url_parse::Component(0, 14), CanonHostInfo::NEUTRAL, -1},
-      // Invalid escaped characters should fail and the percents should be
-      // escaped.
-    {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", url_parse::Component(0, 10), CanonHostInfo::BROKEN, -1},
-      // If we get an invalid character that has been escaped.
-    {"%25", L"%25", "%25", url_parse::Component(0, 3), CanonHostInfo::BROKEN, -1},
-    {"hello%00", L"hello%00", "hello%00", url_parse::Component(0, 8), CanonHostInfo::BROKEN, -1},
-      // Escaped numbers should be treated like IP addresses if they are.
-    {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-    {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-      // Invalid escaping should trigger the regular host error handling.
-    {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", url_parse::Component(0, 17), CanonHostInfo::BROKEN, -1},
-      // Something that isn't exactly an IP should get treated as a host and
-      // spaces escaped.
-    {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", url_parse::Component(0, 19), CanonHostInfo::NEUTRAL, -1},
-      // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
-      // These are "0Xc0.0250.01" in fullwidth.
-    {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-      // Broken IP addresses get marked as such.
-    {"192.168.0.257", L"192.168.0.257", "192.168.0.257", url_parse::Component(0, 13), CanonHostInfo::BROKEN, -1},
-    {"[google.com]", L"[google.com]", "[google.com]", url_parse::Component(0, 12), CanonHostInfo::BROKEN, -1},
-      // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e.
-      // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect).
-    {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", url_parse::Component(0, 11), CanonHostInfo::NEUTRAL, -1},
-  };
-
-  // CanonicalizeHost() non-verbose.
-  std::string out_str;
-  for (size_t i = 0; i < arraysize(host_cases); i++) {
-    // Narrow version.
-    if (host_cases[i].input8) {
-      int host_len = static_cast<int>(strlen(host_cases[i].input8));
-      url_parse::Component in_comp(0, host_len);
-      url_parse::Component out_comp;
-
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-
-      bool success = url_canon::CanonicalizeHost(host_cases[i].input8, in_comp,
-                                                 &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
-                success);
-      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
-      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
-    }
-
-    // Wide version.
-    if (host_cases[i].input16) {
-      string16 input16(WStringToUTF16(host_cases[i].input16));
-      int host_len = static_cast<int>(input16.length());
-      url_parse::Component in_comp(0, host_len);
-      url_parse::Component out_comp;
-
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-
-      bool success = url_canon::CanonicalizeHost(input16.c_str(), in_comp,
-                                                 &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
-                success);
-      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
-      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
-    }
-  }
-
-  // CanonicalizeHostVerbose()
-  for (size_t i = 0; i < arraysize(host_cases); i++) {
-    // Narrow version.
-    if (host_cases[i].input8) {
-      int host_len = static_cast<int>(strlen(host_cases[i].input8));
-      url_parse::Component in_comp(0, host_len);
-
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-      CanonHostInfo host_info;
-
-      url_canon::CanonicalizeHostVerbose(host_cases[i].input8, in_comp,
-                                         &output, &host_info);
-      output.Complete();
-
-      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
-      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
-      EXPECT_EQ(host_cases[i].expected_component.begin,
-                host_info.out_host.begin);
-      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
-      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
-        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
-                  host_info.num_ipv4_components);
-      }
-    }
-
-    // Wide version.
-    if (host_cases[i].input16) {
-      string16 input16(WStringToUTF16(host_cases[i].input16));
-      int host_len = static_cast<int>(input16.length());
-      url_parse::Component in_comp(0, host_len);
-
-      out_str.clear();
-      url_canon::StdStringCanonOutput output(&out_str);
-      CanonHostInfo host_info;
-
-      url_canon::CanonicalizeHostVerbose(input16.c_str(), in_comp,
-                                         &output, &host_info);
-      output.Complete();
-
-      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
-      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
-      EXPECT_EQ(host_cases[i].expected_component.begin,
-                host_info.out_host.begin);
-      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
-      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
-        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
-                  host_info.num_ipv4_components);
-      }
-    }
-  }
-}
-
-TEST(URLCanonTest, IPv4) {
-  IPAddressCase cases[] = {
-      // Empty is not an IP address.
-    {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-    {".", L".", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Regular IP addresses in different bases.
-    {"192.168.0.1", L"192.168.0.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},
-    {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},
-    {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},
-      // Non-IP addresses due to invalid characters.
-    {"192.168.9.com", L"192.168.9.com", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Invalid characters for the base should be rejected.
-    {"19a.168.0.1", L"19a.168.0.1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-    {"0308.0250.00.01", L"0308.0250.00.01", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-    {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // If there are not enough components, the last one should fill them out.
-    {"192", L"192", "0.0.0.192", url_parse::Component(0, 9), CanonHostInfo::IPV4, 1},
-    {"0xC0a80001", L"0xC0a80001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1},
-    {"030052000001", L"030052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1},
-    {"000030052000001", L"000030052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 1},
-    {"192.168", L"192.168", "192.0.0.168", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2},
-    {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2},
-    {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 2},
-    {"192.168.1", L"192.168.1", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-      // Too many components means not an IP address.
-    {"192.168.0.0.1", L"192.168.0.0.1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // We allow a single trailing dot.
-    {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 4},
-    {"192.168.0.1. hello", L"192.168.0.1. hello", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-    {"192.168.0.1..", L"192.168.0.1..", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Two dots in a row means not an IP address.
-    {"192.168..1", L"192.168..1", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Any numerical overflow should be marked as BROKEN.
-    {"0x100.0", L"0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0x100.0.0", L"0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0x100.0.0.0", L"0x100.0.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0.0x100.0.0", L"0.0x100.0.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0.0.0x100.0", L"0.0.0x100.0", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0.0.0.0x100", L"0.0.0.0x100", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0.0.0x10000", L"0.0.0x10000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0.0x1000000", L"0.0x1000000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0x100000000", L"0x100000000", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Repeat the previous tests, minus 1, to verify boundaries.
-    {"0xFF.0", L"0xFF.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 2},
-    {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 3},
-    {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4},
-    {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4},
-    {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4},
-    {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", url_parse::Component(0, 9), CanonHostInfo::IPV4, 4},
-    {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-    {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", url_parse::Component(0, 13), CanonHostInfo::IPV4, 2},
-    {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", url_parse::Component(0, 15), CanonHostInfo::IPV4, 1},
-      // Old trunctations tests.  They're all "BROKEN" now.
-    {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"192.168.0.257", L"192.168.0.257", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"192.168.0xa20001", L"192.168.0xa20001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"192.015052000001", L"192.015052000001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"0X12C0a80001", L"0X12C0a80001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"276.1.2", L"276.1.2", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Spaces should be rejected.
-    {"192.168.0.1 hello", L"192.168.0.1 hello", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Very large numbers.
-    {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", url_parse::Component(0, 11), CanonHostInfo::IPV4, 3},
-    {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", url_parse::Component(0, 11), CanonHostInfo::BROKEN, -1},
-      // A number has no length limit, but long numbers can still overflow.
-    {"00000000000000000001", L"00000000000000000001", "0.0.0.1", url_parse::Component(0, 7), CanonHostInfo::IPV4, 1},
-    {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // If a long component is non-numeric, it's a hostname, *not* a broken IP.
-    {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-    {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Truncation of all zeros should still result in 0.
-    {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", url_parse::Component(0, 7), CanonHostInfo::IPV4, 4},
-  };
-
-  for (size_t i = 0; i < arraysize(cases); i++) {
-    // 8-bit version.
-    url_parse::Component component(0,
-                                   static_cast<int>(strlen(cases[i].input8)));
-
-    std::string out_str1;
-    url_canon::StdStringCanonOutput output1(&out_str1);
-    url_canon::CanonHostInfo host_info;
-    url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,
-                                     &host_info);
-    output1.Complete();
-
-    EXPECT_EQ(cases[i].expected_family, host_info.family);
-    if (host_info.family == CanonHostInfo::IPV4) {
-      EXPECT_STREQ(cases[i].expected, out_str1.c_str());
-      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
-      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
-      EXPECT_EQ(cases[i].expected_num_ipv4_components,
-                host_info.num_ipv4_components);
-    }
-
-    // 16-bit version.
-    string16 input16(WStringToUTF16(cases[i].input16));
-    component = url_parse::Component(0, static_cast<int>(input16.length()));
-
-    std::string out_str2;
-    url_canon::StdStringCanonOutput output2(&out_str2);
-    url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,
-                                     &host_info);
-    output2.Complete();
-
-    EXPECT_EQ(cases[i].expected_family, host_info.family);
-    if (host_info.family == CanonHostInfo::IPV4) {
-      EXPECT_STREQ(cases[i].expected, out_str2.c_str());
-      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
-      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
-      EXPECT_EQ(cases[i].expected_num_ipv4_components,
-                host_info.num_ipv4_components);
-    }
-  }
-}
-
-TEST(URLCanonTest, IPv6) {
-  IPAddressCase cases[] = {
-      // Empty is not an IP address.
-    {"", L"", "", url_parse::Component(), CanonHostInfo::NEUTRAL, -1},
-      // Non-IPs with [:] characters are marked BROKEN.
-    {":", L":", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[", L"[", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[:", L"[:", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"]", L"]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {":]", L":]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[]", L"[]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[:]", L"[:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Regular IP address is invalid without bounding '[' and ']'.
-    {"2001:db8::1", L"2001:db8::1", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[2001:db8::1", L"[2001:db8::1", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"2001:db8::1]", L"2001:db8::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Regular IP addresses.
-    {"[::]", L"[::]", "[::]", url_parse::Component(0,4), CanonHostInfo::IPV6, -1},
-    {"[::1]", L"[::1]", "[::1]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},
-    {"[1::]", L"[1::]", "[1::]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},
-    {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", url_parse::Component(0,10), CanonHostInfo::IPV6, -1},
-    {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1},
-
-    // Leading zeros should be stripped.
-    {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1},
-
-    // Upper case letters should be lowercased.
-    {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", url_parse::Component(0,20), CanonHostInfo::IPV6, -1},
-
-    // The same address can be written with different contractions, but should
-    // get canonicalized to the same thing.
-    {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", url_parse::Component(0,14), CanonHostInfo::IPV6, -1},
-    {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", url_parse::Component(0,14), CanonHostInfo::IPV6, -1},
-
-    // IPv4 addresses
-    // Only mapped and compat addresses can have IPv4 syntax embedded.
-    {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // IPv4 with last component missing.
-    {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1},
-
-    // IPv4 using hex.
-    // TODO(eroman): Should this format be disallowed?
-    {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", url_parse::Component(0,15), CanonHostInfo::IPV6, -1},
-
-    // There may be zeros surrounding the "::" contraction.
-    {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", url_parse::Component(0,5), CanonHostInfo::IPV6, -1},
-
-    {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", url_parse::Component(0,13), CanonHostInfo::IPV6, -1},
-
-      // Can only have one "::" contraction in an IPv6 string literal.
-    {"[2001::db8::1]", L"[2001::db8::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // No more than 2 consecutive ':'s.
-    {"[2001:db8:::1]", L"[2001:db8:::1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[:::]", L"[:::]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Non-IP addresses due to invalid characters.
-    {"[2001::.com]", L"[2001::.com]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // If there are not enough components, the last one should fill them out.
-    // ... omitted at this time ...
-      // Too many components means not an IP address.  Similarly with too few if using IPv4 compat or mapped addresses.
-    {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    // Too many bits (even though 8 comonents, the last one holds 32 bits).
-    {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // Too many bits specified -- the contraction would have to be zero-length
-    // to not exceed 128 bits.
-    {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // The contraction is for 16 bits of zero.
-    {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", url_parse::Component(0,17), CanonHostInfo::IPV6, -1},
-
-    // Cannot have a trailing colon.
-    {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // Cannot have negative numbers.
-    {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
-    // The scope_id should be included in the canonicalized URL, and is an
-    // unsigned decimal number.
-
-    // Invalid because no ID was given after the percent.
-
-    // Don't allow scope-id
-    {"[1::%1]", L"[1::%1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[1::%eth0]", L"[1::%eth0]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[1::%]", L"[1::%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[%]", L"[%]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[::%:]", L"[::%:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-    // Don't allow leading or trailing colons.
-    {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-    {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-
-      // We allow a single trailing dot.
-    // ... omitted at this time ...
-      // Two dots in a row means not an IP address.
-    {"[::192.168..1]", L"[::192.168..1]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-      // Any non-first components get truncated to one byte.
-    // ... omitted at this time ...
-      // Spaces should be rejected.
-    {"[::1 hello]", L"[::1 hello]", "", url_parse::Component(), CanonHostInfo::BROKEN, -1},
-  };
-
-  for (size_t i = 0; i < arraysize(cases); i++) {
-    // 8-bit version.
-    url_parse::Component component(0,
-                                   static_cast<int>(strlen(cases[i].input8)));
-
-    std::string out_str1;
-    url_canon::StdStringCanonOutput output1(&out_str1);
-    url_canon::CanonHostInfo host_info;
-    url_canon::CanonicalizeIPAddress(cases[i].input8, component, &output1,
-                                     &host_info);
-    output1.Complete();
-
-    EXPECT_EQ(cases[i].expected_family, host_info.family);
-    if (host_info.family == CanonHostInfo::IPV6) {
-      EXPECT_STREQ(cases[i].expected, out_str1.c_str());
-      EXPECT_EQ(cases[i].expected_component.begin,
-                host_info.out_host.begin);
-      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
-    }
-
-    // 16-bit version.
-    string16 input16(WStringToUTF16(cases[i].input16));
-    component = url_parse::Component(0, static_cast<int>(input16.length()));
-
-    std::string out_str2;
-    url_canon::StdStringCanonOutput output2(&out_str2);
-    url_canon::CanonicalizeIPAddress(input16.c_str(), component, &output2,
-                                     &host_info);
-    output2.Complete();
-
-    EXPECT_EQ(cases[i].expected_family, host_info.family);
-    if (host_info.family == CanonHostInfo::IPV6) {
-      EXPECT_STREQ(cases[i].expected, out_str2.c_str());
-      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
-      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
-    }
-  }
-}
-
-TEST(URLCanonTest, IPEmpty) {
-  std::string out_str1;
-  url_canon::StdStringCanonOutput output1(&out_str1);
-  url_canon::CanonHostInfo host_info;
-
-  // This tests tests.
-  const char spec[] = "192.168.0.1";
-  url_canon::CanonicalizeIPAddress(spec, url_parse::Component(),
-                                   &output1, &host_info);
-  EXPECT_FALSE(host_info.IsIPAddress());
-
-  url_canon::CanonicalizeIPAddress(spec, url_parse::Component(0, 0),
-                                   &output1, &host_info);
-  EXPECT_FALSE(host_info.IsIPAddress());
-}
-
-TEST(URLCanonTest, UserInfo) {
-  // Note that the canonicalizer should escape and treat empty components as
-  // not being there.
-
-  // We actually parse a full input URL so we can get the initial components.
-  struct UserComponentCase {
-    const char* input;
-    const char* expected;
-    url_parse::Component expected_username;
-    url_parse::Component expected_password;
-    bool expected_success;
-  } user_info_cases[] = {
-    {"http://user:pass@host.com/", "user:pass@", url_parse::Component(0, 4), url_parse::Component(5, 4), true},
-    {"http://@host.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true},
-    {"http://:@host.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true},
-    {"http://foo:@host.com/", "foo@", url_parse::Component(0, 3), url_parse::Component(0, -1), true},
-    {"http://:foo@host.com/", ":foo@", url_parse::Component(0, 0), url_parse::Component(1, 3), true},
-    {"http://^ :$\t@host.com/", "%5E%20:$%09@", url_parse::Component(0, 6), url_parse::Component(7, 4), true},
-    {"http://user:pass@/", "user:pass@", url_parse::Component(0, 4), url_parse::Component(5, 4), true},
-    {"http://%2540:bar@domain.com/", "%2540:bar@", url_parse::Component(0, 5), url_parse::Component(6, 3), true },
-
-      // IE7 compatability: old versions allowed backslashes in usernames, but
-      // IE7 does not. We disallow it as well.
-    {"ftp://me\\mydomain:pass@foo.com/", "", url_parse::Component(0, -1), url_parse::Component(0, -1), true},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(user_info_cases); i++) {
-    int url_len = static_cast<int>(strlen(user_info_cases[i].input));
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
-    url_parse::Component out_user, out_pass;
-    std::string out_str;
-    url_canon::StdStringCanonOutput output1(&out_str);
-
-    bool success = url_canon::CanonicalizeUserInfo(user_info_cases[i].input,
-                                                   parsed.username,
-                                                   user_info_cases[i].input,
-                                                   parsed.password,
-                                                   &output1, &out_user,
-                                                   &out_pass);
-    output1.Complete();
-
-    EXPECT_EQ(user_info_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
-    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
-    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
-    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
-    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
-
-    // Now try the wide version
-    out_str.clear();
-    url_canon::StdStringCanonOutput output2(&out_str);
-    string16 wide_input(ConvertUTF8ToUTF16(user_info_cases[i].input));
-    success = url_canon::CanonicalizeUserInfo(wide_input.c_str(),
-                                              parsed.username,
-                                              wide_input.c_str(),
-                                              parsed.password,
-                                              &output2, &out_user, &out_pass);
-    output2.Complete();
-
-    EXPECT_EQ(user_info_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
-    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
-    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
-    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
-    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
-  }
-}
-
-TEST(URLCanonTest, Port) {
-  // We only need to test that the number gets properly put into the output
-  // buffer. The parser unit tests will test scanning the number correctly.
-  //
-  // Note that the CanonicalizePort will always prepend a colon to the output
-  // to separate it from the colon that it assumes preceeds it.
-  struct PortCase {
-    const char* input;
-    int default_port;
-    const char* expected;
-    url_parse::Component expected_component;
-    bool expected_success;
-  } port_cases[] = {
-      // Invalid input should be copied w/ failure.
-    {"as df", 80, ":as%20df", url_parse::Component(1, 7), false},
-    {"-2", 80, ":-2", url_parse::Component(1, 2), false},
-      // Default port should be omitted.
-    {"80", 80, "", url_parse::Component(0, -1), true},
-    {"8080", 80, ":8080", url_parse::Component(1, 4), true},
-      // PORT_UNSPECIFIED should mean always keep the port.
-    {"80", url_parse::PORT_UNSPECIFIED, ":80", url_parse::Component(1, 2), true},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(port_cases); i++) {
-    int url_len = static_cast<int>(strlen(port_cases[i].input));
-    url_parse::Component in_comp(0, url_len);
-    url_parse::Component out_comp;
-    std::string out_str;
-    url_canon::StdStringCanonOutput output1(&out_str);
-    bool success = url_canon::CanonicalizePort(port_cases[i].input, in_comp,
-                                               port_cases[i].default_port,
-                                               &output1, &out_comp);
-    output1.Complete();
-
-    EXPECT_EQ(port_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
-    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
-    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
-
-    // Now try the wide version
-    out_str.clear();
-    url_canon::StdStringCanonOutput output2(&out_str);
-    string16 wide_input(ConvertUTF8ToUTF16(port_cases[i].input));
-    success = url_canon::CanonicalizePort(wide_input.c_str(), in_comp,
-                                          port_cases[i].default_port,
-                                          &output2, &out_comp);
-    output2.Complete();
-
-    EXPECT_EQ(port_cases[i].expected_success, success);
-    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
-    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
-    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
-  }
-}
-
-TEST(URLCanonTest, Path) {
-  DualComponentCase path_cases[] = {
-    // ----- path collapsing tests -----
-    {"/././foo", L"/././foo", "/foo", url_parse::Component(0, 4), true},
-    {"/./.foo", L"/./.foo", "/.foo", url_parse::Component(0, 5), true},
-    {"/foo/.", L"/foo/.", "/foo/", url_parse::Component(0, 5), true},
-    {"/foo/./", L"/foo/./", "/foo/", url_parse::Component(0, 5), true},
-      // double dots followed by a slash or the end of the string count
-    {"/foo/bar/..", L"/foo/bar/..", "/foo/", url_parse::Component(0, 5), true},
-    {"/foo/bar/../", L"/foo/bar/../", "/foo/", url_parse::Component(0, 5), true},
-      // don't count double dots when they aren't followed by a slash
-    {"/foo/..bar", L"/foo/..bar", "/foo/..bar", url_parse::Component(0, 10), true},
-      // some in the middle
-    {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", url_parse::Component(0, 8), true},
-    {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", url_parse::Component(0, 2), true},
-      // we should not be able to go above the root
-    {"/foo/../../..", L"/foo/../../..", "/", url_parse::Component(0, 1), true},
-    {"/foo/../../../ton", L"/foo/../../../ton", "/ton", url_parse::Component(0, 4), true},
-      // escaped dots should be unescaped and treated the same as dots
-    {"/foo/%2e", L"/foo/%2e", "/foo/", url_parse::Component(0, 5), true},
-    {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", url_parse::Component(0, 8), true},
-    {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", url_parse::Component(0, 6), true},
-      // Multiple slashes in a row should be preserved and treated like empty
-      // directory names.
-    {"////../..", L"////../..", "//", url_parse::Component(0, 2), true},
-
-    // ----- escaping tests -----
-    {"/foo", L"/foo", "/foo", url_parse::Component(0, 4), true},
-      // Valid escape sequence
-    {"/%20foo", L"/%20foo", "/%20foo", url_parse::Component(0, 7), true},
-      // Invalid escape sequence we should pass through unchanged.
-    {"/foo%", L"/foo%", "/foo%", url_parse::Component(0, 5), true},
-    {"/foo%2", L"/foo%2", "/foo%2", url_parse::Component(0, 6), true},
-      // Invalid escape sequence: bad characters should be treated the same as
-      // the sourrounding text, not as escaped (in this case, UTF-8).
-    {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", url_parse::Component(0, 10), true},
-    {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", url_parse::Component(0, 16), true},
-    {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", url_parse::Component(0, 22), true},
-      // Regular characters that are escaped should be unescaped
-    {"/foo%41%7a", L"/foo%41%7a", "/fooAz", url_parse::Component(0, 6), true},
-      // Funny characters that are unescaped should be escaped
-    {"/foo\x09\x91%91", NULL, "/foo%09%91%91", url_parse::Component(0, 13), true},
-    {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", url_parse::Component(0, 16), true},
-      // Invalid characters that are escaped should cause a failure.
-    {"/foo%00%51", L"/foo%00%51", "/foo%00Q", url_parse::Component(0, 8), false},
-      // Some characters should be passed through unchanged regardless of esc.
-    {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", url_parse::Component(0, 13), true},
-      // Characters that are properly escaped should not have the case changed
-      // of hex letters.
-    {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", url_parse::Component(0, 13), true},
-      // Funny characters that are unescaped should be escaped
-    {"/foo\tbar", L"/foo\tbar", "/foo%09bar", url_parse::Component(0, 10), true},
-      // Backslashes should get converted to forward slashes
-    {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", url_parse::Component(0, 8), true},
-      // Hashes found in paths (possibly only when the caller explicitly sets
-      // the path on an already-parsed URL) should be escaped.
-    {"/foo#bar", L"/foo#bar", "/foo%23bar", url_parse::Component(0, 10), true},
-      // %7f should be allowed and %3D should not be unescaped (these were wrong
-      // in a previous version).
-    {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", url_parse::Component(0, 24), true},
-      // @ should be passed through unchanged (escaped or unescaped).
-    {"/@asdf%40", L"/@asdf%40", "/@asdf%40", url_parse::Component(0, 9), true},
-
-    // ----- encoding tests -----
-      // Basic conversions
-    {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", url_parse::Component(0, 37), true},
-      // Invalid unicode characters should fail. We only do validation on
-      // UTF-16 input, so this doesn't happen on 8-bit.
-    {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", url_parse::Component(0, 13), true},
-    {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", url_parse::Component(0, 13), false},
-  };
-
-  for (size_t i = 0; i < arraysize(path_cases); i++) {
-    if (path_cases[i].input8) {
-      int len = static_cast<int>(strlen(path_cases[i].input8));
-      url_parse::Component in_comp(0, len);
-      url_parse::Component out_comp;
-      std::string out_str;
-      url_canon::StdStringCanonOutput output(&out_str);
-      bool success = url_canon::CanonicalizePath(path_cases[i].input8, in_comp,
-                                                 &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(path_cases[i].expected_success, success);
-      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
-      EXPECT_EQ(path_cases[i].expected, out_str);
-    }
-
-    if (path_cases[i].input16) {
-      string16 input16(WStringToUTF16(path_cases[i].input16));
-      int len = static_cast<int>(input16.length());
-      url_parse::Component in_comp(0, len);
-      url_parse::Component out_comp;
-      std::string out_str;
-      url_canon::StdStringCanonOutput output(&out_str);
-
-      bool success = url_canon::CanonicalizePath(input16.c_str(), in_comp,
-                                                 &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(path_cases[i].expected_success, success);
-      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
-      EXPECT_EQ(path_cases[i].expected, out_str);
-    }
-  }
-
-  // Manual test: embedded NULLs should be escaped and the URL should be marked
-  // as invalid.
-  const char path_with_null[] = "/ab\0c";
-  url_parse::Component in_comp(0, 5);
-  url_parse::Component out_comp;
-
-  std::string out_str;
-  url_canon::StdStringCanonOutput output(&out_str);
-  bool success = url_canon::CanonicalizePath(path_with_null, in_comp,
-                                             &output, &out_comp);
-  output.Complete();
-  EXPECT_FALSE(success);
-  EXPECT_EQ("/ab%00c", out_str);
-}
-
-TEST(URLCanonTest, Query) {
-  struct QueryCase {
-    const char* input8;
-    const wchar_t* input16;
-    const char* encoding;
-    const char* expected;
-  } query_cases[] = {
-      // Regular ASCII case in some different encodings.
-    {"foo=bar", L"foo=bar", NULL, "?foo=bar"},
-    {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
-    {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
-    {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
-      // Allow question marks in the query without escaping
-    {"as?df", L"as?df", NULL, "?as?df"},
-      // Always escape '#' since it would mark the ref.
-    {"as#df", L"as#df", NULL, "?as%23df"},
-      // Escape some questionable 8-bit characters, but never unescape.
-    {"\x02hello\x7f bye", L"\x02hello\x7f bye", NULL, "?%02hello%7F%20bye"},
-    {"%40%41123", L"%40%41123", NULL, "?%40%41123"},
-      // Chinese input/output
-    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", NULL, "?q=%E4%BD%A0%E5%A5%BD"},
-    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", "?q=%C4%E3%BA%C3"},
-    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
-      // Unencodable character in the destination character set should be
-      // escaped. The escape sequence unescapes to be the entity name:
-      // "?q=&#20320;"
-    {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", "?q=Chinese%26%2365319%3B"},
-      // Invalid UTF-8/16 input should be replaced with invalid characters.
-    {"q=\xed\xed", L"q=\xd800\xd800", NULL, "?q=%EF%BF%BD%EF%BF%BD"},
-      // Don't allow < or > because sometimes they are used for XSS if the
-      // URL is echoed in content. Firefox does this, IE doesn't.
-    {"q=<asdf>", L"q=<asdf>", NULL, "?q=%3Casdf%3E"},
-      // Escape double quotemarks in the query.
-    {"q=\"asdf\"", L"q=\"asdf\"", NULL, "?q=%22asdf%22"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(query_cases); i++) {
-    url_parse::Component out_comp;
-
-    UConvScoper conv(query_cases[i].encoding);
-    ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
-    url_canon::ICUCharsetConverter converter(conv.converter());
-
-    // Map NULL to a NULL converter pointer.
-    url_canon::ICUCharsetConverter* conv_pointer = &converter;
-    if (!query_cases[i].encoding)
-      conv_pointer = NULL;
-
-    if (query_cases[i].input8) {
-      int len = static_cast<int>(strlen(query_cases[i].input8));
-      url_parse::Component in_comp(0, len);
-      std::string out_str;
-
-      url_canon::StdStringCanonOutput output(&out_str);
-      url_canon::CanonicalizeQuery(query_cases[i].input8, in_comp,
-                                   conv_pointer, &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(query_cases[i].expected, out_str);
-    }
-
-    if (query_cases[i].input16) {
-      string16 input16(WStringToUTF16(query_cases[i].input16));
-      int len = static_cast<int>(input16.length());
-      url_parse::Component in_comp(0, len);
-      std::string out_str;
-
-      url_canon::StdStringCanonOutput output(&out_str);
-      url_canon::CanonicalizeQuery(input16.c_str(), in_comp,
-                                   conv_pointer, &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(query_cases[i].expected, out_str);
-    }
-  }
-
-  // Extra test for input with embedded NULL;
-  std::string out_str;
-  url_canon::StdStringCanonOutput output(&out_str);
-  url_parse::Component out_comp;
-  url_canon::CanonicalizeQuery("a \x00z\x01", url_parse::Component(0, 5), NULL,
-                               &output, &out_comp);
-  output.Complete();
-  EXPECT_EQ("?a%20%00z%01", out_str);
-}
-
-TEST(URLCanonTest, Ref) {
-  // Refs are trivial, it just checks the encoding.
-  DualComponentCase ref_cases[] = {
-      // Regular one, we shouldn't escape spaces, et al.
-    {"hello, world", L"hello, world", "#hello, world", url_parse::Component(1, 12), true},
-      // UTF-8/wide input should be preserved
-    {"\xc2\xa9", L"\xa9", "#\xc2\xa9", url_parse::Component(1, 2), true},
-      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
-    {"\xF0\x90\x8C\x80ss", L"\xd800\xdf00ss", "#\xF0\x90\x8C\x80ss", url_parse::Component(1, 6), true},
-      // Escaping should be preserved unchanged, even invalid ones
-    {"%41%a", L"%41%a", "#%41%a", url_parse::Component(1, 5), true},
-      // Invalid UTF-8/16 input should be flagged and the input made valid
-    {"\xc2", NULL, "#\xef\xbf\xbd", url_parse::Component(1, 3), true},
-    {NULL, L"\xd800\x597d", "#\xef\xbf\xbd\xe5\xa5\xbd", url_parse::Component(1, 6), true},
-      // Test a Unicode invalid character.
-    {"a\xef\xb7\x90", L"a\xfdd0", "#a\xef\xbf\xbd", url_parse::Component(1, 4), true},
-      // Refs can have # signs and we should preserve them.
-    {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", url_parse::Component(1, 9), true},
-    {"#asdf", L"#asdf", "##asdf", url_parse::Component(1, 5), true},
-  };
-
-  for (size_t i = 0; i < arraysize(ref_cases); i++) {
-    // 8-bit input
-    if (ref_cases[i].input8) {
-      int len = static_cast<int>(strlen(ref_cases[i].input8));
-      url_parse::Component in_comp(0, len);
-      url_parse::Component out_comp;
-
-      std::string out_str;
-      url_canon::StdStringCanonOutput output(&out_str);
-      url_canon::CanonicalizeRef(ref_cases[i].input8, in_comp,
-                                                &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
-      EXPECT_EQ(ref_cases[i].expected, out_str);
-    }
-
-    // 16-bit input
-    if (ref_cases[i].input16) {
-      string16 input16(WStringToUTF16(ref_cases[i].input16));
-      int len = static_cast<int>(input16.length());
-      url_parse::Component in_comp(0, len);
-      url_parse::Component out_comp;
-
-      std::string out_str;
-      url_canon::StdStringCanonOutput output(&out_str);
-      url_canon::CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp);
-      output.Complete();
-
-      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
-      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
-      EXPECT_EQ(ref_cases[i].expected, out_str);
-    }
-  }
-
-  // Try one with an embedded NULL. It should be stripped.
-  const char null_input[5] = "ab\x00z";
-  url_parse::Component null_input_component(0, 4);
-  url_parse::Component out_comp;
-
-  std::string out_str;
-  url_canon::StdStringCanonOutput output(&out_str);
-  url_canon::CanonicalizeRef(null_input, null_input_component,
-                             &output, &out_comp);
-  output.Complete();
-
-  EXPECT_EQ(1, out_comp.begin);
-  EXPECT_EQ(3, out_comp.len);
-  EXPECT_EQ("#abz", out_str);
-}
-
-TEST(URLCanonTest, CanonicalizeStandardURL) {
-  // The individual component canonicalize tests should have caught the cases
-  // for each of those components. Here, we just need to test that the various
-  // parts are included or excluded properly, and have the correct separators.
-  struct URLCase {
-    const char* input;
-    const char* expected;
-    bool expected_success;
-  } cases[] = {
-    {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true},
-    {"http://[www.google.com]/", "http://[www.google.com]/", false},
-    {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false},
-    {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", true},
-    {"www.google.com", ":www.google.com/", true},
-    {"http://192.0x00A80001", "http://192.168.0.1/", true},
-    {"http://www/foo%2Ehtml", "http://www/foo.html", true},
-    {"http://user:pass@/", "http://user:pass@/", false},
-    {"http://%25DOMAIN:foobar@foodomain.com/", "http://%25DOMAIN:foobar@foodomain.com/", true},
-
-      // Backslashes should get converted to forward slashes.
-    {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
-
-      // Busted refs shouldn't make the whole thing fail.
-    {"http://www.google.com/asdf#\xc2", "http://www.google.com/asdf#\xef\xbf\xbd", true},
-
-      // Basic port tests.
-    {"http://foo:80/", "http://foo/", true},
-    {"http://foo:81/", "http://foo:81/", true},
-    {"httpa://foo:80/", "httpa://foo:80/", true},
-    {"http://foo:-80/", "http://foo:-80/", false},
-
-    {"https://foo:443/", "https://foo/", true},
-    {"https://foo:80/", "https://foo:80/", true},
-    {"ftp://foo:21/", "ftp://foo/", true},
-    {"ftp://foo:80/", "ftp://foo:80/", true},
-    {"gopher://foo:70/", "gopher://foo/", true},
-    {"gopher://foo:443/", "gopher://foo:443/", true},
-    {"ws://foo:80/", "ws://foo/", true},
-    {"ws://foo:81/", "ws://foo:81/", true},
-    {"ws://foo:443/", "ws://foo:443/", true},
-    {"ws://foo:815/", "ws://foo:815/", true},
-    {"wss://foo:80/", "wss://foo:80/", true},
-    {"wss://foo:81/", "wss://foo:81/", true},
-    {"wss://foo:443/", "wss://foo/", true},
-    {"wss://foo:815/", "wss://foo:815/", true},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    int url_len = static_cast<int>(strlen(cases[i].input));
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(cases[i].input, url_len, &parsed);
-
-    url_parse::Parsed out_parsed;
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    bool success = url_canon::CanonicalizeStandardURL(
-        cases[i].input, url_len, parsed, NULL, &output, &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(cases[i].expected_success, success);
-    EXPECT_EQ(cases[i].expected, out_str);
-  }
-}
-
-// The codepath here is the same as for regular canonicalization, so we just
-// need to test that things are replaced or not correctly.
-TEST(URLCanonTest, ReplaceStandardURL) {
-  ReplaceCase replace_cases[] = {
-      // Common case of truncating the path.
-    {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"},
-      // Replace everything
-    {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
-      // Replace nothing
-    {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
-  };
-
-  for (size_t i = 0; i < arraysize(replace_cases); i++) {
-    const ReplaceCase& cur = replace_cases[i];
-    int base_len = static_cast<int>(strlen(cur.base));
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(cur.base, base_len, &parsed);
-
-    url_canon::Replacements<char> r;
-    typedef url_canon::Replacements<char> R;  // Clean up syntax.
-
-    // Note that for the scheme we pass in a different clear function since
-    // there is no function to clear the scheme.
-    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
-    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
-    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
-    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
-    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
-    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
-    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
-    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
-
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    url_parse::Parsed out_parsed;
-    url_canon::ReplaceStandardURL(replace_cases[i].base, parsed,
-                                  r, NULL, &output, &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(replace_cases[i].expected, out_str);
-  }
-
-  // The path pointer should be ignored if the address is invalid.
-  {
-    const char src[] = "http://www.google.com/here_is_the_path";
-    int src_len = static_cast<int>(strlen(src));
-
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(src, src_len, &parsed);
-
-    // Replace the path to 0 length string. By using 1 as the string address,
-    // the test should get an access violation if it tries to dereference it.
-    url_canon::Replacements<char> r;
-    r.SetPath(reinterpret_cast<char*>(0x00000001), url_parse::Component(0, 0));
-    std::string out_str1;
-    url_canon::StdStringCanonOutput output1(&out_str1);
-    url_parse::Parsed new_parsed;
-    url_canon::ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed);
-    output1.Complete();
-    EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
-
-    // Same with an "invalid" path.
-    r.SetPath(reinterpret_cast<char*>(0x00000001), url_parse::Component());
-    std::string out_str2;
-    url_canon::StdStringCanonOutput output2(&out_str2);
-    url_canon::ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed);
-    output2.Complete();
-    EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
-  }
-}
-
-TEST(URLCanonTest, ReplaceFileURL) {
-  ReplaceCase replace_cases[] = {
-      // Replace everything
-    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
-      // Replace nothing
-    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"},
-      // Clear non-path components (common)
-    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"},
-      // Replace path with something that doesn't begin with a slash and make
-      // sure it get added properly.
-    {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
-    {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
-    {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"},
-    {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"},
-    {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
-  };
-
-  for (size_t i = 0; i < arraysize(replace_cases); i++) {
-    const ReplaceCase& cur = replace_cases[i];
-    int base_len = static_cast<int>(strlen(cur.base));
-    url_parse::Parsed parsed;
-    url_parse::ParseFileURL(cur.base, base_len, &parsed);
-
-    url_canon::Replacements<char> r;
-    typedef url_canon::Replacements<char> R;  // Clean up syntax.
-    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
-    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
-    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
-    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
-    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
-    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
-    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
-    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
-
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    url_parse::Parsed out_parsed;
-    url_canon::ReplaceFileURL(cur.base, parsed,
-                              r, NULL, &output, &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(replace_cases[i].expected, out_str);
-  }
-}
-
-TEST(URLCanonTest, ReplacePathURL) {
-  ReplaceCase replace_cases[] = {
-      // Replace everything
-    {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"},
-      // Replace nothing
-    {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"},
-      // Replace one or the other
-    {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"},
-    {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"},
-    {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"},
-  };
-
-  for (size_t i = 0; i < arraysize(replace_cases); i++) {
-    const ReplaceCase& cur = replace_cases[i];
-    int base_len = static_cast<int>(strlen(cur.base));
-    url_parse::Parsed parsed;
-    url_parse::ParsePathURL(cur.base, base_len, &parsed);
-
-    url_canon::Replacements<char> r;
-    typedef url_canon::Replacements<char> R;  // Clean up syntax.
-    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
-    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
-    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
-    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
-    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
-    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
-    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
-    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
-
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    url_parse::Parsed out_parsed;
-    url_canon::ReplacePathURL(cur.base, parsed,
-                              r, &output, &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(replace_cases[i].expected, out_str);
-  }
-}
-
-TEST(URLCanonTest, ReplaceMailtoURL) {
-  ReplaceCase replace_cases[] = {
-      // Replace everything
-    {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"},
-      // Replace nothing
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"},
-      // Replace the path
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"},
-      // Replace the query
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"},
-      // Replace the path and query
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"},
-      // Set the query to empty (should leave trailing question mark)
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"},
-      // Clear the query
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"},
-      // Clear the path
-    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"},
-      // Clear the path + query
-    {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"},
-      // Setting the ref should have no effect
-    {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
-  };
-
-  for (size_t i = 0; i < arraysize(replace_cases); i++) {
-    const ReplaceCase& cur = replace_cases[i];
-    int base_len = static_cast<int>(strlen(cur.base));
-    url_parse::Parsed parsed;
-    url_parse::ParseMailtoURL(cur.base, base_len, &parsed);
-
-    url_canon::Replacements<char> r;
-    typedef url_canon::Replacements<char> R;
-    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
-    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
-    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
-    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
-    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
-    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
-    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
-    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
-
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    url_parse::Parsed out_parsed;
-    url_canon::ReplaceMailtoURL(cur.base, parsed,
-                                r, &output, &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(replace_cases[i].expected, out_str);
-  }
-}
-
-TEST(URLCanonTest, CanonicalizeFileURL) {
-  struct URLCase {
-    const char* input;
-    const char* expected;
-    bool expected_success;
-    url_parse::Component expected_host;
-    url_parse::Component expected_path;
-  } cases[] = {
-#ifdef _WIN32
-      // Windows-style paths
-    {"file:c:\\foo\\bar.html", "file:///C:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)},
-    {"  File:c|////foo\\bar.html", "file:///C:////foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)},
-    {"file:", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)},
-    {"file:UNChost/path", "file://unchost/path", true, url_parse::Component(7, 7), url_parse::Component(14, 5)},
-      // CanonicalizeFileURL supports absolute Windows style paths for IE
-      // compatability. Note that the caller must decide that this is a file
-      // URL itself so it can call the file canonicalizer. This is usually
-      // done automatically as part of relative URL resolving.
-    {"c:\\foo\\bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)},
-    {"C|/foo/bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)},
-    {"/C|\\foo\\bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)},
-    {"//C|/foo/bar", "file:///C:/foo/bar", true, url_parse::Component(), url_parse::Component(7, 11)},
-    {"//server/file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)},
-    {"\\\\server\\file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)},
-    {"/\\server/file", "file://server/file", true, url_parse::Component(7, 6), url_parse::Component(13, 5)},
-      // We should preserve the number of slashes after the colon for IE
-      // compatability, except when there is none, in which case we should
-      // add one.
-    {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)},
-    {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)},
-      // Three slashes should be non-UNC, even if there is no drive spec (IE
-      // does this, which makes the resulting request invalid).
-    {"file:///foo/bar.txt", "file:///foo/bar.txt", true, url_parse::Component(), url_parse::Component(7, 12)},
-      // TODO(brettw) we should probably fail for invalid host names, which
-      // would change the expected result on this test. We also currently allow
-      // colon even though it's probably invalid, because its currently the
-      // "natural" result of the way the canonicalizer is written. There doesn't
-      // seem to be a strong argument for why allowing it here would be bad, so
-      // we just tolerate it and the load will fail later.
-    {"FILE:/\\/\\7:\\\\//foo\\bar.html", "file://7:////foo/bar.html", false, url_parse::Component(7, 2), url_parse::Component(9, 16)},
-    {"file:filer/home\\me", "file://filer/home/me", true, url_parse::Component(7, 5), url_parse::Component(12, 8)},
-      // Make sure relative paths can't go above the "C:"
-    {"file:///C:/foo/../../../bar.html", "file:///C:/bar.html", true, url_parse::Component(), url_parse::Component(7, 12)},
-      // Busted refs shouldn't make the whole thing fail.
-    {"file:///C:/asdf#\xc2", "file:///C:/asdf#\xef\xbf\xbd", true, url_parse::Component(), url_parse::Component(7, 8)},
-#else
-      // Unix-style paths
-    {"file:///home/me", "file:///home/me", true, url_parse::Component(), url_parse::Component(7, 8)},
-      // Windowsy ones should get still treated as Unix-style.
-    {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 16)},
-    {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, url_parse::Component(), url_parse::Component(7, 19)},
-      // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html)
-    {"//", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)},
-    {"///", "file:///", true, url_parse::Component(), url_parse::Component(7, 1)},
-    {"///test", "file:///test", true, url_parse::Component(), url_parse::Component(7, 5)},
-    {"file://test", "file://test/", true, url_parse::Component(7, 4), url_parse::Component(11, 1)},
-    {"file://localhost",  "file://localhost/", true, url_parse::Component(7, 9), url_parse::Component(16, 1)},
-    {"file://localhost/", "file://localhost/", true, url_parse::Component(7, 9), url_parse::Component(16, 1)},
-    {"file://localhost/test", "file://localhost/test", true, url_parse::Component(7, 9), url_parse::Component(16, 5)},
-#endif  // _WIN32
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    int url_len = static_cast<int>(strlen(cases[i].input));
-    url_parse::Parsed parsed;
-    url_parse::ParseFileURL(cases[i].input, url_len, &parsed);
-
-    url_parse::Parsed out_parsed;
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    bool success = url_canon::CanonicalizeFileURL(cases[i].input, url_len,
-                                                  parsed, NULL, &output,
-                                                  &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(cases[i].expected_success, success);
-    EXPECT_EQ(cases[i].expected, out_str);
-
-    // Make sure the spec was properly identified, the file canonicalizer has
-    // different code for writing the spec.
-    EXPECT_EQ(0, out_parsed.scheme.begin);
-    EXPECT_EQ(4, out_parsed.scheme.len);
-
-    EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin);
-    EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len);
-
-    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
-    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
-  }
-}
-
-TEST(URLCanonTest, CanonicalizePathURL) {
-  // Path URLs should get canonicalized schemes but nothing else.
-  struct PathCase {
-    const char* input;
-    const char* expected;
-  } path_cases[] = {
-    {"javascript:", "javascript:"},
-    {"JavaScript:Foo", "javascript:Foo"},
-    {":\":This /is interesting;?#", ":\":This /is interesting;?#"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(path_cases); i++) {
-    int url_len = static_cast<int>(strlen(path_cases[i].input));
-    url_parse::Parsed parsed;
-    url_parse::ParsePathURL(path_cases[i].input, url_len, &parsed);
-
-    url_parse::Parsed out_parsed;
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    bool success = url_canon::CanonicalizePathURL(path_cases[i].input, url_len,
-                                                  parsed, &output,
-                                                  &out_parsed);
-    output.Complete();
-
-    EXPECT_TRUE(success);
-    EXPECT_EQ(path_cases[i].expected, out_str);
-
-    EXPECT_EQ(0, out_parsed.host.begin);
-    EXPECT_EQ(-1, out_parsed.host.len);
-
-    // When we end with a colon at the end, there should be no path.
-    if (path_cases[i].input[url_len - 1] == ':') {
-      EXPECT_EQ(0, out_parsed.path.begin);
-      EXPECT_EQ(-1, out_parsed.path.len);
-    }
-  }
-}
-
-TEST(URLCanonTest, CanonicalizeMailtoURL) {
-  struct URLCase {
-    const char* input;
-    const char* expected;
-    bool expected_success;
-    url_parse::Component expected_path;
-    url_parse::Component expected_query;
-  } cases[] = {
-    {"mailto:addr1", "mailto:addr1", true, url_parse::Component(7, 5), url_parse::Component()},
-    {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, url_parse::Component(7, 13), url_parse::Component()},
-    // Trailing whitespace is stripped.
-    {"MaIlTo:addr1 \t ", "mailto:addr1", true, url_parse::Component(7, 5), url_parse::Component()},
-    {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, url_parse::Component(7, 5), url_parse::Component(13,6)},
-    {"mailto:addr1,addr2", "mailto:addr1,addr2", true, url_parse::Component(7, 11), url_parse::Component()},
-    {"mailto:addr1, addr2", "mailto:addr1, addr2", true, url_parse::Component(7, 12), url_parse::Component()},
-    {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, url_parse::Component(7, 13), url_parse::Component()},
-    {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, url_parse::Component(7, 12), url_parse::Component()},
-    // Null character should be escaped to %00
-    {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, url_parse::Component(7, 13), url_parse::Component(21, 3)},
-    // Invalid -- UTF-8 encoded surrogate value.
-    {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, url_parse::Component(7, 9), url_parse::Component()},
-    {"mailto:addr1?", "mailto:addr1?", true, url_parse::Component(7, 5), url_parse::Component(13, 0)},
-  };
-
-  // Define outside of loop to catch bugs where components aren't reset
-  url_parse::Parsed parsed;
-  url_parse::Parsed out_parsed;
-
-  for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
-    int url_len = static_cast<int>(strlen(cases[i].input));
-    if (i == 8) {
-      // The 9th test case purposely has a '\0' in it -- don't count it
-      // as the string terminator.
-      url_len = 22;
-    }
-    url_parse::ParseMailtoURL(cases[i].input, url_len, &parsed);
-
-    std::string out_str;
-    url_canon::StdStringCanonOutput output(&out_str);
-    bool success = url_canon::CanonicalizeMailtoURL(cases[i].input, url_len,
-                                                    parsed, &output,
-                                                    &out_parsed);
-    output.Complete();
-
-    EXPECT_EQ(cases[i].expected_success, success);
-    EXPECT_EQ(cases[i].expected, out_str);
-
-    // Make sure the spec was properly identified
-    EXPECT_EQ(0, out_parsed.scheme.begin);
-    EXPECT_EQ(6, out_parsed.scheme.len);
-
-    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
-    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
-
-    EXPECT_EQ(cases[i].expected_query.begin, out_parsed.query.begin);
-    EXPECT_EQ(cases[i].expected_query.len, out_parsed.query.len);
-  }
-}
-
-#ifndef WIN32
-
-TEST(URLCanonTest, _itoa_s) {
-  // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
-  // _itoa_s about, and ensure that the extra byte is untouched.
-  char buf[6];
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itoa_s(12, buf, sizeof(buf) - 1, 10));
-  EXPECT_STREQ("12", buf);
-  EXPECT_EQ('\xFF', buf[3]);
-
-  // Test the edge cases - exactly the buffer size and one over
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itoa_s(1234, buf, sizeof(buf) - 1, 10));
-  EXPECT_STREQ("1234", buf);
-  EXPECT_EQ('\xFF', buf[5]);
-
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(EINVAL, url_canon::_itoa_s(12345, buf, sizeof(buf) - 1, 10));
-  EXPECT_EQ('\xFF', buf[5]);  // should never write to this location
-
-  // Test the template overload (note that this will see the full buffer)
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itoa_s(12, buf, 10));
-  EXPECT_STREQ("12", buf);
-  EXPECT_EQ('\xFF', buf[3]);
-
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itoa_s(12345, buf, 10));
-  EXPECT_STREQ("12345", buf);
-
-  EXPECT_EQ(EINVAL, url_canon::_itoa_s(123456, buf, 10));
-
-  // Test that radix 16 is supported.
-  memset(buf, 0xff, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itoa_s(1234, buf, sizeof(buf) - 1, 16));
-  EXPECT_STREQ("4d2", buf);
-  EXPECT_EQ('\xFF', buf[5]);
-}
-
-TEST(URLCanonTest, _itow_s) {
-  // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
-  // _itoa_s about, and ensure that the extra byte is untouched.
-  char16 buf[6];
-  const char fill_mem = 0xff;
-  const char16 fill_char = 0xffff;
-  memset(buf, fill_mem, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
-  EXPECT_EQ(WStringToUTF16(L"12"), string16(buf));
-  EXPECT_EQ(fill_char, buf[3]);
-
-  // Test the edge cases - exactly the buffer size and one over
-  EXPECT_EQ(0, url_canon::_itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
-  EXPECT_EQ(WStringToUTF16(L"1234"), string16(buf));
-  EXPECT_EQ(fill_char, buf[5]);
-
-  memset(buf, fill_mem, sizeof(buf));
-  EXPECT_EQ(EINVAL, url_canon::_itow_s(12345, buf, sizeof(buf) / 2 - 1, 10));
-  EXPECT_EQ(fill_char, buf[5]);  // should never write to this location
-
-  // Test the template overload (note that this will see the full buffer)
-  memset(buf, fill_mem, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itow_s(12, buf, 10));
-  EXPECT_EQ(WStringToUTF16(L"12"), string16(buf));
-  EXPECT_EQ(fill_char, buf[3]);
-
-  memset(buf, fill_mem, sizeof(buf));
-  EXPECT_EQ(0, url_canon::_itow_s(12345, buf, 10));
-  EXPECT_EQ(WStringToUTF16(L"12345"), string16(buf));
-
-  EXPECT_EQ(EINVAL, url_canon::_itow_s(123456, buf, 10));
-}
-
-#endif  // !WIN32
-
-// Returns true if the given two structures are the same.
-static bool ParsedIsEqual(const url_parse::Parsed& a,
-                          const url_parse::Parsed& b) {
-  return a.scheme.begin == b.scheme.begin && a.scheme.len == b.scheme.len &&
-         a.username.begin == b.username.begin && a.username.len == b.username.len &&
-         a.password.begin == b.password.begin && a.password.len == b.password.len &&
-         a.host.begin == b.host.begin && a.host.len == b.host.len &&
-         a.port.begin == b.port.begin && a.port.len == b.port.len &&
-         a.path.begin == b.path.begin && a.path.len == b.path.len &&
-         a.query.begin == b.query.begin && a.query.len == b.query.len &&
-         a.ref.begin == b.ref.begin && a.ref.len == b.ref.len;
-}
-
-TEST(URLCanonTest, ResolveRelativeURL) {
-  struct RelativeCase {
-    const char* base;      // Input base URL: MUST BE CANONICAL
-    bool is_base_hier;     // Is the base URL hierarchical
-    bool is_base_file;     // Tells us if the base is a file URL.
-    const char* test;      // Input URL to test against.
-    bool succeed_relative; // Whether we expect IsRelativeURL to succeed
-    bool is_rel;           // Whether we expect |test| to be relative or not.
-    bool succeed_resolve;  // Whether we expect ResolveRelativeURL to succeed.
-    const char* resolved;  // What we expect in the result when resolving.
-  } rel_cases[] = {
-      // Basic absolute input.
-    {"http://host/a", true, false, "http://another/", true, false, false, NULL},
-    {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
-      // Empty relative URLs should only remove the ref part of the URL,
-      // leaving the rest unchanged.
-    {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
-    {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
-    {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
-      // Spaces at the ends of the relative path should be ignored.
-    {"http://foo/bar", true, false, "  another  ", true, true, true, "http://foo/another"},
-    {"http://foo/bar", true, false, "  .  ", true, true, true, "http://foo/"},
-    {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"},
-      // Matching schemes without two slashes are treated as relative.
-    {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"},
-    {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"},
-    {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"},
-    {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"},
-      // Nonmatching schemes are absolute.
-    {"http://host/a", true, false, "https:host2", true, false, false, NULL},
-    {"http://host/a", true, false, "htto:/host2", true, false, false, NULL},
-      // Absolute path input
-    {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"},
-    {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"},
-    {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"},
-    {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"},
-    {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"},
-    {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"},
-      // Relative path input
-    {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
-    {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"},
-    {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"},
-    {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
-    {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
-    {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
-    {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
-    {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"},
-    {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"},
-      // Query input
-    {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"},
-    {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"},
-    {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"},
-      // Ref input
-    {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"},
-    {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
-    {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"},
-      // Non-hierarchical base: no relative handling. Relative input should
-      // error, and if a scheme is present, it should be treated as absolute.
-    {"data:foobar", false, false, "baz.html", false, false, false, NULL},
-    {"data:foobar", false, false, "data:baz", true, false, false, NULL},
-    {"data:foobar", false, false, "data:/base", true, false, false, NULL},
-      // Non-hierarchical base: absolute input should succeed.
-    {"data:foobar", false, false, "http://host/", true, false, false, NULL},
-    {"data:foobar", false, false, "http:host", true, false, false, NULL},
-      // Invalid schemes should be treated as relative.
-    {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
-    {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
-    {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"},
-    {"data:asdf", false, false, ":foo", false, false, false, NULL},
-      // We should treat semicolons like any other character in URL resolving
-    {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"},
-    {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"},
-    {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"},
-      // Relative URLs can also be written as "//foo/bar" which is relative to
-      // the scheme. In this case, it would take the old scheme, so for http
-      // the example would resolve to "http://foo/bar".
-    {"http://host/a", true, false, "//another", true, true, true, "http://another/"},
-    {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"},
-    {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"},
-    {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"},
-    {"http://host/a", true, false, "//", true, true, false, "http:"},
-      // IE will also allow one or the other to be a backslash to get the same
-      // behavior.
-    {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"},
-    {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"},
-#ifdef WIN32
-      // Resolving against Windows file base URLs.
-    {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL},
-    {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
-    {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"},
-    {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"},
-      // But two backslashes on Windows should be UNC so should be treated
-      // as absolute.
-    {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL},
-      // IE doesn't support drive specs starting with two slashes. It fails
-      // immediately and doesn't even try to load. We fix it up to either
-      // an absolute path or UNC depending on what it looks like.
-    {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"},
-    {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"},
-      // Windows drive specs should be allowed and treated as absolute.
-    {"file:///C:/foo", true, true, "c:", true, false, false, NULL},
-    {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL},
-    {"http://host/a", true, false, "c:\\foo", true, false, false, NULL},
-      // Relative paths with drive letters should be allowed when the base is
-      // also a file.
-    {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"},
-      // Treat absolute paths as being off of the drive.
-    {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"},
-    {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"},
-    {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"},
-      // On Windows, two slashes without a drive letter when the base is a file
-      // means that the path is UNC.
-    {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"},
-    {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"},
-#else
-      // On Unix we fall back to relative behavior since there's nothing else
-      // reasonable to do.
-    {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"},
-#endif
-      // Even on Windows, we don't allow relative drive specs when the base
-      // is not file.
-    {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
-    {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE(rel_cases); i++) {
-    const RelativeCase& cur_case = rel_cases[i];
-
-    url_parse::Parsed parsed;
-    int base_len = static_cast<int>(strlen(cur_case.base));
-    if (cur_case.is_base_file)
-      url_parse::ParseFileURL(cur_case.base, base_len, &parsed);
-    else if (cur_case.is_base_hier)
-      url_parse::ParseStandardURL(cur_case.base, base_len, &parsed);
-    else
-      url_parse::ParsePathURL(cur_case.base, base_len, &parsed);
-
-    // First see if it is relative.
-    int test_len = static_cast<int>(strlen(cur_case.test));
-    bool is_relative;
-    url_parse::Component relative_component;
-    bool succeed_is_rel = url_canon::IsRelativeURL(
-        cur_case.base, parsed, cur_case.test, test_len, cur_case.is_base_hier,
-        &is_relative, &relative_component);
-
-    EXPECT_EQ(cur_case.succeed_relative, succeed_is_rel) <<
-        "succeed is rel failure on " << cur_case.test;
-    EXPECT_EQ(cur_case.is_rel, is_relative) <<
-        "is rel failure on " << cur_case.test;
-    // Now resolve it.
-    if (succeed_is_rel && is_relative && cur_case.is_rel) {
-      std::string resolved;
-      url_canon::StdStringCanonOutput output(&resolved);
-      url_parse::Parsed resolved_parsed;
-
-      bool succeed_resolve = url_canon::ResolveRelativeURL(
-          cur_case.base, parsed, cur_case.is_base_file,
-          cur_case.test, relative_component, NULL, &output, &resolved_parsed);
-      output.Complete();
-
-      EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve);
-      EXPECT_EQ(cur_case.resolved, resolved) << " on " << cur_case.test;
-
-      // Verify that the output parsed structure is the same as parsing a
-      // the URL freshly.
-      url_parse::Parsed ref_parsed;
-      int resolved_len = static_cast<int>(resolved.size());
-      if (cur_case.is_base_file)
-        url_parse::ParseFileURL(resolved.c_str(), resolved_len, &ref_parsed);
-      else if (cur_case.is_base_hier)
-        url_parse::ParseStandardURL(resolved.c_str(), resolved_len, &ref_parsed);
-      else
-        url_parse::ParsePathURL(resolved.c_str(), resolved_len, &ref_parsed);
-      EXPECT_TRUE(ParsedIsEqual(ref_parsed, resolved_parsed));
-    }
-  }
-}
-
-// It used to be when we did a replacement with a long buffer of UTF-16
-// characters, we would get invalid data in the URL. This is because the buffer
-// it used to hold the UTF-8 data was resized, while some pointers were still
-// kept to the old buffer that was removed.
-TEST(URLCanonTest, ReplacementOverflow) {
-  const char src[] = "file:///C:/foo/bar";
-  int src_len = static_cast<int>(strlen(src));
-  url_parse::Parsed parsed;
-  url_parse::ParseFileURL(src, src_len, &parsed);
-
-  // Override two components, the path with something short, and the query with
-  // sonething long enough to trigger the bug.
-  url_canon::Replacements<char16> repl;
-  string16 new_query;
-  for (int i = 0; i < 4800; i++)
-    new_query.push_back('a');
-
-  string16 new_path(WStringToUTF16(L"/foo"));
-  repl.SetPath(new_path.c_str(), url_parse::Component(0, 4));
-  repl.SetQuery(new_query.c_str(),
-                url_parse::Component(0, static_cast<int>(new_query.length())));
-
-  // Call ReplaceComponents on the string. It doesn't matter if we call it for
-  // standard URLs, file URLs, etc, since they will go to the same replacement
-  // function that was buggy.
-  url_parse::Parsed repl_parsed;
-  std::string repl_str;
-  url_canon::StdStringCanonOutput repl_output(&repl_str);
-  url_canon::ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed);
-  repl_output.Complete();
-
-  // Generate the expected string and check.
-  std::string expected("file:///foo?");
-  for (size_t i = 0; i < new_query.length(); i++)
-    expected.push_back('a');
-  EXPECT_TRUE(expected == repl_str);
-}

diff --git a/googleurl/src/url_common.h b/googleurl/src/url_common.h
deleted file mode 100644
index 7e7e27a..0000000
--- a/googleurl/src/url_common.h
+++ /dev/null

@@ -1,48 +0,0 @@
-// Copyright 2010, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef GOOGLEURL_SRC_URL_COMMON_H__
-#define GOOGLEURL_SRC_URL_COMMON_H__
-
-#if !defined(GURL_IMPLEMENTATION)
-#define GURL_IMPLEMENTATION 0
-#endif
-
-#if defined(WIN32) && defined(GURL_DLL)
-#if GURL_IMPLEMENTATION
-#define GURL_API __declspec(dllexport)
-#else
-#define GURL_API __declspec(dllimport)
-#endif
-#else
-#define GURL_API
-#endif
-
-#endif  // GOOGLEURL_SRC_URL_COMMON_H__
-

diff --git a/googleurl/src/url_file.h b/googleurl/src/url_file.h
deleted file mode 100644
index c1b8ac9..0000000
--- a/googleurl/src/url_file.h
+++ /dev/null

@@ -1,108 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Provides shared functions used by the internals of the parser and
-// canonicalizer for file URLs. Do not use outside of these modules.
-
-#ifndef GOOGLEURL_SRC_URL_FILE_H__
-#define GOOGLEURL_SRC_URL_FILE_H__
-
-#include "googleurl/src/url_parse_internal.h"
-
-namespace url_parse {
-
-#ifdef WIN32
-
-// We allow both "c:" and "c|" as drive identifiers.
-inline bool IsWindowsDriveSeparator(char16 ch) {
-  return ch == ':' || ch == '|';
-}
-inline bool IsWindowsDriveLetter(char16 ch) {
-  return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
-}
-
-#endif  // WIN32
-
-// Returns the index of the next slash in the input after the given index, or
-// spec_len if the end of the input is reached.
-template<typename CHAR>
-inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) {
-  int idx = begin_index;
-  while (idx < spec_len && !IsURLSlash(spec[idx]))
-    idx++;
-  return idx;
-}
-
-#ifdef WIN32
-
-// Returns true if the start_offset in the given spec looks like it begins a
-// drive spec, for example "c:". This function explicitly handles start_offset
-// values that are equal to or larger than the spec_len to simplify callers.
-//
-// If this returns true, the spec is guaranteed to have a valid drive letter
-// plus a colon starting at |start_offset|.
-template<typename CHAR>
-inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
-                                      int spec_len) {
-  int remaining_len = spec_len - start_offset;
-  if (remaining_len < 2)
-    return false;  // Not enough room.
-  if (!IsWindowsDriveLetter(spec[start_offset]))
-    return false;  // Doesn't start with a valid drive letter.
-  if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
-    return false;  // Isn't followed with a drive separator.
-  return true;
-}
-
-// Returns true if the start_offset in the given text looks like it begins a
-// UNC path, for example "\\". This function explicitly handles start_offset
-// values that are equal to or larger than the spec_len to simplify callers.
-//
-// When strict_slashes is set, this function will only accept backslashes as is
-// standard for Windows. Otherwise, it will accept forward slashes as well
-// which we use for a lot of URL handling.
-template<typename CHAR>
-inline bool DoesBeginUNCPath(const CHAR* text,
-                             int start_offset,
-                             int len,
-                             bool strict_slashes) {
-  int remaining_len = len - start_offset;
-  if (remaining_len < 2)
-    return false;
-
-  if (strict_slashes)
-    return text[start_offset] == '\\' && text[start_offset + 1] == '\\';
-  return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]);
-}
-
-#endif  // WIN32
-
-}  // namespace url_parse
-
-#endif  // GOOGLEURL_SRC_URL_FILE_H__

diff --git a/googleurl/src/url_parse_internal.h b/googleurl/src/url_parse_internal.h
deleted file mode 100644
index 61bd068..0000000
--- a/googleurl/src/url_parse_internal.h
+++ /dev/null

@@ -1,112 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Contains common inline helper functions used by the URL parsing routines.
-
-#ifndef GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__
-#define GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__
-
-#include "googleurl/src/url_parse.h"
-
-namespace url_parse {
-
-// We treat slashes and backslashes the same for IE compatability.
-inline bool IsURLSlash(char16 ch) {
-  return ch == '/' || ch == '\\';
-}
-
-// Returns true if we should trim this character from the URL because it is a
-// space or a control character.
-inline bool ShouldTrimFromURL(char16 ch) {
-  return ch <= ' ';
-}
-
-// Given an already-initialized begin index and length, this shrinks the range
-// to eliminate "should-be-trimmed" characters. Note that the length does *not*
-// indicate the length of untrimmed data from |*begin|, but rather the position
-// in the input string (so the string starts at character |*begin| in the spec,
-// and goes until |*len|).
-template<typename CHAR>
-inline void TrimURL(const CHAR* spec, int* begin, int* len) {
-  // Strip leading whitespace and control characters.
-  while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
-    (*begin)++;
-
-  // Strip trailing whitespace and control characters. We need the >i test for
-  // when the input string is all blanks; we don't want to back past the input.
-  while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
-    (*len)--;
-}
-
-// Counts the number of consecutive slashes starting at the given offset
-// in the given string of the given length.
-template<typename CHAR>
-inline int CountConsecutiveSlashes(const CHAR *str,
-                                   int begin_offset, int str_len) {
-  int count = 0;
-  while (begin_offset + count < str_len &&
-         IsURLSlash(str[begin_offset + count]))
-    ++count;
-  return count;
-}
-
-// Internal functions in url_parse.cc that parse the path, that is, everything
-// following the authority section. The input is the range of everything
-// following the authority section, and the output is the identified ranges.
-//
-// This is designed for the file URL parser or other consumers who may do
-// special stuff at the beginning, but want regular path parsing, it just
-// maps to the internal parsing function for paths.
-void ParsePathInternal(const char* spec,
-                       const Component& path,
-                       Component* filepath,
-                       Component* query,
-                       Component* ref);
-void ParsePathInternal(const char16* spec,
-                       const Component& path,
-                       Component* filepath,
-                       Component* query,
-                       Component* ref);
-
-
-// Given a spec and a pointer to the character after the colon following the
-// scheme, this parses it and fills in the structure, Every item in the parsed
-// structure is filled EXCEPT for the scheme, which is untouched.
-void ParseAfterScheme(const char* spec,
-                      int spec_len,
-                      int after_scheme,
-                      Parsed* parsed);
-void ParseAfterScheme(const char16* spec,
-                      int spec_len,
-                      int after_scheme,
-                      Parsed* parsed);
-
-}  // namespace url_parse
-
-#endif  // GOOGLEURL_SRC_URL_PARSE_INTERNAL_H__

diff --git a/googleurl/src/url_test_utils.h b/googleurl/src/url_test_utils.h
deleted file mode 100644
index fdadf7f..0000000
--- a/googleurl/src/url_test_utils.h
+++ /dev/null

@@ -1,85 +0,0 @@
-// Copyright 2007 Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Convenience functions for string conversions.
-// These are mostly intended for use in unit tests.
-
-#ifndef GOOGLEURL_SRC_URL_TEST_UTILS_H__
-#define GOOGLEURL_SRC_URL_TEST_UTILS_H__
-
-#include <string>
-
-#include "base/string16.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "testing/base/public/gunit.h"
-
-namespace url_test_utils {
-
-// Converts a UTF-16 string from native wchar_t format to char16, by
-// truncating the high 32 bits.  This is not meant to handle true UTF-32
-// encoded strings.
-inline string16 WStringToUTF16(const wchar_t* src) {
-  string16 str;
-  int length = static_cast<int>(wcslen(src));
-  for (int i = 0; i < length; ++i) {
-    str.push_back(static_cast<char16>(src[i]));
-  }
-  return str;
-}
-
-// Converts a string from UTF-8 to UTF-16
-inline string16 ConvertUTF8ToUTF16(const std::string& src) {
-  int length = static_cast<int>(src.length());
-  EXPECT_LT(length, 1024);
-  url_canon::RawCanonOutputW<1024> output;
-  EXPECT_TRUE(url_canon::ConvertUTF8ToUTF16(src.data(), length, &output));
-  return string16(output.data(), output.length());
-}
-
-// Converts a string from UTF-16 to UTF-8
-inline std::string ConvertUTF16ToUTF8(const string16& src) {
-  std::string str;
-  url_canon::StdStringCanonOutput output(&str);
-  EXPECT_TRUE(url_canon::ConvertUTF16ToUTF8(src.data(),
-                                            static_cast<int>(src.length()),
-                                            &output));
-  output.Complete();
-  return str;
-}
-
-}  // namespace url_test_utils
-
-// This operator allows EXPECT_EQ(astring16, anotherstring16); to work.
-inline std::ostream& operator<<(std::ostream& os,
-                                const string16& str) {
-  // Convert to UTF-8 and print the string
-  return os << url_test_utils::ConvertUTF16ToUTF8(str);
-}
-
-#endif  // GOOGLEURL_SRC_URL_TEST_UTILS_H__

diff --git a/googleurl/src/url_util.cc b/googleurl/src/url_util.cc
deleted file mode 100644
index 7e100aa..0000000
--- a/googleurl/src/url_util.cc
+++ /dev/null

@@ -1,553 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <string.h>
-#include <vector>
-
-#include "googleurl/src/url_util.h"
-
-#include "base/logging.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_file.h"
-
-namespace url_util {
-
-namespace {
-
-// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
-// so we don't want to use it here.
-template <class Char> inline Char ToLowerASCII(Char c) {
-  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
-}
-
-// Backend for LowerCaseEqualsASCII.
-template<typename Iter>
-inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
-  for (Iter it = a_begin; it != a_end; ++it, ++b) {
-    if (!*b || ToLowerASCII(*it) != *b)
-      return false;
-  }
-  return *b == 0;
-}
-
-const char kFileScheme[] = "file";  // Used in a number of places.
-const char kMailtoScheme[] = "mailto";
-
-const int kNumStandardURLSchemes = 7;
-const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
-  "http",
-  "https",
-  kFileScheme,  // Yes, file urls can have a hostname!
-  "ftp",
-  "gopher",
-  "ws",  // WebSocket.
-  "wss",  // WebSocket secure.
-};
-
-// List of the currently installed standard schemes. This list is lazily
-// initialized by InitStandardSchemes and is leaked on shutdown to prevent
-// any destructors from being called that will slow us down or cause problems.
-std::vector<const char*>* standard_schemes = NULL;
-
-// See the LockStandardSchemes declaration in the header.
-bool standard_schemes_locked = false;
-
-// Ensures that the standard_schemes list is initialized, does nothing if it
-// already has values.
-void InitStandardSchemes() {
-  if (standard_schemes)
-    return;
-  standard_schemes = new std::vector<const char*>;
-  for (int i = 0; i < kNumStandardURLSchemes; i++)
-    standard_schemes->push_back(kStandardURLSchemes[i]);
-}
-
-// Given a string and a range inside the string, compares it to the given
-// lower-case |compare_to| buffer.
-template<typename CHAR>
-inline bool CompareSchemeComponent(const CHAR* spec,
-                                   const url_parse::Component& component,
-                                   const char* compare_to) {
-  if (!component.is_nonempty())
-    return compare_to[0] == 0;  // When component is empty, match empty scheme.
-  return LowerCaseEqualsASCII(&spec[component.begin],
-                              &spec[component.end()],
-                              compare_to);
-}
-
-// Returns true if the given scheme identified by |scheme| within |spec| is one
-// of the registered "standard" schemes.
-template<typename CHAR>
-bool DoIsStandard(const CHAR* spec, const url_parse::Component& scheme) {
-  if (!scheme.is_nonempty())
-    return false;  // Empty or invalid schemes are non-standard.
-
-  InitStandardSchemes();
-  for (size_t i = 0; i < standard_schemes->size(); i++) {
-    if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
-                             standard_schemes->at(i)))
-      return true;
-  }
-  return false;
-}
-
-template<typename CHAR>
-bool DoFindAndCompareScheme(const CHAR* str,
-                            int str_len,
-                            const char* compare,
-                            url_parse::Component* found_scheme) {
-  // Before extracting scheme, canonicalize the URL to remove any whitespace.
-  // This matches the canonicalization done in DoCanonicalize function.
-  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
-  int spec_len;
-  const CHAR* spec = RemoveURLWhitespace(str, str_len,
-                                         &whitespace_buffer, &spec_len);
-
-  url_parse::Component our_scheme;
-  if (!url_parse::ExtractScheme(spec, spec_len, &our_scheme)) {
-    // No scheme.
-    if (found_scheme)
-      *found_scheme = url_parse::Component();
-    return false;
-  }
-  if (found_scheme)
-    *found_scheme = our_scheme;
-  return CompareSchemeComponent(spec, our_scheme, compare);
-}
-
-template<typename CHAR>
-bool DoCanonicalize(const CHAR* in_spec, int in_spec_len,
-                    url_canon::CharsetConverter* charset_converter,
-                    url_canon::CanonOutput* output,
-                    url_parse::Parsed* output_parsed) {
-  // Remove any whitespace from the middle of the relative URL, possibly
-  // copying to the new buffer.
-  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
-  int spec_len;
-  const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
-                                         &whitespace_buffer, &spec_len);
-
-  url_parse::Parsed parsed_input;
-#ifdef WIN32
-  // For Windows, we allow things that look like absolute Windows paths to be
-  // fixed up magically to file URLs. This is done for IE compatability. For
-  // example, this will change "c:/foo" into a file URL rather than treating
-  // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
-  // There is similar logic in url_canon_relative.cc for
-  //
-  // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which
-  // has no meaning as an absolute path name. This is because browsers on Mac
-  // & Unix don't generally do this, so there is no compatibility reason for
-  // doing so.
-  if (url_parse::DoesBeginUNCPath(spec, 0, spec_len, false) ||
-      url_parse::DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {
-    url_parse::ParseFileURL(spec, spec_len, &parsed_input);
-    return url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
-                                           charset_converter,
-                                           output, output_parsed);
-  }
-#endif
-
-  url_parse::Component scheme;
-  if (!url_parse::ExtractScheme(spec, spec_len, &scheme))
-    return false;
-
-  // This is the parsed version of the input URL, we have to canonicalize it
-  // before storing it in our object.
-  bool success;
-  if (CompareSchemeComponent(spec, scheme, kFileScheme)) {
-    // File URLs are special.
-    url_parse::ParseFileURL(spec, spec_len, &parsed_input);
-    success = url_canon::CanonicalizeFileURL(spec, spec_len, parsed_input,
-                                             charset_converter,
-                                             output, output_parsed);
-
-  } else if (DoIsStandard(spec, scheme)) {
-    // All "normal" URLs.
-    url_parse::ParseStandardURL(spec, spec_len, &parsed_input);
-    success = url_canon::CanonicalizeStandardURL(spec, spec_len, parsed_input,
-                                                 charset_converter,
-                                                 output, output_parsed);
-
-  } else if (CompareSchemeComponent(spec, scheme, kMailtoScheme)) {
-    // Mailto are treated like a standard url with only a scheme, path, query
-    url_parse::ParseMailtoURL(spec, spec_len, &parsed_input);
-    success = url_canon::CanonicalizeMailtoURL(spec, spec_len, parsed_input,
-                                               output, output_parsed);
-
-  } else {
-    // "Weird" URLs like data: and javascript:
-    url_parse::ParsePathURL(spec, spec_len, &parsed_input);
-    success = url_canon::CanonicalizePathURL(spec, spec_len, parsed_input,
-                                             output, output_parsed);
-  }
-  return success;
-}
-
-template<typename CHAR>
-bool DoResolveRelative(const char* base_spec,
-                       int base_spec_len,
-                       const url_parse::Parsed& base_parsed,
-                       const CHAR* in_relative,
-                       int in_relative_length,
-                       url_canon::CharsetConverter* charset_converter,
-                       url_canon::CanonOutput* output,
-                       url_parse::Parsed* output_parsed) {
-  // Remove any whitespace from the middle of the relative URL, possibly
-  // copying to the new buffer.
-  url_canon::RawCanonOutputT<CHAR> whitespace_buffer;
-  int relative_length;
-  const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
-                                             &whitespace_buffer,
-                                             &relative_length);
-
-  // See if our base URL should be treated as "standard".
-  bool standard_base_scheme =
-      base_parsed.scheme.is_nonempty() &&
-      DoIsStandard(base_spec, base_parsed.scheme);
-
-  bool is_relative;
-  url_parse::Component relative_component;
-  if (!url_canon::IsRelativeURL(base_spec, base_parsed,
-                                relative, relative_length,
-                                standard_base_scheme,
-                                &is_relative,
-                                &relative_component)) {
-    // Error resolving.
-    return false;
-  }
-
-  if (is_relative) {
-    // Relative, resolve and canonicalize.
-    bool file_base_scheme = base_parsed.scheme.is_nonempty() &&
-        CompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);
-    return url_canon::ResolveRelativeURL(base_spec, base_parsed,
-                                         file_base_scheme, relative,
-                                         relative_component, charset_converter,
-                                         output, output_parsed);
-  }
-
-  // Not relative, canonicalize the input.
-  return DoCanonicalize(relative, relative_length, charset_converter,
-                        output, output_parsed);
-}
-
-template<typename CHAR>
-bool DoReplaceComponents(const char* spec,
-                         int spec_len,
-                         const url_parse::Parsed& parsed,
-                         const url_canon::Replacements<CHAR>& replacements,
-                         url_canon::CharsetConverter* charset_converter,
-                         url_canon::CanonOutput* output,
-                         url_parse::Parsed* out_parsed) {
-  // If the scheme is overridden, just do a simple string substitution and
-  // reparse the whole thing. There are lots of edge cases that we really don't
-  // want to deal with. Like what happens if I replace "http://e:8080/foo"
-  // with a file. Does it become "file:///E:/8080/foo" where the port number
-  // becomes part of the path? Parsing that string as a file URL says "yes"
-  // but almost no sane rule for dealing with the components individually would
-  // come up with that.
-  //
-  // Why allow these crazy cases at all? Programatically, there is almost no
-  // case for replacing the scheme. The most common case for hitting this is
-  // in JS when building up a URL using the location object. In this case, the
-  // JS code expects the string substitution behavior:
-  //   http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
-  if (replacements.IsSchemeOverridden()) {
-    // Canonicalize the new scheme so it is 8-bit and can be concatenated with
-    // the existing spec.
-    url_canon::RawCanonOutput<128> scheme_replaced;
-    url_parse::Component scheme_replaced_parsed;
-    url_canon::CanonicalizeScheme(
-        replacements.sources().scheme,
-        replacements.components().scheme,
-        &scheme_replaced, &scheme_replaced_parsed);
-
-    // We can assume that the input is canonicalized, which means it always has
-    // a colon after the scheme (or where the scheme would be).
-    int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
-                                                    : 1;
-    if (spec_len - spec_after_colon > 0) {
-      scheme_replaced.Append(&spec[spec_after_colon],
-                             spec_len - spec_after_colon);
-    }
-
-    // We now need to completely re-parse the resulting string since its meaning
-    // may have changed with the different scheme.
-    url_canon::RawCanonOutput<128> recanonicalized;
-    url_parse::Parsed recanonicalized_parsed;
-    DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(),
-                   charset_converter,
-                   &recanonicalized, &recanonicalized_parsed);
-
-    // Recurse using the version with the scheme already replaced. This will now
-    // use the replacement rules for the new scheme.
-    //
-    // Warning: this code assumes that ReplaceComponents will re-check all
-    // components for validity. This is because we can't fail if DoCanonicalize
-    // failed above since theoretically the thing making it fail could be
-    // getting replaced here. If ReplaceComponents didn't re-check everything,
-    // we wouldn't know if something *not* getting replaced is a problem.
-    // If the scheme-specific replacers are made more intelligent so they don't
-    // re-check everything, we should instead recanonicalize the whole thing
-    // after this call to check validity (this assumes replacing the scheme is
-    // much much less common than other types of replacements, like clearing the
-    // ref).
-    url_canon::Replacements<CHAR> replacements_no_scheme = replacements;
-    replacements_no_scheme.SetScheme(NULL, url_parse::Component());
-    return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
-                               recanonicalized_parsed, replacements_no_scheme,
-                               charset_converter, output, out_parsed);
-  }
-
-  // If we get here, then we know the scheme doesn't need to be replaced, so can
-  // just key off the scheme in the spec to know how to do the replacements.
-  if (CompareSchemeComponent(spec, parsed.scheme, kFileScheme)) {
-    return url_canon::ReplaceFileURL(spec, parsed, replacements,
-                                     charset_converter, output, out_parsed);
-  }
-  if (DoIsStandard(spec, parsed.scheme)) {
-    return url_canon::ReplaceStandardURL(spec, parsed, replacements,
-                                         charset_converter, output, out_parsed);
-  }
-  if (CompareSchemeComponent(spec, parsed.scheme, kMailtoScheme)) {
-     return url_canon::ReplaceMailtoURL(spec, parsed, replacements,
-                                        output, out_parsed);
-  }
-
-  // Default is a path URL.
-  return url_canon::ReplacePathURL(spec, parsed, replacements,
-                                   output, out_parsed);
-}
-
-}  // namespace
-
-void Initialize() {
-  InitStandardSchemes();
-}
-
-void Shutdown() {
-  if (standard_schemes) {
-    delete standard_schemes;
-    standard_schemes = NULL;
-  }
-}
-
-void AddStandardScheme(const char* new_scheme) {
-  // If this assert triggers, it means you've called AddStandardScheme after
-  // LockStandardSchemes have been called (see the header file for
-  // LockStandardSchemes for more).
-  //
-  // This normally means you're trying to set up a new standard scheme too late
-  // in your application's init process. Locate where your app does this
-  // initialization and calls LockStandardScheme, and add your new standard
-  // scheme there.
-  DCHECK(!standard_schemes_locked) <<
-      "Trying to add a standard scheme after the list has been locked.";
-
-  size_t scheme_len = strlen(new_scheme);
-  if (scheme_len == 0)
-    return;
-
-  // Dulicate the scheme into a new buffer and add it to the list of standard
-  // schemes. This pointer will be leaked on shutdown.
-  char* dup_scheme = new char[scheme_len + 1];
-  memcpy(dup_scheme, new_scheme, scheme_len + 1);
-
-  InitStandardSchemes();
-  standard_schemes->push_back(dup_scheme);
-}
-
-void LockStandardSchemes() {
-  standard_schemes_locked = true;
-}
-
-bool IsStandard(const char* spec, const url_parse::Component& scheme) {
-  return DoIsStandard(spec, scheme);
-}
-
-bool IsStandard(const char16* spec, const url_parse::Component& scheme) {
-  return DoIsStandard(spec, scheme);
-}
-
-bool FindAndCompareScheme(const char* str,
-                          int str_len,
-                          const char* compare,
-                          url_parse::Component* found_scheme) {
-  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
-}
-
-bool FindAndCompareScheme(const char16* str,
-                          int str_len,
-                          const char* compare,
-                          url_parse::Component* found_scheme) {
-  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
-}
-
-bool Canonicalize(const char* spec,
-                  int spec_len,
-                  url_canon::CharsetConverter* charset_converter,
-                  url_canon::CanonOutput* output,
-                  url_parse::Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, charset_converter,
-                        output, output_parsed);
-}
-
-bool Canonicalize(const char16* spec,
-                  int spec_len,
-                  url_canon::CharsetConverter* charset_converter,
-                  url_canon::CanonOutput* output,
-                  url_parse::Parsed* output_parsed) {
-  return DoCanonicalize(spec, spec_len, charset_converter,
-                        output, output_parsed);
-}
-
-bool ResolveRelative(const char* base_spec,
-                     int base_spec_len,
-                     const url_parse::Parsed& base_parsed,
-                     const char* relative,
-                     int relative_length,
-                     url_canon::CharsetConverter* charset_converter,
-                     url_canon::CanonOutput* output,
-                     url_parse::Parsed* output_parsed) {
-  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
-                           relative, relative_length,
-                           charset_converter, output, output_parsed);
-}
-
-bool ResolveRelative(const char* base_spec,
-                     int base_spec_len,
-                     const url_parse::Parsed& base_parsed,
-                     const char16* relative,
-                     int relative_length,
-                     url_canon::CharsetConverter* charset_converter,
-                     url_canon::CanonOutput* output,
-                     url_parse::Parsed* output_parsed) {
-  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
-                           relative, relative_length,
-                           charset_converter, output, output_parsed);
-}
-
-bool ReplaceComponents(const char* spec,
-                       int spec_len,
-                       const url_parse::Parsed& parsed,
-                       const url_canon::Replacements<char>& replacements,
-                       url_canon::CharsetConverter* charset_converter,
-                       url_canon::CanonOutput* output,
-                       url_parse::Parsed* out_parsed) {
-  return DoReplaceComponents(spec, spec_len, parsed, replacements,
-                             charset_converter, output, out_parsed);
-}
-
-bool ReplaceComponents(const char* spec,
-                       int spec_len,
-                       const url_parse::Parsed& parsed,
-                       const url_canon::Replacements<char16>& replacements,
-                       url_canon::CharsetConverter* charset_converter,
-                       url_canon::CanonOutput* output,
-                       url_parse::Parsed* out_parsed) {
-  return DoReplaceComponents(spec, spec_len, parsed, replacements,
-                             charset_converter, output, out_parsed);
-}
-
-// Front-ends for LowerCaseEqualsASCII.
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b_begin,
-                          const char* b_end) {
-  while (a_begin != a_end && b_begin != b_end &&
-         ToLowerASCII(*a_begin) == *b_begin) {
-    a_begin++;
-    b_begin++;
-  }
-  return a_begin == a_end && b_begin == b_end;
-}
-
-bool LowerCaseEqualsASCII(const char16* a_begin,
-                          const char16* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
-void DecodeURLEscapeSequences(const char* input, int length,
-                              url_canon::CanonOutputW* output) {
-  url_canon::RawCanonOutputT<char> unescaped_chars;
-  for (int i = 0; i < length; i++) {
-    if (input[i] == '%') {
-      unsigned char ch;
-      if (url_canon::DecodeEscaped(input, &i, length, &ch)) {
-        unescaped_chars.push_back(ch);
-      } else {
-        // Invalid escape sequence, copy the percent literal.
-        unescaped_chars.push_back('%');
-      }
-    } else {
-      // Regular non-escaped 8-bit character.
-      unescaped_chars.push_back(input[i]);
-    }
-  }
-
-  // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
-  // JavaScript URLs, but Firefox and Safari do.
-  for (int i = 0; i < unescaped_chars.length(); i++) {
-    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
-    if (uch < 0x80) {
-      // Non-UTF-8, just append directly
-      output->push_back(uch);
-    } else {
-      // next_ch will point to the last character of the decoded
-      // character.
-      int next_character = i;
-      unsigned code_point;
-      if (url_canon::ReadUTFChar(unescaped_chars.data(), &next_character,
-                                 unescaped_chars.length(), &code_point)) {
-        // Valid UTF-8 character, convert to UTF-16.
-        url_canon::AppendUTF16Value(code_point, output);
-        i = next_character;
-      } else {
-        // If there are any sequences that are not valid UTF-8, we keep
-        // invalid code points and promote to UTF-16. We copy all characters
-        // from the current position to the end of the identified sequence.
-        while (i < next_character) {
-          output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
-          i++;
-        }
-        output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
-      }
-    }
-  }
-}
-
-}  // namespace url_util

diff --git a/googleurl/src/url_util.h b/googleurl/src/url_util.h
deleted file mode 100644
index ec4cf9e..0000000
--- a/googleurl/src/url_util.h
+++ /dev/null

@@ -1,222 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef GOOGLEURL_SRC_URL_UTIL_H__
-#define GOOGLEURL_SRC_URL_UTIL_H__
-
-#include <string>
-
-#include "base/string16.h"
-#include "googleurl/src/url_common.h"
-#include "googleurl/src/url_parse.h"
-#include "googleurl/src/url_canon.h"
-
-namespace url_util {
-
-// Init ------------------------------------------------------------------------
-
-// Initialization is NOT required, it will be implicitly initialized when first
-// used. However, this implicit initialization is NOT threadsafe. If you are
-// using this library in a threaded environment and don't have a consistent
-// "first call" (an example might be calling "AddStandardScheme" with your
-// special application-specific schemes) then you will want to call initialize
-// before spawning any threads.
-//
-// It is OK to call this function more than once, subsequent calls will simply
-// "noop", unless Shutdown() was called in the mean time. This will also be a
-// "noop" if other calls to the library have forced an initialization
-// beforehand.
-GURL_API void Initialize();
-
-// Cleanup is not required, except some strings may leak. For most user
-// applications, this is fine. If you're using it in a library that may get
-// loaded and unloaded, you'll want to unload to properly clean up your
-// library.
-GURL_API void Shutdown();
-
-// Schemes --------------------------------------------------------------------
-
-// Adds an application-defined scheme to the internal list of "standard" URL
-// schemes. This function is not threadsafe and can not be called concurrently
-// with any other url_util function. It will assert if the list of standard
-// schemes has been locked (see LockStandardSchemes).
-GURL_API void AddStandardScheme(const char* new_scheme);
-
-// Sets a flag to prevent future calls to AddStandardScheme from succeeding.
-//
-// This is designed to help prevent errors for multithreaded applications.
-// Normal usage would be to call AddStandardScheme for your custom schemes at
-// the beginning of program initialization, and then LockStandardSchemes. This
-// prevents future callers from mistakenly calling AddStandardScheme when the
-// program is running with multiple threads, where such usage would be
-// dangerous.
-//
-// We could have had AddStandardScheme use a lock instead, but that would add
-// some platform-specific dependencies we don't otherwise have now, and is
-// overkill considering the normal usage is so simple.
-GURL_API void LockStandardSchemes();
-
-// Locates the scheme in the given string and places it into |found_scheme|,
-// which may be NULL to indicate the caller does not care about the range.
-//
-// Returns whether the given |compare| scheme matches the scheme found in the
-// input (if any). The |compare| scheme must be a valid canonical scheme or
-// the result of the comparison is undefined.
-GURL_API bool FindAndCompareScheme(const char* str,
-                                   int str_len,
-                                   const char* compare,
-                                   url_parse::Component* found_scheme);
-GURL_API bool FindAndCompareScheme(const char16* str,
-                                   int str_len,
-                                   const char* compare,
-                                   url_parse::Component* found_scheme);
-inline bool FindAndCompareScheme(const std::string& str,
-                                 const char* compare,
-                                 url_parse::Component* found_scheme) {
-  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
-                              compare, found_scheme);
-}
-inline bool FindAndCompareScheme(const string16& str,
-                                 const char* compare,
-                                 url_parse::Component* found_scheme) {
-  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
-                              compare, found_scheme);
-}
-
-// Returns true if the given string represents a standard URL. This means that
-// either the scheme is in the list of known standard schemes.
-GURL_API bool IsStandard(const char* spec,
-                         const url_parse::Component& scheme);
-GURL_API bool IsStandard(const char16* spec,
-                         const url_parse::Component& scheme);
-
-// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
-// breaking the WebKit build when this version is synced via Chrome.
-inline bool IsStandard(const char* spec, int spec_len,
-                       const url_parse::Component& scheme) {
-  return IsStandard(spec, scheme);
-}
-
-// URL library wrappers -------------------------------------------------------
-
-// Parses the given spec according to the extracted scheme type. Normal users
-// should use the URL object, although this may be useful if performance is
-// critical and you don't want to do the heap allocation for the std::string.
-//
-// As with the url_canon::Canonicalize* functions, the charset converter can
-// be NULL to use UTF-8 (it will be faster in this case).
-//
-// Returns true if a valid URL was produced, false if not. On failure, the
-// output and parsed structures will still be filled and will be consistent,
-// but they will not represent a loadable URL.
-GURL_API bool Canonicalize(const char* spec,
-                           int spec_len,
-                           url_canon::CharsetConverter* charset_converter,
-                           url_canon::CanonOutput* output,
-                           url_parse::Parsed* output_parsed);
-GURL_API bool Canonicalize(const char16* spec,
-                           int spec_len,
-                           url_canon::CharsetConverter* charset_converter,
-                           url_canon::CanonOutput* output,
-                           url_parse::Parsed* output_parsed);
-
-// Resolves a potentially relative URL relative to the given parsed base URL.
-// The base MUST be valid. The resulting canonical URL and parsed information
-// will be placed in to the given out variables.
-//
-// The relative need not be relative. If we discover that it's absolute, this
-// will produce a canonical version of that URL. See Canonicalize() for more
-// about the charset_converter.
-//
-// Returns true if the output is valid, false if the input could not produce
-// a valid URL.
-GURL_API bool ResolveRelative(const char* base_spec,
-                              int base_spec_len,
-                              const url_parse::Parsed& base_parsed,
-                              const char* relative,
-                              int relative_length,
-                              url_canon::CharsetConverter* charset_converter,
-                              url_canon::CanonOutput* output,
-                              url_parse::Parsed* output_parsed);
-GURL_API bool ResolveRelative(const char* base_spec,
-                              int base_spec_len,
-                              const url_parse::Parsed& base_parsed,
-                              const char16* relative,
-                              int relative_length,
-                              url_canon::CharsetConverter* charset_converter,
-                              url_canon::CanonOutput* output,
-                              url_parse::Parsed* output_parsed);
-
-// Replaces components in the given VALID input url. The new canonical URL info
-// is written to output and out_parsed.
-//
-// Returns true if the resulting URL is valid.
-GURL_API bool ReplaceComponents(
-    const char* spec,
-    int spec_len,
-    const url_parse::Parsed& parsed,
-    const url_canon::Replacements<char>& replacements,
-    url_canon::CharsetConverter* charset_converter,
-    url_canon::CanonOutput* output,
-    url_parse::Parsed* out_parsed);
-GURL_API bool ReplaceComponents(
-    const char* spec,
-    int spec_len,
-    const url_parse::Parsed& parsed,
-    const url_canon::Replacements<char16>& replacements,
-    url_canon::CharsetConverter* charset_converter,
-    url_canon::CanonOutput* output,
-    url_parse::Parsed* out_parsed);
-
-// String helper functions ----------------------------------------------------
-
-// Compare the lower-case form of the given string against the given ASCII
-// string.  This is useful for doing checking if an input string matches some
-// token, and it is optimized to avoid intermediate string copies.
-//
-// The versions of this function that don't take a b_end assume that the b
-// string is NULL terminated.
-GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
-                                   const char* a_end,
-                                   const char* b);
-GURL_API bool LowerCaseEqualsASCII(const char* a_begin,
-                                   const char* a_end,
-                                   const char* b_begin,
-                                   const char* b_end);
-GURL_API bool LowerCaseEqualsASCII(const char16* a_begin,
-                                   const char16* a_end,
-                                   const char* b);
-
-// Unescapes the given string using URL escaping rules.
-GURL_API void DecodeURLEscapeSequences(const char* input, int length,
-                                       url_canon::CanonOutputW* output);
-
-}  // namespace url_util
-
-#endif  // GOOGLEURL_SRC_URL_UTIL_H__

diff --git a/googleurl/src/url_util_unittest.cc b/googleurl/src/url_util_unittest.cc
deleted file mode 100644
index bb04905..0000000
--- a/googleurl/src/url_util_unittest.cc
+++ /dev/null

@@ -1,222 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_stdstring.h"
-#include "googleurl/src/url_parse.h"
-#include "googleurl/src/url_test_utils.h"
-#include "googleurl/src/url_util.h"
-#include "testing/base/public/gunit.h"
-
-// From googleurl/base/basictypes.h
-#define ARRAYSIZE_UNSAFE(a) \
-  ((sizeof(a) / sizeof(*(a))) / \
-   static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
-
-TEST(URLUtilTest, FindAndCompareScheme) {
-  url_parse::Component found_scheme;
-
-  // Simple case where the scheme is found and matches.
-  const char kStr1[] = "http://www.com/";
-  EXPECT_TRUE(url_util::FindAndCompareScheme(
-      kStr1, static_cast<int>(strlen(kStr1)), "http", NULL));
-  EXPECT_TRUE(url_util::FindAndCompareScheme(
-      kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component(0, 4));
-
-  // A case where the scheme is found and doesn't match.
-  EXPECT_FALSE(url_util::FindAndCompareScheme(
-      kStr1, static_cast<int>(strlen(kStr1)), "https", &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component(0, 4));
-
-  // A case where there is no scheme.
-  const char kStr2[] = "httpfoobar";
-  EXPECT_FALSE(url_util::FindAndCompareScheme(
-      kStr2, static_cast<int>(strlen(kStr2)), "http", &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component());
-
-  // When there is an empty scheme, it should match the empty scheme.
-  const char kStr3[] = ":foo.com/";
-  EXPECT_TRUE(url_util::FindAndCompareScheme(
-      kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component(0, 0));
-
-  // But when there is no scheme, it should fail.
-  EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component());
-
-  // When there is a whitespace char in scheme, it should canonicalize the url
-  // before comparison.
-  const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
-  EXPECT_TRUE(url_util::FindAndCompareScheme(
-      whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript",
-      &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component(1, 10));
-
-  // Control characters should be stripped out on the ends, and kept in the
-  // middle.
-  const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)";
-  EXPECT_FALSE(url_util::FindAndCompareScheme(
-      ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript",
-      &found_scheme));
-  EXPECT_TRUE(found_scheme == url_parse::Component(1, 11));
-}
-
-TEST(URLUtilTest, ReplaceComponents) {
-  url_parse::Parsed parsed;
-  url_canon::RawCanonOutputT<char> output;
-  url_parse::Parsed new_parsed;
-
-  // Check that the following calls do not cause crash
-  url_canon::Replacements<char> replacements;
-  replacements.SetRef("test", url_parse::Component(0, 4));
-  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  replacements.ClearRef();
-  replacements.SetHost("test", url_parse::Component(0, 4));
-  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-
-  replacements.ClearHost();
-  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  url_util::ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-  url_util::ReplaceComponents("", 0, parsed, replacements, NULL, &output,
-                              &new_parsed);
-}
-
-static std::string CheckReplaceScheme(const char* base_url,
-                                      const char* scheme) {
-  // Make sure the input is canonicalized.
-  url_canon::RawCanonOutput<32> original;
-  url_parse::Parsed original_parsed;
-  url_util::Canonicalize(base_url, strlen(base_url), NULL,
-                         &original, &original_parsed);
-
-  url_canon::Replacements<char> replacements;
-  replacements.SetScheme(scheme, url_parse::Component(0, strlen(scheme)));
-
-  std::string output_string;
-  url_canon::StdStringCanonOutput output(&output_string);
-  url_parse::Parsed output_parsed;
-  url_util::ReplaceComponents(original.data(), original.length(),
-                              original_parsed, replacements, NULL,
-                              &output, &output_parsed);
-
-  output.Complete();
-  return output_string;
-}
-
-TEST(URLUtilTest, ReplaceScheme) {
-  EXPECT_EQ("https://google.com/",
-            CheckReplaceScheme("http://google.com/", "https"));
-  EXPECT_EQ("file://google.com/",
-            CheckReplaceScheme("http://google.com/", "file"));
-  EXPECT_EQ("http://home/Build",
-            CheckReplaceScheme("file:///Home/Build", "http"));
-  EXPECT_EQ("javascript:foo",
-            CheckReplaceScheme("about:foo", "javascript"));
-  EXPECT_EQ("://google.com/",
-            CheckReplaceScheme("http://google.com/", ""));
-  EXPECT_EQ("http://google.com/",
-            CheckReplaceScheme("about:google.com", "http"));
-  EXPECT_EQ("http:", CheckReplaceScheme("", "http"));
-
-#ifdef WIN32
-  // Magic Windows drive letter behavior when converting to a file URL.
-  EXPECT_EQ("file:///E:/foo/",
-            CheckReplaceScheme("http://localhost/e:foo/", "file"));
-#endif
-
-  // This will probably change to "about://google.com/" when we fix
-  // http://crbug.com/160 which should also be an acceptable result.
-  EXPECT_EQ("about://google.com/",
-            CheckReplaceScheme("http://google.com/", "about"));
-}
-
-TEST(URLUtilTest, DecodeURLEscapeSequences) {
-  struct DecodeCase {
-    const char* input;
-    const char* output;
-  } decode_cases[] = {
-    {"hello, world", "hello, world"},
-    {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
-     "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
-    {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
-     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
-    {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
-     " !\"#$%&'()*+,-.//"},
-    {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
-     "0123456789:;<=>?/"},
-    {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
-     "@ABCDEFGHIJKLMNO/"},
-    {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
-     "PQRSTUVWXYZ[\\]^_/"},
-    {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
-     "`abcdefghijklmno/"},
-    {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
-     "pqrstuvwxyz{|}~\x7f/"},
-    // Test un-UTF-8-ization.
-    {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
-  };
-
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) {
-    const char* input = decode_cases[i].input;
-    url_canon::RawCanonOutputT<char16> output;
-    url_util::DecodeURLEscapeSequences(input, strlen(input), &output);
-    EXPECT_EQ(decode_cases[i].output,
-              url_test_utils::ConvertUTF16ToUTF8(
-                string16(output.data(), output.length())));
-  }
-
-  // Our decode should decode %00
-  const char zero_input[] = "%00";
-  url_canon::RawCanonOutputT<char16> zero_output;
-  url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input),
-                                     &zero_output);
-  EXPECT_NE("%00",
-            url_test_utils::ConvertUTF16ToUTF8(
-              string16(zero_output.data(), zero_output.length())));
-
-  // Test the error behavior for invalid UTF-8.
-  const char invalid_input[] = "%e4%a0%e5%a5%bd";
-  const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0};
-  url_canon::RawCanonOutputT<char16> invalid_output;
-  url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
-                                     &invalid_output);
-  EXPECT_EQ(string16(invalid_expected),
-            string16(invalid_output.data(), invalid_output.length()));
-}

diff --git a/googleurl/third_party/icu/build/using_icu.vsprops b/googleurl/third_party/icu/build/using_icu.vsprops
deleted file mode 100644
index a3989ef..0000000
--- a/googleurl/third_party/icu/build/using_icu.vsprops
+++ /dev/null

@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioPropertySheet
-	ProjectType="Visual C++"
-	Version="8.00"
-	Name="using_icu"
-	>
-	<Tool
-		Name="VCCLCompilerTool"
-		AdditionalIncludeDirectories="&quot;$(SolutionDir)..\..\third_party\icu\public\common&quot;;&quot;$(SolutionDir)..\..\third_party\icu\public\i18n&quot;"
-	/>
-</VisualStudioPropertySheet>

diff --git a/src/base/strings/string16.cc b/src/base/strings/string16.cc
new file mode 100644
index 0000000..2e749a3
--- /dev/null
+++ b/src/base/strings/string16.cc

@@ -0,0 +1,74 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string16.h"
+
+#if defined(WCHAR_T_IS_UTF16)
+
+#error This file should not be used on 2-byte wchar_t systems
+// If this winds up being needed on 2-byte wchar_t systems, either the
+// definitions below can be used, or the host system's wide character
+// functions like wmemcmp can be wrapped.
+
+#elif defined(WCHAR_T_IS_UTF32)
+
+#include <ostream>
+
+namespace url {
+namespace base {
+
+int c16memcmp(const char16* s1, const char16* s2, size_t n) {
+  // We cannot call memcmp because that changes the semantics.
+  while (n-- > 0) {
+    if (*s1 != *s2) {
+      // We cannot use (*s1 - *s2) because char16 is unsigned.
+      return ((*s1 < *s2) ? -1 : 1);
+    }
+    ++s1;
+    ++s2;
+  }
+  return 0;
+}
+
+size_t c16len(const char16* s) {
+  const char16 *s_orig = s;
+  while (*s) {
+    ++s;
+  }
+  return s - s_orig;
+}
+
+const char16* c16memchr(const char16* s, char16 c, size_t n) {
+  while (n-- > 0) {
+    if (*s == c) {
+      return s;
+    }
+    ++s;
+  }
+  return 0;
+}
+
+char16* c16memmove(char16* s1, const char16* s2, size_t n) {
+  return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
+}
+
+char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
+  return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
+}
+
+char16* c16memset(char16* s, char16 c, size_t n) {
+  char16 *s_orig = s;
+  while (n-- > 0) {
+    *s = c;
+    ++s;
+  }
+  return s_orig;
+}
+
+}  // namespace base
+}  // namespace url
+
+template class std::basic_string<url::base::char16, url::base::string16_char_traits>;
+
+#endif  // WCHAR_T_IS_UTF32

diff --git a/googleurl/base/string16.h b/src/base/strings/string16.h
similarity index 74%
rename from googleurl/base/string16.h
rename to src/base/strings/string16.h
index deedaf6..be488c3 100644
--- a/googleurl/base/string16.h
+++ b/src/base/strings/string16.h

@@ -1,33 +1,9 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//    * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//    * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//    * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#ifndef BASE_STRING16_H_
-#define BASE_STRING16_H_
+#ifndef BASE_STRINGS_STRING16_H_
+#define BASE_STRINGS_STRING16_H_
 
 // WHAT:
 // A version of std::basic_string that provides 2-byte characters even when
@@ -54,18 +30,27 @@
 #include <string>
 
 #include "base/basictypes.h"
+#include "build/build_config.h"
 
-#ifdef WIN32
+#if defined(WCHAR_T_IS_UTF16)
+
+namespace url {
+namespace base {
 
 typedef wchar_t char16;
 typedef std::wstring string16;
+typedef std::char_traits<wchar_t> string16_char_traits;
 
-#else  // !WIN32
+}  // namespace base
+}  // namespace url
+
+#elif defined(WCHAR_T_IS_UTF32)
+
+namespace url {
+namespace base {
 
 typedef uint16 char16;
 
-namespace base {
-
 // char16 versions of the functions required by string16_char_traits; these
 // are based on the wide character functions of similar names ("w" or "wcs"
 // instead of "c16").
@@ -80,6 +65,10 @@
   typedef char16 char_type;
   typedef int int_type;
 
+  // int_type needs to be able to hold each possible value of char_type, and in
+  // addition, the distinct value of eof().
+  COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width);
+
   typedef std::streamoff off_type;
   typedef mbstate_t state_type;
   typedef std::fpos<state_type> pos_type;
@@ -141,7 +130,10 @@
   }
 };
 
+typedef std::basic_string<char16, url::base::string16_char_traits> string16;
+
 }  // namespace base
+}  // namespace url
 
 // The string class will be explicitly instantiated only once, in string16.cc.
 //
@@ -182,12 +174,9 @@
 //
 // TODO(mark): File this bug with Apple and update this note with a bug number.
 
-extern template class std::basic_string<char16, base::string16_char_traits>;
+extern template
+class std::basic_string<url::base::char16, url::base::string16_char_traits>;
 
-typedef std::basic_string<char16, base::string16_char_traits> string16;
+#endif  // WCHAR_T_IS_UTF32
 
-extern std::ostream& operator<<(std::ostream& out, const string16& str);
-
-#endif  // !WIN32
-
-#endif  // BASE_STRING16_H_
+#endif  // BASE_STRINGS_STRING16_H_

diff --git a/src/base/strings/string_util.cc b/src/base/strings/string_util.cc
new file mode 100644
index 0000000..8b2e068
--- /dev/null
+++ b/src/base/strings/string_util.cc

@@ -0,0 +1,147 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_util.h"
+
+#include "base/basictypes.h"
+#include "base/third_party/icu/icu_utf.h"
+
+static bool IsWildcard(base_icu::UChar32 character) {
+  return character == '*' || character == '?';
+}
+
+// Move the strings pointers to the point where they start to differ.
+template <typename CHAR, typename NEXT>
+static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
+                         const CHAR** string, const CHAR* string_end,
+                         NEXT next) {
+  const CHAR* escape = NULL;
+  while (*pattern != pattern_end && *string != string_end) {
+    if (!escape && IsWildcard(**pattern)) {
+      // We don't want to match wildcard here, except if it's escaped.
+      return;
+    }
+
+    // Check if the escapement char is found. If so, skip it and move to the
+    // next character.
+    if (!escape && **pattern == '\\') {
+      escape = *pattern;
+      next(pattern, pattern_end);
+      continue;
+    }
+
+    // Check if the chars match, if so, increment the ptrs.
+    const CHAR* pattern_next = *pattern;
+    const CHAR* string_next = *string;
+    base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
+    if (pattern_char == next(&string_next, string_end) &&
+        pattern_char != CBU_SENTINEL) {
+      *pattern = pattern_next;
+      *string = string_next;
+    } else {
+      // Uh oh, it did not match, we are done. If the last char was an
+      // escapement, that means that it was an error to advance the ptr here,
+      // let's put it back where it was. This also mean that the MatchPattern
+      // function will return false because if we can't match an escape char
+      // here, then no one will.
+      if (escape) {
+        *pattern = escape;
+      }
+      return;
+    }
+
+    escape = NULL;
+  }
+}
+
+template <typename CHAR, typename NEXT>
+static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
+  while (*pattern != end) {
+    if (!IsWildcard(**pattern))
+      return;
+    next(pattern, end);
+  }
+}
+
+template <typename CHAR, typename NEXT>
+static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
+                          const CHAR* pattern, const CHAR* pattern_end,
+                          int depth,
+                          NEXT next) {
+  const int kMaxDepth = 16;
+  if (depth > kMaxDepth)
+    return false;
+
+  // Eat all the matching chars.
+  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
+
+  // If the string is empty, then the pattern must be empty too, or contains
+  // only wildcards.
+  if (eval == eval_end) {
+    EatWildcard(&pattern, pattern_end, next);
+    return pattern == pattern_end;
+  }
+
+  // Pattern is empty but not string, this is not a match.
+  if (pattern == pattern_end)
+    return false;
+
+  // If this is a question mark, then we need to compare the rest with
+  // the current string or the string with one character eaten.
+  const CHAR* next_pattern = pattern;
+  next(&next_pattern, pattern_end);
+  if (pattern[0] == '?') {
+    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+                      depth + 1, next))
+      return true;
+    const CHAR* next_eval = eval;
+    next(&next_eval, eval_end);
+    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
+                      depth + 1, next))
+      return true;
+  }
+
+  // This is a *, try to match all the possible substrings with the remainder
+  // of the pattern.
+  if (pattern[0] == '*') {
+    // Collapse duplicate wild cards (********** into *) so that the
+    // method does not recurse unnecessarily. http://crbug.com/52839
+    EatWildcard(&next_pattern, pattern_end, next);
+
+    while (eval != eval_end) {
+      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+                        depth + 1, next))
+        return true;
+      eval++;
+    }
+
+    // We reached the end of the string, let see if the pattern contains only
+    // wildcards.
+    if (eval == eval_end) {
+      EatWildcard(&pattern, pattern_end, next);
+      if (pattern != pattern_end)
+        return false;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+struct NextCharUTF8 {
+  base_icu::UChar32 operator()(const char** p, const char* end) {
+    base_icu::UChar32 c;
+    int offset = 0;
+    CBU8_NEXT(*p, offset, end - *p, c);
+    *p += offset;
+    return c;
+  }
+};
+
+bool MatchPattern(const std::string& eval,
+                  const std::string& pattern) {
+  return MatchPatternT(eval.data(), eval.data() + eval.size(),
+                       pattern.data(), pattern.data() + pattern.size(),
+                       0, NextCharUTF8());
+}

diff --git a/src/base/strings/string_util.h b/src/base/strings/string_util.h
new file mode 100644
index 0000000..ffc1579
--- /dev/null
+++ b/src/base/strings/string_util.h

@@ -0,0 +1,20 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// This file defines utility functions for working with strings.
+
+#ifndef BASE_STRINGS_STRING_UTIL_H_
+#define BASE_STRINGS_STRING_UTIL_H_
+
+#include "base/basictypes.h"
+
+// Returns true if the string passed in matches the pattern. The pattern
+// string can contain wildcards like * and ?
+// The backslash character (\) is an escape character for * and ?
+// We limit the patterns to having a max of 16 * or ? characters.
+// ? matches 0 or 1 character, while * matches 0 or more characters.
+bool MatchPattern(const std::string& string,
+                  const std::string& pattern);
+
+#endif  // BASE_STRINGS_STRING_UTIL_H_

diff --git a/src/base/strings/utf_string_conversion_utils.cc b/src/base/strings/utf_string_conversion_utils.cc
new file mode 100644
index 0000000..e71605b
--- /dev/null
+++ b/src/base/strings/utf_string_conversion_utils.cc

@@ -0,0 +1,150 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/utf_string_conversion_utils.h"
+
+#include "base/third_party/icu/icu_utf.h"
+
+namespace url {
+namespace base {
+
+// ReadUnicodeCharacter --------------------------------------------------------
+
+bool ReadUnicodeCharacter(const char* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point_out) {
+  // U8_NEXT expects to be able to use -1 to signal an error, so we must
+  // use a signed type for code_point.  But this function returns false
+  // on error anyway, so code_point_out is unsigned.
+  int32 code_point;
+  CBU8_NEXT(src, *char_index, src_len, code_point);
+  *code_point_out = static_cast<uint32>(code_point);
+
+  // The ICU macro above moves to the next char, we want to point to the last
+  // char consumed.
+  (*char_index)--;
+
+  // Validate the decoded value.
+  return IsValidCodepoint(code_point);
+}
+
+bool ReadUnicodeCharacter(const char16* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point) {
+  if (CBU16_IS_SURROGATE(src[*char_index])) {
+    if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
+        *char_index + 1 >= src_len ||
+        !CBU16_IS_TRAIL(src[*char_index + 1])) {
+      // Invalid surrogate pair.
+      return false;
+    }
+
+    // Valid surrogate pair.
+    *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index],
+                                          src[*char_index + 1]);
+    (*char_index)++;
+  } else {
+    // Not a surrogate, just one 16-bit word.
+    *code_point = src[*char_index];
+  }
+
+  return IsValidCodepoint(*code_point);
+}
+
+#if defined(WCHAR_T_IS_UTF32)
+bool ReadUnicodeCharacter(const wchar_t* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point) {
+  // Conversion is easy since the source is 32-bit.
+  *code_point = src[*char_index];
+
+  // Validate the value.
+  return IsValidCodepoint(*code_point);
+}
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// WriteUnicodeCharacter -------------------------------------------------------
+
+size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) {
+  if (code_point <= 0x7f) {
+    // Fast path the common case of one byte.
+    output->push_back(static_cast<char>(code_point));
+    return 1;
+  }
+
+
+  // CBU8_APPEND_UNSAFE can append up to 4 bytes.
+  size_t char_offset = output->length();
+  size_t original_char_offset = char_offset;
+  output->resize(char_offset + CBU8_MAX_LENGTH);
+
+  CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+
+  // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
+  // it will represent the new length of the string.
+  output->resize(char_offset);
+  return char_offset - original_char_offset;
+}
+
+size_t WriteUnicodeCharacter(uint32 code_point, string16* output) {
+  if (CBU16_LENGTH(code_point) == 1) {
+    // Thie code point is in the Basic Multilingual Plane (BMP).
+    output->push_back(static_cast<char16>(code_point));
+    return 1;
+  }
+  // Non-BMP characters use a double-character encoding.
+  size_t char_offset = output->length();
+  output->resize(char_offset + CBU16_MAX_LENGTH);
+  CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+  return CBU16_MAX_LENGTH;
+}
+
+// Generalized Unicode converter -----------------------------------------------
+
+template<typename CHAR>
+void PrepareForUTF8Output(const CHAR* src,
+                          size_t src_len,
+                          std::string* output) {
+  output->clear();
+  if (src_len == 0)
+    return;
+  if (src[0] < 0x80) {
+    // Assume that the entire input will be ASCII.
+    output->reserve(src_len);
+  } else {
+    // Assume that the entire input is non-ASCII and will have 3 bytes per char.
+    output->reserve(src_len * 3);
+  }
+}
+
+// Instantiate versions we know callers will need.
+template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*);
+template void PrepareForUTF8Output(const char16*, size_t, std::string*);
+
+template<typename STRING>
+void PrepareForUTF16Or32Output(const char* src,
+                               size_t src_len,
+                               STRING* output) {
+  output->clear();
+  if (src_len == 0)
+    return;
+  if (static_cast<unsigned char>(src[0]) < 0x80) {
+    // Assume the input is all ASCII, which means 1:1 correspondence.
+    output->reserve(src_len);
+  } else {
+    // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
+    // character.
+    output->reserve(src_len / 2);
+  }
+}
+
+// Instantiate versions we know callers will need.
+template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*);
+template void PrepareForUTF16Or32Output(const char*, size_t, string16*);
+
+}  // namespace base
+}  // namespace url

diff --git a/src/base/strings/utf_string_conversion_utils.h b/src/base/strings/utf_string_conversion_utils.h
new file mode 100644
index 0000000..b24f03b
--- /dev/null
+++ b/src/base/strings/utf_string_conversion_utils.h

@@ -0,0 +1,98 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
+#define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
+
+// This should only be used by the various UTF string conversion files.
+
+#include "base/strings/string16.h"
+
+namespace url {
+namespace base {
+
+inline bool IsValidCodepoint(uint32 code_point) {
+  // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
+  // codepoints larger than 0x10FFFF (the highest codepoint allowed).
+  // Non-characters and unassigned codepoints are allowed.
+  return code_point < 0xD800u ||
+         (code_point >= 0xE000u && code_point <= 0x10FFFFu);
+}
+
+inline bool IsValidCharacter(uint32 code_point) {
+  // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in
+  // 0xFFFE or 0xFFFF) from the set of valid code points.
+  return code_point < 0xD800u || (code_point >= 0xE000u &&
+      code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
+      code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
+}
+
+// ReadUnicodeCharacter --------------------------------------------------------
+
+// Reads a UTF-8 stream, placing the next code point into the given output
+// |*code_point|. |src| represents the entire string to read, and |*char_index|
+// is the character offset within the string to start reading at. |*char_index|
+// will be updated to index the last character read, such that incrementing it
+// (as in a for loop) will take the reader to the next character.
+//
+// Returns true on success. On false, |*code_point| will be invalid.
+bool ReadUnicodeCharacter(const char* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point_out);
+
+// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
+bool ReadUnicodeCharacter(const char16* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point);
+
+#if defined(WCHAR_T_IS_UTF32)
+// Reads UTF-32 character. The usage is the same as the 8-bit version above.
+bool ReadUnicodeCharacter(const wchar_t* src,
+                          int32 src_len,
+                          int32* char_index,
+                          uint32* code_point);
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// WriteUnicodeCharacter -------------------------------------------------------
+
+// Appends a UTF-8 character to the given 8-bit string.  Returns the number of
+// bytes written.
+// TODO(brettw) Bug 79631: This function should not be exposed.
+size_t WriteUnicodeCharacter(uint32 code_point,
+                             std::string* output);
+
+// Appends the given code point as a UTF-16 character to the given 16-bit
+// string.  Returns the number of 16-bit values written.
+size_t WriteUnicodeCharacter(uint32 code_point, string16* output);
+
+#if defined(WCHAR_T_IS_UTF32)
+// Appends the given UTF-32 character to the given 32-bit string.  Returns the
+// number of 32-bit values written.
+inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
+  // This is the easy case, just append the character.
+  output->push_back(code_point);
+  return 1;
+}
+#endif  // defined(WCHAR_T_IS_UTF32)
+
+// Generalized Unicode converter -----------------------------------------------
+
+// Guesses the length of the output in UTF-8 in bytes, clears that output
+// string, and reserves that amount of space.  We assume that the input
+// character types are unsigned, which will be true for UTF-16 and -32 on our
+// systems.
+template<typename CHAR>
+void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output);
+
+// Prepares an output buffer (containing either UTF-16 or -32 data) given some
+// UTF-8 input that will be converted to it.  See PrepareForUTF8Output().
+template<typename STRING>
+void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output);
+
+}  // namespace base
+}  // namespace url
+
+#endif  // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_

diff --git a/src/base/third_party/icu/LICENSE b/src/base/third_party/icu/LICENSE
new file mode 100644
index 0000000..40282f4
--- /dev/null
+++ b/src/base/third_party/icu/LICENSE

@@ -0,0 +1,32 @@
+ICU License - ICU 1.8.1 and later
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2009 International Business Machines Corporation and others
+
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.

diff --git a/src/base/third_party/icu/icu_utf.cc b/src/base/third_party/icu/icu_utf.cc
new file mode 100644
index 0000000..55edce1
--- /dev/null
+++ b/src/base/third_party/icu/icu_utf.cc

@@ -0,0 +1,230 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  utf_impl.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file provides implementation functions for macros in the utfXX.h
+*   that would otherwise be too long as macros.
+*/
+
+#include "base/third_party/icu/icu_utf.h"
+
+namespace base_icu {
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF8_ERROR_VALUE_2 0x9f
+
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with e>
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define CBUTF_ERROR_VALUE 0xffff
+
+/*
+ * This table could be replaced on many machines by
+ * a few lines of assembler code using an
+ * "index of first 0-bit from msb" instruction and
+ * one or two more integer instructions.
+ *
+ * For example, on an i386, do something like
+ * - MOV AL, leadByte
+ * - NOT AL         (8-bit, leave b15..b8==0..0, reverse only b7..b0)
+ * - MOV AH, 0
+ * - BSR BX, AX     (16-bit)
+ * - MOV AX, 6      (result)
+ * - JZ finish      (ZF==1 if leadByte==0xff)
+ * - SUB AX, BX (result)
+ * -finish:
+ * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
+ *
+ * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal;
+ * lead bytes above 0xf4 are illegal.
+ * We keep them in this table for skipping long ISO 10646-UTF-8 sequences.
+ */
+const uint8
+utf8_countTrailBytes[256]={
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3,
+    3, 3, 3,    /* illegal in Unicode */
+    4, 4, 4, 4, /* illegal in Unicode */
+    5, 5,       /* illegal in Unicode */
+    0, 0        /* illegal bytes 0xfe and 0xff */
+};
+
+static const UChar32
+utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
+
+static const UChar32
+utf8_errorValue[6]={
+    CBUTF8_ERROR_VALUE_1, CBUTF8_ERROR_VALUE_2, CBUTF_ERROR_VALUE, 0x10ffff,
+    0x3ffffff, 0x7fffffff
+};
+
+/*
+ * Handle the non-inline part of the U8_NEXT() macro and its obsolete sibling
+ * UTF8_NEXT_CHAR_SAFE().
+ *
+ * The "strict" parameter controls the error behavior:
+ * <0  "Safe" behavior of U8_NEXT(): All illegal byte sequences yield a negative
+ *     code point result.
+ *  0  Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE):
+ *     All illegal byte sequences yield a positive code point such that this
+ *     result code point would be encoded with the same number of bytes as
+ *     the illegal sequence.
+ * >0  Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE):
+ *     Same as the obsolete "safe" behavior, but non-characters are also treated
+ *     like illegal sequences.
+ *
+ * The special negative (<0) value -2 is used for lenient treatment of surrogate
+ * code points as legal. Some implementations use this for roundtripping of
+ * Unicode 16-bit strings that are not well-formed UTF-16, that is, they
+ * contain unpaired surrogates.
+ *
+ * Note that a UBool is the same as an int8_t.
+ */
+UChar32
+utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict) {
+    int32 i=*pi;
+    uint8 count=CBU8_COUNT_TRAIL_BYTES(c);
+    if((i)+count<=(length)) {
+        uint8 trail, illegal=0;
+
+        CBU8_MASK_LEAD_BYTE((c), count);
+        /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
+        switch(count) {
+        /* each branch falls through to the next one */
+        case 5:
+        case 4:
+            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
+            illegal=1;
+            break;
+        case 3:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            if(c<0x110) {
+                illegal|=(trail&0xc0)^0x80;
+            } else {
+                /* code point>0x10ffff, outside Unicode */
+                illegal=1;
+                break;
+            }
+            FALLTHROUGH_INTENDED;
+        case 2:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            illegal|=(trail&0xc0)^0x80;
+            FALLTHROUGH_INTENDED;
+        case 1:
+            trail=s[(i)++];
+            (c)=((c)<<6)|(trail&0x3f);
+            illegal|=(trail&0xc0)^0x80;
+            break;
+        case 0:
+            if(strict>=0) {
+                return CBUTF8_ERROR_VALUE_1;
+            } else {
+                return CBU_SENTINEL;
+            }
+        /* no default branch to optimize switch()  - all values are covered */
+        }
+
+        /*
+         * All the error handling should return a value
+         * that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
+         *
+         * Starting with Unicode 3.0.1, non-shortest forms are illegal.
+         * Starting with Unicode 3.2, surrogate code points must not be
+         * encoded in UTF-8, and there are no irregular sequences any more.
+         *
+         * U8_ macros (new in ICU 2.4) return negative values for error conditions.
+         */
+
+        /* correct sequence - all trail bytes have (b7..b6)==(10)? */
+        /* illegal is also set if count>=4 */
+        if(illegal || (c)<utf8_minLegal[count] || (CBU_IS_SURROGATE(c) && strict!=-2)) {
+            /* error handling */
+            uint8 errorCount=count;
+            /* don't go beyond this sequence */
+            i=*pi;
+            while(count>0 && CBU8_IS_TRAIL(s[i])) {
+                ++(i);
+                --count;
+            }
+            if(strict>=0) {
+                c=utf8_errorValue[errorCount-count];
+            } else {
+                c=CBU_SENTINEL;
+            }
+        } else if((strict)>0 && CBU_IS_UNICODE_NONCHAR(c)) {
+            /* strict: forbid non-characters like U+fffe */
+            c=utf8_errorValue[count];
+        }
+    } else /* too few bytes left */ {
+        /* error handling */
+        int32 i0=i;
+        /* don't just set (i)=(length) in case there is an illegal sequence */
+        while((i)<(length) && CBU8_IS_TRAIL(s[i])) {
+            ++(i);
+        }
+        if(strict>=0) {
+            c=utf8_errorValue[i-i0];
+        } else {
+            c=CBU_SENTINEL;
+        }
+    }
+    *pi=i;
+    return c;
+}
+
+}  // namespace base_icu

diff --git a/src/base/third_party/icu/icu_utf.h b/src/base/third_party/icu/icu_utf.h
new file mode 100644
index 0000000..2b993b0
--- /dev/null
+++ b/src/base/third_party/icu/icu_utf.h

@@ -0,0 +1,391 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2004, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+#ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_
+#define BASE_THIRD_PARTY_ICU_ICU_UTF_H_
+
+#include "base/basictypes.h"
+
+namespace base_icu {
+
+typedef int32 UChar32;
+typedef uint16 UChar;
+typedef int8 UBool;
+
+// General ---------------------------------------------------------------------
+// from utf.h
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with CBU_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define CBU_SENTINEL (-1)
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_UNICODE_CHAR(c) \
+    ((uint32)(c)<0xd800 || \
+        ((uint32)(c)>0xdfff && \
+         (uint32)(c)<=0x10ffff && \
+         !CBU_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+
+// UTF-8 macros ----------------------------------------------------------------
+// from utf8.h
+
+extern const uint8 utf8_countTrailBytes[256];
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @internal
+ */
+#define CBU8_COUNT_TRAIL_BYTES(leadByte) (base_icu::utf8_countTrailBytes[(uint8)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @internal
+ */
+#define CBU8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_LEAD(c) ((uint8)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define CBU8_LENGTH(c) \
+    ((uint32)(c)<=0x7f ? 1 : \
+        ((uint32)(c)<=0x7ff ? 2 : \
+            ((uint32)(c)<=0xd7ff ? 3 : \
+                ((uint32)(c)<=0xdfff || (uint32)(c)>0x10ffff ? 0 : \
+                    ((uint32)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define CBU8_MAX_LENGTH 4
+
+/**
+ * Function for handling "next code point" with error-checking.
+ * @internal
+ */
+UChar32 utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict);
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8 * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see CBU8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define CBU8_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(((uint8)(c))>=0x80) { \
+        if(CBU8_IS_LEAD(c)) { \
+            (c)=base_icu::utf8_nextCharSafeBody((const uint8 *)s, &(i), (int32)(length), c, -1); \
+        } else { \
+            (c)=CBU_SENTINEL; \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8 * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see CBU8_APPEND
+ * @stable ICU 2.4
+ */
+#define CBU8_APPEND_UNSAFE(s, i, c) { \
+    if((uint32)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8)(c); \
+    } else { \
+        if((uint32)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
+    } \
+}
+
+// UTF-16 macros ---------------------------------------------------------------
+// from utf16.h
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SINGLE(c) !CBU_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SURROGATE(c) CBU_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define CBU16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Helper constant for CBU16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define CBU16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define CBU16_GET_SUPPLEMENTARY(lead, trail) \
+    (((base_icu::UChar32)(lead)<<10UL)+(base_icu::UChar32)(trail)-CBU16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define CBU16_LEAD(supplementary) \
+    (base_icu::UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define CBU16_TRAIL(supplementary) \
+    (base_icu::UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define CBU16_LENGTH(c) ((uint32)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define CBU16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @stable ICU 2.4
+ */
+#define CBU16_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(CBU16_IS_LEAD(c)) { \
+        uint16 __c2; \
+        if((i)<(length) && CBU16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=CBU16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see CBU16_APPEND
+ * @stable ICU 2.4
+ */
+#define CBU16_APPEND_UNSAFE(s, i, c) { \
+    if((uint32)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16)(c); \
+    } else { \
+        (s)[(i)++]=(uint16)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+}  // namesapce base_icu
+
+#endif  // BASE_THIRD_PARTY_ICU_ICU_UTF_H_

diff --git a/src/build/build_config.h b/src/build/build_config.h
new file mode 100644
index 0000000..b07660d
--- /dev/null
+++ b/src/build/build_config.h

@@ -0,0 +1,168 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file adds defines about the platform we're currently building on.
+//  Operating System:
+//    OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) /
+//    OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI
+//  Compiler:
+//    COMPILER_MSVC / COMPILER_GCC
+//  Processor:
+//    ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
+//    ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
+
+#ifndef BUILD_BUILD_CONFIG_H_
+#define BUILD_BUILD_CONFIG_H_
+
+// A set of macros to use for platform detection.
+#if defined(__native_client__)
+// __native_client__ must be first, so that other OS_ defines are not set.
+#define OS_NACL 1
+// OS_NACL comes in two sandboxing technology flavors, SFI or Non-SFI.
+// PNaCl toolchain defines __native_client_nonsfi__ macro in Non-SFI build
+// mode, while it does not in SFI build mode.
+#if defined(__native_client_nonsfi__)
+#define OS_NACL_NONSFI
+#else
+#define OS_NACL_SFI
+#endif
+#elif defined(ANDROID)
+#define OS_ANDROID 1
+#elif defined(__APPLE__)
+// only include TargetConditions after testing ANDROID as some android builds
+// on mac don't have this header available and it's not needed unless the target
+// is really mac/ios.
+#include <TargetConditionals.h>
+#define OS_MACOSX 1
+#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#define OS_IOS 1
+#endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
+#elif defined(__linux__)
+#define OS_LINUX 1
+// include a system header to pull in features.h for glibc/uclibc macros.
+#include <unistd.h>
+#if defined(__GLIBC__) && !defined(__UCLIBC__)
+// we really are using glibc, not uClibc pretending to be glibc
+#define LIBC_GLIBC 1
+#endif
+#elif defined(_WIN32)
+#define OS_WIN 1
+#define TOOLKIT_VIEWS 1
+#elif defined(__FreeBSD__)
+#define OS_FREEBSD 1
+#elif defined(__OpenBSD__)
+#define OS_OPENBSD 1
+#elif defined(__sun)
+#define OS_SOLARIS 1
+#elif defined(__QNXNTO__)
+#define OS_QNX 1
+#else
+#error Please add support for your platform in build/build_config.h
+#endif
+
+#if defined(USE_OPENSSL) && defined(USE_NSS)
+#error Cannot use both OpenSSL and NSS
+#endif
+
+// For access to standard BSD features, use OS_BSD instead of a
+// more specific macro.
+#if defined(OS_FREEBSD) || defined(OS_OPENBSD)
+#define OS_BSD 1
+#endif
+
+// For access to standard POSIXish features, use OS_POSIX instead of a
+// more specific macro.
+#if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) ||     \
+    defined(OS_OPENBSD) || defined(OS_SOLARIS) || defined(OS_ANDROID) ||  \
+    defined(OS_NACL) || defined(OS_QNX)
+#define OS_POSIX 1
+#endif
+
+// Use tcmalloc
+#if (defined(OS_WIN) || defined(OS_LINUX) || defined(OS_ANDROID)) && \
+    !defined(NO_TCMALLOC)
+#define USE_TCMALLOC 1
+#endif
+
+// Compiler detection.
+#if defined(__GNUC__)
+#define COMPILER_GCC 1
+#elif defined(_MSC_VER)
+#define COMPILER_MSVC 1
+#else
+#error Please add support for your compiler in build/build_config.h
+#endif
+
+// Processor architecture detection.  For more info on what's defined, see:
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#define ARCH_CPU_X86_FAMILY 1
+#define ARCH_CPU_X86_64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(_M_IX86) || defined(__i386__)
+#define ARCH_CPU_X86_FAMILY 1
+#define ARCH_CPU_X86 1
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__ARMEL__)
+#define ARCH_CPU_ARM_FAMILY 1
+#define ARCH_CPU_ARMEL 1
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__aarch64__)
+#define ARCH_CPU_ARM_FAMILY 1
+#define ARCH_CPU_ARM64 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__pnacl__)
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#elif defined(__MIPSEL__)
+#if defined(__LP64__)
+#define ARCH_CPU_MIPS64_FAMILY 1
+#define ARCH_CPU_MIPS64EL 1
+#define ARCH_CPU_64_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#else
+#define ARCH_CPU_MIPS_FAMILY 1
+#define ARCH_CPU_MIPSEL 1
+#define ARCH_CPU_32_BITS 1
+#define ARCH_CPU_LITTLE_ENDIAN 1
+#endif
+#else
+#error Please add support for your architecture in build/build_config.h
+#endif
+
+// Type detection for wchar_t.
+#if defined(OS_WIN)
+#define WCHAR_T_IS_UTF16
+#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
+    defined(__WCHAR_MAX__) && \
+    (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)
+#define WCHAR_T_IS_UTF32
+#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
+    defined(__WCHAR_MAX__) && \
+    (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)
+// On Posix, we'll detect short wchar_t, but projects aren't guaranteed to
+// compile in this mode (in particular, Chrome doesn't). This is intended for
+// other projects using base who manage their own dependencies and make sure
+// short wchar works for them.
+#define WCHAR_T_IS_UTF16
+#else
+#error Please add support for your compiler in build/build_config.h
+#endif
+
+#if defined(OS_ANDROID)
+// The compiler thinks std::string::const_iterator and "const char*" are
+// equivalent types.
+#define STD_STRING_ITERATOR_IS_CHAR_POINTER
+// The compiler thinks base::string16::const_iterator and "char16*" are
+// equivalent types.
+#define BASE_STRING16_ITERATOR_IS_CHAR16_POINTER
+#endif
+
+#endif  // BUILD_BUILD_CONFIG_H_

diff --git a/src/url/gurl.cc b/src/url/gurl.cc
new file mode 100644
index 0000000..6801dda
--- /dev/null
+++ b/src/url/gurl.cc

@@ -0,0 +1,549 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <pthread.h>
+#endif
+
+#include <algorithm>
+#include <ostream>
+
+#include "url/gurl.h"
+
+#include "base/logging.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_util.h"
+
+namespace {
+
+static std::string* empty_string = NULL;
+static GURL* empty_gurl = NULL;
+
+#ifdef WIN32
+
+// Returns a static reference to an empty string for returning a reference
+// when there is no underlying string.
+const std::string& EmptyStringForGURL() {
+  // Avoid static object construction/destruction on startup/shutdown.
+  if (!empty_string) {
+    // Create the string. Be careful that we don't break in the case that this
+    // is being called from multiple threads. Statics are not threadsafe.
+    std::string* new_empty_string = new std::string;
+    if (InterlockedCompareExchangePointer(
+        reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
+      // The old value was non-NULL, so no replacement was done. Another
+      // thread did the initialization out from under us.
+      delete new_empty_string;
+    }
+  }
+  return *empty_string;
+}
+
+#else
+
+static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
+static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
+
+void EmptyStringForGURLOnce(void) {
+  empty_string = new std::string;
+}
+
+const std::string& EmptyStringForGURL() {
+  // Avoid static object construction/destruction on startup/shutdown.
+  pthread_once(&empty_string_once, EmptyStringForGURLOnce);
+  return *empty_string;
+}
+
+#endif  // WIN32
+
+} // namespace
+
+GURL::GURL() : is_valid_(false) {
+}
+
+GURL::GURL(const GURL& other)
+    : spec_(other.spec_),
+      is_valid_(other.is_valid_),
+      parsed_(other.parsed_) {
+  if (other.inner_url_)
+    inner_url_.reset(new GURL(*other.inner_url_));
+  // Valid filesystem urls should always have an inner_url_.
+  DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
+}
+
+GURL::GURL(const std::string& url_string) {
+  InitCanonical(url_string, true);
+}
+
+GURL::GURL(const url::base::string16& url_string) {
+  InitCanonical(url_string, true);
+}
+
+GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
+  InitCanonical(url_string, false);
+}
+
+GURL::GURL(const char* canonical_spec,
+           size_t canonical_spec_len,
+           const url::Parsed& parsed,
+           bool is_valid)
+    : spec_(canonical_spec, canonical_spec_len),
+      is_valid_(is_valid),
+      parsed_(parsed) {
+  InitializeFromCanonicalSpec();
+}
+
+GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
+    : is_valid_(is_valid),
+      parsed_(parsed) {
+  spec_.swap(canonical_spec);
+  InitializeFromCanonicalSpec();
+}
+
+template<typename STR>
+void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  spec_.reserve(input_spec.size() + 32);
+  url::StdStringCanonOutput output(&spec_);
+  is_valid_ = url::Canonicalize(
+      input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
+      NULL, &output, &parsed_);
+
+  output.Complete();  // Must be done before using string.
+  if (is_valid_ && SchemeIsFileSystem()) {
+    inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
+                              *parsed_.inner_parsed(), true));
+  }
+}
+
+void GURL::InitializeFromCanonicalSpec() {
+  if (is_valid_ && SchemeIsFileSystem()) {
+    inner_url_.reset(
+        new GURL(spec_.data(), parsed_.Length(),
+                 *parsed_.inner_parsed(), true));
+  }
+
+#ifndef NDEBUG
+  // For testing purposes, check that the parsed canonical URL is identical to
+  // what we would have produced. Skip checking for invalid URLs have no meaning
+  // and we can't always canonicalize then reproducabely.
+  if (is_valid_) {
+    url::Component scheme;
+    // We can't do this check on the inner_url of a filesystem URL, as
+    // canonical_spec actually points to the start of the outer URL, so we'd
+    // end up with infinite recursion in this constructor.
+    if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
+                                   url::kFileSystemScheme, &scheme) ||
+        scheme.begin == parsed_.scheme.begin) {
+      // We need to retain trailing whitespace on path URLs, as the |parsed_|
+      // spec we originally received may legitimately contain trailing white-
+      // space on the path or  components e.g. if the #ref has been
+      // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
+      GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
+
+      DCHECK(test_url.is_valid_ == is_valid_);
+      DCHECK(test_url.spec_ == spec_);
+
+      DCHECK(test_url.parsed_.scheme == parsed_.scheme);
+      DCHECK(test_url.parsed_.username == parsed_.username);
+      DCHECK(test_url.parsed_.password == parsed_.password);
+      DCHECK(test_url.parsed_.host == parsed_.host);
+      DCHECK(test_url.parsed_.port == parsed_.port);
+      DCHECK(test_url.parsed_.path == parsed_.path);
+      DCHECK(test_url.parsed_.query == parsed_.query);
+      DCHECK(test_url.parsed_.ref == parsed_.ref);
+    }
+  }
+#endif
+}
+
+GURL::~GURL() {
+}
+
+GURL& GURL::operator=(GURL other) {
+  Swap(&other);
+  return *this;
+}
+
+const std::string& GURL::spec() const {
+  if (is_valid_ || spec_.empty())
+    return spec_;
+
+  DCHECK(false) << "Trying to get the spec of an invalid URL!";
+  return EmptyStringForGURL();
+}
+
+bool GURL::operator==(const GURL& other) const {
+  return spec_ == other.spec_;
+}
+
+bool GURL::operator!=(const GURL& other) const {
+  return spec_ != other.spec_;
+}
+
+bool GURL::operator<(const GURL& other) const {
+  return spec_ < other.spec_;
+}
+
+bool GURL::operator>(const GURL& other) const {
+  return spec_ > other.spec_;
+}
+
+GURL GURL::Resolve(const std::string& relative) const {
+  return ResolveWithCharsetConverter(relative, NULL);
+}
+GURL GURL::Resolve(const url::base::string16& relative) const {
+  return ResolveWithCharsetConverter(relative, NULL);
+}
+
+// Note: code duplicated below (it's inconvenient to use a template here).
+GURL GURL::ResolveWithCharsetConverter(
+    const std::string& relative,
+    url::CharsetConverter* charset_converter) const {
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  GURL result;
+
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  result.spec_.reserve(spec_.size() + 32);
+  url::StdStringCanonOutput output(&result.spec_);
+
+  if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
+                            parsed_, relative.data(),
+                            static_cast<int>(relative.length()),
+                            charset_converter, &output, &result.parsed_)) {
+    // Error resolving, return an empty URL.
+    return GURL();
+  }
+
+  output.Complete();
+  result.is_valid_ = true;
+  if (result.SchemeIsFileSystem()) {
+    result.inner_url_.reset(
+        new GURL(result.spec_.data(), result.parsed_.Length(),
+                 *result.parsed_.inner_parsed(), true));
+  }
+  return result;
+}
+
+// Note: code duplicated above (it's inconvenient to use a template here).
+GURL GURL::ResolveWithCharsetConverter(
+    const url::base::string16& relative,
+    url::CharsetConverter* charset_converter) const {
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  GURL result;
+
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  result.spec_.reserve(spec_.size() + 32);
+  url::StdStringCanonOutput output(&result.spec_);
+
+  if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
+                            parsed_, relative.data(),
+                            static_cast<int>(relative.length()),
+                            charset_converter, &output, &result.parsed_)) {
+    // Error resolving, return an empty URL.
+    return GURL();
+  }
+
+  output.Complete();
+  result.is_valid_ = true;
+  if (result.SchemeIsFileSystem()) {
+    result.inner_url_.reset(
+        new GURL(result.spec_.data(), result.parsed_.Length(),
+                 *result.parsed_.inner_parsed(), true));
+  }
+  return result;
+}
+
+// Note: code duplicated below (it's inconvenient to use a template here).
+GURL GURL::ReplaceComponents(
+    const url::Replacements<char>& replacements) const {
+  GURL result;
+
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  result.spec_.reserve(spec_.size() + 32);
+  url::StdStringCanonOutput output(&result.spec_);
+
+  result.is_valid_ = url::ReplaceComponents(
+      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
+      NULL, &output, &result.parsed_);
+
+  output.Complete();
+  if (result.is_valid_ && result.SchemeIsFileSystem()) {
+    result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
+                                     *result.parsed_.inner_parsed(), true));
+  }
+  return result;
+}
+
+// Note: code duplicated above (it's inconvenient to use a template here).
+GURL GURL::ReplaceComponents(
+    const url::Replacements<url::base::char16>& replacements) const {
+  GURL result;
+
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  // Reserve enough room in the output for the input, plus some extra so that
+  // we have room if we have to escape a few things without reallocating.
+  result.spec_.reserve(spec_.size() + 32);
+  url::StdStringCanonOutput output(&result.spec_);
+
+  result.is_valid_ = url::ReplaceComponents(
+      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
+      NULL, &output, &result.parsed_);
+
+  output.Complete();
+  if (result.is_valid_ && result.SchemeIsFileSystem()) {
+    result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
+                                     *result.parsed_.inner_parsed(), true));
+  }
+  return result;
+}
+
+GURL GURL::GetOrigin() const {
+  // This doesn't make sense for invalid or nonstandard URLs, so return
+  // the empty URL
+  if (!is_valid_ || !IsStandard())
+    return GURL();
+
+  if (SchemeIsFileSystem())
+    return inner_url_->GetOrigin();
+
+  url::Replacements<char> replacements;
+  replacements.ClearUsername();
+  replacements.ClearPassword();
+  replacements.ClearPath();
+  replacements.ClearQuery();
+  replacements.ClearRef();
+
+  return ReplaceComponents(replacements);
+}
+
+GURL GURL::GetAsReferrer() const {
+  if (!is_valid_ || !SchemeIsHTTPOrHTTPS())
+    return GURL();
+
+  if (!has_ref() && !has_username() && !has_password())
+    return GURL(*this);
+
+  url::Replacements<char> replacements;
+  replacements.ClearRef();
+  replacements.ClearUsername();
+  replacements.ClearPassword();
+  return ReplaceComponents(replacements);
+}
+
+GURL GURL::GetWithEmptyPath() const {
+  // This doesn't make sense for invalid or nonstandard URLs, so return
+  // the empty URL.
+  if (!is_valid_ || !IsStandard())
+    return GURL();
+
+  // We could optimize this since we know that the URL is canonical, and we are
+  // appending a canonical path, so avoiding re-parsing.
+  GURL other(*this);
+  if (parsed_.path.len == 0)
+    return other;
+
+  // Clear everything after the path.
+  other.parsed_.query.reset();
+  other.parsed_.ref.reset();
+
+  // Set the path, since the path is longer than one, we can just set the
+  // first character and resize.
+  other.spec_[other.parsed_.path.begin] = '/';
+  other.parsed_.path.len = 1;
+  other.spec_.resize(other.parsed_.path.begin + 1);
+  return other;
+}
+
+bool GURL::IsStandard() const {
+  return url::IsStandard(spec_.data(), parsed_.scheme);
+}
+
+bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
+  if (parsed_.scheme.len <= 0)
+    return lower_ascii_scheme == NULL;
+  return url::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
+                                   spec_.data() + parsed_.scheme.end(),
+                                   lower_ascii_scheme);
+}
+
+bool GURL::SchemeIsHTTPOrHTTPS() const {
+  return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
+}
+
+bool GURL::SchemeIsWSOrWSS() const {
+  return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
+}
+
+int GURL::IntPort() const {
+  if (parsed_.port.is_nonempty())
+    return url::ParsePort(spec_.data(), parsed_.port);
+  return url::PORT_UNSPECIFIED;
+}
+
+int GURL::EffectiveIntPort() const {
+  int int_port = IntPort();
+  if (int_port == url::PORT_UNSPECIFIED && IsStandard())
+    return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
+                                     parsed_.scheme.len);
+  return int_port;
+}
+
+std::string GURL::ExtractFileName() const {
+  url::Component file_component;
+  url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
+  return ComponentString(file_component);
+}
+
+std::string GURL::PathForRequest() const {
+  DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
+  if (parsed_.ref.len >= 0) {
+    // Clip off the reference when it exists. The reference starts after the #
+    // sign, so we have to subtract one to also remove it.
+    return std::string(spec_, parsed_.path.begin,
+                       parsed_.ref.begin - parsed_.path.begin - 1);
+  }
+  // Compute the actual path length, rather than depending on the spec's
+  // terminator.  If we're an inner_url, our spec continues on into our outer
+  // url's path/query/ref.
+  int path_len = parsed_.path.len;
+  if (parsed_.query.is_valid())
+    path_len = parsed_.query.end() - parsed_.path.begin;
+
+  return std::string(spec_, parsed_.path.begin, path_len);
+}
+
+std::string GURL::HostNoBrackets() const {
+  // If host looks like an IPv6 literal, strip the square brackets.
+  url::Component h(parsed_.host);
+  if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
+    h.begin++;
+    h.len -= 2;
+  }
+  return ComponentString(h);
+}
+
+std::string GURL::GetContent() const {
+  return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
+}
+
+bool GURL::HostIsIPAddress() const {
+  if (!is_valid_ || spec_.empty())
+     return false;
+
+  url::RawCanonOutputT<char, 128> ignored_output;
+  url::CanonHostInfo host_info;
+  url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
+                             &host_info);
+  return host_info.IsIPAddress();
+}
+
+#ifdef WIN32
+
+const GURL& GURL::EmptyGURL() {
+  // Avoid static object construction/destruction on startup/shutdown.
+  if (!empty_gurl) {
+    // Create the string. Be careful that we don't break in the case that this
+    // is being called from multiple threads.
+    GURL* new_empty_gurl = new GURL;
+    if (InterlockedCompareExchangePointer(
+        reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
+      // The old value was non-NULL, so no replacement was done. Another
+      // thread did the initialization out from under us.
+      delete new_empty_gurl;
+    }
+  }
+  return *empty_gurl;
+}
+
+#else
+
+void EmptyGURLOnce(void) {
+  empty_gurl = new GURL;
+}
+
+const GURL& GURL::EmptyGURL() {
+  // Avoid static object construction/destruction on startup/shutdown.
+  pthread_once(&empty_gurl_once, EmptyGURLOnce);
+  return *empty_gurl;
+}
+
+#endif  // WIN32
+
+bool GURL::DomainIs(const char* lower_ascii_domain,
+                    int domain_len) const {
+  // Return false if this URL is not valid or domain is empty.
+  if (!is_valid_ || !domain_len)
+    return false;
+
+  // FileSystem URLs have empty parsed_.host, so check this first.
+  if (SchemeIsFileSystem() && inner_url_)
+    return inner_url_->DomainIs(lower_ascii_domain, domain_len);
+
+  if (!parsed_.host.is_nonempty())
+    return false;
+
+  // Check whether the host name is end with a dot. If yes, treat it
+  // the same as no-dot unless the input comparison domain is end
+  // with dot.
+  const char* last_pos = spec_.data() + parsed_.host.end() - 1;
+  int host_len = parsed_.host.len;
+  if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
+    last_pos--;
+    host_len--;
+  }
+
+  // Return false if host's length is less than domain's length.
+  if (host_len < domain_len)
+    return false;
+
+  // Compare this url whether belong specific domain.
+  const char* start_pos = spec_.data() + parsed_.host.begin +
+                          host_len - domain_len;
+
+  if (!url::LowerCaseEqualsASCII(start_pos,
+                                 last_pos + 1,
+                                 lower_ascii_domain,
+                                 lower_ascii_domain + domain_len))
+    return false;
+
+  // Check whether host has right domain start with dot, make sure we got
+  // right domain range. For example www.google.com has domain
+  // "google.com" but www.iamnotgoogle.com does not.
+  if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
+      '.' != *(start_pos - 1))
+    return false;
+
+  return true;
+}
+
+void GURL::Swap(GURL* other) {
+  spec_.swap(other->spec_);
+  std::swap(is_valid_, other->is_valid_);
+  std::swap(parsed_, other->parsed_);
+  inner_url_.swap(other->inner_url_);
+}
+
+std::ostream& operator<<(std::ostream& out, const GURL& url) {
+  return out << url.possibly_invalid_spec();
+}

diff --git a/googleurl/src/gurl.h b/src/url/gurl.h
similarity index 69%
rename from googleurl/src/gurl.h
rename to src/url/gurl.h
index 29fea81..16d9a2a 100644
--- a/googleurl/src/gurl.h
+++ b/src/url/gurl.h

@@ -1,55 +1,32 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#ifndef GOOGLEURL_SRC_GURL_H__
-#define GOOGLEURL_SRC_GURL_H__
+#ifndef URL_GURL_H_
+#define URL_GURL_H_
 
-#include <iostream>
+#include <iosfwd>
+#include <memory>
 #include <string>
 
-#include "base/string16.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_stdstring.h"
-#include "googleurl/src/url_common.h"
-#include "googleurl/src/url_parse.h"
+#include "base/strings/string16.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_export.h"
+#include "url/url_parse.h"
 
-class GURL {
+class URL_EXPORT GURL {
  public:
-  typedef url_canon::StdStringReplacements<std::string> Replacements;
-  typedef url_canon::StdStringReplacements<string16> ReplacementsW;
+  typedef url::StdStringReplacements<std::string> Replacements;
+  typedef url::StdStringReplacements<url::base::string16> ReplacementsW;
 
   // Creates an empty, invalid URL.
-  GURL_API GURL();
+  GURL();
 
   // Copy construction is relatively inexpensive, with most of the time going
   // to reallocating the string. It does not re-parse.
-  GURL_API GURL(const GURL& other);
+  GURL(const GURL& other);
 
   // The narrow version requires the input be UTF-8. Invalid UTF-8 input will
   // result in an invalid URL.
@@ -58,16 +35,27 @@
   // encode the query parameters. It is probably sufficient for the narrow
   // version to assume the query parameter encoding should be the same as the
   // input encoding.
-  GURL_API explicit GURL(const std::string& url_string
-                         /*, output_param_encoding*/);
-  GURL_API explicit GURL(const string16& url_string
-                         /*, output_param_encoding*/);
+  explicit GURL(const std::string& url_string /*, output_param_encoding*/);
+  explicit GURL(const url::base::string16& url_string /*, output_param_encoding*/);
 
   // Constructor for URLs that have already been parsed and canonicalized. This
   // is used for conversions from KURL, for example. The caller must supply all
   // information associated with the URL, which must be correct and consistent.
-  GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len,
-                const url_parse::Parsed& parsed, bool is_valid);
+  GURL(const char* canonical_spec,
+       size_t canonical_spec_len,
+       const url::Parsed& parsed,
+       bool is_valid);
+  // Notice that we take the canonical_spec by value so that we can convert
+  // from WebURL without copying the string. When we call this constructor
+  // we pass in a temporary std::string, which lets the compiler skip the
+  // copy and just move the std::string into the function argument. In the
+  // implementation, we use swap to move the data into the GURL itself,
+  // which means we end up with zero copies.
+  GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid);
+
+  ~GURL();
+
+  GURL& operator=(GURL other);
 
   // Returns true when this object represents a valid parsed URL. When not
   // valid, other functions will still succeed, but you will not get canonical
@@ -99,7 +87,7 @@
   // Used invalid_spec() below to get the unusable spec of an invalid URL. This
   // separation is designed to prevent errors that may cause security problems
   // that could result from the mistaken use of an invalid URL.
-  GURL_API const std::string& spec() const;
+  const std::string& spec() const;
 
   // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
   // modified or sent over the network. It is designed to be displayed in error
@@ -119,22 +107,17 @@
   // or may not be valid. If you are using this to index into the spec, BE
   // SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you
   // don't do anything "important" with invalid specs.
-  const url_parse::Parsed& parsed_for_possibly_invalid_spec() const {
+  const url::Parsed& parsed_for_possibly_invalid_spec() const {
     return parsed_;
   }
 
   // Defiant equality operator!
-  bool operator==(const GURL& other) const {
-    return spec_ == other.spec_;
-  }
-  bool operator!=(const GURL& other) const {
-    return spec_ != other.spec_;
-  }
+  bool operator==(const GURL& other) const;
+  bool operator!=(const GURL& other) const;
 
   // Allows GURL to used as a key in STL (for example, a std::set or std::map).
-  bool operator<(const GURL& other) const {
-    return spec_ < other.spec_;
-  }
+  bool operator<(const GURL& other) const;
+  bool operator>(const GURL& other) const;
 
   // Resolves a URL that's possibly relative to this object's URL, and returns
   // it. Absolute URLs are also handled according to the rules of URLs on web
@@ -151,8 +134,8 @@
   //
   // It is an error to resolve a URL relative to an invalid URL. The result
   // will be the empty URL.
-  GURL_API GURL Resolve(const std::string& relative) const;
-  GURL_API GURL Resolve(const string16& relative) const;
+  GURL Resolve(const std::string& relative) const;
+  GURL Resolve(const url::base::string16& relative) const;
 
   // Like Resolve() above but takes a character set encoder which will be used
   // for any query text specified in the input. The charset converter parameter
@@ -161,12 +144,12 @@
   // TODO(brettw): These should be replaced with versions that take something
   // more friendly than a raw CharsetConverter (maybe like an ICU character set
   // name).
-  GURL_API GURL ResolveWithCharsetConverter(
+  GURL ResolveWithCharsetConverter(
       const std::string& relative,
-      url_canon::CharsetConverter* charset_converter) const;
-  GURL_API GURL ResolveWithCharsetConverter(
-      const string16& relative,
-      url_canon::CharsetConverter* charset_converter) const;
+      url::CharsetConverter* charset_converter) const;
+  GURL ResolveWithCharsetConverter(
+      const url::base::string16& relative,
+      url::CharsetConverter* charset_converter) const;
 
   // Creates a new GURL by replacing the current URL's components with the
   // supplied versions. See the Replacements class in url_canon.h for more.
@@ -177,12 +160,11 @@
   // It is an error to replace components of an invalid URL. The result will
   // be the empty URL.
   //
-  // Note that we use the more general url_canon::Replacements type to give
+  // Note that we use the more general url::Replacements type to give
   // callers extra flexibility rather than our override.
-  GURL_API GURL ReplaceComponents(
-      const url_canon::Replacements<char>& replacements) const;
-  GURL_API GURL ReplaceComponents(
-      const url_canon::Replacements<char16>& replacements) const;
+  GURL ReplaceComponents(const url::Replacements<char>& replacements) const;
+  GURL ReplaceComponents(
+      const url::Replacements<url::base::char16>& replacements) const;
 
   // A helper function that is equivalent to replacing the path with a slash
   // and clearing out everything after that. We sometimes need to know just the
@@ -193,7 +175,7 @@
   //
   // It is an error to get an empty path on an invalid URL. The result
   // will be the empty URL.
-  GURL_API GURL GetWithEmptyPath() const;
+  GURL GetWithEmptyPath() const;
 
   // A helper function to return a GURL containing just the scheme, host,
   // and port from a URL. Equivalent to clearing any username and password,
@@ -204,35 +186,63 @@
   //
   // It is an error to get the origin of an invalid URL. The result
   // will be the empty URL.
-  GURL_API GURL GetOrigin() const;
+  GURL GetOrigin() const;
+
+  // A helper function to return a GURL stripped from the elements that are not
+  // supposed to be sent as HTTP referrer: username, password and ref fragment.
+  // For invalid URLs or URLs that no valid referrers, an empty URL will be
+  // returned.
+  GURL GetAsReferrer() const;
 
   // Returns true if the scheme for the current URL is a known "standard"
   // scheme. Standard schemes have an authority and a path section. This
-  // includes file:, which some callers may want to filter out explicitly by
-  // calling SchemeIsFile.
-  GURL_API bool IsStandard() const;
+  // includes file: and filesystem:, which some callers may want to filter out
+  // explicitly by calling SchemeIsFile[System].
+  bool IsStandard() const;
 
   // Returns true if the given parameter (should be lower-case ASCII to match
   // the canonicalized scheme) is the scheme for this URL. This call is more
   // efficient than getting the scheme and comparing it because no copies or
   // object constructions are done.
-  GURL_API bool SchemeIs(const char* lower_ascii_scheme) const;
+  bool SchemeIs(const char* lower_ascii_scheme) const;
+
+  // Returns true if the scheme is "http" or "https".
+  bool SchemeIsHTTPOrHTTPS() const;
+
+  // Returns true is the scheme is "ws" or "wss".
+  bool SchemeIsWSOrWSS() const;
 
   // We often need to know if this is a file URL. File URLs are "standard", but
   // are often treated separately by some programs.
   bool SchemeIsFile() const {
-    return SchemeIs("file");
+    return SchemeIs(url::kFileScheme);
+  }
+
+  // FileSystem URLs need to be treated differently in some cases.
+  bool SchemeIsFileSystem() const {
+    return SchemeIs(url::kFileSystemScheme);
   }
 
   // If the scheme indicates a secure connection
   bool SchemeIsSecure() const {
-    return SchemeIs("https");
+    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) ||
+        (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
   }
 
+  // Returns true if the scheme is "blob".
+  bool SchemeIsBlob() const {
+    return SchemeIs(url::kBlobScheme);
+  }
+
+  // The "content" of the URL is everything after the scheme (skipping the
+  // scheme delimiting colon). It is an error to get the origin of an invalid
+  // URL. The result will be an empty string.
+  std::string GetContent() const;
+
   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
   // This currently identifies only IPv4 addresses (bug 822685).
-  GURL_API bool HostIsIPAddress() const;
+  bool HostIsIPAddress() const;
 
   // Getters for various components of the URL. The returned string will be
   // empty if the component is empty or is not present.
@@ -298,24 +308,24 @@
 
   // Returns a parsed version of the port. Can also be any of the special
   // values defined in Parsed for ExtractPort.
-  GURL_API int IntPort() const;
+  int IntPort() const;
 
   // Returns the port number of the url, or the default port number.
   // If the scheme has no concept of port (or unknown default) returns
   // PORT_UNSPECIFIED.
-  GURL_API int EffectiveIntPort() const;
+  int EffectiveIntPort() const;
 
   // Extracts the filename portion of the path and returns it. The filename
   // is everything after the last slash in the path. This may be empty.
-  GURL_API std::string ExtractFileName() const;
+  std::string ExtractFileName() const;
 
   // Returns the path that should be sent to the server. This is the path,
   // parameter, and query portions of the URL. It is guaranteed to be ASCII.
-  GURL_API std::string PathForRequest() const;
+  std::string PathForRequest() const;
 
   // Returns the host, excluding the square brackets surrounding IPv6 address
   // literals.  This can be useful for passing to getaddrinfo().
-  GURL_API std::string HostNoBrackets() const;
+  std::string HostNoBrackets() const;
 
   // Returns true if this URL's host matches or is in the same domain as
   // the given input string. For example if this URL was "www.google.com",
@@ -327,7 +337,7 @@
   //
   // If function DomainIs has parameter domain_len, which means the parameter
   // lower_ascii_domain does not gurantee to terminate with NULL character.
-  GURL_API bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
+  bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
 
   // If function DomainIs only has parameter lower_ascii_domain, which means
   // domain string should be terminate with NULL character.
@@ -338,16 +348,35 @@
 
   // Swaps the contents of this GURL object with the argument without doing
   // any memory allocations.
-  GURL_API void Swap(GURL* other);
+  void Swap(GURL* other);
 
   // Returns a reference to a singleton empty GURL. This object is for callers
   // who return references but don't have anything to return in some cases.
   // This function may be called from any thread.
-  GURL_API static const GURL& EmptyGURL();
+  static const GURL& EmptyGURL();
+
+  // Returns the inner URL of a nested URL [currently only non-null for
+  // filesystem: URLs].
+  const GURL* inner_url() const {
+    return inner_url_.get();
+  }
 
  private:
+  // Variant of the string parsing constructor that allows the caller to elect
+  // retain trailing whitespace, if any, on the passed URL spec but only  if the
+  // scheme is one that allows trailing whitespace. The primary use-case is
+  // for data: URLs. In most cases, you want to use the single parameter
+  // constructor above.
+  enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
+  GURL(const std::string& url_string, RetainWhiteSpaceSelector);
+
+  template<typename STR>
+  void InitCanonical(const STR& input_spec, bool trim_path_end);
+
+  void InitializeFromCanonicalSpec();
+
   // Returns the substring of the input identified by the given component.
-  std::string ComponentString(const url_parse::Component& comp) const {
+  std::string ComponentString(const url::Component& comp) const {
     if (comp.len <= 0)
       return std::string();
     return std::string(spec_, comp.begin, comp.len);
@@ -362,14 +391,15 @@
   bool is_valid_;
 
   // Identified components of the canonical spec.
-  url_parse::Parsed parsed_;
+  url::Parsed parsed_;
+
+  // Used for nested schemes [currently only filesystem:].
+  std::unique_ptr<GURL> inner_url_;
 
   // TODO bug 684583: Add encoding for query params.
 };
 
 // Stream operator so GURL can be used in assertion statements.
-inline std::ostream& operator<<(std::ostream& out, const GURL& url) {
-  return out << url.possibly_invalid_spec();
-}
+URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);
 
-#endif  // GOOGLEURL_SRC_GURL_H__
+#endif  // URL_GURL_H_

diff --git a/src/url/gurl_unittest.cc b/src/url/gurl_unittest.cc
new file mode 100644
index 0000000..112ee5f
--- /dev/null
+++ b/src/url/gurl_unittest.cc

@@ -0,0 +1,642 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+using test_utils::WStringToUTF16;
+using test_utils::ConvertUTF8ToUTF16;
+
+namespace {
+
+template<typename CHAR>
+void SetupReplacement(
+    void (Replacements<CHAR>::*func)(const CHAR*, const Component&),
+    Replacements<CHAR>* replacements,
+    const CHAR* str) {
+  if (str) {
+    Component comp;
+    if (str[0])
+      comp.len = static_cast<int>(strlen(str));
+    (replacements->*func)(str, comp);
+  }
+}
+
+// Returns the canonicalized string for the given URL string for the
+// GURLTest.Types test.
+std::string TypesTestCase(const char* src) {
+  GURL gurl(src);
+  return gurl.possibly_invalid_spec();
+}
+
+}  // namespace
+
+// Different types of URLs should be handled differently, and handed off to
+// different canonicalizers.
+TEST(GURLTest, Types) {
+  // URLs with unknown schemes should be treated as path URLs, even when they
+  // have things like "://".
+  EXPECT_EQ("something:///HOSTNAME.com/",
+            TypesTestCase("something:///HOSTNAME.com/"));
+
+  // In the reverse, known schemes should always trigger standard URL handling.
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
+
+#ifdef WIN32
+  // URLs that look like absolute Windows drive specs.
+  EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
+  EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
+#endif
+}
+
+// Test the basic creation and querying of components in a GURL. We assume
+// the parser is already tested and works, so we are mostly interested if the
+// object does the right thing with the results.
+TEST(GURLTest, Components) {
+  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+  EXPECT_TRUE(url.is_valid());
+  EXPECT_TRUE(url.SchemeIs("http"));
+  EXPECT_FALSE(url.SchemeIsFile());
+
+  // This is the narrow version of the URL, which should match the wide input.
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
+
+  EXPECT_EQ("http", url.scheme());
+  EXPECT_EQ("user", url.username());
+  EXPECT_EQ("pass", url.password());
+  EXPECT_EQ("google.com", url.host());
+  EXPECT_EQ("99", url.port());
+  EXPECT_EQ(99, url.IntPort());
+  EXPECT_EQ("/foo;bar", url.path());
+  EXPECT_EQ("q=a", url.query());
+  EXPECT_EQ("ref", url.ref());
+
+  // Test parsing userinfo with special characters.
+  GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345");
+  EXPECT_TRUE(url_special_pass.is_valid());
+  // GURL canonicalizes some delimiters.
+  EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
+  EXPECT_EQ("google.com", url_special_pass.host());
+  EXPECT_EQ("12345", url_special_pass.port());
+}
+
+TEST(GURLTest, Empty) {
+  GURL url;
+  EXPECT_FALSE(url.is_valid());
+  EXPECT_EQ("", url.spec());
+
+  EXPECT_EQ("", url.scheme());
+  EXPECT_EQ("", url.username());
+  EXPECT_EQ("", url.password());
+  EXPECT_EQ("", url.host());
+  EXPECT_EQ("", url.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort());
+  EXPECT_EQ("", url.path());
+  EXPECT_EQ("", url.query());
+  EXPECT_EQ("", url.ref());
+}
+
+TEST(GURLTest, Copy) {
+  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+
+  GURL url2(url);
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("http", url2.scheme());
+  EXPECT_EQ("user", url2.username());
+  EXPECT_EQ("pass", url2.password());
+  EXPECT_EQ("google.com", url2.host());
+  EXPECT_EQ("99", url2.port());
+  EXPECT_EQ(99, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  // Copying of invalid URL should be invalid
+  GURL invalid;
+  GURL invalid2(invalid);
+  EXPECT_FALSE(invalid2.is_valid());
+  EXPECT_EQ("", invalid2.spec());
+  EXPECT_EQ("", invalid2.scheme());
+  EXPECT_EQ("", invalid2.username());
+  EXPECT_EQ("", invalid2.password());
+  EXPECT_EQ("", invalid2.host());
+  EXPECT_EQ("", invalid2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
+  EXPECT_EQ("", invalid2.path());
+  EXPECT_EQ("", invalid2.query());
+  EXPECT_EQ("", invalid2.ref());
+}
+
+TEST(GURLTest, Assign) {
+  GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
+
+  GURL url2;
+  url2 = url;
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("http", url2.scheme());
+  EXPECT_EQ("user", url2.username());
+  EXPECT_EQ("pass", url2.password());
+  EXPECT_EQ("google.com", url2.host());
+  EXPECT_EQ("99", url2.port());
+  EXPECT_EQ(99, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  // Assignment of invalid URL should be invalid
+  GURL invalid;
+  GURL invalid2;
+  invalid2 = invalid;
+  EXPECT_FALSE(invalid2.is_valid());
+  EXPECT_EQ("", invalid2.spec());
+  EXPECT_EQ("", invalid2.scheme());
+  EXPECT_EQ("", invalid2.username());
+  EXPECT_EQ("", invalid2.password());
+  EXPECT_EQ("", invalid2.host());
+  EXPECT_EQ("", invalid2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
+  EXPECT_EQ("", invalid2.path());
+  EXPECT_EQ("", invalid2.query());
+  EXPECT_EQ("", invalid2.ref());
+}
+
+// This is a regression test for http://crbug.com/309975 .
+TEST(GURLTest, SelfAssign) {
+  GURL a("filesystem:http://example.com/temporary/");
+  // This should not crash.
+  a = a;
+}
+
+TEST(GURLTest, CopyFileSystem) {
+  GURL url(WStringToUTF16(L"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
+
+  GURL url2(url);
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("filesystem", url2.scheme());
+  EXPECT_EQ("", url2.username());
+  EXPECT_EQ("", url2.password());
+  EXPECT_EQ("", url2.host());
+  EXPECT_EQ("", url2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  const GURL* inner = url2.inner_url();
+  ASSERT_TRUE(inner);
+  EXPECT_EQ("https", inner->scheme());
+  EXPECT_EQ("user", inner->username());
+  EXPECT_EQ("pass", inner->password());
+  EXPECT_EQ("google.com", inner->host());
+  EXPECT_EQ("99", inner->port());
+  EXPECT_EQ(99, inner->IntPort());
+  EXPECT_EQ("/t", inner->path());
+  EXPECT_EQ("", inner->query());
+  EXPECT_EQ("", inner->ref());
+}
+
+TEST(GURLTest, IsValid) {
+  const char* valid_cases[] = {
+    "http://google.com",
+    "unknown://google.com",
+    "http://user:pass@google.com",
+    "http://google.com:12345",
+    "http://google.com/path",
+    "http://google.com//path",
+    "http://google.com?k=v#fragment",
+    "http://user:pass@google.com:12345/path?k=v#fragment",
+    "http:/path",
+    "http:path",
+    "://google.com",
+  };
+  for (size_t i = 0; i < arraysize(valid_cases); i++) {
+    EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
+        << "Case: " << valid_cases[i];
+  }
+
+  const char* invalid_cases[] = {
+    "http://?k=v",
+    "http:://google.com",
+    "http//google.com",
+    "http://google.com:12three45",
+    "path",
+  };
+  for (size_t i = 0; i < arraysize(invalid_cases); i++) {
+    EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
+        << "Case: " << invalid_cases[i];
+  }
+}
+
+TEST(GURLTest, ExtraSlashesBeforeAuthority) {
+  // According to RFC3986, the hier-part for URI with an authority must use only
+  // two slashes, GURL intentionally just ignores slashes more than 2 and parses
+  // the following part as an authority.
+  GURL url("http:///host");
+  EXPECT_EQ("host", url.host());
+  EXPECT_EQ("/", url.path());
+}
+
+// Given an invalid URL, we should still get most of the components.
+TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
+  GURL url("http:google.com:foo");
+  EXPECT_FALSE(url.is_valid());
+  EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
+
+  EXPECT_EQ("http", url.scheme());
+  EXPECT_EQ("", url.username());
+  EXPECT_EQ("", url.password());
+  EXPECT_EQ("google.com", url.host());
+  EXPECT_EQ("foo", url.port());
+  EXPECT_EQ(PORT_INVALID, url.IntPort());
+  EXPECT_EQ("/", url.path());
+  EXPECT_EQ("", url.query());
+  EXPECT_EQ("", url.ref());
+}
+
+TEST(GURLTest, Resolve) {
+  // The tricky cases for relative URL resolving are tested in the
+  // canonicalizer unit test. Here, we just test that the GURL integration
+  // works properly.
+  struct ResolveCase {
+    const char* base;
+    const char* relative;
+    bool expected_valid;
+    const char* expected;
+  } resolve_cases[] = {
+    {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
+    {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
+    {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
+    {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
+    {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
+      // A non-standard base can be replaced with a standard absolute URL.
+    {"data:blahblah", "http://google.com/", true, "http://google.com/"},
+    {"data:blahblah", "http:google.com", true, "http://google.com/"},
+      // Filesystem URLs have different paths to test.
+    {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
+    {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
+  };
+
+  for (size_t i = 0; i < arraysize(resolve_cases); i++) {
+    // 8-bit code path.
+    GURL input(resolve_cases[i].base);
+    GURL output = input.Resolve(resolve_cases[i].relative);
+    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
+    EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
+
+    // Wide code path.
+    GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
+    GURL outputw =
+        input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
+    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
+    EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
+  }
+}
+
+TEST(GURLTest, GetOrigin) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    {"http://www.google.com", "http://www.google.com/"},
+    {"javascript:window.alert(\"hello,world\");", ""},
+    {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
+    {"http://user@www.google.com", "http://www.google.com/"},
+    {"http://:pass@www.google.com", "http://www.google.com/"},
+    {"http://:@www.google.com", "http://www.google.com/"},
+    {"filesystem:http://www.google.com/temp/foo?q#b", "http://www.google.com/"},
+    {"filesystem:http://user:pass@google.com:21/blah#baz", "http://google.com:21/"},
+  };
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    GURL origin = url.GetOrigin();
+    EXPECT_EQ(cases[i].expected, origin.spec());
+  }
+}
+
+TEST(GURLTest, GetAsReferrer) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    {"http://www.google.com", "http://www.google.com/"},
+    {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/blah"},
+    {"http://user@www.google.com", "http://www.google.com/"},
+    {"http://:pass@www.google.com", "http://www.google.com/"},
+    {"http://:@www.google.com", "http://www.google.com/"},
+    {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"},
+    {"not a url", ""},
+    {"unknown-scheme://foo.html", ""},
+    {"file:///tmp/test.html", ""},
+    {"https://www.google.com", "https://www.google.com/"},
+  };
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    GURL origin = url.GetAsReferrer();
+    EXPECT_EQ(cases[i].expected, origin.spec());
+  }
+}
+
+TEST(GURLTest, GetWithEmptyPath) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    {"http://www.google.com", "http://www.google.com/"},
+    {"javascript:window.alert(\"hello, world\");", ""},
+    {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
+    {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
+    {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    GURL empty_path = url.GetWithEmptyPath();
+    EXPECT_EQ(cases[i].expected, empty_path.spec());
+  }
+}
+
+TEST(GURLTest, Replacements) {
+  // The url canonicalizer replacement test will handle most of these case.
+  // The most important thing to do here is to check that the proper
+  // canonicalizer gets called based on the scheme of the input.
+  struct ReplaceCase {
+    const char* base;
+    const char* scheme;
+    const char* username;
+    const char* password;
+    const char* host;
+    const char* port;
+    const char* path;
+    const char* query;
+    const char* ref;
+    const char* expected;
+  } replace_cases[] = {
+    {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
+    {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
+    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
+#ifdef WIN32
+    {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
+#endif
+    {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    GURL url(cur.base);
+    GURL::Replacements repl;
+    SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
+    SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
+    SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
+    SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
+    SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
+    SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
+    SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
+    SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
+    GURL output = url.ReplaceComponents(repl);
+
+    EXPECT_EQ(replace_cases[i].expected, output.spec());
+    EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
+  }
+}
+
+TEST(GURLTest, ClearFragmentOnDataUrl) {
+  // http://crbug.com/291747 - a data URL may legitimately have trailing
+  // whitespace in the spec after the ref is cleared. Test this does not trigger
+  // the Parsed importing validation DCHECK in GURL.
+  GURL url(" data: one ? two # three ");
+
+  // By default the trailing whitespace will have been stripped.
+  EXPECT_EQ("data: one ? two # three", url.spec());
+  GURL::Replacements repl;
+  repl.ClearRef();
+  GURL url_no_ref = url.ReplaceComponents(repl);
+
+  EXPECT_EQ("data: one ? two ", url_no_ref.spec());
+
+  // Importing a parsed url via this constructor overload will retain trailing
+  // whitespace.
+  GURL import_url(url_no_ref.spec(),
+                  url_no_ref.parsed_for_possibly_invalid_spec(),
+                  url_no_ref.is_valid());
+  EXPECT_EQ(url_no_ref, import_url);
+  EXPECT_EQ(import_url.query(), " two ");
+}
+
+TEST(GURLTest, PathForRequest) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+    const char* inner_expected;
+  } cases[] = {
+    {"http://www.google.com", "/", NULL},
+    {"http://www.google.com/", "/", NULL},
+    {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL},
+    {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL},
+    {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL},
+    {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"},
+    {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    std::string path_request = url.PathForRequest();
+    EXPECT_EQ(cases[i].expected, path_request);
+    EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
+    if (url.inner_url() && cases[i].inner_expected)
+      EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
+  }
+}
+
+TEST(GURLTest, EffectiveIntPort) {
+  struct PortTest {
+    const char* spec;
+    int expected_int_port;
+  } port_tests[] = {
+    // http
+    {"http://www.google.com/", 80},
+    {"http://www.google.com:80/", 80},
+    {"http://www.google.com:443/", 443},
+
+    // https
+    {"https://www.google.com/", 443},
+    {"https://www.google.com:443/", 443},
+    {"https://www.google.com:80/", 80},
+
+    // ftp
+    {"ftp://www.google.com/", 21},
+    {"ftp://www.google.com:21/", 21},
+    {"ftp://www.google.com:80/", 80},
+
+    // gopher
+    {"gopher://www.google.com/", 70},
+    {"gopher://www.google.com:70/", 70},
+    {"gopher://www.google.com:80/", 80},
+
+    // file - no port
+    {"file://www.google.com/", PORT_UNSPECIFIED},
+    {"file://www.google.com:443/", PORT_UNSPECIFIED},
+
+    // data - no port
+    {"data:www.google.com:90", PORT_UNSPECIFIED},
+    {"data:www.google.com", PORT_UNSPECIFIED},
+
+    // filesystem - no port
+    {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED},
+    {"filesystem:file:///t/foo", PORT_UNSPECIFIED},
+  };
+
+  for (size_t i = 0; i < arraysize(port_tests); i++) {
+    GURL url(port_tests[i].spec);
+    EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
+  }
+}
+
+TEST(GURLTest, IPAddress) {
+  struct IPTest {
+    const char* spec;
+    bool expected_ip;
+  } ip_tests[] = {
+    {"http://www.google.com/", false},
+    {"http://192.168.9.1/", true},
+    {"http://192.168.9.1.2/", false},
+    {"http://192.168.m.1/", false},
+    {"http://2001:db8::1/", false},
+    {"http://[2001:db8::1]/", true},
+    {"", false},
+    {"some random input!", false},
+  };
+
+  for (size_t i = 0; i < arraysize(ip_tests); i++) {
+    GURL url(ip_tests[i].spec);
+    EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
+  }
+}
+
+TEST(GURLTest, HostNoBrackets) {
+  struct TestCase {
+    const char* input;
+    const char* expected_host;
+    const char* expected_plainhost;
+  } cases[] = {
+    {"http://www.google.com", "www.google.com", "www.google.com"},
+    {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
+    {"http://[::]/", "[::]", "::"},
+
+    // Don't require a valid URL, but don't crash either.
+    {"http://[]/", "[]", ""},
+    {"http://[x]/", "[x]", "x"},
+    {"http://[x/", "[x", "[x"},
+    {"http://x]/", "x]", "x]"},
+    {"http://[/", "[", "["},
+    {"http://]/", "]", "]"},
+    {"", "", ""},
+  };
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    GURL url(cases[i].input);
+    EXPECT_EQ(cases[i].expected_host, url.host());
+    EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
+  }
+}
+
+TEST(GURLTest, DomainIs) {
+  const char google_domain[] = "google.com";
+
+  GURL url_1("http://www.google.com:99/foo");
+  EXPECT_TRUE(url_1.DomainIs(google_domain));
+
+  GURL url_2("http://google.com:99/foo");
+  EXPECT_TRUE(url_2.DomainIs(google_domain));
+
+  GURL url_3("http://google.com./foo");
+  EXPECT_TRUE(url_3.DomainIs(google_domain));
+
+  GURL url_4("http://google.com/foo");
+  EXPECT_FALSE(url_4.DomainIs("google.com."));
+
+  GURL url_5("http://google.com./foo");
+  EXPECT_TRUE(url_5.DomainIs("google.com."));
+
+  GURL url_6("http://www.google.com./foo");
+  EXPECT_TRUE(url_6.DomainIs(".com."));
+
+  GURL url_7("http://www.balabala.com/foo");
+  EXPECT_FALSE(url_7.DomainIs(google_domain));
+
+  GURL url_8("http://www.google.com.cn/foo");
+  EXPECT_FALSE(url_8.DomainIs(google_domain));
+
+  GURL url_9("http://www.iamnotgoogle.com/foo");
+  EXPECT_FALSE(url_9.DomainIs(google_domain));
+
+  GURL url_10("http://www.iamnotgoogle.com../foo");
+  EXPECT_FALSE(url_10.DomainIs(".com"));
+
+  GURL url_11("filesystem:http://www.google.com:99/foo/");
+  EXPECT_TRUE(url_11.DomainIs(google_domain));
+
+  GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/");
+  EXPECT_FALSE(url_12.DomainIs(google_domain));
+}
+
+// Newlines should be stripped from inputs.
+TEST(GURLTest, Newlines) {
+  // Constructor.
+  GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
+  EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
+
+  // Relative path resolver.
+  GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
+  EXPECT_EQ("http://www.google.com/foo", url_2.spec());
+
+  // Note that newlines are NOT stripped from ReplaceComponents.
+}
+
+TEST(GURLTest, IsStandard) {
+  GURL a("http:foo/bar");
+  EXPECT_TRUE(a.IsStandard());
+
+  GURL b("foo:bar/baz");
+  EXPECT_FALSE(b.IsStandard());
+
+  GURL c("foo://bar/baz");
+  EXPECT_FALSE(c.IsStandard());
+}
+
+TEST(GURLTest, SchemeIsHTTPOrHTTPS) {
+  EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS());
+  EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS());
+  EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS());
+}
+
+TEST(GURLTest, SchemeIsWSOrWSS) {
+  EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS());
+  EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS());
+  EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
+}
+
+TEST(GURLTest, SchemeIsBlob) {
+  EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
+  EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
+  EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
+}
+
+}  // namespace url

diff --git a/src/url/origin.cc b/src/url/origin.cc
new file mode 100644
index 0000000..fdb8913
--- /dev/null
+++ b/src/url/origin.cc

@@ -0,0 +1,20 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+
+#include "base/logging.h"
+#include "base/strings/string_util.h"
+
+namespace url {
+
+Origin::Origin() : string_("null") {}
+
+Origin::Origin(const std::string& origin) : string_(origin) {
+  DCHECK(origin == "null" || MatchPattern(origin, "?*://?*"));
+  DCHECK_GT(origin.size(), 0u);
+  DCHECK(origin == "file://" || origin[origin.size() - 1] != '/');
+}
+
+}  // namespace url

diff --git a/src/url/origin.h b/src/url/origin.h
new file mode 100644
index 0000000..777e4e1
--- /dev/null
+++ b/src/url/origin.h

@@ -0,0 +1,33 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_H_
+#define URL_ORIGIN_H_
+
+#include <string>
+
+#include "url/url_export.h"
+
+namespace url {
+
+// Origin represents a Web Origin serialized to a string.
+// See RFC6454 for details.
+class URL_EXPORT Origin {
+ public:
+  Origin();
+  explicit Origin(const std::string& origin);
+
+  const std::string& string() const { return string_; }
+
+  bool IsSameAs(const Origin& that) const {
+    return string_ == that.string_;
+  }
+
+ private:
+  std::string string_;
+};
+
+}  // namespace url
+
+#endif  // URL_ORIGIN_H_

diff --git a/src/url/origin_unittest.cc b/src/url/origin_unittest.cc
new file mode 100644
index 0000000..910a1cf
--- /dev/null
+++ b/src/url/origin_unittest.cc

@@ -0,0 +1,41 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "testing/base/public/gunit.h"
+#include "url/origin.h"
+
+namespace url {
+
+namespace {
+
+// Each test examines the Origin is constructed correctly without
+// violating DCHECKs.
+TEST(OriginTest, constructEmpty) {
+  Origin origin;
+  EXPECT_EQ("null", origin.string());
+}
+
+TEST(OriginTest, constructNull) {
+  Origin origin("null");
+  EXPECT_EQ("null", origin.string());
+}
+
+TEST(OriginTest, constructValidOrigin) {
+  Origin origin("http://example.com:8080");
+  EXPECT_EQ("http://example.com:8080", origin.string());
+}
+
+TEST(OriginTest, constructValidFileOrigin) {
+  Origin origin("file://");
+  EXPECT_EQ("file://", origin.string());
+}
+
+TEST(OriginTest, constructValidOriginWithoutPort) {
+  Origin origin("wss://example2.com");
+  EXPECT_EQ("wss://example2.com", origin.string());
+}
+
+}  // namespace
+
+}  // namespace url

diff --git a/googleurl/src/url_parse.cc b/src/url/third_party/mozilla/url_parse.cc
similarity index 73%
rename from googleurl/src/url_parse.cc
rename to src/url/third_party/mozilla/url_parse.cc
index a08c4da..211043c 100644
--- a/googleurl/src/url_parse.cc
+++ b/src/url/third_party/mozilla/url_parse.cc

@@ -34,19 +34,21 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "googleurl/src/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 #include <stdlib.h>
 
 #include "base/logging.h"
-#include "googleurl/src/url_parse_internal.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util.h"
+#include "url/url_util_internal.h"
 
-namespace url_parse {
+namespace url {
 
 namespace {
 
 // Returns true if the given character is a valid digit to use in a port.
-inline bool IsPortDigit(char16 ch) {
+inline bool IsPortDigit(base::char16 ch) {
   return ch >= '0' && ch <= '9';
 }
 
@@ -324,7 +326,7 @@
   if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
     after_scheme = parsed->scheme.end() + 1;  // Skip past the colon.
   } else {
-    // Say there's no scheme when there is a colon. We could also say that
+    // Say there's no scheme when there is no colon. We could also say that
     // everything is the scheme. Both would produce an invalid URL, but this way
     // seems less wrong in more cases.
     parsed->scheme.reset();
@@ -333,18 +335,20 @@
   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
 }
 
-// Initializes a path URL which is merely a scheme followed by a path. Examples
-// include "about:foo" and "javascript:alert('bar');"
+#ifndef NO_FILESYSTEMURL_SUPPORT
 template<typename CHAR>
-void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {
-  // Get the non-path and non-scheme parts of the URL out of the way, we never
-  // use them.
+void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+  DCHECK(spec_len >= 0);
+
+  // Get the unused parts of the URL out of the way.
   parsed->username.reset();
   parsed->password.reset();
   parsed->host.reset();
   parsed->port.reset();
-  parsed->query.reset();
-  parsed->ref.reset();
+  parsed->path.reset();   // May use this; reset for convenience.
+  parsed->ref.reset();    // May use this; reset for convenience.
+  parsed->query.reset();  // May use this; reset for convenience.
+  parsed->clear_inner_parsed();  // May use this; reset for convenience.
 
   // Strip leading & trailing spaces and control characters.
   int begin = 0;
@@ -353,28 +357,151 @@
   // Handle empty specs or ones that contain only whitespace or control chars.
   if (begin == spec_len) {
     parsed->scheme.reset();
+    return;
+  }
+
+  int inner_start = -1;
+
+  // Extract the scheme.  We also handle the case where there is no scheme.
+  if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    parsed->scheme.begin += begin;
+
+    if (parsed->scheme.end() == spec_len - 1)
+      return;
+
+    inner_start = parsed->scheme.end() + 1;
+  } else {
+    // No scheme found; that's not valid for filesystem URLs.
+    parsed->scheme.reset();
+    return;
+  }
+
+  Component inner_scheme;
+  const CHAR* inner_spec = &spec[inner_start];
+  int inner_spec_len = spec_len - inner_start;
+
+  if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    inner_scheme.begin += inner_start;
+
+    if (inner_scheme.end() == spec_len - 1)
+      return;
+  } else {
+    // No scheme found; that's not valid for filesystem URLs.
+    // The best we can do is return "filesystem://".
+    return;
+  }
+
+  Parsed inner_parsed;
+
+  if (CompareSchemeComponent(spec, inner_scheme, kFileScheme)) {
+    // File URLs are special.
+    ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
+  } else if (CompareSchemeComponent(spec, inner_scheme, kFileSystemScheme)) {
+    // Filesystem URLs don't nest.
+    return;
+  } else if (IsStandard(spec, inner_scheme)) {
+    // All "normal" URLs.
+    DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
+  } else {
+    return;
+  }
+
+  // All members of inner_parsed need to be offset by inner_start.
+  // If we had any scheme that supported nesting more than one level deep,
+  // we'd have to recurse into the inner_parsed's inner_parsed when
+  // adjusting by inner_start.
+  inner_parsed.scheme.begin += inner_start;
+  inner_parsed.username.begin += inner_start;
+  inner_parsed.password.begin += inner_start;
+  inner_parsed.host.begin += inner_start;
+  inner_parsed.port.begin += inner_start;
+  inner_parsed.query.begin += inner_start;
+  inner_parsed.ref.begin += inner_start;
+  inner_parsed.path.begin += inner_start;
+
+  // Query and ref move from inner_parsed to parsed.
+  parsed->query = inner_parsed.query;
+  inner_parsed.query.reset();
+  parsed->ref = inner_parsed.ref;
+  inner_parsed.ref.reset();
+
+  parsed->set_inner_parsed(inner_parsed);
+  if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
+      inner_parsed.inner_parsed()) {
+    return;
+  }
+
+  // The path in inner_parsed should start with a slash, then have a filesystem
+  // type followed by a slash.  From the first slash up to but excluding the
+  // second should be what it keeps; the rest goes to parsed.  If the path ends
+  // before the second slash, it's still pretty clear what the user meant, so
+  // we'll let that through.
+  if (!IsURLSlash(spec[inner_parsed.path.begin])) {
+    return;
+  }
+  int inner_path_end = inner_parsed.path.begin + 1;  // skip the leading slash
+  while (inner_path_end < spec_len &&
+      !IsURLSlash(spec[inner_path_end]))
+    ++inner_path_end;
+  parsed->path.begin = inner_path_end;
+  int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
+  parsed->path.len = inner_parsed.path.len - new_inner_path_length;
+  parsed->inner_parsed()->path.len = new_inner_path_length;
+}
+#endif
+
+// Initializes a path URL which is merely a scheme followed by a path. Examples
+// include "about:foo" and "javascript:alert('bar');"
+template<typename CHAR>
+void DoParsePathURL(const CHAR* spec, int spec_len,
+                    bool trim_path_end,
+                    Parsed* parsed) {
+  // Get the non-path and non-scheme parts of the URL out of the way, we never
+  // use them.
+  parsed->username.reset();
+  parsed->password.reset();
+  parsed->host.reset();
+  parsed->port.reset();
+  parsed->path.reset();
+  parsed->query.reset();
+  parsed->ref.reset();
+
+  // Strip leading & trailing spaces and control characters.
+  int scheme_begin = 0;
+  TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
+
+  // Handle empty specs or ones that contain only whitespace or control chars.
+  if (scheme_begin == spec_len) {
+    parsed->scheme.reset();
     parsed->path.reset();
     return;
   }
 
+  int path_begin;
   // Extract the scheme, with the path being everything following. We also
   // handle the case where there is no scheme.
-  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+  if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
+                    &parsed->scheme)) {
     // Offset the results since we gave ExtractScheme a substring.
-    parsed->scheme.begin += begin;
-
-    // For compatability with the standard URL parser, we treat no path as
-    // -1, rather than having a length of 0 (we normally wouldn't care so
-    // much for these non-standard URLs).
-    if (parsed->scheme.end() == spec_len - 1)
-      parsed->path.reset();
-    else
-      parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);
+    parsed->scheme.begin += scheme_begin;
+    path_begin = parsed->scheme.end() + 1;
   } else {
-    // No scheme found, just path.
+    // No scheme case.
     parsed->scheme.reset();
-    parsed->path = MakeRange(begin, spec_len);
+    path_begin = scheme_begin;
   }
+
+  if (path_begin == spec_len)
+    return;
+  DCHECK_LT(path_begin, spec_len);
+
+  ParsePath(spec,
+            MakeRange(path_begin, spec_len),
+            &parsed->path,
+            &parsed->query,
+            &parsed->ref);
 }
 
 template<typename CHAR>
@@ -496,23 +623,13 @@
     return;
   }
 
-  // Search backwards for a parameter, which is a normally unused field in a
-  // URL delimited by a semicolon. We parse the parameter as part of the
-  // path, but here, we don't want to count it. The last semicolon is the
-  // parameter. The path should start with a slash, so we don't need to check
-  // the first one.
+  // Extract the filename range from the path which is between
+  // the last slash and the following semicolon.
   int file_end = path.end();
-  for (int i = path.end() - 1; i > path.begin; i--) {
+  for (int i = path.end() - 1; i >= path.begin; i--) {
     if (spec[i] == ';') {
       file_end = i;
-      break;
-    }
-  }
-
-  // Now search backwards from the filename end to the previous slash
-  // to find the beginning of the filename.
-  for (int i = file_end - 1; i >= path.begin; i--) {
-    if (IsURLSlash(spec[i])) {
+    } else if (IsURLSlash(spec[i])) {
       // File name is everything following this character to the end
       *file_name = MakeRange(i + 1, file_end);
       return;
@@ -559,12 +676,51 @@
     cur++;
 
   // Save the new query
-  *query = url_parse::MakeRange(cur, end);
+  *query = MakeRange(cur, end);
   return true;
 }
 
 }  // namespace
 
+Parsed::Parsed() : inner_parsed_(NULL) {
+}
+
+Parsed::Parsed(const Parsed& other) :
+    scheme(other.scheme),
+    username(other.username),
+    password(other.password),
+    host(other.host),
+    port(other.port),
+    path(other.path),
+    query(other.query),
+    ref(other.ref),
+    inner_parsed_(NULL) {
+  if (other.inner_parsed_)
+    set_inner_parsed(*other.inner_parsed_);
+}
+
+Parsed& Parsed::operator=(const Parsed& other) {
+  if (this != &other) {
+    scheme = other.scheme;
+    username = other.username;
+    password = other.password;
+    host = other.host;
+    port = other.port;
+    path = other.path;
+    query = other.query;
+    ref = other.ref;
+    if (other.inner_parsed_)
+      set_inner_parsed(*other.inner_parsed_);
+    else
+      clear_inner_parsed();
+  }
+  return *this;
+}
+
+Parsed::~Parsed() {
+  delete inner_parsed_;
+}
+
 int Parsed::Length() const {
   if (ref.is_valid())
     return ref.end();
@@ -634,18 +790,27 @@
   return cur;
 }
 
+Component Parsed::GetContent() const {
+  const int begin = CountCharactersBefore(USERNAME, false);
+  const int len = Length() - begin;
+  // For compatability with the standard URL parser, we treat no content as
+  // -1, rather than having a length of 0 (we normally wouldn't care so
+  // much for these non-standard URLs).
+  return len ? Component(begin, len) : Component();
+}
+
 bool ExtractScheme(const char* url, int url_len, Component* scheme) {
   return DoExtractScheme(url, url_len, scheme);
 }
 
-bool ExtractScheme(const char16* url, int url_len, Component* scheme) {
+bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) {
   return DoExtractScheme(url, url_len, scheme);
 }
 
 // This handles everything that may be an authority terminator, including
 // backslash. For special backslash handling see DoParseAfterScheme.
-bool IsAuthorityTerminator(char16 ch) {
-  return IsURLSlash(ch) || ch == '?' || ch == '#' || ch == ';';
+bool IsAuthorityTerminator(base::char16 ch) {
+  return IsURLSlash(ch) || ch == '?' || ch == '#';
 }
 
 void ExtractFileName(const char* url,
@@ -654,7 +819,7 @@
   DoExtractFileName(url, path, file_name);
 }
 
-void ExtractFileName(const char16* url,
+void ExtractFileName(const base::char16* url,
                      const Component& path,
                      Component* file_name) {
   DoExtractFileName(url, path, file_name);
@@ -667,7 +832,7 @@
   return DoExtractQueryKeyValue(url, query, key, value);
 }
 
-bool ExtractQueryKeyValue(const char16* url,
+bool ExtractQueryKeyValue(const base::char16* url,
                           Component* query,
                           Component* key,
                           Component* value) {
@@ -683,7 +848,7 @@
   DoParseAuthority(spec, auth, username, password, hostname, port_num);
 }
 
-void ParseAuthority(const char16* spec,
+void ParseAuthority(const base::char16* spec,
                     const Component& auth,
                     Component* username,
                     Component* password,
@@ -696,7 +861,7 @@
   return DoParsePort(url, port);
 }
 
-int ParsePort(const char16* url, const Component& port) {
+int ParsePort(const base::char16* url, const Component& port) {
   return DoParsePort(url, port);
 }
 
@@ -704,23 +869,49 @@
   DoParseStandardURL(url, url_len, parsed);
 }
 
-void ParseStandardURL(const char16* url, int url_len, Parsed* parsed) {
+void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {
   DoParseStandardURL(url, url_len, parsed);
 }
 
-void ParsePathURL(const char* url, int url_len, Parsed* parsed) {
-  DoParsePathURL(url, url_len, parsed);
+void ParsePathURL(const char* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
 }
 
-void ParsePathURL(const char16* url, int url_len, Parsed* parsed) {
-  DoParsePathURL(url, url_len, parsed);
+void ParsePathURL(const base::char16* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
+}
+
+void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
+#ifndef NO_FILESYSTEMURL_SUPPORT
+  DoParseFileSystemURL(url, url_len, parsed);
+#else
+  // Should not reach here if the client doesn't want to support file system
+  // URL.
+  DCHECK(false);
+#endif
+}
+
+void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) {
+#ifndef NO_FILESYSTEMURL_SUPPORT
+  DoParseFileSystemURL(url, url_len, parsed);
+#else
+  // Should not reach here if the client doesn't want to support file system
+  // URL.
+  DCHECK(false);
+#endif
 }
 
 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
   DoParseMailtoURL(url, url_len, parsed);
 }
 
-void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) {
+void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) {
   DoParseMailtoURL(url, url_len, parsed);
 }
 
@@ -732,7 +923,7 @@
   ParsePath(spec, path, filepath, query, ref);
 }
 
-void ParsePathInternal(const char16* spec,
+void ParsePathInternal(const base::char16* spec,
                        const Component& path,
                        Component* filepath,
                        Component* query,
@@ -747,11 +938,11 @@
   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
 }
 
-void ParseAfterScheme(const char16* spec,
+void ParseAfterScheme(const base::char16* spec,
                       int spec_len,
                       int after_scheme,
                       Parsed* parsed) {
   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
 }
 
-}  // namespace url_parse
+}  // namespace url

diff --git a/googleurl/src/url_parse.h b/src/url/third_party/mozilla/url_parse.h
similarity index 61%
rename from googleurl/src/url_parse.h
rename to src/url/third_party/mozilla/url_parse.h
index 134b445..71dbb78 100644
--- a/googleurl/src/url_parse.h
+++ b/src/url/third_party/mozilla/url_parse.h

@@ -1,46 +1,21 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#ifndef GOOGLEURL_SRC_URL_PARSE_H__
-#define GOOGLEURL_SRC_URL_PARSE_H__
+#ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
+#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
 
 #include <string>
 
 #include "base/basictypes.h"
-#include "base/string16.h"
-#include "googleurl/src/url_common.h"
+#include "base/strings/string16.h"
+#include "url/url_export.h"
 
-namespace url_parse {
+namespace url {
 
 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and
 // KURLGoogle.cpp still rely on this type.
-typedef char16 UTF16Char;
+typedef base::char16 UTF16Char;
 
 // Component ------------------------------------------------------------------
 
@@ -94,19 +69,19 @@
 //
 // Typical usage would be:
 //
-//    url_parse::Parsed parsed;
-//    url_parse::Component scheme;
-//    if (!url_parse::ExtractScheme(url, url_len, &scheme))
+//    Parsed parsed;
+//    Component scheme;
+//    if (!ExtractScheme(url, url_len, &scheme))
 //      return I_CAN_NOT_FIND_THE_SCHEME_DUDE;
 //
 //    if (IsStandardScheme(url, scheme))  // Not provided by this component
-//      url_parseParseStandardURL(url, url_len, &parsed);
+//      ParseStandardURL(url, url_len, &parsed);
 //    else if (IsFileURL(url, scheme))    // Not provided by this component
-//      url_parse::ParseFileURL(url, url_len, &parsed);
+//      ParseFileURL(url, url_len, &parsed);
 //    else
-//      url_parse::ParsePathURL(url, url_len, &parsed);
+//      ParsePathURL(url, url_len, &parsed);
 //
-struct Parsed {
+struct URL_EXPORT Parsed {
   // Identifies different components.
   enum ComponentType {
     SCHEME,
@@ -119,8 +94,12 @@
     REF,
   };
 
-  // The default constructor is sufficient for the components.
-  Parsed() {}
+  // The default constructor is sufficient for the components, but inner_parsed_
+  // requires special handling.
+  Parsed();
+  Parsed(const Parsed&);
+  Parsed& operator=(const Parsed&);
+  ~Parsed();
 
   // Returns the length of the URL (the end of the last component).
   //
@@ -128,7 +107,7 @@
   // of the string. For example "http://": the parsed structure will only
   // contain an entry for the four-character scheme, and it doesn't know about
   // the "://". For all other last-components, it will return the real length.
-  GURL_API int Length() const;
+  int Length() const;
 
   // Returns the number of characters before the given component if it exists,
   // or where the component would be if it did exist. This will return the
@@ -156,8 +135,7 @@
   //      *QUERY: 14                   15 <-
   //        *REF: 20                   20
   //
-  GURL_API int CountCharactersBefore(ComponentType type,
-                                     bool include_delimiter) const;
+  int CountCharactersBefore(ComponentType type, bool include_delimiter) const;
 
   // Scheme without the colon: "http://foo"/ would have a scheme of "http".
   // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there
@@ -181,10 +159,11 @@
   // Port number.
   Component port;
 
-  // Path, this is everything following the host name. Length will be -1 if
-  // unspecified. This includes the preceeding slash, so the path on
-  // http://www.google.com/asdf" is "/asdf". As a result, it is impossible to
-  // have a 0 length path, it will be -1 in cases like "http://host?foo".
+  // Path, this is everything following the host name, stopping at the query of
+  // ref delimiter (if any). Length will be -1 if unspecified. This includes
+  // the preceeding slash, so the path on http://www.google.com/asdf" is
+  // "/asdf". As a result, it is impossible to have a 0 length path, it will
+  // be -1 in cases like "http://host?foo".
   // Note that we treat backslashes the same as slashes.
   Component path;
 
@@ -198,6 +177,37 @@
   // Length will be -1 if there is no hash sign, or 0 if there is one but
   // nothing follows it.
   Component ref;
+
+  // The URL spec from the character after the scheme: until the end of the
+  // URL, regardless of the scheme. This is mostly useful for 'opaque' non-
+  // hierarchical schemes like data: and javascript: as a convient way to get
+  // the string with the scheme stripped off.
+  Component GetContent() const;
+
+  // This is used for nested URL types, currently only filesystem.  If you
+  // parse a filesystem URL, the resulting Parsed will have a nested
+  // inner_parsed_ to hold the parsed inner URL's component information.
+  // For all other url types [including the inner URL], it will be NULL.
+  Parsed* inner_parsed() const {
+    return inner_parsed_;
+  }
+
+  void set_inner_parsed(const Parsed& inner_parsed) {
+    if (!inner_parsed_)
+      inner_parsed_ = new Parsed(inner_parsed);
+    else
+      *inner_parsed_ = inner_parsed;
+  }
+
+  void clear_inner_parsed() {
+    if (inner_parsed_) {
+      delete inner_parsed_;
+      inner_parsed_ = NULL;
+    }
+  }
+
+ private:
+  Parsed* inner_parsed_;  // This object is owned and managed by this struct.
 };
 
 // Initialization functions ---------------------------------------------------
@@ -217,24 +227,46 @@
 // StandardURL is for when the scheme is known to be one that has an
 // authority (host) like "http". This function will not handle weird ones
 // like "about:" and "javascript:", or do the right thing for "file:" URLs.
-GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
-GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParseStandardURL(const char* url,
+                                 int url_len,
+                                 Parsed* parsed);
+URL_EXPORT void ParseStandardURL(const base::char16* url,
+                                 int url_len,
+                                 Parsed* parsed);
 
 // PathURL is for when the scheme is known not to have an authority (host)
 // section but that aren't file URLs either. The scheme is parsed, and
 // everything after the scheme is considered as the path. This is used for
 // things like "about:" and "javascript:"
-GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed);
-GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParsePathURL(const char* url,
+                             int url_len,
+                             bool trim_path_end,
+                             Parsed* parsed);
+URL_EXPORT void ParsePathURL(const base::char16* url,
+                             int url_len,
+                             bool trim_path_end,
+                             Parsed* parsed);
 
 // FileURL is for file URLs. There are some special rules for interpreting
 // these.
-GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed);
-GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParseFileURL(const base::char16* url,
+                             int url_len,
+                             Parsed* parsed);
+
+// Filesystem URLs are structured differently than other URLs.
+URL_EXPORT void ParseFileSystemURL(const char* url,
+                                   int url_len,
+                                   Parsed* parsed);
+URL_EXPORT void ParseFileSystemURL(const base::char16* url,
+                                   int url_len,
+                                   Parsed* parsed);
 
 // MailtoURL is for mailto: urls. They are made up scheme,path,query
-GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
-GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
+URL_EXPORT void ParseMailtoURL(const base::char16* url,
+                               int url_len,
+                               Parsed* parsed);
 
 // Helper functions -----------------------------------------------------------
 
@@ -258,27 +290,31 @@
 // end of the string).
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme);
-GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme);
+URL_EXPORT bool ExtractScheme(const char* url,
+                              int url_len,
+                              Component* scheme);
+URL_EXPORT bool ExtractScheme(const base::char16* url,
+                              int url_len,
+                              Component* scheme);
 
 // Returns true if ch is a character that terminates the authority segment
 // of a URL.
-GURL_API bool IsAuthorityTerminator(char16 ch);
+URL_EXPORT bool IsAuthorityTerminator(base::char16 ch);
 
 // Does a best effort parse of input |spec|, in range |auth|. If a particular
 // component is not found, it will be set to invalid.
-GURL_API void ParseAuthority(const char* spec,
-                             const Component& auth,
-                             Component* username,
-                             Component* password,
-                             Component* hostname,
-                             Component* port_num);
-GURL_API void ParseAuthority(const char16* spec,
-                             const Component& auth,
-                             Component* username,
-                             Component* password,
-                             Component* hostname,
-                             Component* port_num);
+URL_EXPORT void ParseAuthority(const char* spec,
+                               const Component& auth,
+                               Component* username,
+                               Component* password,
+                               Component* hostname,
+                               Component* port_num);
+URL_EXPORT void ParseAuthority(const base::char16* spec,
+                               const Component& auth,
+                               Component* username,
+                               Component* password,
+                               Component* hostname,
+                               Component* port_num);
 
 // Computes the integer port value from the given port component. The port
 // component should have been identified by one of the init functions on
@@ -287,8 +323,8 @@
 // The return value will be a positive integer between 0 and 64K, or one of
 // the two special values below.
 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
-GURL_API int ParsePort(const char* url, const Component& port);
-GURL_API int ParsePort(const char16* url, const Component& port);
+URL_EXPORT int ParsePort(const char* url, const Component& port);
+URL_EXPORT int ParsePort(const base::char16* url, const Component& port);
 
 // Extracts the range of the file name in the given url. The path must
 // already have been computed by the parse function, and the matching URL
@@ -300,12 +336,12 @@
 // following the last slash.
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API void ExtractFileName(const char* url,
-                              const Component& path,
-                              Component* file_name);
-GURL_API void ExtractFileName(const char16* url,
-                              const Component& path,
-                              Component* file_name);
+URL_EXPORT void ExtractFileName(const char* url,
+                                const Component& path,
+                                Component* file_name);
+URL_EXPORT void ExtractFileName(const base::char16* url,
+                                const Component& path,
+                                Component* file_name);
 
 // Extract the first key/value from the range defined by |*query|. Updates
 // |*query| to start at the end of the extracted key/value pair. This is
@@ -322,15 +358,15 @@
 //
 // If no key/value are found |*key| and |*value| will be unchanged and it will
 // return false.
-GURL_API bool ExtractQueryKeyValue(const char* url,
-                                   Component* query,
-                                   Component* key,
-                                   Component* value);
-GURL_API bool ExtractQueryKeyValue(const char16* url,
-                                   Component* query,
-                                   Component* key,
-                                   Component* value);
+URL_EXPORT bool ExtractQueryKeyValue(const char* url,
+                                     Component* query,
+                                     Component* key,
+                                     Component* value);
+URL_EXPORT bool ExtractQueryKeyValue(const base::char16* url,
+                                     Component* query,
+                                     Component* key,
+                                     Component* value);
 
-}  // namespace url_parse
+}  // namespace url
 
-#endif  // GOOGLEURL_SRC_URL_PARSE_H__
+#endif  // URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_

diff --git a/googleurl/src/url_canon.h b/src/url/url_canon.h
similarity index 62%
rename from googleurl/src/url_canon.h
rename to src/url/url_canon.h
index e2cfb55..89e3509 100644
--- a/googleurl/src/url_canon.h
+++ b/src/url/url_canon.h

@@ -1,42 +1,18 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#ifndef GOOGLEURL_SRC_URL_CANON_H__
-#define GOOGLEURL_SRC_URL_CANON_H__
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include <memory.h>
+#ifndef URL_URL_CANON_H_
+#define URL_URL_CANON_H_
+
 #include <stdlib.h>
+#include <string.h>
 
-#include "base/string16.h"
-#include "googleurl/src/url_common.h"
-#include "googleurl/src/url_parse.h"
+#include "base/strings/string16.h"
+#include "url/url_export.h"
+#include "url/url_parse.h"
 
-namespace url_canon {
+namespace url {
 
 // Canonicalizer output -------------------------------------------------------
 
@@ -65,13 +41,13 @@
 
   // Accessor for returning a character at a given position. The input offset
   // must be in the valid range.
-  inline char at(int offset) const {
+  inline T at(int offset) const {
     return buffer_[offset];
   }
 
   // Sets the character at the given position. The given position MUST be less
   // than the length().
-  inline void set(int offset, int ch) {
+  inline void set(int offset, T ch) {
     buffer_[offset] = ch;
   }
 
@@ -178,7 +154,7 @@
       delete[] this->buffer_;
   }
 
-  virtual void Resize(int sz) {
+  void Resize(int sz) override {
     T* new_buf = new T[sz];
     memcpy(new_buf, this->buffer_,
            sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
@@ -196,12 +172,12 @@
 // the templates so it can also be used internally if a wide buffer is
 // required.
 typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<char16> CanonOutputW;
+typedef CanonOutputT<base::char16> CanonOutputW;
 
 template<int fixed_capacity>
 class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
 template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<char16, fixed_capacity> {};
+class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
 
 // Character set converter ----------------------------------------------------
 //
@@ -211,7 +187,7 @@
 //
 // Embedders will want to see the unit test for the ICU version.
 
-class CharsetConverter {
+class URL_EXPORT CharsetConverter {
  public:
   CharsetConverter() {}
   virtual ~CharsetConverter() {}
@@ -227,7 +203,7 @@
   // decimal, (such as "&#20320;") with escaping of the ampersand, number
   // sign, and semicolon (in the previous example it would be
   // "%26%2320320%3B"). This rule is based on what IE does in this situation.
-  virtual void ConvertFromUTF16(const char16* input,
+  virtual void ConvertFromUTF16(const base::char16* input,
                                 int input_len,
                                 CanonOutput* output) = 0;
 };
@@ -247,14 +223,16 @@
 // required, the given |buffer| will be used and the returned pointer will
 // point to the beginning of the buffer.
 //
-// Therefore, callers should not use the buffer, since it may actuall be empty,
+// Therefore, callers should not use the buffer, since it may actually be empty,
 // use the computed pointer and |*output_len| instead.
-GURL_API const char* RemoveURLWhitespace(const char* input, int input_len,
-                                         CanonOutputT<char>* buffer,
-                                         int* output_len);
-GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len,
-                                           CanonOutputT<char16>* buffer,
+URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len,
+                                           CanonOutputT<char>* buffer,
                                            int* output_len);
+URL_EXPORT const base::char16* RemoveURLWhitespace(
+    const base::char16* input,
+    int input_len,
+    CanonOutputT<base::char16>* buffer,
+    int* output_len);
 
 // IDN ------------------------------------------------------------------------
 
@@ -267,7 +245,9 @@
 // the length of the output will be set to the length of the new host name.
 //
 // On error, returns false. The output in this case is undefined.
-GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output);
+URL_EXPORT bool IDNToASCII(const base::char16* src,
+                           int src_len,
+                           CanonOutputW* output);
 
 // Piece-by-piece canonicalizers ----------------------------------------------
 //
@@ -293,14 +273,14 @@
 // URLs.
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API bool CanonicalizeScheme(const char* spec,
-                                 const url_parse::Component& scheme,
-                                 CanonOutput* output,
-                                 url_parse::Component* out_scheme);
-GURL_API bool CanonicalizeScheme(const char16* spec,
-                                 const url_parse::Component& scheme,
-                                 CanonOutput* output,
-                                 url_parse::Component* out_scheme);
+URL_EXPORT bool CanonicalizeScheme(const char* spec,
+                                   const Component& scheme,
+                                   CanonOutput* output,
+                                   Component* out_scheme);
+URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
+                                   const Component& scheme,
+                                   CanonOutput* output,
+                                   Component* out_scheme);
 
 // User info: username/password. If present, this will add the delimiters so
 // the output will be "<username>:<password>@" or "<username>@". Empty
@@ -312,21 +292,20 @@
 // is legal as long as the two components don't overlap.
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API bool CanonicalizeUserInfo(const char* username_source,
-                                   const url_parse::Component& username,
-                                   const char* password_source,
-                                   const url_parse::Component& password,
-                                   CanonOutput* output,
-                                   url_parse::Component* out_username,
-                                   url_parse::Component* out_password);
-GURL_API bool CanonicalizeUserInfo(const char16* username_source,
-                                   const url_parse::Component& username,
-                                   const char16* password_source,
-                                   const url_parse::Component& password,
-                                   CanonOutput* output,
-                                   url_parse::Component* out_username,
-                                   url_parse::Component* out_password);
-
+URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
+                                     const Component& username,
+                                     const char* password_source,
+                                     const Component& password,
+                                     CanonOutput* output,
+                                     Component* out_username,
+                                     Component* out_password);
+URL_EXPORT bool CanonicalizeUserInfo(const base::char16* username_source,
+                                     const Component& username,
+                                     const base::char16* password_source,
+                                     const Component& password,
+                                     CanonOutput* output,
+                                     Component* out_username,
+                                     Component* out_password);
 
 // This structure holds detailed state exported from the IP/Host canonicalizers.
 // Additional fields may be added as callers require them.
@@ -359,7 +338,18 @@
   // Location of host within the canonicalized output.
   // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
   // CanonicalizeHostVerbose() always sets it.
-  url_parse::Component out_host;
+  Component out_host;
+
+  // |address| contains the parsed IP Address (if any) in its first
+  // AddressLength() bytes, in network order. If IsIPAddress() is false
+  // AddressLength() will return zero and the content of |address| is undefined.
+  unsigned char address[16];
+
+  // Convenience function to calculate the length of an IP address corresponding
+  // to the current IP version in |family|, if any. For use with |address|.
+  int AddressLength() const {
+    return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
+  }
 };
 
 
@@ -367,28 +357,27 @@
 //
 // The 8-bit version requires UTF-8 encoding.  Use this version when you only
 // need to know whether canonicalization succeeded.
-GURL_API bool CanonicalizeHost(const char* spec,
-                               const url_parse::Component& host,
-                               CanonOutput* output,
-                               url_parse::Component* out_host);
-GURL_API bool CanonicalizeHost(const char16* spec,
-                               const url_parse::Component& host,
-                               CanonOutput* output,
-                               url_parse::Component* out_host);
+URL_EXPORT bool CanonicalizeHost(const char* spec,
+                                 const Component& host,
+                                 CanonOutput* output,
+                                 Component* out_host);
+URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
+                                 const Component& host,
+                                 CanonOutput* output,
+                                 Component* out_host);
 
 // Extended version of CanonicalizeHost, which returns additional information.
 // Use this when you need to know whether the hostname was an IP address.
 // A successful return is indicated by host_info->family != BROKEN.  See the
 // definition of CanonHostInfo above for details.
-GURL_API void CanonicalizeHostVerbose(const char* spec,
-                                      const url_parse::Component& host,
-                                      CanonOutput* output,
-                                      CanonHostInfo* host_info);
-GURL_API void CanonicalizeHostVerbose(const char16* spec,
-                                      const url_parse::Component& host,
-                                      CanonOutput* output,
-                                      CanonHostInfo* host_info);
-
+URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
+                                        const Component& host,
+                                        CanonOutput* output,
+                                        CanonHostInfo* host_info);
+URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
+                                        const Component& host,
+                                        CanonOutput* output,
+                                        CanonHostInfo* host_info);
 
 // IP addresses.
 //
@@ -400,34 +389,34 @@
 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
 // the input is unescaped and name-prepped, etc. It should not normally be
 // necessary or wise to call this directly.
-GURL_API void CanonicalizeIPAddress(const char* spec,
-                                    const url_parse::Component& host,
-                                    CanonOutput* output,
-                                    CanonHostInfo* host_info);
-GURL_API void CanonicalizeIPAddress(const char16* spec,
-                                    const url_parse::Component& host,
-                                    CanonOutput* output,
-                                    CanonHostInfo* host_info);
+URL_EXPORT void CanonicalizeIPAddress(const char* spec,
+                                      const Component& host,
+                                      CanonOutput* output,
+                                      CanonHostInfo* host_info);
+URL_EXPORT void CanonicalizeIPAddress(const base::char16* spec,
+                                      const Component& host,
+                                      CanonOutput* output,
+                                      CanonHostInfo* host_info);
 
 // Port: this function will add the colon for the port if a port is present.
-// The caller can pass url_parse::PORT_UNSPECIFIED as the
+// The caller can pass PORT_UNSPECIFIED as the
 // default_port_for_scheme argument if there is no default port.
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API bool CanonicalizePort(const char* spec,
-                               const url_parse::Component& port,
-                               int default_port_for_scheme,
-                               CanonOutput* output,
-                               url_parse::Component* out_port);
-GURL_API bool CanonicalizePort(const char16* spec,
-                               const url_parse::Component& port,
-                               int default_port_for_scheme,
-                               CanonOutput* output,
-                               url_parse::Component* out_port);
+URL_EXPORT bool CanonicalizePort(const char* spec,
+                                 const Component& port,
+                                 int default_port_for_scheme,
+                                 CanonOutput* output,
+                                 Component* out_port);
+URL_EXPORT bool CanonicalizePort(const base::char16* spec,
+                                 const Component& port,
+                                 int default_port_for_scheme,
+                                 CanonOutput* output,
+                                 Component* out_port);
 
 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
 // if the scheme is unknown.
-GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len);
+URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
 
 // Path. If the input does not begin in a slash (including if the input is
 // empty), we'll prepend a slash to the path to make it canonical.
@@ -438,14 +427,14 @@
 // an issue. Somebody giving us an 8-bit path is responsible for generating
 // the path that the server expects (we'll escape high-bit characters), so
 // if something is invalid, it's their problem.
-GURL_API bool CanonicalizePath(const char* spec,
-                               const url_parse::Component& path,
-                               CanonOutput* output,
-                               url_parse::Component* out_path);
-GURL_API bool CanonicalizePath(const char16* spec,
-                               const url_parse::Component& path,
-                               CanonOutput* output,
-                               url_parse::Component* out_path);
+URL_EXPORT bool CanonicalizePath(const char* spec,
+                                 const Component& path,
+                                 CanonOutput* output,
+                                 Component* out_path);
+URL_EXPORT bool CanonicalizePath(const base::char16* spec,
+                                 const Component& path,
+                                 CanonOutput* output,
+                                 Component* out_path);
 
 // Canonicalizes the input as a file path. This is like CanonicalizePath except
 // that it also handles Windows drive specs. For example, the path can begin
@@ -453,14 +442,14 @@
 // The string will be appended to |*output| and |*out_path| will be updated.
 //
 // The 8-bit version requires UTF-8 encoding.
-GURL_API bool FileCanonicalizePath(const char* spec,
-                                   const url_parse::Component& path,
-                                   CanonOutput* output,
-                                   url_parse::Component* out_path);
-GURL_API bool FileCanonicalizePath(const char16* spec,
-                                   const url_parse::Component& path,
-                                   CanonOutput* output,
-                                   url_parse::Component* out_path);
+URL_EXPORT bool FileCanonicalizePath(const char* spec,
+                                     const Component& path,
+                                     CanonOutput* output,
+                                     Component* out_path);
+URL_EXPORT bool FileCanonicalizePath(const base::char16* spec,
+                                     const Component& path,
+                                     CanonOutput* output,
+                                     Component* out_path);
 
 // Query: Prepends the ? if needed.
 //
@@ -474,16 +463,16 @@
 // if necessary, for ASCII input, no conversions are necessary.
 //
 // The converter can be NULL. In this case, the output encoding will be UTF-8.
-GURL_API void CanonicalizeQuery(const char* spec,
-                                const url_parse::Component& query,
-                                CharsetConverter* converter,
-                                CanonOutput* output,
-                                url_parse::Component* out_query);
-GURL_API void CanonicalizeQuery(const char16* spec,
-                                const url_parse::Component& query,
-                                CharsetConverter* converter,
-                                CanonOutput* output,
-                                url_parse::Component* out_query);
+URL_EXPORT void CanonicalizeQuery(const char* spec,
+                                  const Component& query,
+                                  CharsetConverter* converter,
+                                  CanonOutput* output,
+                                  Component* out_query);
+URL_EXPORT void CanonicalizeQuery(const base::char16* spec,
+                                  const Component& query,
+                                  CharsetConverter* converter,
+                                  CanonOutput* output,
+                                  Component* out_query);
 
 // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
 // canonicalizer that does not produce ASCII output). The output is
@@ -491,14 +480,14 @@
 //
 // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
 // the "Unicode replacement character" for the confusing bits and copy the rest.
-GURL_API void CanonicalizeRef(const char* spec,
-                              const url_parse::Component& path,
-                              CanonOutput* output,
-                              url_parse::Component* out_path);
-GURL_API void CanonicalizeRef(const char16* spec,
-                              const url_parse::Component& path,
-                              CanonOutput* output,
-                              url_parse::Component* out_path);
+URL_EXPORT void CanonicalizeRef(const char* spec,
+                                const Component& path,
+                                CanonOutput* output,
+                                Component* out_path);
+URL_EXPORT void CanonicalizeRef(const base::char16* spec,
+                                const Component& path,
+                                CanonOutput* output,
+                                Component* out_path);
 
 // Full canonicalizer ---------------------------------------------------------
 //
@@ -511,71 +500,85 @@
 // The 8-bit versions require UTF-8 encoding.
 
 // Use for standard URLs with authorities and paths.
-GURL_API bool CanonicalizeStandardURL(const char* spec,
-                                      int spec_len,
-                                      const url_parse::Parsed& parsed,
-                                      CharsetConverter* query_converter,
-                                      CanonOutput* output,
-                                      url_parse::Parsed* new_parsed);
-GURL_API bool CanonicalizeStandardURL(const char16* spec,
-                                      int spec_len,
-                                      const url_parse::Parsed& parsed,
-                                      CharsetConverter* query_converter,
-                                      CanonOutput* output,
-                                      url_parse::Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
+                                        int spec_len,
+                                        const Parsed& parsed,
+                                        CharsetConverter* query_converter,
+                                        CanonOutput* output,
+                                        Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
+                                        int spec_len,
+                                        const Parsed& parsed,
+                                        CharsetConverter* query_converter,
+                                        CanonOutput* output,
+                                        Parsed* new_parsed);
 
 // Use for file URLs.
-GURL_API bool CanonicalizeFileURL(const char* spec,
-                                  int spec_len,
-                                  const url_parse::Parsed& parsed,
-                                  CharsetConverter* query_converter,
-                                  CanonOutput* output,
-                                  url_parse::Parsed* new_parsed);
-GURL_API bool CanonicalizeFileURL(const char16* spec,
-                                  int spec_len,
-                                  const url_parse::Parsed& parsed,
-                                  CharsetConverter* query_converter,
-                                  CanonOutput* output,
-                                  url_parse::Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeFileURL(const char* spec,
+                                    int spec_len,
+                                    const Parsed& parsed,
+                                    CharsetConverter* query_converter,
+                                    CanonOutput* output,
+                                    Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeFileURL(const base::char16* spec,
+                                    int spec_len,
+                                    const Parsed& parsed,
+                                    CharsetConverter* query_converter,
+                                    CanonOutput* output,
+                                    Parsed* new_parsed);
+
+// Use for filesystem URLs.
+URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,
+                                          int spec_len,
+                                          const Parsed& parsed,
+                                          CharsetConverter* query_converter,
+                                          CanonOutput* output,
+                                          Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeFileSystemURL(const base::char16* spec,
+                                          int spec_len,
+                                          const Parsed& parsed,
+                                          CharsetConverter* query_converter,
+                                          CanonOutput* output,
+                                          Parsed* new_parsed);
 
 // Use for path URLs such as javascript. This does not modify the path in any
 // way, for example, by escaping it.
-GURL_API bool CanonicalizePathURL(const char* spec,
-                                  int spec_len,
-                                  const url_parse::Parsed& parsed,
-                                  CanonOutput* output,
-                                  url_parse::Parsed* new_parsed);
-GURL_API bool CanonicalizePathURL(const char16* spec,
-                                  int spec_len,
-                                  const url_parse::Parsed& parsed,
-                                  CanonOutput* output,
-                                  url_parse::Parsed* new_parsed);
+URL_EXPORT bool CanonicalizePathURL(const char* spec,
+                                    int spec_len,
+                                    const Parsed& parsed,
+                                    CanonOutput* output,
+                                    Parsed* new_parsed);
+URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
+                                    int spec_len,
+                                    const Parsed& parsed,
+                                    CanonOutput* output,
+                                    Parsed* new_parsed);
 
 // Use for mailto URLs. This "canonicalizes" the url into a path and query
 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
 // the query encoding if there is a query. This is because a mailto URL is
 // really intended for an external mail program, and the encoding of a page,
 // etc. which would influence a query encoding normally are irrelevant.
-GURL_API bool CanonicalizeMailtoURL(const char* spec,
-                                    int spec_len,
-                                    const url_parse::Parsed& parsed,
-                                    CanonOutput* output,
-                                    url_parse::Parsed* new_parsed);
-GURL_API bool CanonicalizeMailtoURL(const char16* spec,
-                                    int spec_len,
-                                    const url_parse::Parsed& parsed,
-                                    CanonOutput* output,
-                                    url_parse::Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
+                                      int spec_len,
+                                      const Parsed& parsed,
+                                      CanonOutput* output,
+                                      Parsed* new_parsed);
+URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
+                                      int spec_len,
+                                      const Parsed& parsed,
+                                      CanonOutput* output,
+                                      Parsed* new_parsed);
 
 // Part replacer --------------------------------------------------------------
 
 // Internal structure used for storing separate strings for each component.
 // The basic canonicalization functions use this structure internally so that
-// component remplacement (different strings for different components) can be
+// component replacement (different strings for different components) can be
 // treated on the same code path as regular canonicalization (the same string
 // for each component).
 //
-// A url_parse::Parsed structure usually goes along with this. Those
+// A Parsed structure usually goes along with this. Those
 // components identify offsets within these strings, so that they can all be
 // in the same string, or spread arbitrarily across different ones.
 //
@@ -638,7 +641,7 @@
   }
 
   // Scheme
-  void SetScheme(const CHAR* s, const url_parse::Component& comp) {
+  void SetScheme(const CHAR* s, const Component& comp) {
     sources_.scheme = s;
     components_.scheme = comp;
   }
@@ -646,86 +649,86 @@
   bool IsSchemeOverridden() const { return sources_.scheme != NULL; }
 
   // Username
-  void SetUsername(const CHAR* s, const url_parse::Component& comp) {
+  void SetUsername(const CHAR* s, const Component& comp) {
     sources_.username = s;
     components_.username = comp;
   }
   void ClearUsername() {
     sources_.username = Placeholder();
-    components_.username = url_parse::Component();
+    components_.username = Component();
   }
   bool IsUsernameOverridden() const { return sources_.username != NULL; }
 
   // Password
-  void SetPassword(const CHAR* s, const url_parse::Component& comp) {
+  void SetPassword(const CHAR* s, const Component& comp) {
     sources_.password = s;
     components_.password = comp;
   }
   void ClearPassword() {
     sources_.password = Placeholder();
-    components_.password = url_parse::Component();
+    components_.password = Component();
   }
   bool IsPasswordOverridden() const { return sources_.password != NULL; }
 
   // Host
-  void SetHost(const CHAR* s, const url_parse::Component& comp) {
+  void SetHost(const CHAR* s, const Component& comp) {
     sources_.host = s;
     components_.host = comp;
   }
   void ClearHost() {
     sources_.host = Placeholder();
-    components_.host = url_parse::Component();
+    components_.host = Component();
   }
   bool IsHostOverridden() const { return sources_.host != NULL; }
 
   // Port
-  void SetPort(const CHAR* s, const url_parse::Component& comp) {
+  void SetPort(const CHAR* s, const Component& comp) {
     sources_.port = s;
     components_.port = comp;
   }
   void ClearPort() {
     sources_.port = Placeholder();
-    components_.port = url_parse::Component();
+    components_.port = Component();
   }
   bool IsPortOverridden() const { return sources_.port != NULL; }
 
   // Path
-  void SetPath(const CHAR* s, const url_parse::Component& comp) {
+  void SetPath(const CHAR* s, const Component& comp) {
     sources_.path = s;
     components_.path = comp;
   }
   void ClearPath() {
     sources_.path = Placeholder();
-    components_.path = url_parse::Component();
+    components_.path = Component();
   }
   bool IsPathOverridden() const { return sources_.path != NULL; }
 
   // Query
-  void SetQuery(const CHAR* s, const url_parse::Component& comp) {
+  void SetQuery(const CHAR* s, const Component& comp) {
     sources_.query = s;
     components_.query = comp;
   }
   void ClearQuery() {
     sources_.query = Placeholder();
-    components_.query = url_parse::Component();
+    components_.query = Component();
   }
   bool IsQueryOverridden() const { return sources_.query != NULL; }
 
   // Ref
-  void SetRef(const CHAR* s, const url_parse::Component& comp) {
+  void SetRef(const CHAR* s, const Component& comp) {
     sources_.ref = s;
     components_.ref = comp;
   }
   void ClearRef() {
     sources_.ref = Placeholder();
-    components_.ref = url_parse::Component();
+    components_.ref = Component();
   }
   bool IsRefOverridden() const { return sources_.ref != NULL; }
 
   // Getters for the itnernal data. See the variables below for how the
   // information is encoded.
   const URLComponentSource<CHAR>& sources() const { return sources_; }
-  const url_parse::Parsed& components() const { return components_; }
+  const Parsed& components() const { return components_; }
 
  private:
   // Returns a pointer to a static empty string that is used as a placeholder
@@ -746,63 +749,80 @@
   // We use a pointer to the empty string for the source when the component
   // should be deleted.
   URLComponentSource<CHAR> sources_;
-  url_parse::Parsed components_;
+  Parsed components_;
 };
 
 // The base must be an 8-bit canonical URL.
-GURL_API bool ReplaceStandardURL(const char* base,
-                                 const url_parse::Parsed& base_parsed,
-                                 const Replacements<char>& replacements,
-                                 CharsetConverter* query_converter,
-                                 CanonOutput* output,
-                                 url_parse::Parsed* new_parsed);
-GURL_API bool ReplaceStandardURL(const char* base,
-                                 const url_parse::Parsed& base_parsed,
-                                 const Replacements<char16>& replacements,
-                                 CharsetConverter* query_converter,
-                                 CanonOutput* output,
-                                 url_parse::Parsed* new_parsed);
+URL_EXPORT bool ReplaceStandardURL(const char* base,
+                                   const Parsed& base_parsed,
+                                   const Replacements<char>& replacements,
+                                   CharsetConverter* query_converter,
+                                   CanonOutput* output,
+                                   Parsed* new_parsed);
+URL_EXPORT bool ReplaceStandardURL(
+    const char* base,
+    const Parsed& base_parsed,
+    const Replacements<base::char16>& replacements,
+    CharsetConverter* query_converter,
+    CanonOutput* output,
+    Parsed* new_parsed);
+
+// Filesystem URLs can only have the path, query, or ref replaced.
+// All other components will be ignored.
+URL_EXPORT bool ReplaceFileSystemURL(const char* base,
+                                     const Parsed& base_parsed,
+                                     const Replacements<char>& replacements,
+                                     CharsetConverter* query_converter,
+                                     CanonOutput* output,
+                                     Parsed* new_parsed);
+URL_EXPORT bool ReplaceFileSystemURL(
+    const char* base,
+    const Parsed& base_parsed,
+    const Replacements<base::char16>& replacements,
+    CharsetConverter* query_converter,
+    CanonOutput* output,
+    Parsed* new_parsed);
 
 // Replacing some parts of a file URL is not permitted. Everything except
 // the host, path, query, and ref will be ignored.
-GURL_API bool ReplaceFileURL(const char* base,
-                             const url_parse::Parsed& base_parsed,
-                             const Replacements<char>& replacements,
-                             CharsetConverter* query_converter,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
-GURL_API bool ReplaceFileURL(const char* base,
-                             const url_parse::Parsed& base_parsed,
-                             const Replacements<char16>& replacements,
-                             CharsetConverter* query_converter,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
+URL_EXPORT bool ReplaceFileURL(const char* base,
+                               const Parsed& base_parsed,
+                               const Replacements<char>& replacements,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
+URL_EXPORT bool ReplaceFileURL(const char* base,
+                               const Parsed& base_parsed,
+                               const Replacements<base::char16>& replacements,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
 
 // Path URLs can only have the scheme and path replaced. All other components
 // will be ignored.
-GURL_API bool ReplacePathURL(const char* base,
-                             const url_parse::Parsed& base_parsed,
-                             const Replacements<char>& replacements,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
-GURL_API bool ReplacePathURL(const char* base,
-                             const url_parse::Parsed& base_parsed,
-                             const Replacements<char16>& replacements,
-                             CanonOutput* output,
-                             url_parse::Parsed* new_parsed);
+URL_EXPORT bool ReplacePathURL(const char* base,
+                               const Parsed& base_parsed,
+                               const Replacements<char>& replacements,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
+URL_EXPORT bool ReplacePathURL(const char* base,
+                               const Parsed& base_parsed,
+                               const Replacements<base::char16>& replacements,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
 
 // Mailto URLs can only have the scheme, path, and query replaced.
 // All other components will be ignored.
-GURL_API bool ReplaceMailtoURL(const char* base,
-                               const url_parse::Parsed& base_parsed,
-                               const Replacements<char>& replacements,
-                               CanonOutput* output,
-                               url_parse::Parsed* new_parsed);
-GURL_API bool ReplaceMailtoURL(const char* base,
-                               const url_parse::Parsed& base_parsed,
-                               const Replacements<char16>& replacements,
-                               CanonOutput* output,
-                               url_parse::Parsed* new_parsed);
+URL_EXPORT bool ReplaceMailtoURL(const char* base,
+                                 const Parsed& base_parsed,
+                                 const Replacements<char>& replacements,
+                                 CanonOutput* output,
+                                 Parsed* new_parsed);
+URL_EXPORT bool ReplaceMailtoURL(const char* base,
+                                 const Parsed& base_parsed,
+                                 const Replacements<base::char16>& replacements,
+                                 CanonOutput* output,
+                                 Parsed* new_parsed);
 
 // Relative URL ---------------------------------------------------------------
 
@@ -811,26 +831,26 @@
 // relative, the relevant portion of the URL will be placed into
 // |*relative_component| (there may have been trimmed whitespace, for example).
 // This value is passed to ResolveRelativeURL. If the input is not relative,
-// this value is UNDEFINED (it may be changed by the functin).
+// this value is UNDEFINED (it may be changed by the function).
 //
 // Returns true on success (we successfully determined the URL is relative or
 // not). Failure means that the combination of URLs doesn't make any sense.
 //
 // The base URL should always be canonical, therefore is ASCII.
-GURL_API bool IsRelativeURL(const char* base,
-                            const url_parse::Parsed& base_parsed,
-                            const char* fragment,
-                            int fragment_len,
-                            bool is_base_hierarchical,
-                            bool* is_relative,
-                            url_parse::Component* relative_component);
-GURL_API bool IsRelativeURL(const char* base,
-                            const url_parse::Parsed& base_parsed,
-                            const char16* fragment,
-                            int fragment_len,
-                            bool is_base_hierarchical,
-                            bool* is_relative,
-                            url_parse::Component* relative_component);
+URL_EXPORT bool IsRelativeURL(const char* base,
+                              const Parsed& base_parsed,
+                              const char* fragment,
+                              int fragment_len,
+                              bool is_base_hierarchical,
+                              bool* is_relative,
+                              Component* relative_component);
+URL_EXPORT bool IsRelativeURL(const char* base,
+                              const Parsed& base_parsed,
+                              const base::char16* fragment,
+                              int fragment_len,
+                              bool is_base_hierarchical,
+                              bool* is_relative,
+                              Component* relative_component);
 
 // Given a canonical parsed source URL, a URL fragment known to be relative,
 // and the identified relevant portion of the relative URL (computed by
@@ -850,23 +870,23 @@
 // Returns true on success. On failure, the output will be "something
 // reasonable" that will be consistent and valid, just probably not what
 // was intended by the web page author or caller.
-GURL_API bool ResolveRelativeURL(const char* base_url,
-                                 const url_parse::Parsed& base_parsed,
-                                 bool base_is_file,
-                                 const char* relative_url,
-                                 const url_parse::Component& relative_component,
-                                 CharsetConverter* query_converter,
-                                 CanonOutput* output,
-                                 url_parse::Parsed* out_parsed);
-GURL_API bool ResolveRelativeURL(const char* base_url,
-                                 const url_parse::Parsed& base_parsed,
-                                 bool base_is_file,
-                                 const char16* relative_url,
-                                 const url_parse::Component& relative_component,
-                                 CharsetConverter* query_converter,
-                                 CanonOutput* output,
-                                 url_parse::Parsed* out_parsed);
+URL_EXPORT bool ResolveRelativeURL(const char* base_url,
+                                   const Parsed& base_parsed,
+                                   bool base_is_file,
+                                   const char* relative_url,
+                                   const Component& relative_component,
+                                   CharsetConverter* query_converter,
+                                   CanonOutput* output,
+                                   Parsed* out_parsed);
+URL_EXPORT bool ResolveRelativeURL(const char* base_url,
+                                   const Parsed& base_parsed,
+                                   bool base_is_file,
+                                   const base::char16* relative_url,
+                                   const Component& relative_component,
+                                   CharsetConverter* query_converter,
+                                   CanonOutput* output,
+                                   Parsed* out_parsed);
 
-}  // namespace url_canon
+}  // namespace url
 
-#endif  // GOOGLEURL_SRC_URL_CANON_H__
+#endif  // URL_URL_CANON_H_

diff --git a/googleurl/src/url_canon_etc.cc b/src/url/url_canon_etc.cc
similarity index 68%
rename from googleurl/src/url_canon_etc.cc
rename to src/url/url_canon_etc.cc
index aea181a..7409efd 100644
--- a/googleurl/src/url_canon_etc.cc
+++ b/src/url/url_canon_etc.cc

@@ -1,40 +1,15 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 // Canonicalizers for random bits that aren't big enough for their own files.
 
 #include <string.h>
 
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -107,12 +82,12 @@
 
 template<typename CHAR, typename UCHAR>
 bool DoScheme(const CHAR* spec,
-              const url_parse::Component& scheme,
+              const Component& scheme,
               CanonOutput* output,
-              url_parse::Component* out_scheme) {
+              Component* out_scheme) {
   if (scheme.len <= 0) {
     // Scheme is unspecified or empty, convert to empty by appending a colon.
-    *out_scheme = url_parse::Component(output->length(), 0);
+    *out_scheme = Component(output->length(), 0);
     output->push_back(':');
     return true;
   }
@@ -123,7 +98,7 @@
   // Danger: it's important that this code does not strip any characters: it
   // only emits the canonical version (be it valid or escaped) of each of
   // the input characters. Stripping would put it out of sync with
-  // url_util::FindAndCompareScheme, which could cause some security checks on
+  // FindAndCompareScheme, which could cause some security checks on
   // schemes to be incorrect.
   bool success = true;
   int end = scheme.end();
@@ -171,16 +146,16 @@
 // replacing components.
 template<typename CHAR, typename UCHAR>
 bool DoUserInfo(const CHAR* username_spec,
-                const url_parse::Component& username,
+                const Component& username,
                 const CHAR* password_spec,
-                const url_parse::Component& password,
+                const Component& password,
                 CanonOutput* output,
-                url_parse::Component* out_username,
-                url_parse::Component* out_password) {
+                Component* out_username,
+                Component* out_password) {
   if (username.len <= 0 && password.len <= 0) {
     // Common case: no user info. We strip empty username/passwords.
-    *out_username = url_parse::Component();
-    *out_password = url_parse::Component();
+    *out_username = Component();
+    *out_password = Component();
     return true;
   }
 
@@ -202,7 +177,7 @@
                        CHAR_USERINFO, output);
     out_password->len = output->length() - out_password->begin;
   } else {
-    *out_password = url_parse::Component();
+    *out_password = Component();
   }
 
   output->push_back('@');
@@ -213,25 +188,21 @@
 inline void WritePortInt(char* output, int output_len, int port) {
   _itoa_s(port, output, output_len, 10);
 }
-inline void WritePortInt(char16* output, int output_len, int port) {
-  _itow_s(port, output, output_len, 10);
-}
 
 // This function will prepend the colon if there will be a port.
 template<typename CHAR, typename UCHAR>
 bool DoPort(const CHAR* spec,
-            const url_parse::Component& port,
+            const Component& port,
             int default_port_for_scheme,
             CanonOutput* output,
-            url_parse::Component* out_port) {
-  int port_num = url_parse::ParsePort(spec, port);
-  if (port_num == url_parse::PORT_UNSPECIFIED ||
-      port_num == default_port_for_scheme) {
-    *out_port = url_parse::Component();
+            Component* out_port) {
+  int port_num = ParsePort(spec, port);
+  if (port_num == PORT_UNSPECIFIED || port_num == default_port_for_scheme) {
+    *out_port = Component();
     return true;  // Leave port empty.
   }
 
-  if (port_num == url_parse::PORT_INVALID) {
+  if (port_num == PORT_INVALID) {
     // Invalid port: We'll copy the text from the input so the user can see
     // what the error was, and mark the URL as invalid by returning false.
     output->push_back(':');
@@ -259,12 +230,12 @@
 
 template<typename CHAR, typename UCHAR>
 void DoCanonicalizeRef(const CHAR* spec,
-                       const url_parse::Component& ref,
+                       const Component& ref,
                        CanonOutput* output,
-                       url_parse::Component* out_ref) {
+                       Component* out_ref) {
   if (ref.len < 0) {
     // Common case of no ref.
-    *out_ref = url_parse::Component();
+    *out_ref = Component();
     return;
   }
 
@@ -290,12 +261,11 @@
     } else {
       // Non-ASCII characters are appended unescaped, but only when they are
       // valid. Invalid Unicode characters are replaced with the "invalid
-      // character" as IE seems to.
+      // character" as IE seems to (ReadUTFChar puts the unicode replacement
+      // character in the output on failure for us).
       unsigned code_point;
-      if (!ReadUTFChar(spec, &i, end, &code_point))
-        AppendUTF8Value(kUnicodeReplacementCharacter, output);
-      else
-        AppendUTF8Value(code_point, output);
+      ReadUTFChar(spec, &i, end, &code_point);
+      AppendUTF8Value(code_point, output);
     }
   }
 
@@ -310,87 +280,88 @@
   return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
 }
 
-const char16* RemoveURLWhitespace(const char16* input, int input_len,
-                                  CanonOutputT<char16>* buffer,
-                                  int* output_len) {
+const base::char16* RemoveURLWhitespace(const base::char16* input,
+                                        int input_len,
+                                        CanonOutputT<base::char16>* buffer,
+                                        int* output_len) {
   return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
 }
 
-char CanonicalSchemeChar(char16 ch) {
+char CanonicalSchemeChar(base::char16 ch) {
   if (ch >= 0x80)
     return 0;  // Non-ASCII is not supported by schemes.
   return kSchemeCanonical[ch];
 }
 
 bool CanonicalizeScheme(const char* spec,
-                        const url_parse::Component& scheme,
+                        const Component& scheme,
                         CanonOutput* output,
-                        url_parse::Component* out_scheme) {
+                        Component* out_scheme) {
   return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
 }
 
-bool CanonicalizeScheme(const char16* spec,
-                        const url_parse::Component& scheme,
+bool CanonicalizeScheme(const base::char16* spec,
+                        const Component& scheme,
                         CanonOutput* output,
-                        url_parse::Component* out_scheme) {
-  return DoScheme<char16, char16>(spec, scheme, output, out_scheme);
+                        Component* out_scheme) {
+  return DoScheme<base::char16, base::char16>(spec, scheme, output, out_scheme);
 }
 
 bool CanonicalizeUserInfo(const char* username_source,
-                          const url_parse::Component& username,
+                          const Component& username,
                           const char* password_source,
-                          const url_parse::Component& password,
+                          const Component& password,
                           CanonOutput* output,
-                          url_parse::Component* out_username,
-                          url_parse::Component* out_password) {
+                          Component* out_username,
+                          Component* out_password) {
   return DoUserInfo<char, unsigned char>(
       username_source, username, password_source, password,
       output, out_username, out_password);
 }
 
-bool CanonicalizeUserInfo(const char16* username_source,
-                          const url_parse::Component& username,
-                          const char16* password_source,
-                          const url_parse::Component& password,
+bool CanonicalizeUserInfo(const base::char16* username_source,
+                          const Component& username,
+                          const base::char16* password_source,
+                          const Component& password,
                           CanonOutput* output,
-                          url_parse::Component* out_username,
-                          url_parse::Component* out_password) {
-  return DoUserInfo<char16, char16>(
+                          Component* out_username,
+                          Component* out_password) {
+  return DoUserInfo<base::char16, base::char16>(
       username_source, username, password_source, password,
       output, out_username, out_password);
 }
 
 bool CanonicalizePort(const char* spec,
-                      const url_parse::Component& port,
+                      const Component& port,
                       int default_port_for_scheme,
                       CanonOutput* output,
-                      url_parse::Component* out_port) {
+                      Component* out_port) {
   return DoPort<char, unsigned char>(spec, port,
                                      default_port_for_scheme,
                                      output, out_port);
 }
 
-bool CanonicalizePort(const char16* spec,
-                      const url_parse::Component& port,
+bool CanonicalizePort(const base::char16* spec,
+                      const Component& port,
                       int default_port_for_scheme,
                       CanonOutput* output,
-                      url_parse::Component* out_port) {
-  return DoPort<char16, char16>(spec, port, default_port_for_scheme,
-                                      output, out_port);
+                      Component* out_port) {
+  return DoPort<base::char16, base::char16>(spec, port, default_port_for_scheme,
+                                            output, out_port);
 }
 
 void CanonicalizeRef(const char* spec,
-                     const url_parse::Component& ref,
+                     const Component& ref,
                      CanonOutput* output,
-                     url_parse::Component* out_ref) {
+                     Component* out_ref) {
   DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
 }
 
-void CanonicalizeRef(const char16* spec,
-                     const url_parse::Component& ref,
+void CanonicalizeRef(const base::char16* spec,
+                     const Component& ref,
                      CanonOutput* output,
-                     url_parse::Component* out_ref) {
-  DoCanonicalizeRef<char16, char16>(spec, ref, output, out_ref);
+                     Component* out_ref) {
+  DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_filesystemurl.cc b/src/url/url_canon_filesystemurl.cc
new file mode 100644
index 0000000..18e9055
--- /dev/null
+++ b/src/url/url_canon_filesystemurl.cc

@@ -0,0 +1,129 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "filesystem:file:" URLs.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// We use the URLComponentSource for the outer URL, as it can have replacements,
+// whereas the inner_url can't, so it uses spec.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeFileSystemURL(const CHAR* spec,
+                                 const URLComponentSource<CHAR>& source,
+                                 const Parsed& parsed,
+                                 CharsetConverter* charset_converter,
+                                 CanonOutput* output,
+                                 Parsed* new_parsed) {
+  // filesystem only uses {scheme, path, query, ref} -- clear the rest.
+  new_parsed->username.reset();
+  new_parsed->password.reset();
+  new_parsed->host.reset();
+  new_parsed->port.reset();
+
+  const Parsed* inner_parsed = parsed.inner_parsed();
+  Parsed new_inner_parsed;
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("filesystem:", 11);
+  new_parsed->scheme.len = 10;
+
+  if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid())
+    return false;
+
+  bool success = true;
+  if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
+    new_inner_parsed.scheme.begin = output->length();
+    output->Append("file://", 7);
+    new_inner_parsed.scheme.len = 4;
+    success &= CanonicalizePath(spec, inner_parsed->path, output,
+                                &new_inner_parsed.path);
+  } else if (IsStandard(spec, inner_parsed->scheme)) {
+    success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(),
+                                      *parsed.inner_parsed(), charset_converter,
+                                      output, &new_inner_parsed);
+  } else {
+    // TODO(ericu): The URL is wrong, but should we try to output more of what
+    // we were given?  Echoing back filesystem:mailto etc. doesn't seem all that
+    // useful.
+    return false;
+  }
+  // The filesystem type must be more than just a leading slash for validity.
+  success &= parsed.inner_parsed()->path.len > 1;
+
+  success &= CanonicalizePath(source.path, parsed.path, output,
+                              &new_parsed->path);
+
+  // Ignore failures for query/ref since the URL can probably still be loaded.
+  CanonicalizeQuery(source.query, parsed.query, charset_converter,
+                    output, &new_parsed->query);
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+  if (success)
+    new_parsed->set_inner_parsed(new_inner_parsed);
+
+  return success;
+}
+
+}  // namespace
+
+bool CanonicalizeFileSystemURL(const char* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* charset_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed) {
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      spec, URLComponentSource<char>(spec), parsed, charset_converter, output,
+      new_parsed);
+}
+
+bool CanonicalizeFileSystemURL(const base::char16* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* charset_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed) {
+  return DoCanonicalizeFileSystemURL<base::char16, base::char16>(
+      spec, URLComponentSource<base::char16>(spec), parsed, charset_converter,
+      output, new_parsed);
+}
+
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char>& replacements,
+                          CharsetConverter* charset_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      base, source, parsed, charset_converter, output, new_parsed);
+}
+
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<base::char16>& replacements,
+                          CharsetConverter* charset_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      base, source, parsed, charset_converter, output, new_parsed);
+}
+
+}  // namespace url

diff --git a/src/url/url_canon_fileurl.cc b/src/url/url_canon_fileurl.cc
new file mode 100644
index 0000000..6191f8f
--- /dev/null
+++ b/src/url/url_canon_fileurl.cc

@@ -0,0 +1,189 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "file:" URLs.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+namespace {
+
+#ifdef WIN32
+
+// Given a pointer into the spec, this copies and canonicalizes the drive
+// letter and colon to the output, if one is found. If there is not a drive
+// spec, it won't do anything. The index of the next character in the input
+// spec is returned (after the colon when a drive spec is found, the begin
+// offset if one is not).
+template<typename CHAR>
+int FileDoDriveSpec(const CHAR* spec, int begin, int end,
+                    CanonOutput* output) {
+  // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
+  // (with backslashes instead of slashes as well).
+  int num_slashes = CountConsecutiveSlashes(spec, begin, end);
+  int after_slashes = begin + num_slashes;
+
+  if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
+    return begin;  // Haven't consumed any characters
+
+  // A drive spec is the start of a path, so we need to add a slash for the
+  // authority terminator (typically the third slash).
+  output->push_back('/');
+
+  // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
+  // and that it is followed by a colon/pipe.
+
+  // Normalize Windows drive letters to uppercase
+  if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
+    output->push_back(static_cast<char>(spec[after_slashes] - 'a' + 'A'));
+  else
+    output->push_back(static_cast<char>(spec[after_slashes]));
+
+  // Normalize the character following it to a colon rather than pipe.
+  output->push_back(':');
+  return after_slashes + 2;
+}
+
+#endif  // WIN32
+
+template<typename CHAR, typename UCHAR>
+bool DoFileCanonicalizePath(const CHAR* spec,
+                            const Component& path,
+                            CanonOutput* output,
+                            Component* out_path) {
+  // Copies and normalizes the "c:" at the beginning, if present.
+  out_path->begin = output->length();
+  int after_drive;
+#ifdef WIN32
+  after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
+#else
+  after_drive = path.begin;
+#endif
+
+  // Copies the rest of the path, starting from the slash following the
+  // drive colon (if any, Windows only), or the first slash of the path.
+  bool success = true;
+  if (after_drive < path.end()) {
+    // Use the regular path canonicalizer to canonicalize the rest of the
+    // path. Give it a fake output component to write into. DoCanonicalizeFile
+    // will compute the full path component.
+    Component sub_path = MakeRange(after_drive, path.end());
+    Component fake_output_path;
+    success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
+  } else {
+    // No input path, canonicalize to a slash.
+    output->push_back('/');
+  }
+
+  out_path->len = output->length() - out_path->begin;
+  return success;
+}
+
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
+                           const Parsed& parsed,
+                           CharsetConverter* query_converter,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  // Things we don't set in file: URLs.
+  new_parsed->username = Component();
+  new_parsed->password = Component();
+  new_parsed->port = Component();
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("file://", 7);
+  new_parsed->scheme.len = 4;
+
+  // Append the host. For many file URLs, this will be empty. For UNC, this
+  // will be present.
+  // TODO(brettw) This doesn't do any checking for host name validity. We
+  // should probably handle validity checking of UNC hosts differently than
+  // for regular IP hosts.
+  bool success = CanonicalizeHost(source.host, parsed.host,
+                                  output, &new_parsed->host);
+  success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
+                                    output, &new_parsed->path);
+  CanonicalizeQuery(source.query, parsed.query, query_converter,
+                    output, &new_parsed->query);
+
+  // Ignore failure for refs since the URL can probably still be loaded.
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+
+  return success;
+}
+
+} // namespace
+
+bool CanonicalizeFileURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, query_converter,
+      output, new_parsed);
+}
+
+bool CanonicalizeFileURL(const base::char16* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizeFileURL<base::char16, base::char16>(
+      URLComponentSource<base::char16>(spec), parsed, query_converter,
+      output, new_parsed);
+}
+
+bool FileCanonicalizePath(const char* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path) {
+  return DoFileCanonicalizePath<char, unsigned char>(spec, path,
+                                                     output, out_path);
+}
+
+bool FileCanonicalizePath(const base::char16* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path) {
+  return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
+                                                            output, out_path);
+}
+
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      source, parsed, query_converter, output, new_parsed);
+}
+
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<base::char16>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      source, parsed, query_converter, output, new_parsed);
+}
+
+}  // namespace url

diff --git a/googleurl/src/url_canon_host.cc b/src/url/url_canon_host.cc
similarity index 83%
rename from googleurl/src/url_canon_host.cc
rename to src/url/url_canon_host.cc
index 6642004..513248a 100644
--- a/googleurl/src/url_canon_host.cc
+++ b/src/url/url_canon_host.cc

@@ -1,37 +1,12 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 #include "base/logging.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -94,14 +69,16 @@
 
 const int kTempHostBufferLen = 1024;
 typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;
-typedef RawCanonOutputT<char16, kTempHostBufferLen> StackBufferW;
+typedef RawCanonOutputT<base::char16, kTempHostBufferLen> StackBufferW;
 
 // Scans a host name and fills in the output flags according to what we find.
 // |has_non_ascii| will be true if there are any non-7-bit characters, and
 // |has_escaped| will be true if there is a percent sign.
 template<typename CHAR, typename UCHAR>
-void ScanHostname(const CHAR* spec, const url_parse::Component& host,
-                  bool* has_non_ascii, bool* has_escaped) {
+void ScanHostname(const CHAR* spec,
+                  const Component& host,
+                  bool* has_non_ascii,
+                  bool* has_escaped) {
   int end = host.end();
   *has_non_ascii = false;
   *has_escaped = false;
@@ -187,7 +164,7 @@
 }
 
 // Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const char16* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) {
   // We need to escape URL before doing IDN conversion, since punicode strings
   // cannot be escaped after they are created.
   RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
@@ -280,7 +257,7 @@
 // UTF-16 convert host to its ASCII version. The set up is already ready for
 // the backend, so we just pass through. The has_escaped flag should be set if
 // the input string requires unescaping.
-bool DoComplexHost(const char16* host, int host_len,
+bool DoComplexHost(const base::char16* host, int host_len,
                    bool has_non_ascii, bool has_escaped, CanonOutput* output) {
   if (has_escaped) {
     // Yikes, we have escaped characters with wide input. The escaped
@@ -312,13 +289,13 @@
 
 template<typename CHAR, typename UCHAR>
 void DoHost(const CHAR* spec,
-            const url_parse::Component& host,
+            const Component& host,
             CanonOutput* output,
             CanonHostInfo* host_info) {
   if (host.len <= 0) {
     // Empty hosts don't need anything.
     host_info->family = CanonHostInfo::NEUTRAL;
-    host_info->out_host = url_parse::Component();
+    host_info->out_host = Component();
     return;
   }
 
@@ -347,7 +324,7 @@
     // should not cause an allocation.
     RawCanonOutput<64> canon_ip;
     CanonicalizeIPAddress(output->data(),
-                          url_parse::MakeRange(output_begin, output->length()),
+                          MakeRange(output_begin, output->length()),
                           &canon_ip, host_info);
 
     // If we got an IPv4/IPv6 address, copy the canonical form back to the
@@ -359,43 +336,43 @@
     }
   }
 
-  host_info->out_host = url_parse::MakeRange(output_begin, output->length());
+  host_info->out_host = MakeRange(output_begin, output->length());
 }
 
 }  // namespace
 
 bool CanonicalizeHost(const char* spec,
-                      const url_parse::Component& host,
+                      const Component& host,
                       CanonOutput* output,
-                      url_parse::Component* out_host) {
+                      Component* out_host) {
   CanonHostInfo host_info;
   DoHost<char, unsigned char>(spec, host, output, &host_info);
   *out_host = host_info.out_host;
   return (host_info.family != CanonHostInfo::BROKEN);
 }
 
-bool CanonicalizeHost(const char16* spec,
-                      const url_parse::Component& host,
+bool CanonicalizeHost(const base::char16* spec,
+                      const Component& host,
                       CanonOutput* output,
-                      url_parse::Component* out_host) {
+                      Component* out_host) {
   CanonHostInfo host_info;
-  DoHost<char16, char16>(spec, host, output, &host_info);
+  DoHost<base::char16, base::char16>(spec, host, output, &host_info);
   *out_host = host_info.out_host;
   return (host_info.family != CanonHostInfo::BROKEN);
 }
 
 void CanonicalizeHostVerbose(const char* spec,
-                             const url_parse::Component& host,
+                             const Component& host,
                              CanonOutput* output,
-                             CanonHostInfo *host_info) {
+                             CanonHostInfo* host_info) {
   DoHost<char, unsigned char>(spec, host, output, host_info);
 }
 
-void CanonicalizeHostVerbose(const char16* spec,
-                             const url_parse::Component& host,
+void CanonicalizeHostVerbose(const base::char16* spec,
+                             const Component& host,
                              CanonOutput* output,
-                             CanonHostInfo *host_info) {
-  DoHost<char16, char16>(spec, host, output, host_info);
+                             CanonHostInfo* host_info) {
+  DoHost<base::char16, base::char16>(spec, host, output, host_info);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_icu.cc b/src/url/url_canon_icu.cc
new file mode 100644
index 0000000..743ff00
--- /dev/null
+++ b/src/url/url_canon_icu.cc

@@ -0,0 +1,186 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ICU integration functions.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/logging.h"
+#include "third_party/icu/include/unicode/ucnv.h"
+#include "third_party/icu/include/unicode/ucnv_cb.h"
+#include "third_party/icu/include/unicode/uidna.h"
+#include "url/url_canon_icu.h"
+#include "url/url_canon_internal.h"  // for _itoa_s
+#include "util/gtl/lazy_static_ptr.h"
+
+namespace url {
+
+namespace {
+
+// Called when converting a character that can not be represented, this will
+// append an escaped version of the numerical character reference for that code
+// point. It is of the form "&#1234;" and we will escape the non-digits to
+// "%26%231234%3B". Why? This is what Netscape did back in the olden days.
+void appendURLEscapedChar(const void* context,
+                          UConverterFromUnicodeArgs* from_args,
+                          const UChar* code_units,
+                          int32_t length,
+                          UChar32 code_point,
+                          UConverterCallbackReason reason,
+                          UErrorCode* err) {
+  if (reason == UCNV_UNASSIGNED) {
+    *err = U_ZERO_ERROR;
+
+    const static int prefix_len = 6;
+    const static char prefix[prefix_len + 1] = "%26%23";  // "&#" percent-escaped
+    ucnv_cbFromUWriteBytes(from_args, prefix, prefix_len, 0, err);
+
+    DCHECK(code_point < 0x110000);
+    char number[8];  // Max Unicode code point is 7 digits.
+    _itoa_s(code_point, number, 10);
+    int number_len = static_cast<int>(strlen(number));
+    ucnv_cbFromUWriteBytes(from_args, number, number_len, 0, err);
+
+    const static int postfix_len = 3;
+    const static char postfix[postfix_len + 1] = "%3B";   // ";" percent-escaped
+    ucnv_cbFromUWriteBytes(from_args, postfix, postfix_len, 0, err);
+  }
+}
+
+// A class for scoping the installation of the invalid character callback.
+class AppendHandlerInstaller {
+ public:
+  // The owner of this object must ensure that the converter is alive for the
+  // duration of this object's lifetime.
+  AppendHandlerInstaller(UConverter* converter) : converter_(converter) {
+    UErrorCode err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(converter_, appendURLEscapedChar, 0,
+                          &old_callback_, &old_context_, &err);
+  }
+
+  ~AppendHandlerInstaller() {
+    UErrorCode err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);
+  }
+
+ private:
+  UConverter* converter_;
+
+  UConverterFromUCallback old_callback_;
+  const void* old_context_;
+};
+
+// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
+// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
+//
+// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
+// code points allowed) to IDNA 2008 with
+// the backward compatibility in mind. What it does:
+//
+// 1. Use the up-to-date Unicode data.
+// 2. Define a case folding/mapping with the up-to-date Unicode data as
+//    in IDNA 2003.
+// 3. Use transitional mechanism for 4 deviation characters (sharp-s,
+//    final sigma, ZWJ and ZWNJ) for now.
+// 4. Continue to allow symbols and punctuations.
+// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
+// 6. Do not apply STD3 rules
+// 7. Do not allow unassigned code points.
+//
+// It also closely matches what IE 10 does except for the BiDi check (
+// http://goo.gl/3XBhqw ).
+// See http://http://unicode.org/reports/tr46/ and references therein
+// for more details.
+struct UIDNAWrapper {
+  UIDNAWrapper() {
+    UErrorCode err = U_ZERO_ERROR;
+    // TODO(jungshik): Change options as different parties (browsers,
+    // registrars, search engines) converge toward a consensus.
+    value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
+    if (U_FAILURE(err))
+      value = NULL;
+  }
+
+  UIDNA* value;
+};
+
+}  // namespace
+
+ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
+    : converter_(converter) {
+}
+
+ICUCharsetConverter::~ICUCharsetConverter() {
+}
+
+void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input,
+                                           int input_len,
+                                           CanonOutput* output) {
+  // Install our error handler. It will be called for character that can not
+  // be represented in the destination character set.
+  AppendHandlerInstaller handler(converter_);
+
+  int begin_offset = output->length();
+  int dest_capacity = output->capacity() - begin_offset;
+  output->set_length(output->length());
+
+  do {
+    UErrorCode err = U_ZERO_ERROR;
+    char* dest = &output->data()[begin_offset];
+    int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
+                                            input, input_len, &err);
+    if (err != U_BUFFER_OVERFLOW_ERROR) {
+      output->set_length(begin_offset + required_capacity);
+      return;
+    }
+
+    // Output didn't fit, expand
+    dest_capacity = required_capacity;
+    output->Resize(begin_offset + dest_capacity);
+  } while (true);
+}
+
+static util::gtl::LazyStaticPtr<UIDNAWrapper> g_uidna;
+
+// Converts the Unicode input representing a hostname to ASCII using IDN rules.
+// The output must be ASCII, but is represented as wide characters.
+//
+// On success, the output will be filled with the ASCII host name and it will
+// return true. Unlike most other canonicalization functions, this assumes that
+// the output is empty. The beginning of the host will be at offset 0, and
+// the length of the output will be set to the length of the new host name.
+//
+// On error, this will return false. The output in this case is undefined.
+// TODO(jungshik): use UTF-8/ASCII version of nameToASCII.
+// Change the function signature and callers accordingly to avoid unnecessary
+// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
+// version with StringByteSink. That way, we can avoid C wrappers and additional
+// string conversion.
+bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+  DCHECK(output->length() == 0);  // Output buffer is assumed empty.
+
+  UIDNA* uidna = g_uidna->value;
+  DCHECK(uidna != NULL);
+  while (true) {
+    UErrorCode err = U_ZERO_ERROR;
+    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+    int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
+                                          output->capacity(), &info, &err);
+    if (U_SUCCESS(err) && info.errors == 0) {
+      output->set_length(output_length);
+      return true;
+    }
+
+    // TODO(jungshik): Look at info.errors to handle them case-by-case basis
+    // if necessary.
+    if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
+      return false;  // Unknown error, give up.
+
+    // Not enough room in our buffer, expand.
+    output->Resize(output_length);
+  }
+}
+
+}  // namespace url

diff --git a/src/url/url_canon_icu.h b/src/url/url_canon_icu.h
new file mode 100644
index 0000000..c3c1f01
--- /dev/null
+++ b/src/url/url_canon_icu.h

@@ -0,0 +1,39 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_ICU_H_
+#define URL_URL_CANON_ICU_H_
+
+// ICU integration functions.
+
+#include "url/url_canon.h"
+#include "url/url_export.h"
+
+typedef struct UConverter UConverter;
+
+namespace url {
+
+// An implementation of CharsetConverter that implementations can use to
+// interface the canonicalizer with ICU's conversion routines.
+class URL_EXPORT ICUCharsetConverter : public CharsetConverter {
+ public:
+  // Constructs a converter using an already-existing ICU character set
+  // converter. This converter is NOT owned by this object; the lifetime must
+  // be managed by the creator such that it is alive as long as this is.
+  ICUCharsetConverter(UConverter* converter);
+
+  ~ICUCharsetConverter() override;
+
+  void ConvertFromUTF16(const base::char16* input,
+                        int input_len,
+                        CanonOutput* output) override;
+
+ private:
+  // The ICU converter, not owned by this class.
+  UConverter* converter_;
+};
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_ICU_H_

diff --git a/src/url/url_canon_icu_unittest.cc b/src/url/url_canon_icu_unittest.cc
new file mode 100644
index 0000000..cfa4b49
--- /dev/null
+++ b/src/url/url_canon_icu_unittest.cc

@@ -0,0 +1,160 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "third_party/icu/include/unicode/ucnv.h"
+#include "url/url_canon.h"
+#include "url/url_canon_icu.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+using test_utils::WStringToUTF16;
+
+namespace {
+
+// Wrapper around a UConverter object that managers creation and destruction.
+class UConvScoper {
+ public:
+  explicit UConvScoper(const char* charset_name) {
+    UErrorCode err = U_ZERO_ERROR;
+    converter_ = ucnv_open(charset_name, &err);
+  }
+
+  ~UConvScoper() {
+    if (converter_)
+      ucnv_close(converter_);
+  }
+
+  // Returns the converter object, may be NULL.
+  UConverter* converter() const { return converter_; }
+
+ private:
+  UConverter* converter_;
+};
+
+TEST(URLCanonIcuTest, ICUCharsetConverter) {
+  struct ICUCase {
+    const wchar_t* input;
+    const char* encoding;
+    const char* expected;
+  } icu_cases[] = {
+      // UTF-8.
+    {L"Hello, world", "utf-8", "Hello, world"},
+    {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+      // Non-BMP UTF-8.
+    {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
+      // Big5
+    {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
+      // Unrepresentable character in the destination set.
+    {L"hello\x4f60\x06de\x597dworld", "big5",
+      "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
+  };
+
+  for (size_t i = 0; i < arraysize(icu_cases); i++) {
+    UConvScoper conv(icu_cases[i].encoding);
+    ASSERT_TRUE(conv.converter() != NULL);
+    ICUCharsetConverter converter(conv.converter());
+
+    std::string str;
+    StdStringCanonOutput output(&str);
+
+    base::string16 input_str(WStringToUTF16(icu_cases[i].input));
+    int input_len = static_cast<int>(input_str.length());
+    converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
+    output.Complete();
+
+    EXPECT_STREQ(icu_cases[i].expected, str.c_str());
+  }
+
+  // Test string sizes around the resize boundary for the output to make sure
+  // the converter resizes as needed.
+  const int static_size = 16;
+  UConvScoper conv("utf-8");
+  ASSERT_TRUE(conv.converter());
+  ICUCharsetConverter converter(conv.converter());
+  for (int i = static_size - 2; i <= static_size + 2; i++) {
+    // Make a string with the appropriate length.
+    base::string16 input;
+    for (int ch = 0; ch < i; ch++)
+      input.push_back('a');
+
+    RawCanonOutput<static_size> output;
+    converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
+                               &output);
+    EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
+  }
+}
+
+TEST(URLCanonIcuTest, QueryWithConverter) {
+  struct QueryCase {
+    const char* input8;
+    const wchar_t* input16;
+    const char* encoding;
+    const char* expected;
+  } query_cases[] = {
+      // Regular ASCII case in some different encodings.
+    {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
+    {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
+    {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
+      // Chinese input/output
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",
+      "?q=%C4%E3%BA%C3"},
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
+      // Unencodable character in the destination character set should be
+      // escaped. The escape sequence unescapes to be the entity name:
+      // "?q=&#20320;"
+    {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
+      "?q=Chinese%26%2365319%3B"},
+  };
+
+  for (size_t i = 0; i < arraysize(query_cases); i++) {
+    Component out_comp;
+
+    UConvScoper conv(query_cases[i].encoding);
+    ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
+    ICUCharsetConverter converter(conv.converter());
+
+    if (query_cases[i].input8) {
+      int len = static_cast<int>(strlen(query_cases[i].input8));
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+
+    if (query_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(query_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+  }
+
+  // Extra test for input with embedded NULL;
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  Component out_comp;
+  CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+  output.Complete();
+  EXPECT_EQ("?a%20%00z%01", out_str);
+}
+
+}  // namespace
+
+}  // namespace url

diff --git a/googleurl/src/url_canon_internal.cc b/src/url/url_canon_internal.cc
similarity index 61%
rename from googleurl/src/url_canon_internal.cc
rename to src/url/url_canon_internal.cc
index 6b776bc..1554814 100644
--- a/googleurl/src/url_canon_internal.cc
+++ b/src/url/url_canon_internal.cc

@@ -1,40 +1,18 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include <cstdio>
+#include "url/url_canon_internal.h"
+
 #include <errno.h>
 #include <stdlib.h>
+
+#include <cstdio>
 #include <string>
 
-#include "googleurl/src/url_canon_internal.h"
+#include "base/strings/utf_string_conversion_utils.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -82,12 +60,12 @@
   }
 }
 
-// Overrides one component, see the url_canon::Replacements structure for
+// Overrides one component, see the Replacements structure for
 // what the various combionations of source pointer and component mean.
 void DoOverrideComponent(const char* override_source,
-                         const url_parse::Component& override_component,
+                         const Component& override_component,
                          const char** dest,
-                         url_parse::Component* dest_component) {
+                         Component* dest_component) {
   if (override_source) {
     *dest = override_source;
     *dest_component = override_component;
@@ -98,7 +76,7 @@
 // not actually set the output character pointer.
 //
 // The input is converted to UTF-8 at the end of the given buffer as a temporary
-// holding place. The component indentifying the portion of the buffer used in
+// holding place. The component identifying the portion of the buffer used in
 // the |utf8_buffer| will be specified in |*dest_component|.
 //
 // This will not actually set any |dest| pointer like DoOverrideComponent
@@ -106,16 +84,15 @@
 // may get resized while we're overriding a subsequent component. Instead, the
 // caller should use the beginning of the |utf8_buffer| as the string pointer
 // for all components once all overrides have been prepared.
-bool PrepareUTF16OverrideComponent(
-    const char16* override_source,
-    const url_parse::Component& override_component,
-    CanonOutput* utf8_buffer,
-    url_parse::Component* dest_component) {
+bool PrepareUTF16OverrideComponent(const base::char16* override_source,
+                                   const Component& override_component,
+                                   CanonOutput* utf8_buffer,
+                                   Component* dest_component) {
   bool success = true;
   if (override_source) {
     if (!override_component.is_valid()) {
       // Non-"valid" component (means delete), so we need to preserve that.
-      *dest_component = url_parse::Component();
+      *dest_component = Component();
     } else {
       // Convert to UTF-8.
       dest_component->begin = utf8_buffer->length();
@@ -134,31 +111,31 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
     0,                           // 0x20  ' ' (escape spaces in queries)
-    CHAR_QUERY | CHAR_USERINFO,  // 0x21  !
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
     0,                           // 0x22  "
     0,                           // 0x23  #  (invalid in query since it marks the ref)
     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
-    CHAR_QUERY | CHAR_USERINFO,  // 0x27  '
-    CHAR_QUERY | CHAR_USERINFO,  // 0x28  (
-    CHAR_QUERY | CHAR_USERINFO,  // 0x29  )
-    CHAR_QUERY | CHAR_USERINFO,  // 0x2a  *
+    0,                           // 0x27  '  (Try to prevent XSS.)
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
-    CHAR_QUERY | CHAR_USERINFO,  // 0x2d  -
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4,  // 0x2e  .
-    CHAR_QUERY,                              // 0x2f  /
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x30  0
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x31  1
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x32  2
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x33  3
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x34  4
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x35  5
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x36  6
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT,  // 0x37  7
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC,             // 0x38  8
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC,             // 0x39  9
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
+    CHAR_QUERY,                  // 0x2f  /
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
     CHAR_QUERY,  // 0x3a  :
     CHAR_QUERY,  // 0x3b  ;
     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
@@ -166,68 +143,68 @@
     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
     CHAR_QUERY,  // 0x3f  ?
     CHAR_QUERY,  // 0x40  @
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x41  A
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x42  B
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x43  C
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x44  D
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x45  E
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x46  F
-    CHAR_QUERY | CHAR_USERINFO,  // 0x47  G
-    CHAR_QUERY | CHAR_USERINFO,  // 0x48  H
-    CHAR_QUERY | CHAR_USERINFO,  // 0x49  I
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4a  J
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4b  K
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4c  L
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4d  M
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4e  N
-    CHAR_QUERY | CHAR_USERINFO,  // 0x4f  O
-    CHAR_QUERY | CHAR_USERINFO,  // 0x50  P
-    CHAR_QUERY | CHAR_USERINFO,  // 0x51  Q
-    CHAR_QUERY | CHAR_USERINFO,  // 0x52  R
-    CHAR_QUERY | CHAR_USERINFO,  // 0x53  S
-    CHAR_QUERY | CHAR_USERINFO,  // 0x54  T
-    CHAR_QUERY | CHAR_USERINFO,  // 0x55  U
-    CHAR_QUERY | CHAR_USERINFO,  // 0x56  V
-    CHAR_QUERY | CHAR_USERINFO,  // 0x57  W
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x58  X
-    CHAR_QUERY | CHAR_USERINFO,  // 0x59  Y
-    CHAR_QUERY | CHAR_USERINFO,  // 0x5a  Z
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
     CHAR_QUERY,  // 0x5b  [
     CHAR_QUERY,  // 0x5c  '\'
     CHAR_QUERY,  // 0x5d  ]
     CHAR_QUERY,  // 0x5e  ^
-    CHAR_QUERY | CHAR_USERINFO,  // 0x5f  _
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
     CHAR_QUERY,  // 0x60  `
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x61  a
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x62  b
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x63  c
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x64  d
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x65  e
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX,  // 0x66  f
-    CHAR_QUERY | CHAR_USERINFO,  // 0x67  g
-    CHAR_QUERY | CHAR_USERINFO,  // 0x68  h
-    CHAR_QUERY | CHAR_USERINFO,  // 0x69  i
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6a  j
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6b  k
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6c  l
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6d  m
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6e  n
-    CHAR_QUERY | CHAR_USERINFO,  // 0x6f  o
-    CHAR_QUERY | CHAR_USERINFO,  // 0x70  p
-    CHAR_QUERY | CHAR_USERINFO,  // 0x71  q
-    CHAR_QUERY | CHAR_USERINFO,  // 0x72  r
-    CHAR_QUERY | CHAR_USERINFO,  // 0x73  s
-    CHAR_QUERY | CHAR_USERINFO,  // 0x74  t
-    CHAR_QUERY | CHAR_USERINFO,  // 0x75  u
-    CHAR_QUERY | CHAR_USERINFO,  // 0x76  v
-    CHAR_QUERY | CHAR_USERINFO,  // 0x77  w
-    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4,  // 0x78  x
-    CHAR_QUERY | CHAR_USERINFO,  // 0x79  y
-    CHAR_QUERY | CHAR_USERINFO,  // 0x7a  z
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
     CHAR_QUERY,  // 0x7b  {
     CHAR_QUERY,  // 0x7c  |
     CHAR_QUERY,  // 0x7d  }
-    CHAR_QUERY | CHAR_USERINFO,  // 0x7e  ~
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
     0,           // 0x7f
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
@@ -255,7 +232,7 @@
     0,         // 0xE0 - 0xFF
 };
 
-const char16 kUnicodeReplacementCharacter = 0xfffd;
+const base::char16 kUnicodeReplacementCharacter = 0xfffd;
 
 void AppendStringOfType(const char* source, int length,
                         SharedCharTypes type,
@@ -263,10 +240,37 @@
   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
 }
 
-void AppendStringOfType(const char16* source, int length,
+void AppendStringOfType(const base::char16* source, int length,
                         SharedCharTypes type,
                         CanonOutput* output) {
-  DoAppendStringOfType<char16, char16>(source, length, type, output);
+  DoAppendStringOfType<base::char16, base::char16>(
+      source, length, type, output);
+}
+
+bool ReadUTFChar(const char* str, int* begin, int length,
+                 unsigned* code_point_out) {
+  // This depends on ints and int32s being the same thing.  If they're not, it
+  // will fail to compile.
+  // TODO(mmenke):  This should probably be fixed.
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
+      !base::IsValidCharacter(*code_point_out)) {
+    *code_point_out = kUnicodeReplacementCharacter;
+    return false;
+  }
+  return true;
+}
+
+bool ReadUTFChar(const base::char16* str, int* begin, int length,
+                 unsigned* code_point_out) {
+  // This depends on ints and int32s being the same thing.  If they're not, it
+  // will fail to compile.
+  // TODO(mmenke):  This should probably be fixed.
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
+      !base::IsValidCharacter(*code_point_out)) {
+    *code_point_out = kUnicodeReplacementCharacter;
+    return false;
+  }
+  return true;
 }
 
 void AppendInvalidNarrowString(const char* spec, int begin, int end,
@@ -274,12 +278,13 @@
   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
 }
 
-void AppendInvalidNarrowString(const char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
                                CanonOutput* output) {
-  DoAppendInvalidNarrowString<char16, char16>(spec, begin, end, output);
+  DoAppendInvalidNarrowString<base::char16, base::char16>(
+      spec, begin, end, output);
 }
 
-bool ConvertUTF16ToUTF8(const char16* input, int input_len,
+bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
                         CanonOutput* output) {
   bool success = true;
   for (int i = 0; i < input_len; i++) {
@@ -291,7 +296,7 @@
 }
 
 bool ConvertUTF8ToUTF16(const char* input, int input_len,
-                        CanonOutputT<char16>* output) {
+                        CanonOutputT<base::char16>* output) {
   bool success = true;
   for (int i = 0; i < input_len; i++) {
     unsigned code_point;
@@ -304,10 +309,10 @@
 void SetupOverrideComponents(const char* base,
                              const Replacements<char>& repl,
                              URLComponentSource<char>* source,
-                             url_parse::Parsed* parsed) {
+                             Parsed* parsed) {
   // Get the source and parsed structures of the things we are replacing.
   const URLComponentSource<char>& repl_source = repl.sources();
-  const url_parse::Parsed& repl_parsed = repl.components();
+  const Parsed& repl_parsed = repl.components();
 
   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
                       &source->scheme, &parsed->scheme);
@@ -333,15 +338,15 @@
 }
 
 bool SetupUTF16OverrideComponents(const char* base,
-                                  const Replacements<char16>& repl,
+                                  const Replacements<base::char16>& repl,
                                   CanonOutput* utf8_buffer,
                                   URLComponentSource<char>* source,
-                                  url_parse::Parsed* parsed) {
+                                  Parsed* parsed) {
   bool success = true;
 
   // Get the source and parsed structures of the things we are replacing.
-  const URLComponentSource<char16>& repl_source = repl.sources();
-  const url_parse::Parsed& repl_parsed = repl.components();
+  const URLComponentSource<base::char16>& repl_source = repl.sources();
+  const Parsed& repl_parsed = repl.components();
 
   success &= PrepareUTF16OverrideComponent(
       repl_source.scheme, repl_parsed.scheme,
@@ -402,7 +407,7 @@
   return 0;
 }
 
-int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) {
+int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) {
   if (radix != 10)
     return EINVAL;
 
@@ -416,7 +421,7 @@
   }
 
   for (int i = 0; i < written; ++i) {
-    buffer[i] = static_cast<char16>(temp[i]);
+    buffer[i] = static_cast<base::char16>(temp[i]);
   }
   buffer[written] = '\0';
   return 0;
@@ -424,4 +429,4 @@
 
 #endif  // !WIN32
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/googleurl/src/url_canon_internal.h b/src/url/url_canon_internal.h
similarity index 77%
rename from googleurl/src/url_canon_internal.h
rename to src/url/url_canon_internal.h
index 4b1e45a..a66cd8d 100644
--- a/googleurl/src/url_canon_internal.h
+++ b/src/url/url_canon_internal.h

@@ -1,45 +1,21 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_INTERNAL_H_
+#define URL_URL_CANON_INTERNAL_H_
 
 // This file is intended to be included in another C++ file where the character
 // types are defined. This allows us to write mostly generic code, but not have
 // templace bloat because everything is inlined when anybody calls any of our
 // functions.
 
-#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
-#define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
-
 #include <stdlib.h>
 
-#include "googleurl/src/url_canon.h"
+#include "base/logging.h"
+#include "url/url_canon.h"
 
-namespace url_canon {
+namespace url {
 
 // Character type handling -----------------------------------------------------
 
@@ -47,7 +23,7 @@
 // bits that are set for each 8-bit character in the kSharedCharTypeTable.
 enum SharedCharTypes {
   // Characters that do not require escaping in queries. Characters that do
-  // not have this flag will be escaped, see url_canon_query.cc
+  // not have this flag will be escaped; see url_canon_query.cc
   CHAR_QUERY = 1,
 
   // Valid in the username/password field.
@@ -64,6 +40,10 @@
 
   // Valid in an ASCII-representation of an octal digit.
   CHAR_OCT = 32,
+
+  // Characters that do not require escaping in encodeURIComponent.  Characters
+  // that do not have this flag will be escaped; see url_util.cc.
+  CHAR_COMPONENT = 64,
 };
 
 // This table contains the flags in SharedCharTypes for each 8-bit character.
@@ -88,19 +68,22 @@
 inline bool IsHexChar(unsigned char c) {
   return IsCharOfType(c, CHAR_HEX);
 }
+inline bool IsComponentChar(unsigned char c) {
+  return IsCharOfType(c, CHAR_COMPONENT);
+}
 
 // Appends the given string to the output, escaping characters that do not
 // match the given |type| in SharedCharTypes.
 void AppendStringOfType(const char* source, int length,
                         SharedCharTypes type,
                         CanonOutput* output);
-void AppendStringOfType(const char16* source, int length,
+void AppendStringOfType(const base::char16* source, int length,
                         SharedCharTypes type,
                         CanonOutput* output);
 
 // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
 // that will be used to represent it.
-extern const char kHexCharLookup[0x10];
+URL_EXPORT extern const char kHexCharLookup[0x10];
 
 // This lookup table allows fast conversion between ASCII hex letters and their
 // corresponding numerical value. The 8-bit range is divided up into 8
@@ -138,7 +121,7 @@
 // required for relative URL resolving to test for scheme equality.
 //
 // Returns 0 if the input character is not a valid scheme character.
-char CanonicalSchemeChar(char16 ch);
+char CanonicalSchemeChar(base::char16 ch);
 
 // Write a single character, escaped, to the output. This always escapes: it
 // does no checking that thee character requires escaping.
@@ -148,12 +131,12 @@
 inline void AppendEscapedChar(UINCHAR ch,
                               CanonOutputT<OUTCHAR>* output) {
   output->push_back('%');
-  output->push_back(kHexCharLookup[ch >> 4]);
+  output->push_back(kHexCharLookup[(ch >> 4) & 0xf]);
   output->push_back(kHexCharLookup[ch & 0xf]);
 }
 
 // The character we'll substitute for undecodable or invalid characters.
-extern const char16 kUnicodeReplacementCharacter;
+extern const base::char16 kUnicodeReplacementCharacter;
 
 // UTF-8 functions ------------------------------------------------------------
 
@@ -165,14 +148,15 @@
 // |*begin| will be updated to point to the last character consumed so it
 // can be incremented in a loop and will be ready for the next character.
 // (for a single-byte ASCII character, it will not be changed).
-//
-// Implementation is in url_canon_icu.cc.
-bool ReadUTFChar(const char* str, int* begin, int length,
-                 unsigned* code_point_out);
+URL_EXPORT bool ReadUTFChar(const char* str, int* begin, int length,
+                            unsigned* code_point_out);
 
 // Generic To-UTF-8 converter. This will call the given append method for each
 // character that should be appended, with the given output method. Wrappers
 // are provided below for escaped and non-escaped versions of this.
+//
+// The char_value must have already been checked that it's a valid Unicode
+// character.
 template<class Output, void Appender(unsigned char, Output*)>
 inline void DoAppendUTF8(unsigned char_value, Output* output) {
   if (char_value <= 0x7f) {
@@ -191,7 +175,7 @@
              output);
     Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
              output);
-  } else if (char_value <= 0x1fffff) {
+  } else if (char_value <= 0x10FFFF) {  // Max unicode code point.
     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
              output);
@@ -201,20 +185,9 @@
              output);
     Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
              output);
-  } else if (char_value <= 0x10FFFF) {  // Max unicode code point.
-    // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-    Appender(static_cast<unsigned char>(0xf8 | (char_value >> 24)),
-             output);
-    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 18) & 0x3f)),
-             output);
-    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
-             output);
-    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
-             output);
-    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
-             output);
   } else {
-    // Invalid UTF-8 character (>20 bits)
+    // Invalid UTF-8 character (>20 bits).
+    DCHECK(false);  // NOTREACHED();
   }
 }
 
@@ -250,19 +223,17 @@
 // |*begin| will be updated to point to the last character consumed so it
 // can be incremented in a loop and will be ready for the next character.
 // (for a single-16-bit-word character, it will not be changed).
-//
-// Implementation is in url_canon_icu.cc.
-bool ReadUTFChar(const char16* str, int* begin, int length,
-                 unsigned* code_point);
+URL_EXPORT bool ReadUTFChar(const base::char16* str, int* begin, int length,
+                            unsigned* code_point_out);
 
 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
 inline void AppendUTF16Value(unsigned code_point,
-                             CanonOutputT<char16>* output) {
+                             CanonOutputT<base::char16>* output) {
   if (code_point > 0xffff) {
-    output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));
-    output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00));
+    output->push_back(static_cast<base::char16>((code_point >> 10) + 0xd7c0));
+    output->push_back(static_cast<base::char16>((code_point & 0x3ff) | 0xdc00));
   } else {
-    output->push_back(static_cast<char16>(code_point));
+    output->push_back(static_cast<base::char16>(code_point));
   }
 }
 
@@ -287,8 +258,8 @@
 //
 // Assumes that ch[begin] is within range in the array, but does not assume
 // that any following characters are.
-inline bool AppendUTF8EscapedChar(const char16* str, int* begin, int length,
-                                  CanonOutput* output) {
+inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
+                                  int length, CanonOutput* output) {
   // UTF-16 input. Readchar16 will handle invalid characters for us and give
   // us the kUnicodeReplacementCharacter, so we don't have to do special
   // checking after failure, just pass through the failure to the caller.
@@ -322,7 +293,7 @@
 inline bool Is8BitChar(char c) {
   return true;  // this case is specialized to avoid a warning
 }
-inline bool Is8BitChar(char16 c) {
+inline bool Is8BitChar(base::char16 c) {
   return c <= 255;
 }
 
@@ -358,7 +329,7 @@
 // the escaping rules are not guaranteed!
 void AppendInvalidNarrowString(const char* spec, int begin, int end,
                                CanonOutput* output);
-void AppendInvalidNarrowString(const char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
                                CanonOutput* output);
 
 // Misc canonicalization helpers ----------------------------------------------
@@ -371,15 +342,15 @@
 // replacing the invalid characters with the "invalid character". It will
 // return false in the failure case, and the caller should not continue as
 // normal.
-bool ConvertUTF16ToUTF8(const char16* input, int input_len,
-                        CanonOutput* output);
-bool ConvertUTF8ToUTF16(const char* input, int input_len,
-                        CanonOutputT<char16>* output);
+URL_EXPORT bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
+                                   CanonOutput* output);
+URL_EXPORT bool ConvertUTF8ToUTF16(const char* input, int input_len,
+                                   CanonOutputT<base::char16>* output);
 
 // Converts from UTF-16 to 8-bit using the character set converter. If the
 // converter is NULL, this will use UTF-8.
-void ConvertUTF16ToQueryEncoding(const char16* input,
-                                 const url_parse::Component& query,
+void ConvertUTF16ToQueryEncoding(const base::char16* input,
+                                 const Component& query,
                                  CharsetConverter* converter,
                                  CanonOutput* output);
 
@@ -395,7 +366,7 @@
 void SetupOverrideComponents(const char* base,
                              const Replacements<char>& repl,
                              URLComponentSource<char>* source,
-                             url_parse::Parsed* parsed);
+                             Parsed* parsed);
 
 // Like the above 8-bit version, except that it additionally converts the
 // UTF-16 input to UTF-8 before doing the overrides.
@@ -410,31 +381,33 @@
 // |source| will point into this buffer, which could be invalidated if
 // additional data is added and the CanonOutput resizes its buffer.
 //
-// Returns true on success. Fales means that the input was not valid UTF-16,
+// Returns true on success. False means that the input was not valid UTF-16,
 // although we will have still done the override with "invalid characters" in
 // place of errors.
 bool SetupUTF16OverrideComponents(const char* base,
-                                  const Replacements<char16>& repl,
+                                  const Replacements<base::char16>& repl,
                                   CanonOutput* utf8_buffer,
                                   URLComponentSource<char>* source,
-                                  url_parse::Parsed* parsed);
+                                  Parsed* parsed);
 
 // Implemented in url_canon_path.cc, these are required by the relative URL
 // resolver as well, so we declare them here.
 bool CanonicalizePartialPath(const char* spec,
-                             const url_parse::Component& path,
+                             const Component& path,
                              int path_begin_in_output,
                              CanonOutput* output);
-bool CanonicalizePartialPath(const char16* spec,
-                             const url_parse::Component& path,
+bool CanonicalizePartialPath(const base::char16* spec,
+                             const Component& path,
                              int path_begin_in_output,
                              CanonOutput* output);
 
 #ifndef WIN32
 
 // Implementations of Windows' int-to-string conversions
-int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
-int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix);
+URL_EXPORT int _itoa_s(int value, char* buffer, size_t size_in_chars,
+                       int radix);
+URL_EXPORT int _itow_s(int value, base::char16* buffer, size_t size_in_chars,
+                       int radix);
 
 // Secure template overloads for these functions
 template<size_t N>
@@ -443,7 +416,7 @@
 }
 
 template<size_t N>
-inline int _itow_s(int value, char16 (&buffer)[N], int radix) {
+inline int _itow_s(int value, base::char16 (&buffer)[N], int radix) {
   return _itow_s(value, buffer, N, radix);
 }
 
@@ -455,6 +428,6 @@
 
 #endif  // WIN32
 
-}  // namespace url_canon
+}  // namespace url
 
-#endif  // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
+#endif  // URL_URL_CANON_INTERNAL_H_

diff --git a/googleurl/src/url_canon_internal_file.h b/src/url/url_canon_internal_file.h
similarity index 70%
rename from googleurl/src/url_canon_internal_file.h
rename to src/url/url_canon_internal_file.h
index 63a9c5b..6903098 100644
--- a/googleurl/src/url_canon_internal_file.h
+++ b/src/url/url_canon_internal_file.h

@@ -1,31 +1,9 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_INTERNAL_FILE_H_
+#define URL_URL_CANON_INTERNAL_FILE_H_
 
 // As with url_canon_internal.h, this file is intended to be included in
 // another C++ file where the template types are defined. This allows the
@@ -36,13 +14,11 @@
 // *** This file must be included after url_canon_internal as we depend on some
 // functions in it. ***
 
-#ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
-#define GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
 
-#include "googleurl/src/url_file.h"
-#include "googleurl/src/url_parse_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
 
-using namespace url_canon;
+namespace url {
 
 // Given a pointer into the spec, this copies and canonicalizes the drive
 // letter and colon to the output, if one is found. If there is not a drive
@@ -90,11 +66,11 @@
   // path. We supply it with the path following the slashes. It won't prepend
   // a slash because it assumes any nonempty path already starts with one.
   // We explicitly filter out calls with no path here to prevent that case.
-  ParsedURL::Component sub_path(after_slashes, end - after_slashes);
+  ParsedComponent sub_path(after_slashes, end - after_slashes);
   if (sub_path.len > 0) {
     // Give it a fake output component to write into. DoCanonicalizeFile will
     // compute the full path component.
-    ParsedURL::Component fake_output_path;
+    ParsedComponent fake_output_path;
     URLCanonInternal<CHAR, UCHAR>::DoPath(
         spec, sub_path, output, &fake_output_path);
   }
@@ -106,9 +82,9 @@
                                   CanonOutput* output,
                                   ParsedURL* new_parsed) {
   // Things we don't set in file: URLs.
-  new_parsed->username = ParsedURL::Component(0, -1);
-  new_parsed->password = ParsedURL::Component(0, -1);
-  new_parsed->port = ParsedURL::Component(0, -1);
+  new_parsed->username = ParsedComponent(0, -1);
+  new_parsed->password = ParsedComponent(0, -1);
+  new_parsed->port = ParsedComponent(0, -1);
 
   // Scheme (known, so we don't bother running it through the more
   // complicated scheme canonicalizer).
@@ -154,4 +130,6 @@
   return success;
 }
 
-#endif  // GOOGLEURL_SRC_URL_CANON_INTERNAL_FILE_H__
+}  // namespace url
+
+#endif  // URL_URL_CANON_INTERNAL_FILE_H_

diff --git a/googleurl/src/url_canon_ip.cc b/src/url/url_canon_ip.cc
similarity index 78%
rename from googleurl/src/url_canon_ip.cc
rename to src/url/url_canon_ip.cc
index 86f7c9c..45f95de 100644
--- a/googleurl/src/url_canon_ip.cc
+++ b/src/url/url_canon_ip.cc

@@ -1,41 +1,16 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include "googleurl/src/url_canon_ip.h"
+#include "url/url_canon_ip.h"
 
 #include <stdlib.h>
 
 #include "base/basictypes.h"
 #include "base/logging.h"
-#include "googleurl/src/url_canon_internal.h"
+#include "url/url_canon_internal.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -56,8 +31,8 @@
 
 template<typename CHAR, typename UCHAR>
 bool DoFindIPv4Components(const CHAR* spec,
-                          const url_parse::Component& host,
-                          url_parse::Component components[4]) {
+                          const Component& host,
+                          Component components[4]) {
   if (!host.is_nonempty())
     return false;
 
@@ -68,8 +43,7 @@
     if (i >= end || spec[i] == '.') {
       // Found the end of the current component.
       int component_len = i - cur_component_begin;
-      components[cur_component] =
-          url_parse::Component(cur_component_begin, component_len);
+      components[cur_component] = Component(cur_component_begin, component_len);
 
       // The next component starts after the dot.
       cur_component_begin = i + 1;
@@ -101,7 +75,7 @@
 
   // Fill in any unused components.
   while (cur_component < 4)
-    components[cur_component++] = url_parse::Component();
+    components[cur_component++] = Component();
   return true;
 }
 
@@ -116,10 +90,9 @@
 // out any input that is greater than 7 bits. The components are assumed
 // to be non-empty.
 template<typename CHAR>
-CanonHostInfo::Family IPv4ComponentToNumber(
-    const CHAR* spec,
-    const url_parse::Component& component,
-    uint32* number) {
+CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
+                                            const Component& component,
+                                            uint32* number) {
   // Figure out the base
   SharedCharTypes base;
   int base_prefix_len = 0;  // Size of the prefix for this base.
@@ -181,34 +154,14 @@
   return CanonHostInfo::IPV4;
 }
 
-// Writes the given address (with each character representing one dotted
-// part of an IPv4 address) to the output, and updating |*out_host| to
-// identify the added portion.
-void AppendIPv4Address(const unsigned char address[4],
-                       CanonOutput* output,
-                       url_parse::Component* out_host) {
-  out_host->begin = output->length();
-  for (int i = 0; i < 4; i++) {
-    char str[16];
-    _itoa_s(address[i], str, 10);
-
-    for (int ch = 0; str[ch] != 0; ch++)
-      output->push_back(str[ch]);
-
-    if (i != 3)
-      output->push_back('.');
-  }
-  out_host->len = output->length() - out_host->begin;
-}
-
 // See declaration of IPv4AddressToNumber for documentation.
 template<typename CHAR>
 CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
-                                            const url_parse::Component& host,
+                                            const Component& host,
                                             unsigned char address[4],
                                             int* num_ipv4_components) {
   // The identified components. Not all may exist.
-  url_parse::Component components[4];
+  Component components[4];
   if (!FindIPv4Components(spec, host, components))
     return CanonHostInfo::NEUTRAL;
 
@@ -216,19 +169,30 @@
   // |existing_components| will be valid.
   uint32 component_values[4];
   int existing_components = 0;
+
+  // Set to true if one or more components are BROKEN.  BROKEN is only
+  // returned if all components are IPV4 or BROKEN, so, for example,
+  // 12345678912345.de returns NEUTRAL rather than broken.
+  bool broken = false;
   for (int i = 0; i < 4; i++) {
     if (components[i].len <= 0)
       continue;
     CanonHostInfo::Family family = IPv4ComponentToNumber(
         spec, components[i], &component_values[existing_components]);
 
-    // Stop if we hit an invalid non-empty component.
-    if (family != CanonHostInfo::IPV4)
+    if (family == CanonHostInfo::BROKEN) {
+      broken = true;
+    } else if (family != CanonHostInfo::IPV4) {
+      // Stop if we hit a non-BROKEN invalid non-empty component.
       return family;
+    }
 
     existing_components++;
   }
 
+  if (broken)
+    return CanonHostInfo::BROKEN;
+
   // Use that sequence of numbers to fill out the 4-component IP address.
 
   // First, process all components but the last, while making sure each fits
@@ -240,7 +204,15 @@
   }
 
   // Next, consume the last component to fill in the remaining bytes.
+  // Work around a gcc 4.9 bug. crbug.com/392872
+#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
   uint32 last_value = component_values[existing_components - 1];
+#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
+#pragma GCC diagnostic pop
+#endif
   for (int i = 3; i >= existing_components - 1; i--) {
     address[i] = static_cast<unsigned char>(last_value);
     last_value >>= 8;
@@ -261,17 +233,18 @@
 // is NEUTRAL, and we could use a second opinion.
 template<typename CHAR, typename UCHAR>
 bool DoCanonicalizeIPv4Address(const CHAR* spec,
-                               const url_parse::Component& host,
+                               const Component& host,
                                CanonOutput* output,
                                CanonHostInfo* host_info) {
-  unsigned char address[4];
   host_info->family = IPv4AddressToNumber(
-      spec, host, address, &host_info->num_ipv4_components);
+      spec, host, host_info->address, &host_info->num_ipv4_components);
 
   switch (host_info->family) {
     case CanonHostInfo::IPV4:
       // Definitely an IPv4 address.
-      AppendIPv4Address(address, output, &host_info->out_host);
+      host_info->out_host.begin = output->length();
+      AppendIPv4Address(host_info->address, output);
+      host_info->out_host.len = output->length() - host_info->out_host.begin;
       return true;
     case CanonHostInfo::BROKEN:
       // Definitely broken.
@@ -328,7 +301,7 @@
   }
 
   // There can be up to 8 hex components (colon separated) in the literal.
-  url_parse::Component hex_components[8];
+  Component hex_components[8];
 
   // The count of hex components present. Ranges from [0,8].
   int num_hex_components;
@@ -338,16 +311,14 @@
   int index_of_contraction;
 
   // The range of characters which are an IPv4 literal.
-  url_parse::Component ipv4_component;
+  Component ipv4_component;
 };
 
 // Parse the IPv6 input string. If parsing succeeded returns true and fills
 // |parsed| with the information. If parsing failed (because the input is
 // invalid) returns false.
 template<typename CHAR, typename UCHAR>
-bool DoParseIPv6(const CHAR* spec,
-                 const url_parse::Component& host,
-                 IPv6Parsed* parsed) {
+bool DoParseIPv6(const CHAR* spec, const Component& host, IPv6Parsed* parsed) {
   // Zero-out the info.
   parsed->reset();
 
@@ -392,7 +363,7 @@
           return false;
 
         parsed->hex_components[parsed->num_hex_components++] =
-            url_parse::Component(cur_component_begin, component_len);
+            Component(cur_component_begin, component_len);
       }
     }
 
@@ -423,8 +394,8 @@
           // Since IPv4 address can only appear at the end, assume the rest
           // of the string is an IPv4 address. (We will parse this separately
           // later).
-          parsed->ipv4_component = url_parse::Component(
-              cur_component_begin, end - cur_component_begin);
+          parsed->ipv4_component =
+              Component(cur_component_begin, end - cur_component_begin);
           break;
         } else {
           // The character was neither a hex digit, nor an IPv4 character.
@@ -473,8 +444,7 @@
 // already verified that each character in the string was a hex digit, and
 // that there were no more than 4 characters.
 template<typename CHAR>
-uint16 IPv6HexComponentToNumber(const CHAR* spec,
-                                const url_parse::Component& component) {
+uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) {
   DCHECK(component.len <= 4);
 
   // Copy the hex string into a C-string.
@@ -492,7 +462,7 @@
 // true on success. False means that the input was not a valid IPv6 address.
 template<typename CHAR, typename UCHAR>
 bool DoIPv6AddressToNumber(const CHAR* spec,
-                           const url_parse::Component& host,
+                           const Component& host,
                            unsigned char address[16]) {
   // Make sure the component is bounded by '[' and ']'.
   int end = host.end();
@@ -500,7 +470,7 @@
     return false;
 
   // Exclude the square brackets.
-  url_parse::Component ipv6_comp(host.begin + 1, host.len - 2);
+  Component ipv6_comp(host.begin + 1, host.len - 2);
 
   // Parse the IPv6 address -- identify where all the colon separated hex
   // components are, the "::" contraction, and the embedded IPv4 address.
@@ -538,18 +508,6 @@
   // If there was an IPv4 section, convert it into a 32-bit number and append
   // it to |address|.
   if (ipv6_parsed.ipv4_component.is_valid()) {
-    // We only allow the embedded IPv4 syntax to be used for "compat" and
-    // "mapped" formats:
-    //     "mapped" ==>  0:0:0:0:0:ffff:<IPv4-literal>
-    //     "compat" ==>  0:0:0:0:0:0000:<IPv4-literal>
-    for (int j = 0; j < 10; ++j) {
-      if (address[j] != 0)
-        return false;
-    }
-    if (!((address[10] == 0 && address[11] == 0) ||
-          (address[10] == 0xFF && address[11] == 0xFF)))
-      return false;
-
     // Append the 32-bit number to |address|.
     int ignored_num_ipv4_components;
     if (CanonHostInfo::IPV4 !=
@@ -567,12 +525,12 @@
 // range into |contraction_range|. The run of zeros must be at least 16 bits,
 // and if there is a tie the first is chosen.
 void ChooseIPv6ContractionRange(const unsigned char address[16],
-                                url_parse::Component* contraction_range) {
+                                Component* contraction_range) {
   // The longest run of zeros in |address| seen so far.
-  url_parse::Component max_range;
+  Component max_range;
 
   // The current run of zeros in |address| being iterated over.
-  url_parse::Component cur_range;
+  Component cur_range;
 
   for (int i = 0; i < 16; i += 2) {
     // Test for 16 bits worth of zero.
@@ -581,7 +539,7 @@
     if (is_zero) {
       // Add the zero to the current range (or start a new one).
       if (!cur_range.is_valid())
-        cur_range = url_parse::Component(i, 0);
+        cur_range = Component(i, 0);
       cur_range.len += 2;
     }
 
@@ -601,12 +559,11 @@
 // is NEUTRAL, and we could use a second opinion.
 template<typename CHAR, typename UCHAR>
 bool DoCanonicalizeIPv6Address(const CHAR* spec,
-                               const url_parse::Component& host,
+                               const Component& host,
                                CanonOutput* output,
                                CanonHostInfo* host_info) {
   // Turn the IP address into a 128 bit number.
-  unsigned char address[16];
-  if (!IPv6AddressToNumber(spec, host, address)) {
+  if (!IPv6AddressToNumber(spec, host, host_info->address)) {
     // If it's not an IPv6 address, scan for characters that should *only*
     // exist in an IPv6 address.
     for (int i = host.begin; i < host.end(); i++) {
@@ -626,12 +583,35 @@
 
   host_info->out_host.begin = output->length();
   output->push_back('[');
+  AppendIPv6Address(host_info->address, output);
+  output->push_back(']');
+  host_info->out_host.len = output->length() - host_info->out_host.begin;
 
-  // We will now output the address according to the rules in:
+  host_info->family = CanonHostInfo::IPV6;
+  return true;
+}
+
+}  // namespace
+
+void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
+  for (int i = 0; i < 4; i++) {
+    char str[16];
+    _itoa_s(address[i], str, 10);
+
+    for (int ch = 0; str[ch] != 0; ch++)
+      output->push_back(str[ch]);
+
+    if (i != 3)
+      output->push_back('.');
+  }
+}
+
+void AppendIPv6Address(const unsigned char address[16], CanonOutput* output) {
+  // We will output the address according to the rules in:
   // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#section-4
 
   // Start by finding where to place the "::" contraction (if any).
-  url_parse::Component contraction_range;
+  Component contraction_range;
   ChooseIPv6ContractionRange(address, &contraction_range);
 
   for (int i = 0; i <= 14;) {
@@ -660,30 +640,23 @@
         output->push_back(':');
     }
   }
-
-  output->push_back(']');
-  host_info->out_host.len = output->length() - host_info->out_host.begin;
-
-  host_info->family = CanonHostInfo::IPV6;
-  return true;
 }
 
-}  // namespace
-
 bool FindIPv4Components(const char* spec,
-                        const url_parse::Component& host,
-                        url_parse::Component components[4]) {
+                        const Component& host,
+                        Component components[4]) {
   return DoFindIPv4Components<char, unsigned char>(spec, host, components);
 }
 
-bool FindIPv4Components(const char16* spec,
-                        const url_parse::Component& host,
-                        url_parse::Component components[4]) {
-  return DoFindIPv4Components<char16, char16>(spec, host, components);
+bool FindIPv4Components(const base::char16* spec,
+                        const Component& host,
+                        Component components[4]) {
+  return DoFindIPv4Components<base::char16, base::char16>(
+      spec, host, components);
 }
 
 void CanonicalizeIPAddress(const char* spec,
-                           const url_parse::Component& host,
+                           const Component& host,
                            CanonOutput* output,
                            CanonHostInfo* host_info) {
   if (DoCanonicalizeIPv4Address<char, unsigned char>(
@@ -694,44 +667,43 @@
     return;
 }
 
-void CanonicalizeIPAddress(const char16* spec,
-                           const url_parse::Component& host,
+void CanonicalizeIPAddress(const base::char16* spec,
+                           const Component& host,
                            CanonOutput* output,
                            CanonHostInfo* host_info) {
-  if (DoCanonicalizeIPv4Address<char16, char16>(
+  if (DoCanonicalizeIPv4Address<base::char16, base::char16>(
           spec, host, output, host_info))
     return;
-  if (DoCanonicalizeIPv6Address<char16, char16>(
+  if (DoCanonicalizeIPv6Address<base::char16, base::char16>(
           spec, host, output, host_info))
     return;
 }
 
 CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
-                                          const url_parse::Component& host,
+                                          const Component& host,
                                           unsigned char address[4],
                                           int* num_ipv4_components) {
   return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components);
 }
 
-CanonHostInfo::Family IPv4AddressToNumber(const char16* spec,
-                                          const url_parse::Component& host,
+CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
+                                          const Component& host,
                                           unsigned char address[4],
                                           int* num_ipv4_components) {
-  return DoIPv4AddressToNumber<char16>(
+  return DoIPv4AddressToNumber<base::char16>(
       spec, host, address, num_ipv4_components);
 }
 
 bool IPv6AddressToNumber(const char* spec,
-                         const url_parse::Component& host,
+                         const Component& host,
                          unsigned char address[16]) {
   return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
 }
 
-bool IPv6AddressToNumber(const char16* spec,
-                         const url_parse::Component& host,
+bool IPv6AddressToNumber(const base::char16* spec,
+                         const Component& host,
                          unsigned char address[16]) {
-  return DoIPv6AddressToNumber<char16, char16>(spec, host, address);
+  return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address);
 }
 
-
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_ip.h b/src/url/url_canon_ip.h
new file mode 100644
index 0000000..19ecfdb
--- /dev/null
+++ b/src/url/url_canon_ip.h

@@ -0,0 +1,82 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_IP_H_
+#define URL_URL_CANON_IP_H_
+
+#include "base/strings/string16.h"
+#include "url/url_canon.h"
+#include "url/url_export.h"
+#include "url/url_parse.h"
+
+namespace url {
+
+// Writes the given IPv4 address to |output|.
+URL_EXPORT void AppendIPv4Address(const unsigned char address[4],
+                                  CanonOutput* output);
+
+// Writes the given IPv6 address to |output|.
+URL_EXPORT void AppendIPv6Address(const unsigned char address[16],
+                                  CanonOutput* output);
+
+// Searches the host name for the portions of the IPv4 address. On success,
+// each component will be placed into |components| and it will return true.
+// It will return false if the host can not be separated as an IPv4 address
+// or if there are any non-7-bit characters or other characters that can not
+// be in an IP address. (This is important so we fail as early as possible for
+// common non-IP hostnames.)
+//
+// Not all components may exist. If there are only 3 components, for example,
+// the last one will have a length of -1 or 0 to indicate it does not exist.
+//
+// Note that many platform's inet_addr will ignore everything after a space
+// in certain curcumstances if the stuff before the space looks like an IP
+// address. IE6 is included in this. We do NOT handle this case. In many cases,
+// the browser's canonicalization will get run before this which converts
+// spaces to %20 (in the case of IE7) or rejects them (in the case of
+// Mozilla), so this code path never gets hit. Our host canonicalization will
+// notice these spaces and escape them, which will make IP address finding
+// fail. This seems like better behavior than stripping after a space.
+URL_EXPORT bool FindIPv4Components(const char* spec,
+                                   const Component& host,
+                                   Component components[4]);
+URL_EXPORT bool FindIPv4Components(const base::char16* spec,
+                                   const Component& host,
+                                   Component components[4]);
+
+// Converts an IPv4 address to a 32-bit number (network byte order).
+//
+// Possible return values:
+//   IPV4    - IPv4 address was successfully parsed.
+//   BROKEN  - Input was formatted like an IPv4 address, but overflow occurred
+//             during parsing.
+//   NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
+//             It might be an IPv6 address, or a hostname.
+//
+// On success, |num_ipv4_components| will be populated with the number of
+// components in the IPv4 address.
+URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
+                                                     const Component& host,
+                                                     unsigned char address[4],
+                                                     int* num_ipv4_components);
+URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
+                                                     const Component& host,
+                                                     unsigned char address[4],
+                                                     int* num_ipv4_components);
+
+// Converts an IPv6 address to a 128-bit number (network byte order), returning
+// true on success. False means that the input was not a valid IPv6 address.
+//
+// NOTE that |host| is expected to be surrounded by square brackets.
+// i.e. "[::1]" rather than "::1".
+URL_EXPORT bool IPv6AddressToNumber(const char* spec,
+                                    const Component& host,
+                                    unsigned char address[16]);
+URL_EXPORT bool IPv6AddressToNumber(const base::char16* spec,
+                                    const Component& host,
+                                    unsigned char address[16]);
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_IP_H_

diff --git a/src/url/url_canon_mailtourl.cc b/src/url/url_canon_mailtourl.cc
new file mode 100644
index 0000000..7c48b95
--- /dev/null
+++ b/src/url/url_canon_mailtourl.cc

@@ -0,0 +1,110 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "mailto:" URLs.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+namespace {
+
+template <typename CHAR, typename UCHAR>
+bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
+                             const Parsed& parsed,
+                             CanonOutput* output,
+                             Parsed* new_parsed) {
+  // mailto: only uses {scheme, path, query} -- clear the rest.
+  new_parsed->username = Component();
+  new_parsed->password = Component();
+  new_parsed->host = Component();
+  new_parsed->port = Component();
+  new_parsed->ref = Component();
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("mailto:", 7);
+  new_parsed->scheme.len = 6;
+
+  bool success = true;
+
+  // Path
+  if (parsed.path.is_valid()) {
+    new_parsed->path.begin = output->length();
+
+    // Copy the path using path URL's more lax escaping rules.
+    // We convert to UTF-8 and escape non-ASCII, but leave all
+    // ASCII characters alone.
+    int end = parsed.path.end();
+    for (int i = parsed.path.begin; i < end; ++i) {
+      UCHAR uch = static_cast<UCHAR>(source.path[i]);
+      if (uch < 0x20 || uch >= 0x80)
+        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
+      else
+        output->push_back(static_cast<char>(uch));
+    }
+
+    new_parsed->path.len = output->length() - new_parsed->path.begin;
+  } else {
+    // No path at all
+    new_parsed->path.reset();
+  }
+
+  // Query -- always use the default utf8 charset converter.
+  CanonicalizeQuery(source.query, parsed.query, NULL,
+                    output, &new_parsed->query);
+
+  return success;
+}
+
+} // namespace
+
+bool CanonicalizeMailtoURL(const char* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, output, new_parsed);
+}
+
+bool CanonicalizeMailtoURL(const base::char16* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  return DoCanonicalizeMailtoURL<base::char16, base::char16>(
+      URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+}
+
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<base::char16>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+}  // namespace url

diff --git a/googleurl/src/url_canon_path.cc b/src/url/url_canon_path.cc
similarity index 83%
rename from googleurl/src/url_canon_path.cc
rename to src/url/url_canon_path.cc
index df97aad..ceff689 100644
--- a/googleurl/src/url_canon_path.cc
+++ b/src/url/url_canon_path.cc

@@ -1,39 +1,13 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// Canonicalization functions for the paths of URLs.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 #include "base/logging.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_parse_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_parse_internal.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -131,7 +105,7 @@
     *consumed_len = 0;
     return DIRECTORY_CUR;
   }
-  if (url_parse::IsURLSlash(spec[after_dot])) {
+  if (IsURLSlash(spec[after_dot])) {
     // Single dot followed by a slash.
     *consumed_len = 1;  // Consume the slash
     return DIRECTORY_CUR;
@@ -145,7 +119,7 @@
       *consumed_len = second_dot_len;
       return DIRECTORY_UP;
     }
-    if (url_parse::IsURLSlash(spec[after_second_dot])) {
+    if (IsURLSlash(spec[after_second_dot])) {
       // Double dot followed by a slash.
       *consumed_len = second_dot_len + 1;
       return DIRECTORY_UP;
@@ -203,7 +177,7 @@
 // it would be correct for most systems.
 template<typename CHAR, typename UCHAR>
 bool DoPartialPath(const CHAR* spec,
-                   const url_parse::Component& path,
+                   const Component& path,
                    int path_begin_in_output,
                    CanonOutput* output) {
   int end = path.end();
@@ -321,60 +295,59 @@
 
 template<typename CHAR, typename UCHAR>
 bool DoPath(const CHAR* spec,
-            const url_parse::Component& path,
+            const Component& path,
             CanonOutput* output,
-            url_parse::Component* out_path) {
+            Component* out_path) {
   bool success = true;
+  out_path->begin = output->length();
   if (path.len > 0) {
-    out_path->begin = output->length();
-
     // Write out an initial slash if the input has none. If we just parse a URL
     // and then canonicalize it, it will of course have a slash already. This
     // check is for the replacement and relative URL resolving cases of file
     // URLs.
-    if (!url_parse::IsURLSlash(spec[path.begin]))
+    if (!IsURLSlash(spec[path.begin]))
       output->push_back('/');
 
     success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output);
-    out_path->len = output->length() - out_path->begin;
   } else {
     // No input, canonical path is a slash.
     output->push_back('/');
-    *out_path = url_parse::Component();
   }
+  out_path->len = output->length() - out_path->begin;
   return success;
 }
 
 }  // namespace
 
 bool CanonicalizePath(const char* spec,
-                      const url_parse::Component& path,
+                      const Component& path,
                       CanonOutput* output,
-                      url_parse::Component* out_path) {
+                      Component* out_path) {
   return DoPath<char, unsigned char>(spec, path, output, out_path);
 }
 
-bool CanonicalizePath(const char16* spec,
-                      const url_parse::Component& path,
+bool CanonicalizePath(const base::char16* spec,
+                      const Component& path,
                       CanonOutput* output,
-                      url_parse::Component* out_path) {
-  return DoPath<char16, char16>(spec, path, output, out_path);
+                      Component* out_path) {
+  return DoPath<base::char16, base::char16>(spec, path, output, out_path);
 }
 
 bool CanonicalizePartialPath(const char* spec,
-                             const url_parse::Component& path,
+                             const Component& path,
                              int path_begin_in_output,
                              CanonOutput* output) {
   return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output,
                                             output);
 }
 
-bool CanonicalizePartialPath(const char16* spec,
-                             const url_parse::Component& path,
+bool CanonicalizePartialPath(const base::char16* spec,
+                             const Component& path,
                              int path_begin_in_output,
                              CanonOutput* output) {
-  return DoPartialPath<char16, char16>(spec, path, path_begin_in_output,
-                                       output);
+  return DoPartialPath<base::char16, base::char16>(spec, path,
+                                                   path_begin_in_output,
+                                                   output);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_pathurl.cc b/src/url/url_canon_pathurl.cc
new file mode 100644
index 0000000..0d23ccb
--- /dev/null
+++ b/src/url/url_canon_pathurl.cc

@@ -0,0 +1,121 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "path" URLs. Not to be confused with the path
+// of a URL, these are URLs that have no authority section, only a path. For
+// example, "javascript:" and "data:".
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace {
+
+// Canonicalize the given |component| from |source| into |output| and
+// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
+// prior to the canonicalized component; i.e. for the '?' or '#' characters.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizePathComponent(const CHAR* source,
+                                 const Component& component,
+                                 char separator,
+                                 CanonOutput* output,
+                                 Component* new_component) {
+  bool success = true;
+  if (component.is_valid()) {
+    if (separator)
+      output->push_back(separator);
+    // Copy the path using path URL's more lax escaping rules (think for
+    // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
+    // ASCII characters alone. This helps readability of JavaStript.
+    new_component->begin = output->length();
+    int end = component.end();
+    for (int i = component.begin; i < end; i++) {
+      UCHAR uch = static_cast<UCHAR>(source[i]);
+      if (uch < 0x20 || uch >= 0x80)
+        success &= AppendUTF8EscapedChar(source, &i, end, output);
+      else
+        output->push_back(static_cast<char>(uch));
+    }
+    new_component->len = output->length() - new_component->begin;
+  } else {
+    // Empty part.
+    new_component->reset();
+  }
+  return success;
+}
+
+template <typename CHAR, typename UCHAR>
+bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  // Scheme: this will append the colon.
+  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
+                                    output, &new_parsed->scheme);
+
+  // We assume there's no authority for path URLs. Note that hosts should never
+  // have -1 length.
+  new_parsed->username.reset();
+  new_parsed->password.reset();
+  new_parsed->host.reset();
+  new_parsed->port.reset();
+  // We allow path URLs to have the path, query and fragment components, but we
+  // will canonicalize each of the via the weaker path URL rules.
+  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+      source.path, parsed.path, '\0', output, &new_parsed->path);
+  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+      source.query, parsed.query, '?', output, &new_parsed->query);
+  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+      source.ref, parsed.ref, '#', output, &new_parsed->ref);
+
+  return success;
+}
+
+}  // namespace
+
+bool CanonicalizePathURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizePathURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, output, new_parsed);
+}
+
+bool CanonicalizePathURL(const base::char16* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizePathURL<base::char16, base::char16>(
+      URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+}
+
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizePathURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<base::char16>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizePathURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+}  // namespace url

diff --git a/googleurl/src/url_canon_query.cc b/src/url/url_canon_query.cc
similarity index 66%
rename from googleurl/src/url_canon_query.cc
rename to src/url/url_canon_query.cc
index cee8774..5494ddf 100644
--- a/googleurl/src/url_canon_query.cc
+++ b/src/url/url_canon_query.cc

@@ -1,34 +1,9 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
 
 // Query canonicalization in IE
 // ----------------------------
@@ -60,14 +35,14 @@
 // replace all invalid sequences (including invalid UTF-16 sequences, which IE
 // doesn't) with the "invalid character," and we will escape it.
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
 // Returns true if the characters starting at |begin| and going until |end|
 // (non-inclusive) are all representable in 7-bits.
 template<typename CHAR, typename UCHAR>
-bool IsAllASCII(const CHAR* spec, const url_parse::Component& query) {
+bool IsAllASCII(const CHAR* spec, const Component& query) {
   int end = query.end();
   for (int i = query.begin; i < end; i++) {
     if (static_cast<UCHAR>(spec[i]) >= 0x80)
@@ -94,7 +69,7 @@
 // Runs the converter on the given UTF-8 input. Since the converter expects
 // UTF-16, we have to convert first. The converter must be non-NULL.
 void RunConverter(const char* spec,
-                  const url_parse::Component& query,
+                  const Component& query,
                   CharsetConverter* converter,
                   CanonOutput* output) {
   // This function will replace any misencoded values with the invalid
@@ -107,8 +82,8 @@
 // Runs the converter with the given UTF-16 input. We don't have to do
 // anything, but this overriddden function allows us to use the same code
 // for both UTF-8 and UTF-16 input.
-void RunConverter(const char16* spec,
-                  const url_parse::Component& query,
+void RunConverter(const base::char16* spec,
+                  const Component& query,
                   CharsetConverter* converter,
                   CanonOutput* output) {
   converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
@@ -116,7 +91,7 @@
 
 template<typename CHAR, typename UCHAR>
 void DoConvertToQueryEncoding(const CHAR* spec,
-                              const url_parse::Component& query,
+                              const Component& query,
                               CharsetConverter* converter,
                               CanonOutput* output) {
   if (IsAllASCII<CHAR, UCHAR>(spec, query)) {
@@ -141,12 +116,12 @@
 
 template<typename CHAR, typename UCHAR>
 void DoCanonicalizeQuery(const CHAR* spec,
-                         const url_parse::Component& query,
+                         const Component& query,
                          CharsetConverter* converter,
                          CanonOutput* output,
-                         url_parse::Component* out_query) {
+                         Component* out_query) {
   if (query.len < 0) {
-    *out_query = url_parse::Component();
+    *out_query = Component();
     return;
   }
 
@@ -161,29 +136,29 @@
 }  // namespace
 
 void CanonicalizeQuery(const char* spec,
-                       const url_parse::Component& query,
+                       const Component& query,
                        CharsetConverter* converter,
                        CanonOutput* output,
-                       url_parse::Component* out_query) {
+                       Component* out_query) {
   DoCanonicalizeQuery<char, unsigned char>(spec, query, converter,
                                            output, out_query);
 }
 
-void CanonicalizeQuery(const char16* spec,
-                       const url_parse::Component& query,
+void CanonicalizeQuery(const base::char16* spec,
+                       const Component& query,
                        CharsetConverter* converter,
                        CanonOutput* output,
-                       url_parse::Component* out_query) {
-  DoCanonicalizeQuery<char16, char16>(spec, query, converter,
-                                      output, out_query);
+                       Component* out_query) {
+  DoCanonicalizeQuery<base::char16, base::char16>(spec, query, converter,
+                                                  output, out_query);
 }
 
-void ConvertUTF16ToQueryEncoding(const char16* input,
-                                 const url_parse::Component& query,
+void ConvertUTF16ToQueryEncoding(const base::char16* input,
+                                 const Component& query,
                                  CharsetConverter* converter,
                                  CanonOutput* output) {
-  DoConvertToQueryEncoding<char16, char16>(input, query,
-                                           converter, output);
+  DoConvertToQueryEncoding<base::char16, base::char16>(input, query,
+                                                       converter, output);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/googleurl/src/url_canon_relative.cc b/src/url/url_canon_relative.cc
similarity index 73%
rename from googleurl/src/url_canon_relative.cc
rename to src/url/url_canon_relative.cc
index 6bcc72f..9436245 100644
--- a/googleurl/src/url_canon_relative.cc
+++ b/src/url/url_canon_relative.cc

@@ -1,41 +1,18 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 // Canonicalizer functions for working with and resolving relative URLs.
 
 #include "base/logging.h"
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
-#include "googleurl/src/url_file.h"
-#include "googleurl/src/url_parse_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util_internal.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
@@ -50,9 +27,9 @@
 // The base URL should always be canonical, therefore is ASCII.
 template<typename CHAR>
 bool AreSchemesEqual(const char* base,
-                     const url_parse::Component& base_scheme,
+                     const Component& base_scheme,
                      const CHAR* cmp,
-                     const url_parse::Component& cmp_scheme) {
+                     const Component& cmp_scheme) {
   if (base_scheme.len != cmp_scheme.len)
     return false;
   for (int i = 0; i < base_scheme.len; i++) {
@@ -76,8 +53,8 @@
                                     int spec_len) {
   if (start_offset >= spec_len)
     return false;
-  return url_parse::IsURLSlash(spec[start_offset]) &&
-      url_parse::DoesBeginWindowsDriveSpec(spec, start_offset + 1, spec_len);
+  return IsURLSlash(spec[start_offset]) &&
+         DoesBeginWindowsDriveSpec(spec, start_offset + 1, spec_len);
 }
 
 #endif  // WIN32
@@ -85,20 +62,20 @@
 // See IsRelativeURL in the header file for usage.
 template<typename CHAR>
 bool DoIsRelativeURL(const char* base,
-                     const url_parse::Parsed& base_parsed,
+                     const Parsed& base_parsed,
                      const CHAR* url,
                      int url_len,
                      bool is_base_hierarchical,
                      bool* is_relative,
-                     url_parse::Component* relative_component) {
+                     Component* relative_component) {
   *is_relative = false;  // So we can default later to not relative.
 
   // Trim whitespace and construct a new range for the substring.
   int begin = 0;
-  url_parse::TrimURL(url, &begin, &url_len);
+  TrimURL(url, &begin, &url_len);
   if (begin >= url_len) {
     // Empty URLs are relative, but do nothing.
-    *relative_component = url_parse::Component(begin, 0);
+    *relative_component = Component(begin, 0);
     *is_relative = true;
     return true;
   }
@@ -115,8 +92,8 @@
   //
   // We require strict backslashes when detecting UNC since two forward
   // shashes should be treated a a relative URL with a hostname.
-  if (url_parse::DoesBeginWindowsDriveSpec(url, begin, url_len) ||
-      url_parse::DoesBeginUNCPath(url, begin, url_len, true))
+  if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
+      DoesBeginUNCPath(url, begin, url_len, true))
     return true;
 #endif  // WIN32
 
@@ -124,13 +101,19 @@
   // BUT: Just because we have a scheme, doesn't make it absolute.
   // "http:foo.html" is a relative URL with path "foo.html". If the scheme is
   // empty, we treat it as relative (":foo") like IE does.
-  url_parse::Component scheme;
-  if (!url_parse::ExtractScheme(url, url_len, &scheme) || scheme.len == 0) {
-    // Don't allow relative URLs if the base scheme doesn't support it.
-    if (!is_base_hierarchical)
+  Component scheme;
+  const bool scheme_is_empty =
+      !ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
+  if (scheme_is_empty) {
+    if (url[begin] == '#') {
+      // |url| is a bare fragement (e.g. "#foo"). This can be resolved against
+      // any base. Fall-through.
+    } else if (!is_base_hierarchical) {
+      // Don't allow relative URLs if the base scheme doesn't support it.
       return false;
+    }
 
-    *relative_component = url_parse::MakeRange(begin, url_len);
+    *relative_component = MakeRange(begin, url_len);
     *is_relative = true;
     return true;
   }
@@ -139,7 +122,11 @@
   int scheme_end = scheme.end();
   for (int i = scheme.begin; i < scheme_end; i++) {
     if (!CanonicalSchemeChar(url[i])) {
-      *relative_component = url_parse::MakeRange(begin, url_len);
+      if (!is_base_hierarchical) {
+        // Don't allow relative URLs if the base scheme doesn't support it.
+        return false;
+      }
+      *relative_component = MakeRange(begin, url_len);
       *is_relative = true;
       return true;
     }
@@ -155,18 +142,23 @@
   if (!is_base_hierarchical)
     return true;
 
+  int colon_offset = scheme.end();
+
+  // If it's a filesystem URL, the only valid way to make it relative is not to
+  // supply a scheme.  There's no equivalent to e.g. http:index.html.
+  if (CompareSchemeComponent(url, scheme, kFileSystemScheme))
+    return true;
+
   // ExtractScheme guarantees that the colon immediately follows what it
   // considers to be the scheme. CountConsecutiveSlashes will handle the
   // case where the begin offset is the end of the input.
-  int colon_offset = scheme.end();
-  int num_slashes = url_parse::CountConsecutiveSlashes(url, colon_offset + 1,
-                                                       url_len);
+  int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len);
 
   if (num_slashes == 0 || num_slashes == 1) {
     // No slashes means it's a relative path like "http:foo.html". One slash
     // is an absolute path. "http:/home/foo.html"
     *is_relative = true;
-    *relative_component = url_parse::MakeRange(colon_offset + 1, url_len);
+    *relative_component = MakeRange(colon_offset + 1, url_len);
     return true;
   }
 
@@ -205,12 +197,12 @@
 // source should already be canonical, we don't have to do anything special,
 // and the input is ASCII.
 void CopyOneComponent(const char* source,
-                      const url_parse::Component& source_component,
+                      const Component& source_component,
                       CanonOutput* output,
-                      url_parse::Component* output_component) {
+                      Component* output_component) {
   if (source_component.len < 0) {
     // This component is not present.
-    *output_component = url_parse::Component();
+    *output_component = Component();
     return;
   }
 
@@ -245,8 +237,7 @@
 
   // If the relative begins with a drive spec, don't do anything. The existing
   // drive spec in the base will be replaced.
-  if (url_parse::DoesBeginWindowsDriveSpec(relative_url,
-                                           path_start, relative_url_len)) {
+  if (DoesBeginWindowsDriveSpec(relative_url, path_start, relative_url_len)) {
     return base_path_begin;  // Relative URL path is "C:/foo"
   }
 
@@ -272,23 +263,19 @@
 // the input is a relative path or less (qyuery or ref).
 template<typename CHAR>
 bool DoResolveRelativePath(const char* base_url,
-                           const url_parse::Parsed& base_parsed,
+                           const Parsed& base_parsed,
                            bool base_is_file,
                            const CHAR* relative_url,
-                           const url_parse::Component& relative_component,
+                           const Component& relative_component,
                            CharsetConverter* query_converter,
                            CanonOutput* output,
-                           url_parse::Parsed* out_parsed) {
+                           Parsed* out_parsed) {
   bool success = true;
 
   // We know the authority section didn't change, copy it to the output. We
   // also know we have a path so can copy up to there.
-  url_parse::Component path, query, ref;
-  url_parse::ParsePathInternal(relative_url,
-                               relative_component,
-                               &path,
-                               &query,
-                               &ref);
+  Component path, query, ref;
+  ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
   // Canonical URLs always have a path, so we can use that offset.
   output->Append(base_url, base_parsed.path.begin);
 
@@ -313,7 +300,7 @@
     }
 #endif  // WIN32
 
-    if (url_parse::IsURLSlash(relative_url[path.begin])) {
+    if (IsURLSlash(relative_url[path.begin])) {
       // Easy case: the path is an absolute path on the server, so we can
       // just replace everything from the path on with the new versions.
       // Since the input should be canonical hierarchical URL, we should
@@ -329,7 +316,7 @@
                       output);
       success &= CanonicalizePartialPath(relative_url, path, path_begin,
                                          output);
-      out_parsed->path = url_parse::MakeRange(path_begin, output->length());
+      out_parsed->path = MakeRange(path_begin, output->length());
 
       // Copy the rest of the stuff after the path from the relative path.
     }
@@ -340,8 +327,7 @@
     CanonicalizeRef(relative_url, ref, output, &out_parsed->ref);
 
     // Fix the path beginning to add back the "C:" we may have written above.
-    out_parsed->path = url_parse::MakeRange(true_path_begin,
-                                            out_parsed->path.end());
+    out_parsed->path = MakeRange(true_path_begin, out_parsed->path.end());
     return success;
   }
 
@@ -381,18 +367,17 @@
 // should be kept from the original URL is the scheme.
 template<typename CHAR>
 bool DoResolveRelativeHost(const char* base_url,
-                           const url_parse::Parsed& base_parsed,
+                           const Parsed& base_parsed,
                            const CHAR* relative_url,
-                           const url_parse::Component& relative_component,
+                           const Component& relative_component,
                            CharsetConverter* query_converter,
                            CanonOutput* output,
-                           url_parse::Parsed* out_parsed) {
+                           Parsed* out_parsed) {
   // Parse the relative URL, just like we would for anything following a
   // scheme.
-  url_parse::Parsed relative_parsed;  // Everything but the scheme is valid.
-  url_parse::ParseAfterScheme(&relative_url[relative_component.begin],
-                              relative_component.len, relative_component.begin,
-                              &relative_parsed);
+  Parsed relative_parsed;  // Everything but the scheme is valid.
+  ParseAfterScheme(relative_url, relative_component.end(),
+                   relative_component.begin, &relative_parsed);
 
   // Now we can just use the replacement function to replace all the necessary
   // parts of the old URL with the new one.
@@ -413,16 +398,16 @@
 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo".
 template<typename CHAR>
 bool DoResolveAbsoluteFile(const CHAR* relative_url,
-                           const url_parse::Component& relative_component,
+                           const Component& relative_component,
                            CharsetConverter* query_converter,
                            CanonOutput* output,
-                           url_parse::Parsed* out_parsed) {
+                           Parsed* out_parsed) {
   // Parse the file URL. The file URl parsing function uses the same logic
   // as we do for determining if the file is absolute, in which case it will
   // not bother to look for a scheme.
-  url_parse::Parsed relative_parsed;
-  url_parse::ParseFileURL(&relative_url[relative_component.begin],
-                          relative_component.len, &relative_parsed);
+  Parsed relative_parsed;
+  ParseFileURL(&relative_url[relative_component.begin], relative_component.len,
+               &relative_parsed);
 
   return CanonicalizeFileURL(&relative_url[relative_component.begin],
                              relative_component.len, relative_parsed,
@@ -432,13 +417,13 @@
 // TODO(brettw) treat two slashes as root like Mozilla for FTP?
 template<typename CHAR>
 bool DoResolveRelativeURL(const char* base_url,
-                          const url_parse::Parsed& base_parsed,
+                          const Parsed& base_parsed,
                           bool base_is_file,
                           const CHAR* relative_url,
-                          const url_parse::Component& relative_component,
+                          const Component& relative_component,
                           CharsetConverter* query_converter,
                           CanonOutput* output,
-                          url_parse::Parsed* out_parsed) {
+                          Parsed* out_parsed) {
   // Starting point for our output parsed. We'll fix what we change.
   *out_parsed = base_parsed;
 
@@ -465,7 +450,7 @@
     return true;
   }
 
-  int num_slashes = url_parse::CountConsecutiveSlashes(
+  int num_slashes = CountConsecutiveSlashes(
       relative_url, relative_component.begin, relative_component.end());
 
 #ifdef WIN32
@@ -481,24 +466,27 @@
   // be setting the path.
   //
   // This assumes the absolute path resolver handles absolute URLs like this
-  // properly. url_util::DoCanonicalize does this.
+  // properly. DoCanonicalize does this.
   int after_slashes = relative_component.begin + num_slashes;
-  if (url_parse::DoesBeginUNCPath(relative_url, relative_component.begin,
-                                  relative_component.end(), !base_is_file) ||
+  if (DoesBeginUNCPath(relative_url, relative_component.begin,
+                       relative_component.end(), !base_is_file) ||
       ((num_slashes == 0 || base_is_file) &&
-       url_parse::DoesBeginWindowsDriveSpec(relative_url, after_slashes,
-                                            relative_component.end()))) {
+       DoesBeginWindowsDriveSpec(
+           relative_url, after_slashes, relative_component.end()))) {
     return DoResolveAbsoluteFile(relative_url, relative_component,
                                  query_converter, output, out_parsed);
   }
 #else
   // Other platforms need explicit handling for file: URLs with multiple
   // slashes because the generic scheme parsing always extracts a host, but a
-  // file: URL only has a host if it has exactly 2 slashes. This also
-  // handles the special case where the URL is only slashes, since that
-  // doesn't have a host part either.
+  // file: URL only has a host if it has exactly 2 slashes. Even if it does
+  // have a host, we want to use the special host detection logic for file
+  // URLs provided by DoResolveAbsoluteFile(), as opposed to the generic host
+  // detection logic, for consistency with parsing file URLs from scratch.
+  // This also handles the special case where the URL is only slashes,
+  // since that doesn't have a host part either.
   if (base_is_file &&
-      (num_slashes > 2 || num_slashes == relative_component.len)) {
+      (num_slashes >= 2 || num_slashes == relative_component.len)) {
     return DoResolveAbsoluteFile(relative_url, relative_component,
                                  query_converter, output, out_parsed);
   }
@@ -520,53 +508,53 @@
 }  // namespace
 
 bool IsRelativeURL(const char* base,
-                   const url_parse::Parsed& base_parsed,
+                   const Parsed& base_parsed,
                    const char* fragment,
                    int fragment_len,
                    bool is_base_hierarchical,
                    bool* is_relative,
-                   url_parse::Component* relative_component) {
+                   Component* relative_component) {
   return DoIsRelativeURL<char>(
       base, base_parsed, fragment, fragment_len, is_base_hierarchical,
       is_relative, relative_component);
 }
 
 bool IsRelativeURL(const char* base,
-                   const url_parse::Parsed& base_parsed,
-                   const char16* fragment,
+                   const Parsed& base_parsed,
+                   const base::char16* fragment,
                    int fragment_len,
                    bool is_base_hierarchical,
                    bool* is_relative,
-                   url_parse::Component* relative_component) {
-  return DoIsRelativeURL<char16>(
+                   Component* relative_component) {
+  return DoIsRelativeURL<base::char16>(
       base, base_parsed, fragment, fragment_len, is_base_hierarchical,
       is_relative, relative_component);
 }
 
 bool ResolveRelativeURL(const char* base_url,
-                        const url_parse::Parsed& base_parsed,
+                        const Parsed& base_parsed,
                         bool base_is_file,
                         const char* relative_url,
-                        const url_parse::Component& relative_component,
+                        const Component& relative_component,
                         CharsetConverter* query_converter,
                         CanonOutput* output,
-                        url_parse::Parsed* out_parsed) {
+                        Parsed* out_parsed) {
   return DoResolveRelativeURL<char>(
       base_url, base_parsed, base_is_file, relative_url,
       relative_component, query_converter, output, out_parsed);
 }
 
 bool ResolveRelativeURL(const char* base_url,
-                        const url_parse::Parsed& base_parsed,
+                        const Parsed& base_parsed,
                         bool base_is_file,
-                        const char16* relative_url,
-                        const url_parse::Component& relative_component,
+                        const base::char16* relative_url,
+                        const Component& relative_component,
                         CharsetConverter* query_converter,
                         CanonOutput* output,
-                        url_parse::Parsed* out_parsed) {
-  return DoResolveRelativeURL<char16>(
+                        Parsed* out_parsed) {
+  return DoResolveRelativeURL<base::char16>(
       base_url, base_parsed, base_is_file, relative_url,
       relative_component, query_converter, output, out_parsed);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_stdstring.cc b/src/url/url_canon_stdstring.cc
new file mode 100644
index 0000000..366a2e0
--- /dev/null
+++ b/src/url/url_canon_stdstring.cc

@@ -0,0 +1,32 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon_stdstring.h"
+
+namespace url {
+
+StdStringCanonOutput::StdStringCanonOutput(std::string* str)
+    : CanonOutput(), str_(str) {
+  cur_len_ = static_cast<int>(str_->size());  // Append to existing data.
+  str_->resize(str_->capacity());
+  buffer_ = str_->empty() ? NULL : &(*str_)[0];
+  buffer_len_ = static_cast<int>(str_->size());
+}
+
+StdStringCanonOutput::~StdStringCanonOutput() {
+  // Nothing to do, we don't own the string.
+}
+
+void StdStringCanonOutput::Complete() {
+  str_->resize(cur_len_);
+  buffer_len_ = cur_len_;
+}
+
+void StdStringCanonOutput::Resize(int sz) {
+  str_->resize(sz);
+  buffer_ = str_->empty() ? NULL : &(*str_)[0];
+  buffer_len_ = sz;
+}
+
+}  // namespace url

diff --git a/src/url/url_canon_stdstring.h b/src/url/url_canon_stdstring.h
new file mode 100644
index 0000000..f8a847d
--- /dev/null
+++ b/src/url/url_canon_stdstring.h

@@ -0,0 +1,85 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_STDSTRING_H_
+#define URL_URL_CANON_STDSTRING_H_
+
+// This header file defines a canonicalizer output method class for STL
+// strings. Because the canonicalizer tries not to be dependent on the STL,
+// we have segregated it here.
+
+#include <string>
+
+#include "url/url_canon.h"
+#include "url/url_export.h"
+
+namespace url {
+
+// Write into a std::string given in the constructor. This object does not own
+// the string itself, and the user must ensure that the string stays alive
+// throughout the lifetime of this object.
+//
+// The given string will be appended to; any existing data in the string will
+// be preserved. The caller should reserve() the amount of data in the string
+// they expect to be written. We will resize if necessary, but that's slow.
+//
+// Note that when canonicalization is complete, the string will likely have
+// unused space at the end because we make the string very big to start out
+// with (by |initial_size|). This ends up being important because resize
+// operations are slow, and because the base class needs to write directly
+// into the buffer.
+//
+// Therefore, the user should call Complete() before using the string that
+// this class wrote into.
+class URL_EXPORT StdStringCanonOutput : public CanonOutput {
+ public:
+  StdStringCanonOutput(std::string* str);
+  ~StdStringCanonOutput() override;
+
+  // Must be called after writing has completed but before the string is used.
+  void Complete();
+
+  void Resize(int sz) override;
+
+ protected:
+  std::string* str_;
+};
+
+// An extension of the Replacements class that allows the setters to use
+// standard strings.
+//
+// The strings passed as arguments are not copied and must remain valid until
+// this class goes out of scope.
+template<typename STR>
+class StdStringReplacements : public Replacements<typename STR::value_type> {
+ public:
+  void SetSchemeStr(const STR& s) {
+    this->SetScheme(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetUsernameStr(const STR& s) {
+    this->SetUsername(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetPasswordStr(const STR& s) {
+    this->SetPassword(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetHostStr(const STR& s) {
+    this->SetHost(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetPortStr(const STR& s) {
+    this->SetPort(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetPathStr(const STR& s) {
+    this->SetPath(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetQueryStr(const STR& s) {
+    this->SetQuery(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+  void SetRefStr(const STR& s) {
+    this->SetRef(s.data(), Component(0, static_cast<int>(s.length())));
+  }
+};
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_STDSTRING_H_

diff --git a/googleurl/src/url_canon_stdurl.cc b/src/url/url_canon_stdurl.cc
similarity index 64%
rename from googleurl/src/url_canon_stdurl.cc
rename to src/url/url_canon_stdurl.cc
index 1e21a14..7a61de8 100644
--- a/googleurl/src/url_canon_stdurl.cc
+++ b/src/url/url_canon_stdurl.cc

@@ -1,48 +1,24 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 // Functions to canonicalize "standard" URLs, which are ones that have an
 // authority section including a host name.
 
-#include "googleurl/src/url_canon.h"
-#include "googleurl/src/url_canon_internal.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
 
-namespace url_canon {
+namespace url {
 
 namespace {
 
 template<typename CHAR, typename UCHAR>
 bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
-                               const url_parse::Parsed& parsed,
+                               const Parsed& parsed,
                                CharsetConverter* query_converter,
                                CanonOutput* output,
-                               url_parse::Parsed* new_parsed) {
+                               Parsed* new_parsed) {
   // Scheme: this will append the colon.
   bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
                                     output, &new_parsed->scheme);
@@ -97,7 +73,7 @@
     // When we have an empty path, make up a path when we have an authority
     // or something following the path. The only time we allow an empty
     // output path is when there is nothing else.
-    new_parsed->path = url_parse::Component(output->length(), 1);
+    new_parsed->path = Component(output->length(), 1);
     output->push_back('/');
   } else {
     // No path at all
@@ -120,28 +96,28 @@
 // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
 // if the scheme is unknown.
 int DefaultPortForScheme(const char* scheme, int scheme_len) {
-  int default_port = url_parse::PORT_UNSPECIFIED;
+  int default_port = PORT_UNSPECIFIED;
   switch (scheme_len) {
     case 4:
-      if (!strncmp(scheme, "http", scheme_len))
+      if (!strncmp(scheme, kHttpScheme, scheme_len))
         default_port = 80;
       break;
     case 5:
-      if (!strncmp(scheme, "https", scheme_len))
+      if (!strncmp(scheme, kHttpsScheme, scheme_len))
         default_port = 443;
       break;
     case 3:
-      if (!strncmp(scheme, "ftp", scheme_len))
+      if (!strncmp(scheme, kFtpScheme, scheme_len))
         default_port = 21;
-      else if (!strncmp(scheme, "wss", scheme_len))
+      else if (!strncmp(scheme, kWssScheme, scheme_len))
         default_port = 443;
       break;
     case 6:
-      if (!strncmp(scheme, "gopher", scheme_len))
+      if (!strncmp(scheme, kGopherScheme, scheme_len))
         default_port = 70;
       break;
     case 2:
-      if (!strncmp(scheme, "ws", scheme_len))
+      if (!strncmp(scheme, kWsScheme, scheme_len))
         default_port = 80;
       break;
   }
@@ -150,23 +126,23 @@
 
 bool CanonicalizeStandardURL(const char* spec,
                              int spec_len,
-                             const url_parse::Parsed& parsed,
+                             const Parsed& parsed,
                              CharsetConverter* query_converter,
                              CanonOutput* output,
-                             url_parse::Parsed* new_parsed) {
+                             Parsed* new_parsed) {
   return DoCanonicalizeStandardURL<char, unsigned char>(
       URLComponentSource<char>(spec), parsed, query_converter,
       output, new_parsed);
 }
 
-bool CanonicalizeStandardURL(const char16* spec,
+bool CanonicalizeStandardURL(const base::char16* spec,
                              int spec_len,
-                             const url_parse::Parsed& parsed,
+                             const Parsed& parsed,
                              CharsetConverter* query_converter,
                              CanonOutput* output,
-                             url_parse::Parsed* new_parsed) {
-  return DoCanonicalizeStandardURL<char16, char16>(
-      URLComponentSource<char16>(spec), parsed, query_converter,
+                             Parsed* new_parsed) {
+  return DoCanonicalizeStandardURL<base::char16, base::char16>(
+      URLComponentSource<base::char16>(spec), parsed, query_converter,
       output, new_parsed);
 }
 
@@ -180,13 +156,13 @@
 // You would also need to update DoReplaceComponents in url_util.cc which
 // relies on this re-checking everything (see the comment there for why).
 bool ReplaceStandardURL(const char* base,
-                        const url_parse::Parsed& base_parsed,
+                        const Parsed& base_parsed,
                         const Replacements<char>& replacements,
                         CharsetConverter* query_converter,
                         CanonOutput* output,
-                        url_parse::Parsed* new_parsed) {
+                        Parsed* new_parsed) {
   URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
+  Parsed parsed(base_parsed);
   SetupOverrideComponents(base, replacements, &source, &parsed);
   return DoCanonicalizeStandardURL<char, unsigned char>(
       source, parsed, query_converter, output, new_parsed);
@@ -195,17 +171,17 @@
 // For 16-bit replacements, we turn all the replacements into UTF-8 so the
 // regular codepath can be used.
 bool ReplaceStandardURL(const char* base,
-                        const url_parse::Parsed& base_parsed,
-                        const Replacements<char16>& replacements,
+                        const Parsed& base_parsed,
+                        const Replacements<base::char16>& replacements,
                         CharsetConverter* query_converter,
                         CanonOutput* output,
-                        url_parse::Parsed* new_parsed) {
+                        Parsed* new_parsed) {
   RawCanonOutput<1024> utf8;
   URLComponentSource<char> source(base);
-  url_parse::Parsed parsed(base_parsed);
+  Parsed parsed(base_parsed);
   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
   return DoCanonicalizeStandardURL<char, unsigned char>(
       source, parsed, query_converter, output, new_parsed);
 }
 
-}  // namespace url_canon
+}  // namespace url

diff --git a/src/url/url_canon_unittest.cc b/src/url/url_canon_unittest.cc
new file mode 100644
index 0000000..1917cc9
--- /dev/null
+++ b/src/url/url_canon_unittest.cc

@@ -0,0 +1,2131 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <errno.h>
+
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_parse.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+using test_utils::WStringToUTF16;
+using test_utils::ConvertUTF8ToUTF16;
+using test_utils::ConvertUTF16ToUTF8;
+
+namespace {
+
+struct ComponentCase {
+  const char* input;
+  const char* expected;
+  Component expected_component;
+  bool expected_success;
+};
+
+// ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests
+// treat each input as optional, and will only try processing if non-NULL.
+// The output is always 8-bit.
+struct DualComponentCase {
+  const char* input8;
+  const wchar_t* input16;
+  const char* expected;
+  Component expected_component;
+  bool expected_success;
+};
+
+// Test cases for CanonicalizeIPAddress().  The inputs are identical to
+// DualComponentCase, but the output has extra CanonHostInfo fields.
+struct IPAddressCase {
+  const char* input8;
+  const wchar_t* input16;
+  const char* expected;
+  Component expected_component;
+
+  // CanonHostInfo fields, for verbose output.
+  CanonHostInfo::Family expected_family;
+  int expected_num_ipv4_components;
+  const char* expected_address_hex;  // Two hex chars per IP address byte.
+};
+
+std::string BytesToHexString(unsigned char bytes[16], int length) {
+  EXPECT_TRUE(length == 0 || length == 4 || length == 16)
+      << "Bad IP address length: " << length;
+  std::string result;
+  for (int i = 0; i < length; ++i) {
+    result.push_back(kHexCharLookup[(bytes[i] >> 4) & 0xf]);
+    result.push_back(kHexCharLookup[bytes[i] & 0xf]);
+  }
+  return result;
+}
+
+struct ReplaceCase {
+  const char* base;
+  const char* scheme;
+  const char* username;
+  const char* password;
+  const char* host;
+  const char* port;
+  const char* path;
+  const char* query;
+  const char* ref;
+  const char* expected;
+};
+
+// Magic string used in the replacements code that tells SetupReplComp to
+// call the clear function.
+const char kDeleteComp[] = "|";
+
+// Sets up a replacement for a single component. This is given pointers to
+// the set and clear function for the component being replaced, and will
+// either set the component (if it exists) or clear it (if the replacement
+// string matches kDeleteComp).
+//
+// This template is currently used only for the 8-bit case, and the strlen
+// causes it to fail in other cases. It is left a template in case we have
+// tests for wide replacements.
+template<typename CHAR>
+void SetupReplComp(
+    void (Replacements<CHAR>::*set)(const CHAR*, const Component&),
+    void (Replacements<CHAR>::*clear)(),
+    Replacements<CHAR>* rep,
+    const CHAR* str) {
+  if (str && str[0] == kDeleteComp[0]) {
+    (rep->*clear)();
+  } else if (str) {
+    (rep->*set)(str, Component(0, static_cast<int>(strlen(str))));
+  }
+}
+
+}  // namespace
+
+TEST(URLCanonTest, DoAppendUTF8) {
+  struct UTF8Case {
+    unsigned input;
+    const char* output;
+  } utf_cases[] = {
+    // Valid code points.
+    {0x24, "\x24"},
+    {0xA2, "\xC2\xA2"},
+    {0x20AC, "\xE2\x82\xAC"},
+    {0x24B62, "\xF0\xA4\xAD\xA2"},
+    {0x10FFFF, "\xF4\x8F\xBF\xBF"},
+  };
+  std::string out_str;
+  for (size_t i = 0; i < arraysize(utf_cases); i++) {
+    out_str.clear();
+    StdStringCanonOutput output(&out_str);
+    AppendUTF8Value(utf_cases[i].input, &output);
+    output.Complete();
+    EXPECT_EQ(utf_cases[i].output, out_str);
+  }
+}
+
+#if defined(GTEST_HAS_DEATH_TEST)
+// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
+// cause the Chromium stacktrace dialog to appear and hang the test.
+// See http://crbug.com/49580.
+#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
+#define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
+#else
+#define MAYBE_DoAppendUTF8Invalid DISABLED_DoAppendUTF8Invalid
+#endif
+TEST(URLCanonTest, MAYBE_DoAppendUTF8Invalid) {
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  // Invalid code point (too large).
+  ASSERT_DEBUG_DEATH({
+    AppendUTF8Value(0x110000, &output);
+    output.Complete();
+    EXPECT_EQ("", out_str);
+  }, "");
+}
+#endif  // defined(GTEST_HAS_DEATH_TEST)
+
+TEST(URLCanonTest, UTF) {
+  // Low-level test that we handle reading, canonicalization, and writing
+  // UTF-8/UTF-16 strings properly.
+  struct UTFCase {
+    const char* input8;
+    const wchar_t* input16;
+    bool expected_success;
+    const char* output;
+  } utf_cases[] = {
+      // Valid canonical input should get passed through & escaped.
+    {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
+      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
+    {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
+      // Non-shortest-form UTF-8 are invalid. The bad char should be replaced
+      // with the invalid character (EF BF DB in UTF-8).
+    {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
+      // Invalid UTF-8 sequences should be marked as invalid (the first
+      // sequence is truncated).
+    {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"},
+      // Character going off the end.
+    {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"},
+      // ...same with low surrogates with no high surrogate.
+    {"\xed\xb0\x80", L"\xdc00", false, "%EF%BF%BD"},
+      // Test a UTF-8 encoded surrogate value is marked as invalid.
+      // ED A0 80 = U+D800
+    {"\xed\xa0\x80", NULL, false, "%EF%BF%BD"},
+  };
+
+  std::string out_str;
+  for (size_t i = 0; i < arraysize(utf_cases); i++) {
+    if (utf_cases[i].input8) {
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      int input_len = static_cast<int>(strlen(utf_cases[i].input8));
+      bool success = true;
+      for (int ch = 0; ch < input_len; ch++) {
+        success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
+                                         &output);
+      }
+      output.Complete();
+      EXPECT_EQ(utf_cases[i].expected_success, success);
+      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+    }
+    if (utf_cases[i].input16) {
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      base::string16 input_str(WStringToUTF16(utf_cases[i].input16));
+      int input_len = static_cast<int>(input_str.length());
+      bool success = true;
+      for (int ch = 0; ch < input_len; ch++) {
+        success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
+                                         &output);
+      }
+      output.Complete();
+      EXPECT_EQ(utf_cases[i].expected_success, success);
+      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+    }
+
+    if (utf_cases[i].input8 && utf_cases[i].input16 &&
+        utf_cases[i].expected_success) {
+      // Check that the UTF-8 and UTF-16 inputs are equivalent.
+
+      // UTF-16 -> UTF-8
+      std::string input8_str(utf_cases[i].input8);
+      base::string16 input16_str(WStringToUTF16(utf_cases[i].input16));
+      EXPECT_EQ(input8_str, ConvertUTF16ToUTF8(input16_str));
+
+      // UTF-8 -> UTF-16
+      EXPECT_EQ(input16_str, ConvertUTF8ToUTF16(input8_str));
+    }
+  }
+}
+
+TEST(URLCanonTest, Scheme) {
+  // Here, we're mostly testing that unusual characters are handled properly.
+  // The canonicalizer doesn't do any parsing or whitespace detection. It will
+  // also do its best on error, and will escape funny sequences (these won't be
+  // valid schemes and it will return error).
+  //
+  // Note that the canonicalizer will append a colon to the output to separate
+  // out the rest of the URL, which is not present in the input. We check,
+  // however, that the output range includes everything but the colon.
+  ComponentCase scheme_cases[] = {
+    {"http", "http:", Component(0, 4), true},
+    {"HTTP", "http:", Component(0, 4), true},
+    {" HTTP ", "%20http%20:", Component(0, 10), false},
+    {"htt: ", "htt%3A%20:", Component(0, 9), false},
+    {"\xe4\xbd\xa0\xe5\xa5\xbdhttp", "%E4%BD%A0%E5%A5%BDhttp:", Component(0, 22), false},
+      // Don't re-escape something already escaped. Note that it will
+      // "canonicalize" the 'A' to 'a', but that's OK.
+    {"ht%3Atp", "ht%3atp:", Component(0, 7), false},
+  };
+
+  std::string out_str;
+
+  for (size_t i = 0; i < arraysize(scheme_cases); i++) {
+    int url_len = static_cast<int>(strlen(scheme_cases[i].input));
+    Component in_comp(0, url_len);
+    Component out_comp;
+
+    out_str.clear();
+    StdStringCanonOutput output1(&out_str);
+    bool success = CanonicalizeScheme(scheme_cases[i].input, in_comp, &output1,
+                                      &out_comp);
+    output1.Complete();
+
+    EXPECT_EQ(scheme_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
+    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+
+    // Now try the wide version
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+
+    base::string16 wide_input(ConvertUTF8ToUTF16(scheme_cases[i].input));
+    in_comp.len = static_cast<int>(wide_input.length());
+    success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
+                                 &out_comp);
+    output2.Complete();
+
+    EXPECT_EQ(scheme_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
+    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+  }
+
+  // Test the case where the scheme is declared nonexistant, it should be
+  // converted into an empty scheme.
+  Component out_comp;
+  out_str.clear();
+  StdStringCanonOutput output(&out_str);
+
+  EXPECT_TRUE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
+  output.Complete();
+
+  EXPECT_EQ(std::string(":"), out_str);
+  EXPECT_EQ(0, out_comp.begin);
+  EXPECT_EQ(0, out_comp.len);
+}
+
+TEST(URLCanonTest, Host) {
+  IPAddressCase host_cases[] = {
+       // Basic canonicalization, uppercase should be converted to lowercase.
+    {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      // Spaces and some other characters should be escaped.
+    {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+      // Exciting different types of spaces!
+    {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
+      // Other types of space (no-break, zero-width, zero-width-no-break) are
+      // name-prepped away to nothing.
+    {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      // Ideographic full stop (full-width period for Chinese, etc.) should be
+      // treated as a dot.
+    {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      // Invalid unicode characters should fail...
+      // ...In wide input, ICU will barf and we'll end up with the input as
+      //    escaped UTF-8 (the invalid character should be replaced with the
+      //    replacement character).
+    {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      // ...This is the same as previous but with with escaped.
+    {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      // Test name prepping, fullwidth input should be converted to ASCII and NOT
+      // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
+    {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+      // Test that fullwidth escaped values are properly name-prepped,
+      // then converted or rejected.
+      // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
+    {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+    {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
+    {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+    {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
+    {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      // See http://unicode.org/cldr/utility/idna.jsp for other
+      // examples/experiments and http://goo.gl/7yG11o
+      // for the full list of characters handled differently by
+      // IDNA 2003, UTS 46 (http://unicode.org/reports/tr46/ ) and IDNA 2008.
+
+      // 4 Deviation characters are mapped/ignored in UTS 46 transitional
+      // mechansm. UTS 46, table 4 row (g).
+      // Sharp-s is mapped to 'ss' in UTS 46 and IDNA 2003.
+      // Otherwise, it'd be "xn--fuball-cta.de".
+    {"fu\xc3\x9f" "ball.de", L"fu\x00df" L"ball.de", "fussball.de",
+      Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+      // Final-sigma (U+03C3) is mapped to regular sigma (U+03C2).
+      // Otherwise, it'd be "xn--wxaijb9b".
+    {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
+      "xn--wxaikc6b", Component(0, 12),
+      CanonHostInfo::NEUTRAL, -1, ""},
+      // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional
+      // handling as well as in IDNA 2003.
+    {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc",
+      Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""},
+      // ZWJ between Devanagari characters is still mapped away in UTS 46
+      // transitional handling. IDNA 2008 would give xn--11bo0mv54g.
+    {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
+     L"\x915\x94d\x200d\x91c", "xn--11bo0m",
+     Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
+      // However, we do allow this at the moment because we don't use
+      // STD3 rules and canonicalize full-width ASCII to ASCII.
+    {"wow\xef\xbc\x81", L"wow\xff01", "wow%21",
+      Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+      // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
+      // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
+    {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo",
+      Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
+      // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d)
+      // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
+    {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
+      "%F0%AF%A1%A8%E5%A7%BB.cn",
+      Component(0, 24), CanonHostInfo::BROKEN, -1, ""},
+      // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e)
+    {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya",
+      Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      // Symbol/punctuations are allowed in IDNA 2003/UTS46.
+      // Not allowed in IDNA 2008. UTS 46 table 4 row (f).
+    {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us",
+      Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+      // U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h)
+      // We used to allow it because we passed through unassigned code points.
+    {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
+      Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+      // U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i)
+      // Used to be allowed in INDA 2003.
+    {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg",
+      Component(0, 9), CanonHostInfo::BROKEN, -1, ""},
+      // U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based
+      // on Unicode 3.2). We did allow it in the past because we let unassigned
+      // code point pass. We continue to allow it even though it's a
+      // "punctuation and symbol" blocked in IDNA 2008.
+      // UTS 46 table 4, row (j)
+    {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com",
+      Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps uppercase letters to lower case letters.
+      // In IDNA 2003, it's allowed without case-folding
+      // ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2
+      // (added in Unicode 4.1). UTS 46 table 4 row (k)
+    {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com",
+      Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      // BiDi check test
+      // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM.
+      // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008.
+    {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
+     L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw",
+     Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+      // Disallowed in both IDNA 2003 and 2008 with BiDi check.
+      // Labels starting with a RTL character cannot end with a LTR character.
+    {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
+     "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21),
+     CanonHostInfo::BROKEN, -1, ""},
+      // Labels starting with a RTL character can end with BC=EN (European
+      // number). Disallowed in IDNA 2003 but now allowed.
+    {"\xd8\xac\xd8\xa7\xd8\xb1" "2", L"\x62c\x627\x631" L"2",
+     "xn--2-ymcov", Component(0, 11),
+     CanonHostInfo::NEUTRAL, -1, ""},
+      // Labels starting with a RTL character cannot have "L" characters
+      // even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008.
+    {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
+     "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21),
+     CanonHostInfo::BROKEN, -1, ""},
+      // Labels starting with a RTL character can end with BC=AN (Arabic number)
+      // Disallowed in IDNA 2003, but now allowed.
+    {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
+     "xn--mgbjq0r", Component(0, 11),
+     CanonHostInfo::NEUTRAL, -1, ""},
+      // Labels starting with a RTL character cannot have "L" characters
+      // even if it ends with an BC=AN (Arabic number).
+      // Disallowed in both IDNA 2003/2008.
+    {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
+     "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26),
+     CanonHostInfo::BROKEN, -1, ""},
+      // Labels starting with a RTL character cannot mix BC=EN and BC=AN
+    {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
+     "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27),
+     CanonHostInfo::BROKEN, -1, ""},
+      // As of Unicode 6.2, U+20CF is not assigned. We do not allow it.
+    {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com",
+      Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
+      // U+0080 is not allowed.
+    {"\xc2\x80.com", L"\x80.com", "%C2%80.com",
+      Component(0, 10), CanonHostInfo::BROKEN, -1, ""},
+      // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
+      // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
+      // UTF-8 (wide case). The output should be equivalent to the true wide
+      // character input above).
+    {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
+      L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba",
+      Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      // Invalid escaped characters should fail and the percents should be
+      // escaped.
+    {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
+      CanonHostInfo::BROKEN, -1, ""},
+      // If we get an invalid character that has been escaped.
+    {"%25", L"%25", "%25", Component(0, 3),
+      CanonHostInfo::BROKEN, -1, ""},
+    {"hello%00", L"hello%00", "hello%00", Component(0, 8),
+      CanonHostInfo::BROKEN, -1, ""},
+      // Escaped numbers should be treated like IP addresses if they are.
+    {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
+      "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
+      "C0A80001"},
+    {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
+      "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
+      "C0A80001"},
+      // Invalid escaping should trigger the regular host error handling.
+    {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
+      // Something that isn't exactly an IP should get treated as a host and
+      // spaces escaped.
+    {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+      // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
+      // These are "0Xc0.0250.01" in fullwidth.
+    {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      // Broken IP addresses get marked as such.
+    {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
+    {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHostInfo::BROKEN, -1, ""},
+      // Cyrillic letter followed by '(' should return punycode for '(' escaped
+      // before punycode string was created. I.e.
+      // if '(' is escaped after punycode is created we would get xn--%28-8tb
+      // (incorrect).
+    {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+      CanonHostInfo::NEUTRAL, -1, ""},
+      // Address with all hexidecimal characters with leading number of 1<<32
+      // or greater and should return NEUTRAL rather than BROKEN if not all
+      // components are numbers.
+    {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
+    {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+    {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "12345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""},
+    {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
+    {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
+  };
+
+  // CanonicalizeHost() non-verbose.
+  std::string out_str;
+  for (size_t i = 0; i < arraysize(host_cases); i++) {
+    // Narrow version.
+    if (host_cases[i].input8) {
+      int host_len = static_cast<int>(strlen(host_cases[i].input8));
+      Component in_comp(0, host_len);
+      Component out_comp;
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      bool success = CanonicalizeHost(host_cases[i].input8, in_comp, &output,
+                                      &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
+                success) << "for input: " << host_cases[i].input8;
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str) <<
+                "for input: " << host_cases[i].input8;
+      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) <<
+                "for input: " << host_cases[i].input8;
+      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) <<
+                "for input: " << host_cases[i].input8;
+    }
+
+    // Wide version.
+    if (host_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(host_cases[i].input16));
+      int host_len = static_cast<int>(input16.length());
+      Component in_comp(0, host_len);
+      Component out_comp;
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      bool success = CanonicalizeHost(input16.c_str(), in_comp, &output,
+                                      &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
+                success);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
+    }
+  }
+
+  // CanonicalizeHostVerbose()
+  for (size_t i = 0; i < arraysize(host_cases); i++) {
+    // Narrow version.
+    if (host_cases[i].input8) {
+      int host_len = static_cast<int>(strlen(host_cases[i].input8));
+      Component in_comp(0, host_len);
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+      CanonHostInfo host_info;
+
+      CanonicalizeHostVerbose(host_cases[i].input8, in_comp, &output,
+                              &host_info);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+                BytesToHexString(host_info.address, host_info.AddressLength()));
+      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
+        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+                  host_info.num_ipv4_components);
+      }
+    }
+
+    // Wide version.
+    if (host_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(host_cases[i].input16));
+      int host_len = static_cast<int>(input16.length());
+      Component in_comp(0, host_len);
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+      CanonHostInfo host_info;
+
+      CanonicalizeHostVerbose(input16.c_str(), in_comp, &output, &host_info);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+                BytesToHexString(host_info.address, host_info.AddressLength()));
+      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
+        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+                  host_info.num_ipv4_components);
+      }
+    }
+  }
+}
+
+TEST(URLCanonTest, IPv4) {
+  IPAddressCase cases[] = {
+      // Empty is not an IP address.
+    {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Regular IP addresses in different bases.
+    {"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+      // Non-IP addresses due to invalid characters.
+    {"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Invalid characters for the base should be rejected.
+    {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // If there are not enough components, the last one should fill them out.
+    {"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"},
+    {"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"000030052000001", L"000030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"192.168", L"192.168", "192.0.0.168", Component(0, 11), CanonHostInfo::IPV4, 2, "C00000A8"},
+    {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
+    {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
+    {"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      // Too many components means not an IP address.
+    {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // We allow a single trailing dot.
+    {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Two dots in a row means not an IP address.
+    {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Any numerical overflow should be marked as BROKEN.
+    {"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0x100.0.0", L"0.0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0x100.0", L"0.0.0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0.0x100", L"0.0.0.0x100", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Repeat the previous tests, minus 1, to verify boundaries.
+    {"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"},
+    {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"},
+    {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"},
+    {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "00FF0000"},
+    {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", Component(0, 9), CanonHostInfo::IPV4, 4, "0000FF00"},
+    {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", Component(0, 9), CanonHostInfo::IPV4, 4, "000000FF"},
+    {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
+    {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
+    {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
+      // Old trunctations tests.  They're all "BROKEN" now.
+    {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Spaces should be rejected.
+    {"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Very large numbers.
+    {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"},
+    {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
+      // A number has no length limit, but long numbers can still overflow.
+    {"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"},
+    {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // If a long component is non-numeric, it's a hostname, *not* a broken IP.
+    {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Truncation of all zeros should still result in 0.
+    {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    // 8-bit version.
+    Component component(0, static_cast<int>(strlen(cases[i].input8)));
+
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    CanonHostInfo host_info;
+    CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
+    output1.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV4) {
+      EXPECT_STREQ(cases[i].expected, out_str1.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(cases[i].expected_num_ipv4_components,
+                host_info.num_ipv4_components);
+    }
+
+    // 16-bit version.
+    base::string16 input16(WStringToUTF16(cases[i].input16));
+    component = Component(0, static_cast<int>(input16.length()));
+
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
+    output2.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV4) {
+      EXPECT_STREQ(cases[i].expected, out_str2.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(cases[i].expected_num_ipv4_components,
+                host_info.num_ipv4_components);
+    }
+  }
+}
+
+TEST(URLCanonTest, IPv6) {
+  IPAddressCase cases[] = {
+      // Empty is not an IP address.
+    {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Non-IPs with [:] characters are marked BROKEN.
+    {":", L":", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[", L"[", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:", L"[:", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"]", L"]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {":]", L":]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[]", L"[]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:]", L"[:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Regular IP address is invalid without bounding '[' and ']'.
+    {"2001:db8::1", L"2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[2001:db8::1", L"[2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"2001:db8::1]", L"2001:db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Regular IP addresses.
+    {"[::]", L"[::]", "[::]", Component(0,4), CanonHostInfo::IPV6, -1, "00000000000000000000000000000000"},
+    {"[::1]", L"[::1]", "[::1]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000001"},
+    {"[1::]", L"[1::]", "[1::]", Component(0,5), CanonHostInfo::IPV6, -1, "00010000000000000000000000000000"},
+
+    // Leading zeros should be stripped.
+    {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", Component(0,17), CanonHostInfo::IPV6, -1, "00000001000200030004000500060007"},
+
+    // Upper case letters should be lowercased.
+    {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", Component(0,20), CanonHostInfo::IPV6, -1, "000A000B000C00DE00FF0000000100AC"},
+
+    // The same address can be written with different contractions, but should
+    // get canonicalized to the same thing.
+    {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+    {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+
+    // Addresses with embedded IPv4.
+    {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", Component(0,10), CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"},
+    {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
+    {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]", Component(0, 15), CanonHostInfo::IPV6, -1, "00000000000000000000EEEEC0A80001"},
+    {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"},
+    {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // IPv4 with last component missing.
+    {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0010002"},
+
+    // IPv4 using hex.
+    // TODO(eroman): Should this format be disallowed?
+    {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
+
+    // There may be zeros surrounding the "::" contraction.
+    {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"},
+
+    {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
+
+      // Can only have one "::" contraction in an IPv6 string literal.
+    {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // No more than 2 consecutive ':'s.
+    {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Non-IP addresses due to invalid characters.
+    {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // If there are not enough components, the last one should fill them out.
+    // ... omitted at this time ...
+      // Too many components means not an IP address.  Similarly with too few if using IPv4 compat or mapped addresses.
+    {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Too many bits (even though 8 comonents, the last one holds 32 bits).
+    {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Too many bits specified -- the contraction would have to be zero-length
+    // to not exceed 128 bits.
+    {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // The contraction is for 16 bits of zero.
+    {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", Component(0,17), CanonHostInfo::IPV6, -1, "00010002000300040005000600000008"},
+
+    // Cannot have a trailing colon.
+    {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Cannot have negative numbers.
+    {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
+    // The scope_id should be included in the canonicalized URL, and is an
+    // unsigned decimal number.
+
+    // Invalid because no ID was given after the percent.
+
+    // Don't allow scope-id
+    {"[1::%1]", L"[1::%1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1::%eth0]", L"[1::%eth0]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1::%]", L"[1::%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[%]", L"[%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[::%:]", L"[::%:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Don't allow leading or trailing colons.
+    {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+      // We allow a single trailing dot.
+    // ... omitted at this time ...
+      // Two dots in a row means not an IP address.
+    {"[::192.168..1]", L"[::192.168..1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Any non-first components get truncated to one byte.
+    // ... omitted at this time ...
+      // Spaces should be rejected.
+    {"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    // 8-bit version.
+    Component component(0, static_cast<int>(strlen(cases[i].input8)));
+
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    CanonHostInfo host_info;
+    CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
+    output1.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength())) << "iter " << i << " host " << cases[i].input8;
+    if (host_info.family == CanonHostInfo::IPV6) {
+      EXPECT_STREQ(cases[i].expected, out_str1.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+    }
+
+    // 16-bit version.
+    base::string16 input16(WStringToUTF16(cases[i].input16));
+    component = Component(0, static_cast<int>(input16.length()));
+
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
+    output2.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV6) {
+      EXPECT_STREQ(cases[i].expected, out_str2.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+    }
+  }
+}
+
+TEST(URLCanonTest, IPEmpty) {
+  std::string out_str1;
+  StdStringCanonOutput output1(&out_str1);
+  CanonHostInfo host_info;
+
+  // This tests tests.
+  const char spec[] = "192.168.0.1";
+  CanonicalizeIPAddress(spec, Component(), &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+
+  CanonicalizeIPAddress(spec, Component(0, 0), &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+}
+
+TEST(URLCanonTest, UserInfo) {
+  // Note that the canonicalizer should escape and treat empty components as
+  // not being there.
+
+  // We actually parse a full input URL so we can get the initial components.
+  struct UserComponentCase {
+    const char* input;
+    const char* expected;
+    Component expected_username;
+    Component expected_password;
+    bool expected_success;
+  } user_info_cases[] = {
+    {"http://user:pass@host.com/", "user:pass@", Component(0, 4), Component(5, 4), true},
+    {"http://@host.com/", "", Component(0, -1), Component(0, -1), true},
+    {"http://:@host.com/", "", Component(0, -1), Component(0, -1), true},
+    {"http://foo:@host.com/", "foo@", Component(0, 3), Component(0, -1), true},
+    {"http://:foo@host.com/", ":foo@", Component(0, 0), Component(1, 3), true},
+    {"http://^ :$\t@host.com/", "%5E%20:$%09@", Component(0, 6), Component(7, 4), true},
+    {"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
+    {"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
+
+      // IE7 compatability: old versions allowed backslashes in usernames, but
+      // IE7 does not. We disallow it as well.
+    {"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
+  };
+
+  for (size_t i = 0; i < arraysize(user_info_cases); i++) {
+    int url_len = static_cast<int>(strlen(user_info_cases[i].input));
+    Parsed parsed;
+    ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
+    Component out_user, out_pass;
+    std::string out_str;
+    StdStringCanonOutput output1(&out_str);
+
+    bool success = CanonicalizeUserInfo(user_info_cases[i].input,
+                                        parsed.username,
+                                        user_info_cases[i].input,
+                                        parsed.password,
+                                        &output1,
+                                        &out_user,
+                                        &out_pass);
+    output1.Complete();
+
+    EXPECT_EQ(user_info_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
+    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
+    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
+    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
+    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+
+    // Now try the wide version
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+    base::string16 wide_input(ConvertUTF8ToUTF16(user_info_cases[i].input));
+    success = CanonicalizeUserInfo(wide_input.c_str(),
+                                   parsed.username,
+                                   wide_input.c_str(),
+                                   parsed.password,
+                                   &output2,
+                                   &out_user,
+                                   &out_pass);
+    output2.Complete();
+
+    EXPECT_EQ(user_info_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
+    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
+    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
+    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
+    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+  }
+}
+
+TEST(URLCanonTest, Port) {
+  // We only need to test that the number gets properly put into the output
+  // buffer. The parser unit tests will test scanning the number correctly.
+  //
+  // Note that the CanonicalizePort will always prepend a colon to the output
+  // to separate it from the colon that it assumes preceeds it.
+  struct PortCase {
+    const char* input;
+    int default_port;
+    const char* expected;
+    Component expected_component;
+    bool expected_success;
+  } port_cases[] = {
+      // Invalid input should be copied w/ failure.
+    {"as df", 80, ":as%20df", Component(1, 7), false},
+    {"-2", 80, ":-2", Component(1, 2), false},
+      // Default port should be omitted.
+    {"80", 80, "", Component(0, -1), true},
+    {"8080", 80, ":8080", Component(1, 4), true},
+      // PORT_UNSPECIFIED should mean always keep the port.
+    {"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true},
+  };
+
+  for (size_t i = 0; i < arraysize(port_cases); i++) {
+    int url_len = static_cast<int>(strlen(port_cases[i].input));
+    Component in_comp(0, url_len);
+    Component out_comp;
+    std::string out_str;
+    StdStringCanonOutput output1(&out_str);
+    bool success = CanonicalizePort(port_cases[i].input,
+                                    in_comp,
+                                    port_cases[i].default_port,
+                                    &output1,
+                                    &out_comp);
+    output1.Complete();
+
+    EXPECT_EQ(port_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
+    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+
+    // Now try the wide version
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+    base::string16 wide_input(ConvertUTF8ToUTF16(port_cases[i].input));
+    success = CanonicalizePort(wide_input.c_str(),
+                               in_comp,
+                               port_cases[i].default_port,
+                               &output2,
+                               &out_comp);
+    output2.Complete();
+
+    EXPECT_EQ(port_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
+    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+  }
+}
+
+TEST(URLCanonTest, Path) {
+  DualComponentCase path_cases[] = {
+    // ----- path collapsing tests -----
+    {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
+    {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
+    {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
+    {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
+      // double dots followed by a slash or the end of the string count
+    {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
+    {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
+      // don't count double dots when they aren't followed by a slash
+    {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
+      // some in the middle
+    {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true},
+    {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", Component(0, 2), true},
+      // we should not be able to go above the root
+    {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
+    {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true},
+      // escaped dots should be unescaped and treated the same as dots
+    {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
+    {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
+    {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", Component(0, 6), true},
+      // Multiple slashes in a row should be preserved and treated like empty
+      // directory names.
+    {"////../..", L"////../..", "//", Component(0, 2), true},
+
+    // ----- escaping tests -----
+    {"/foo", L"/foo", "/foo", Component(0, 4), true},
+      // Valid escape sequence
+    {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
+      // Invalid escape sequence we should pass through unchanged.
+    {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
+    {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
+      // Invalid escape sequence: bad characters should be treated the same as
+      // the sourrounding text, not as escaped (in this case, UTF-8).
+    {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
+    {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", Component(0, 16), true},
+    {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), true},
+      // Regular characters that are escaped should be unescaped
+    {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
+      // Funny characters that are unescaped should be escaped
+    {"/foo\x09\x91%91", NULL, "/foo%09%91%91", Component(0, 13), true},
+    {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
+      // Invalid characters that are escaped should cause a failure.
+    {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+      // Some characters should be passed through unchanged regardless of esc.
+    {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), true},
+      // Characters that are properly escaped should not have the case changed
+      // of hex letters.
+    {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), true},
+      // Funny characters that are unescaped should be escaped
+    {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
+      // Backslashes should get converted to forward slashes
+    {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
+      // Hashes found in paths (possibly only when the caller explicitly sets
+      // the path on an already-parsed URL) should be escaped.
+    {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
+      // %7f should be allowed and %3D should not be unescaped (these were wrong
+      // in a previous version).
+    {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
+      // @ should be passed through unchanged (escaped or unescaped).
+    {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
+
+    // ----- encoding tests -----
+      // Basic conversions
+    {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", Component(0, 37), true},
+      // Invalid unicode characters should fail. We only do validation on
+      // UTF-16 input, so this doesn't happen on 8-bit.
+    {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", Component(0, 13), true},
+    {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+  };
+
+  for (size_t i = 0; i < arraysize(path_cases); i++) {
+    if (path_cases[i].input8) {
+      int len = static_cast<int>(strlen(path_cases[i].input8));
+      Component in_comp(0, len);
+      Component out_comp;
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      bool success =
+          CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(path_cases[i].expected_success, success);
+      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(path_cases[i].expected, out_str);
+    }
+
+    if (path_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(path_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      Component out_comp;
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+
+      bool success =
+          CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(path_cases[i].expected_success, success);
+      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(path_cases[i].expected, out_str);
+    }
+  }
+
+  // Manual test: embedded NULLs should be escaped and the URL should be marked
+  // as invalid.
+  const char path_with_null[] = "/ab\0c";
+  Component in_comp(0, 5);
+  Component out_comp;
+
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);
+  output.Complete();
+  EXPECT_FALSE(success);
+  EXPECT_EQ("/ab%00c", out_str);
+}
+
+TEST(URLCanonTest, Query) {
+  struct QueryCase {
+    const char* input8;
+    const wchar_t* input16;
+    const char* expected;
+  } query_cases[] = {
+      // Regular ASCII case.
+    {"foo=bar", L"foo=bar", "?foo=bar"},
+      // Allow question marks in the query without escaping
+    {"as?df", L"as?df", "?as?df"},
+      // Always escape '#' since it would mark the ref.
+    {"as#df", L"as#df", "?as%23df"},
+      // Escape some questionable 8-bit characters, but never unescape.
+    {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"},
+    {"%40%41123", L"%40%41123", "?%40%41123"},
+      // Chinese input/output
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"},
+      // Invalid UTF-8/16 input should be replaced with invalid characters.
+    {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"},
+      // Don't allow < or > because sometimes they are used for XSS if the
+      // URL is echoed in content. Firefox does this, IE doesn't.
+    {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"},
+      // Escape double quotemarks in the query.
+    {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
+  };
+
+  for (size_t i = 0; i < arraysize(query_cases); i++) {
+    Component out_comp;
+
+    if (query_cases[i].input8) {
+      int len = static_cast<int>(strlen(query_cases[i].input8));
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+
+    if (query_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(query_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+  }
+
+  // Extra test for input with embedded NULL;
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  Component out_comp;
+  CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+  output.Complete();
+  EXPECT_EQ("?a%20%00z%01", out_str);
+}
+
+TEST(URLCanonTest, Ref) {
+  // Refs are trivial, it just checks the encoding.
+  DualComponentCase ref_cases[] = {
+      // Regular one, we shouldn't escape spaces, et al.
+    {"hello, world", L"hello, world", "#hello, world", Component(1, 12), true},
+      // UTF-8/wide input should be preserved
+    {"\xc2\xa9", L"\xa9", "#\xc2\xa9", Component(1, 2), true},
+      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
+    {"\xF0\x90\x8C\x80ss", L"\xd800\xdf00ss", "#\xF0\x90\x8C\x80ss", Component(1, 6), true},
+      // Escaping should be preserved unchanged, even invalid ones
+    {"%41%a", L"%41%a", "#%41%a", Component(1, 5), true},
+      // Invalid UTF-8/16 input should be flagged and the input made valid
+    {"\xc2", NULL, "#\xef\xbf\xbd", Component(1, 3), true},
+    {NULL, L"\xd800\x597d", "#\xef\xbf\xbd\xe5\xa5\xbd", Component(1, 6), true},
+      // Test a Unicode invalid character.
+    {"a\xef\xb7\x90", L"a\xfdd0", "#a\xef\xbf\xbd", Component(1, 4), true},
+      // Refs can have # signs and we should preserve them.
+    {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true},
+    {"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
+  };
+
+  for (size_t i = 0; i < arraysize(ref_cases); i++) {
+    // 8-bit input
+    if (ref_cases[i].input8) {
+      int len = static_cast<int>(strlen(ref_cases[i].input8));
+      Component in_comp(0, len);
+      Component out_comp;
+
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeRef(ref_cases[i].input8, in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(ref_cases[i].expected, out_str);
+    }
+
+    // 16-bit input
+    if (ref_cases[i].input16) {
+      base::string16 input16(WStringToUTF16(ref_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      Component out_comp;
+
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(ref_cases[i].expected, out_str);
+    }
+  }
+
+  // Try one with an embedded NULL. It should be stripped.
+  const char null_input[5] = "ab\x00z";
+  Component null_input_component(0, 4);
+  Component out_comp;
+
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  CanonicalizeRef(null_input, null_input_component, &output, &out_comp);
+  output.Complete();
+
+  EXPECT_EQ(1, out_comp.begin);
+  EXPECT_EQ(3, out_comp.len);
+  EXPECT_EQ("#abz", out_str);
+}
+
+TEST(URLCanonTest, CanonicalizeStandardURL) {
+  // The individual component canonicalize tests should have caught the cases
+  // for each of those components. Here, we just need to test that the various
+  // parts are included or excluded properly, and have the correct separators.
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+  } cases[] = {
+    {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#", true},
+    {"http://[www.google.com]/", "http://[www.google.com]/", false},
+    {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", false},
+    {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", true},
+    {"www.google.com", ":www.google.com/", true},
+    {"http://192.0x00A80001", "http://192.168.0.1/", true},
+    {"http://www/foo%2Ehtml", "http://www/foo.html", true},
+    {"http://user:pass@/", "http://user:pass@/", false},
+    {"http://%25DOMAIN:foobar@foodomain.com/", "http://%25DOMAIN:foobar@foodomain.com/", true},
+
+      // Backslashes should get converted to forward slashes.
+    {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
+
+      // Busted refs shouldn't make the whole thing fail.
+    {"http://www.google.com/asdf#\xc2", "http://www.google.com/asdf#\xef\xbf\xbd", true},
+
+      // Basic port tests.
+    {"http://foo:80/", "http://foo/", true},
+    {"http://foo:81/", "http://foo:81/", true},
+    {"httpa://foo:80/", "httpa://foo:80/", true},
+    {"http://foo:-80/", "http://foo:-80/", false},
+
+    {"https://foo:443/", "https://foo/", true},
+    {"https://foo:80/", "https://foo:80/", true},
+    {"ftp://foo:21/", "ftp://foo/", true},
+    {"ftp://foo:80/", "ftp://foo:80/", true},
+    {"gopher://foo:70/", "gopher://foo/", true},
+    {"gopher://foo:443/", "gopher://foo:443/", true},
+    {"ws://foo:80/", "ws://foo/", true},
+    {"ws://foo:81/", "ws://foo:81/", true},
+    {"ws://foo:443/", "ws://foo:443/", true},
+    {"ws://foo:815/", "ws://foo:815/", true},
+    {"wss://foo:80/", "wss://foo:80/", true},
+    {"wss://foo:81/", "wss://foo:81/", true},
+    {"wss://foo:443/", "wss://foo/", true},
+    {"wss://foo:815/", "wss://foo:815/", true},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseStandardURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeStandardURL(
+        cases[i].input, url_len, parsed, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+  }
+}
+
+// The codepath here is the same as for regular canonicalization, so we just
+// need to test that things are replaced or not correctly.
+TEST(URLCanonTest, ReplaceStandardURL) {
+  ReplaceCase replace_cases[] = {
+      // Common case of truncating the path.
+    {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"},
+      // Replace everything
+    {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
+      // Replace nothing
+    {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
+      // Replace scheme with filesystem.  The result is garbage, but you asked
+      // for it.
+    {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseStandardURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+
+    // Note that for the scheme we pass in a different clear function since
+    // there is no function to clear the scheme.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceStandardURL(replace_cases[i].base, parsed, r, NULL, &output,
+                       &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+
+  // The path pointer should be ignored if the address is invalid.
+  {
+    const char src[] = "http://www.google.com/here_is_the_path";
+    int src_len = static_cast<int>(strlen(src));
+
+    Parsed parsed;
+    ParseStandardURL(src, src_len, &parsed);
+
+    // Replace the path to 0 length string. By using 1 as the string address,
+    // the test should get an access violation if it tries to dereference it.
+    Replacements<char> r;
+    r.SetPath(reinterpret_cast<char*>(0x00000001), Component(0, 0));
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    Parsed new_parsed;
+    ReplaceStandardURL(src, parsed, r, NULL, &output1, &new_parsed);
+    output1.Complete();
+    EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
+
+    // Same with an "invalid" path.
+    r.SetPath(reinterpret_cast<char*>(0x00000001), Component());
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    ReplaceStandardURL(src, parsed, r, NULL, &output2, &new_parsed);
+    output2.Complete();
+    EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
+  }
+}
+
+TEST(URLCanonTest, ReplaceFileURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
+      // Replace nothing
+    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"},
+      // Clear non-path components (common)
+    {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"},
+      // Replace path with something that doesn't begin with a slash and make
+      // sure it gets added properly.
+    {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
+    {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
+    {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"},
+    {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"},
+    {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
+      // Replace scheme -- shouldn't do anything.
+    {"file:///C:/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseFileURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceFileURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplaceFileSystemURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything in the outer URL.
+    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
+      // Replace nothing
+    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
+      // Clear non-path components (common)
+    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"},
+      // Replace path with something that doesn't begin with a slash and make
+      // sure it gets added properly.
+    {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "filesystem:file:///temporary/interesting/?query#ref"},
+      // Replace scheme -- shouldn't do anything.
+    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      // Replace username -- shouldn't do anything.
+    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      // Replace password -- shouldn't do anything.
+    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", NULL, NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      // Replace host -- shouldn't do anything.
+    {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, NULL, "foo.com", NULL, NULL, NULL, NULL, "filesystem:http://u:p@bar.com/t/gaba?query#ref"},
+      // Replace port -- shouldn't do anything.
+    {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, NULL, "41", NULL, NULL, NULL, "filesystem:http://u:p@bar.com:40/t/gaba?query#ref"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseFileSystemURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceFileSystemURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplacePathURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+    {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"},
+      // Replace nothing
+    {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"},
+      // Replace one or the other
+    {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"},
+    {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"},
+    {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParsePathURL(cur.base, base_len, false, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplacePathURL(cur.base, parsed, r, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplaceMailtoURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+    {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"},
+      // Replace nothing
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"},
+      // Replace the path
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"},
+      // Replace the query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"},
+      // Replace the path and query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"},
+      // Set the query to empty (should leave trailing question mark)
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"},
+      // Clear the query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"},
+      // Clear the path
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"},
+      // Clear the path + query
+    {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"},
+      // Setting the ref should have no effect
+    {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
+  };
+
+  for (size_t i = 0; i < arraysize(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseMailtoURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceMailtoURL(cur.base, parsed, r, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeFileURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+    Component expected_host;
+    Component expected_path;
+  } cases[] = {
+#ifdef _WIN32
+      // Windows-style paths
+    {"file:c:\\foo\\bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
+    {"  File:c|////foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
+    {"file:", "file:///", true, Component(), Component(7, 1)},
+    {"file:UNChost/path", "file://unchost/path", true, Component(7, 7), Component(14, 5)},
+      // CanonicalizeFileURL supports absolute Windows style paths for IE
+      // compatability. Note that the caller must decide that this is a file
+      // URL itself so it can call the file canonicalizer. This is usually
+      // done automatically as part of relative URL resolving.
+    {"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
+    {"C|/foo/bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
+    {"/C|\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
+    {"//C|/foo/bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
+    {"//server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
+    {"\\\\server\\file", "file://server/file", true, Component(7, 6), Component(13, 5)},
+    {"/\\server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
+      // We should preserve the number of slashes after the colon for IE
+      // compatability, except when there is none, in which case we should
+      // add one.
+    {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
+    {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
+      // Three slashes should be non-UNC, even if there is no drive spec (IE
+      // does this, which makes the resulting request invalid).
+    {"file:///foo/bar.txt", "file:///foo/bar.txt", true, Component(), Component(7, 12)},
+      // TODO(brettw) we should probably fail for invalid host names, which
+      // would change the expected result on this test. We also currently allow
+      // colon even though it's probably invalid, because its currently the
+      // "natural" result of the way the canonicalizer is written. There doesn't
+      // seem to be a strong argument for why allowing it here would be bad, so
+      // we just tolerate it and the load will fail later.
+    {"FILE:/\\/\\7:\\\\//foo\\bar.html", "file://7:////foo/bar.html", false, Component(7, 2), Component(9, 16)},
+    {"file:filer/home\\me", "file://filer/home/me", true, Component(7, 5), Component(12, 8)},
+      // Make sure relative paths can't go above the "C:"
+    {"file:///C:/foo/../../../bar.html", "file:///C:/bar.html", true, Component(), Component(7, 12)},
+      // Busted refs shouldn't make the whole thing fail.
+    {"file:///C:/asdf#\xc2", "file:///C:/asdf#\xef\xbf\xbd", true, Component(), Component(7, 8)},
+#else
+      // Unix-style paths
+    {"file:///home/me", "file:///home/me", true, Component(), Component(7, 8)},
+      // Windowsy ones should get still treated as Unix-style.
+    {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), Component(7, 16)},
+    {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, Component(), Component(7, 19)},
+      // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html)
+    {"//", "file:///", true, Component(), Component(7, 1)},
+    {"///", "file:///", true, Component(), Component(7, 1)},
+    {"///test", "file:///test", true, Component(), Component(7, 5)},
+    {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
+    {"file://localhost",  "file://localhost/", true, Component(7, 9), Component(16, 1)},
+    {"file://localhost/", "file://localhost/", true, Component(7, 9), Component(16, 1)},
+    {"file://localhost/test", "file://localhost/test", true, Component(7, 9), Component(16, 5)},
+#endif  // _WIN32
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseFileURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeFileURL(cases[i].input, url_len, parsed, NULL,
+                                       &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified, the file canonicalizer has
+    // different code for writing the spec.
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(4, out_parsed.scheme.len);
+
+    EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin);
+    EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len);
+
+    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
+    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeFileSystemURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+  } cases[] = {
+    {"Filesystem:htTp://www.Foo.com:80/tempoRary", "filesystem:http://www.foo.com/tempoRary/", true},
+    {"filesystem:httpS://www.foo.com/temporary/", "filesystem:https://www.foo.com/temporary/", true},
+    {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", false},
+    {"filesystem:http://www.foo.com/persistent/bob?query#ref", "filesystem:http://www.foo.com/persistent/bob?query#ref", true},
+    {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true},
+    {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
+    {"filesystem:File:///temporary/Bob?qUery#reF", "filesystem:file:///temporary/Bob?qUery#reF", true},
+  };
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseFileSystemURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeFileSystemURL(cases[i].input, url_len, parsed,
+                                             NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified, the filesystem canonicalizer
+    // has different code for writing the spec.
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(10, out_parsed.scheme.len);
+    if (success)
+      EXPECT_GT(out_parsed.path.len, 0);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizePathURL) {
+  // Path URLs should get canonicalized schemes but nothing else.
+  struct PathCase {
+    const char* input;
+    const char* expected;
+  } path_cases[] = {
+    {"javascript:", "javascript:"},
+    {"JavaScript:Foo", "javascript:Foo"},
+    {":\":This /is interesting;?#", ":\":This /is interesting;?#"},
+  };
+
+  for (size_t i = 0; i < arraysize(path_cases); i++) {
+    int url_len = static_cast<int>(strlen(path_cases[i].input));
+    Parsed parsed;
+    ParsePathURL(path_cases[i].input, url_len, true, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizePathURL(path_cases[i].input, url_len, parsed,
+                                       &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_TRUE(success);
+    EXPECT_EQ(path_cases[i].expected, out_str);
+
+    EXPECT_EQ(0, out_parsed.host.begin);
+    EXPECT_EQ(-1, out_parsed.host.len);
+
+    // When we end with a colon at the end, there should be no path.
+    if (path_cases[i].input[url_len - 1] == ':') {
+      EXPECT_EQ(0, out_parsed.GetContent().begin);
+      EXPECT_EQ(-1, out_parsed.GetContent().len);
+    }
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeMailtoURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+    Component expected_path;
+    Component expected_query;
+  } cases[] = {
+    {"mailto:addr1", "mailto:addr1", true, Component(7, 5), Component()},
+    {"mailto:addr1@foo.com", "mailto:addr1@foo.com", true, Component(7, 13), Component()},
+    // Trailing whitespace is stripped.
+    {"MaIlTo:addr1 \t ", "mailto:addr1", true, Component(7, 5), Component()},
+    {"MaIlTo:addr1?to=jon", "mailto:addr1?to=jon", true, Component(7, 5), Component(13,6)},
+    {"mailto:addr1,addr2", "mailto:addr1,addr2", true, Component(7, 11), Component()},
+    {"mailto:addr1, addr2", "mailto:addr1, addr2", true, Component(7, 12), Component()},
+    {"mailto:addr1%2caddr2", "mailto:addr1%2caddr2", true, Component(7, 13), Component()},
+    {"mailto:\xF0\x90\x8C\x80", "mailto:%F0%90%8C%80", true, Component(7, 12), Component()},
+    // Null character should be escaped to %00
+    {"mailto:addr1\0addr2?foo", "mailto:addr1%00addr2?foo", true, Component(7, 13), Component(21, 3)},
+    // Invalid -- UTF-8 encoded surrogate value.
+    {"mailto:\xed\xa0\x80", "mailto:%EF%BF%BD", false, Component(7, 9), Component()},
+    {"mailto:addr1?", "mailto:addr1?", true, Component(7, 5), Component(13, 0)},
+  };
+
+  // Define outside of loop to catch bugs where components aren't reset
+  Parsed parsed;
+  Parsed out_parsed;
+
+  for (size_t i = 0; i < arraysize(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    if (i == 8) {
+      // The 9th test case purposely has a '\0' in it -- don't count it
+      // as the string terminator.
+      url_len = 22;
+    }
+    ParseMailtoURL(cases[i].input, url_len, &parsed);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeMailtoURL(cases[i].input, url_len, parsed,
+                                         &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(6, out_parsed.scheme.len);
+
+    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
+    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+
+    EXPECT_EQ(cases[i].expected_query.begin, out_parsed.query.begin);
+    EXPECT_EQ(cases[i].expected_query.len, out_parsed.query.len);
+  }
+}
+
+#ifndef WIN32
+
+TEST(URLCanonTest, _itoa_s) {
+  // We fill the buffer with 0xff to ensure that it's getting properly
+  // null-terminated.  We also allocate one byte more than what we tell
+  // _itoa_s about, and ensure that the extra byte is untouched.
+  char buf[6];
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12, buf, sizeof(buf) - 1, 10));
+  EXPECT_STREQ("12", buf);
+  EXPECT_EQ('\xFF', buf[3]);
+
+  // Test the edge cases - exactly the buffer size and one over
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 10));
+  EXPECT_STREQ("1234", buf);
+  EXPECT_EQ('\xFF', buf[5]);
+
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(EINVAL, _itoa_s(12345, buf, sizeof(buf) - 1, 10));
+  EXPECT_EQ('\xFF', buf[5]);  // should never write to this location
+
+  // Test the template overload (note that this will see the full buffer)
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12, buf, 10));
+  EXPECT_STREQ("12", buf);
+  EXPECT_EQ('\xFF', buf[3]);
+
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12345, buf, 10));
+  EXPECT_STREQ("12345", buf);
+
+  EXPECT_EQ(EINVAL, _itoa_s(123456, buf, 10));
+
+  // Test that radix 16 is supported.
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 16));
+  EXPECT_STREQ("4d2", buf);
+  EXPECT_EQ('\xFF', buf[5]);
+}
+
+TEST(URLCanonTest, _itow_s) {
+  // We fill the buffer with 0xff to ensure that it's getting properly
+  // null-terminated.  We also allocate one byte more than what we tell
+  // _itoa_s about, and ensure that the extra byte is untouched.
+  base::char16 buf[6];
+  const char fill_mem = 0xff;
+  const base::char16 fill_char = 0xffff;
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+  EXPECT_EQ(fill_char, buf[3]);
+
+  // Test the edge cases - exactly the buffer size and one over
+  EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(WStringToUTF16(L"1234"), base::string16(buf));
+  EXPECT_EQ(fill_char, buf[5]);
+
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(EINVAL, _itow_s(12345, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(fill_char, buf[5]);  // should never write to this location
+
+  // Test the template overload (note that this will see the full buffer)
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12, buf, 10));
+  EXPECT_EQ(WStringToUTF16(L"12"), base::string16(buf));
+  EXPECT_EQ(fill_char, buf[3]);
+
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12345, buf, 10));
+  EXPECT_EQ(WStringToUTF16(L"12345"), base::string16(buf));
+
+  EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
+}
+
+#endif  // !WIN32
+
+// Returns true if the given two structures are the same.
+static bool ParsedIsEqual(const Parsed& a, const Parsed& b) {
+  return a.scheme.begin == b.scheme.begin && a.scheme.len == b.scheme.len &&
+         a.username.begin == b.username.begin && a.username.len == b.username.len &&
+         a.password.begin == b.password.begin && a.password.len == b.password.len &&
+         a.host.begin == b.host.begin && a.host.len == b.host.len &&
+         a.port.begin == b.port.begin && a.port.len == b.port.len &&
+         a.path.begin == b.path.begin && a.path.len == b.path.len &&
+         a.query.begin == b.query.begin && a.query.len == b.query.len &&
+         a.ref.begin == b.ref.begin && a.ref.len == b.ref.len;
+}
+
+TEST(URLCanonTest, ResolveRelativeURL) {
+  struct RelativeCase {
+    const char* base;      // Input base URL: MUST BE CANONICAL
+    bool is_base_hier;     // Is the base URL hierarchical
+    bool is_base_file;     // Tells us if the base is a file URL.
+    const char* test;      // Input URL to test against.
+    bool succeed_relative; // Whether we expect IsRelativeURL to succeed
+    bool is_rel;           // Whether we expect |test| to be relative or not.
+    bool succeed_resolve;  // Whether we expect ResolveRelativeURL to succeed.
+    const char* resolved;  // What we expect in the result when resolving.
+  } rel_cases[] = {
+      // Basic absolute input.
+    {"http://host/a", true, false, "http://another/", true, false, false, NULL},
+    {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
+      // Empty relative URLs should only remove the ref part of the URL,
+      // leaving the rest unchanged.
+    {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
+      // Spaces at the ends of the relative path should be ignored.
+    {"http://foo/bar", true, false, "  another  ", true, true, true, "http://foo/another"},
+    {"http://foo/bar", true, false, "  .  ", true, true, true, "http://foo/"},
+    {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"},
+      // Matching schemes without two slashes are treated as relative.
+    {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"},
+    {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"},
+    {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"},
+    {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"},
+      // Nonmatching schemes are absolute.
+    {"http://host/a", true, false, "https:host2", true, false, false, NULL},
+    {"http://host/a", true, false, "htto:/host2", true, false, false, NULL},
+      // Absolute path input
+    {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"},
+    {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"},
+    {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"},
+    {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"},
+    {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"},
+    {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"},
+      // Relative path input
+    {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
+    {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"},
+    {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"},
+    {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
+    {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"},
+    {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"},
+      // Query input
+    {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"},
+    {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"},
+    {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"},
+      // Ref input
+    {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"},
+    {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
+    {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"},
+      // Non-hierarchical base: no relative handling. Relative input should
+      // error, and if a scheme is present, it should be treated as absolute.
+    {"data:foobar", false, false, "baz.html", false, false, false, NULL},
+    {"data:foobar", false, false, "data:baz", true, false, false, NULL},
+    {"data:foobar", false, false, "data:/base", true, false, false, NULL},
+      // Non-hierarchical base: absolute input should succeed.
+    {"data:foobar", false, false, "http://host/", true, false, false, NULL},
+    {"data:foobar", false, false, "http:host", true, false, false, NULL},
+      // Invalid schemes should be treated as relative.
+    {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
+    {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
+    {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"},
+    {"data:asdf", false, false, ":foo", false, false, false, NULL},
+    {"data:asdf", false, false, "bad(':foo')", false, false, false, NULL},
+      // We should treat semicolons like any other character in URL resolving
+    {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"},
+    {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"},
+    {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"},
+      // Relative URLs can also be written as "//foo/bar" which is relative to
+      // the scheme. In this case, it would take the old scheme, so for http
+      // the example would resolve to "http://foo/bar".
+    {"http://host/a", true, false, "//another", true, true, true, "http://another/"},
+    {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"},
+    {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "//", true, true, false, "http:"},
+      // IE will also allow one or the other to be a backslash to get the same
+      // behavior.
+    {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"},
+#ifdef WIN32
+      // Resolving against Windows file base URLs.
+    {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL},
+    {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
+    {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"},
+    {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"},
+      // But two backslashes on Windows should be UNC so should be treated
+      // as absolute.
+    {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL},
+      // IE doesn't support drive specs starting with two slashes. It fails
+      // immediately and doesn't even try to load. We fix it up to either
+      // an absolute path or UNC depending on what it looks like.
+    {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"},
+    {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"},
+      // Windows drive specs should be allowed and treated as absolute.
+    {"file:///C:/foo", true, true, "c:", true, false, false, NULL},
+    {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL},
+    {"http://host/a", true, false, "c:\\foo", true, false, false, NULL},
+      // Relative paths with drive letters should be allowed when the base is
+      // also a file.
+    {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"},
+      // Treat absolute paths as being off of the drive.
+    {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"},
+    {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"},
+    {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"},
+      // On Windows, two slashes without a drive letter when the base is a file
+      // means that the path is UNC.
+    {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"},
+    {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"},
+#else
+      // On Unix we fall back to relative behavior since there's nothing else
+      // reasonable to do.
+    {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"},
+#endif
+      // Even on Windows, we don't allow relative drive specs when the base
+      // is not file.
+    {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
+    {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+      // Ensure that ports aren't allowed for hosts relative to a file url.
+      // Although the result string shows a host:port portion, the call to
+      // resolve the relative URL returns false, indicating parse failure,
+      // which is what is required.
+    {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
+      // Filesystem URL tests; filesystem URLs are only valid and relative if
+      // they have no scheme, e.g. "./index.html".  There's no valid equivalent
+      // to http:index.html.
+    {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL},
+    {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"},
+    {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"},
+    {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL},
+      // Absolute URLs are still not relative to a non-standard base URL.
+    {"about:blank", false, false, "http://X/A", true, false, true, ""},
+    {"about:blank", false, false, "content://content.Provider/", true, false, true, ""},
+  };
+
+  for (size_t i = 0; i < arraysize(rel_cases); i++) {
+    const RelativeCase& cur_case = rel_cases[i];
+
+    Parsed parsed;
+    int base_len = static_cast<int>(strlen(cur_case.base));
+    if (cur_case.is_base_file)
+      ParseFileURL(cur_case.base, base_len, &parsed);
+    else if (cur_case.is_base_hier)
+      ParseStandardURL(cur_case.base, base_len, &parsed);
+    else
+      ParsePathURL(cur_case.base, base_len, false, &parsed);
+
+    // First see if it is relative.
+    int test_len = static_cast<int>(strlen(cur_case.test));
+    bool is_relative;
+    Component relative_component;
+    bool succeed_is_rel = IsRelativeURL(
+        cur_case.base, parsed, cur_case.test, test_len, cur_case.is_base_hier,
+        &is_relative, &relative_component);
+
+    EXPECT_EQ(cur_case.succeed_relative, succeed_is_rel) <<
+        "succeed is rel failure on " << cur_case.test;
+    EXPECT_EQ(cur_case.is_rel, is_relative) <<
+        "is rel failure on " << cur_case.test;
+    // Now resolve it.
+    if (succeed_is_rel && is_relative && cur_case.is_rel) {
+      std::string resolved;
+      StdStringCanonOutput output(&resolved);
+      Parsed resolved_parsed;
+
+      bool succeed_resolve = ResolveRelativeURL(
+          cur_case.base, parsed, cur_case.is_base_file, cur_case.test,
+          relative_component, NULL, &output, &resolved_parsed);
+      output.Complete();
+
+      EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve);
+      EXPECT_EQ(cur_case.resolved, resolved) << " on " << cur_case.test;
+
+      // Verify that the output parsed structure is the same as parsing a
+      // the URL freshly.
+      Parsed ref_parsed;
+      int resolved_len = static_cast<int>(resolved.size());
+      if (cur_case.is_base_file) {
+        ParseFileURL(resolved.c_str(), resolved_len, &ref_parsed);
+      } else if (cur_case.is_base_hier) {
+        ParseStandardURL(resolved.c_str(), resolved_len, &ref_parsed);
+      } else {
+        ParsePathURL(resolved.c_str(), resolved_len, false, &ref_parsed);
+      }
+      EXPECT_TRUE(ParsedIsEqual(ref_parsed, resolved_parsed));
+    }
+  }
+}
+
+// It used to be when we did a replacement with a long buffer of UTF-16
+// characters, we would get invalid data in the URL. This is because the buffer
+// it used to hold the UTF-8 data was resized, while some pointers were still
+// kept to the old buffer that was removed.
+TEST(URLCanonTest, ReplacementOverflow) {
+  const char src[] = "file:///C:/foo/bar";
+  int src_len = static_cast<int>(strlen(src));
+  Parsed parsed;
+  ParseFileURL(src, src_len, &parsed);
+
+  // Override two components, the path with something short, and the query with
+  // sonething long enough to trigger the bug.
+  Replacements<base::char16> repl;
+  base::string16 new_query;
+  for (int i = 0; i < 4800; i++)
+    new_query.push_back('a');
+
+  base::string16 new_path(WStringToUTF16(L"/foo"));
+  repl.SetPath(new_path.c_str(), Component(0, 4));
+  repl.SetQuery(new_query.c_str(),
+                Component(0, static_cast<int>(new_query.length())));
+
+  // Call ReplaceComponents on the string. It doesn't matter if we call it for
+  // standard URLs, file URLs, etc, since they will go to the same replacement
+  // function that was buggy.
+  Parsed repl_parsed;
+  std::string repl_str;
+  StdStringCanonOutput repl_output(&repl_str);
+  ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed);
+  repl_output.Complete();
+
+  // Generate the expected string and check.
+  std::string expected("file:///foo?");
+  for (size_t i = 0; i < new_query.length(); i++)
+    expected.push_back('a');
+  EXPECT_TRUE(expected == repl_str);
+}
+
+}  // namespace url

diff --git a/src/url/url_constants.cc b/src/url/url_constants.cc
new file mode 100644
index 0000000..2dc1478
--- /dev/null
+++ b/src/url/url_constants.cc

@@ -0,0 +1,28 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_constants.h"
+
+namespace url {
+
+const char kAboutBlankURL[] = "about:blank";
+
+const char kAboutScheme[] = "about";
+const char kBlobScheme[] = "blob";
+const char kContentScheme[] = "content";
+const char kDataScheme[] = "data";
+const char kFileScheme[] = "file";
+const char kFileSystemScheme[] = "filesystem";
+const char kFtpScheme[] = "ftp";
+const char kGopherScheme[] = "gopher";
+const char kHttpScheme[] = "http";
+const char kHttpsScheme[] = "https";
+const char kJavaScriptScheme[] = "javascript";
+const char kMailToScheme[] = "mailto";
+const char kWsScheme[] = "ws";
+const char kWssScheme[] = "wss";
+
+const char kStandardSchemeSeparator[] = "://";
+
+}  // namespace url

diff --git a/src/url/url_constants.h b/src/url/url_constants.h
new file mode 100644
index 0000000..c48dafc
--- /dev/null
+++ b/src/url/url_constants.h

@@ -0,0 +1,35 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CONSTANTS_H_
+#define URL_URL_CONSTANTS_H_
+
+#include "url/url_export.h"
+
+namespace url {
+
+URL_EXPORT extern const char kAboutBlankURL[];
+
+URL_EXPORT extern const char kAboutScheme[];
+URL_EXPORT extern const char kBlobScheme[];
+// The content scheme is specific to Android for identifying a stored file.
+URL_EXPORT extern const char kContentScheme[];
+URL_EXPORT extern const char kDataScheme[];
+URL_EXPORT extern const char kFileScheme[];
+URL_EXPORT extern const char kFileSystemScheme[];
+URL_EXPORT extern const char kFtpScheme[];
+URL_EXPORT extern const char kGopherScheme[];
+URL_EXPORT extern const char kHttpScheme[];
+URL_EXPORT extern const char kHttpsScheme[];
+URL_EXPORT extern const char kJavaScriptScheme[];
+URL_EXPORT extern const char kMailToScheme[];
+URL_EXPORT extern const char kWsScheme[];
+URL_EXPORT extern const char kWssScheme[];
+
+// Used to separate a standard scheme and the hostname: "://".
+URL_EXPORT extern const char kStandardSchemeSeparator[];
+
+}  // namespace url
+
+#endif  // URL_URL_CONSTANTS_H_

diff --git a/src/url/url_export.h b/src/url/url_export.h
new file mode 100644
index 0000000..15ef19e
--- /dev/null
+++ b/src/url/url_export.h

@@ -0,0 +1,33 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_EXPORT_H_
+#define URL_URL_EXPORT_H_
+
+#if defined(COMPONENT_BUILD)
+#if defined(WIN32)
+
+#if defined(URL_IMPLEMENTATION)
+#define URL_EXPORT __declspec(dllexport)
+#else
+#define URL_EXPORT __declspec(dllimport)
+#endif  // defined(URL_IMPLEMENTATION)
+
+#else  // !defined(WIN32)
+
+#if defined(URL_IMPLEMENTATION)
+#define URL_EXPORT __attribute__((visibility("default")))
+#else
+#define URL_EXPORT
+#endif  // defined(URL_IMPLEMENTATION)
+
+#endif  // defined(WIN32)
+
+#else  // !defined(COMPONENT_BUILD)
+
+#define URL_EXPORT
+
+#endif  // define(COMPONENT_BUILD)
+
+#endif  // URL_URL_EXPORT_H_

diff --git a/src/url/url_file.h b/src/url/url_file.h
new file mode 100644
index 0000000..540cb25
--- /dev/null
+++ b/src/url/url_file.h

@@ -0,0 +1,83 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_FILE_H_
+#define URL_URL_FILE_H_
+
+// Provides shared functions used by the internals of the parser and
+// canonicalizer for file URLs. Do not use outside of these modules.
+
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+#ifdef WIN32
+
+// We allow both "c:" and "c|" as drive identifiers.
+inline bool IsWindowsDriveSeparator(base::char16 ch) {
+  return ch == ':' || ch == '|';
+}
+inline bool IsWindowsDriveLetter(base::char16 ch) {
+  return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+#endif  // WIN32
+
+// Returns the index of the next slash in the input after the given index, or
+// spec_len if the end of the input is reached.
+template<typename CHAR>
+inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) {
+  int idx = begin_index;
+  while (idx < spec_len && !IsURLSlash(spec[idx]))
+    idx++;
+  return idx;
+}
+
+#ifdef WIN32
+
+// Returns true if the start_offset in the given spec looks like it begins a
+// drive spec, for example "c:". This function explicitly handles start_offset
+// values that are equal to or larger than the spec_len to simplify callers.
+//
+// If this returns true, the spec is guaranteed to have a valid drive letter
+// plus a colon starting at |start_offset|.
+template<typename CHAR>
+inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
+                                      int spec_len) {
+  int remaining_len = spec_len - start_offset;
+  if (remaining_len < 2)
+    return false;  // Not enough room.
+  if (!IsWindowsDriveLetter(spec[start_offset]))
+    return false;  // Doesn't start with a valid drive letter.
+  if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
+    return false;  // Isn't followed with a drive separator.
+  return true;
+}
+
+// Returns true if the start_offset in the given text looks like it begins a
+// UNC path, for example "\\". This function explicitly handles start_offset
+// values that are equal to or larger than the spec_len to simplify callers.
+//
+// When strict_slashes is set, this function will only accept backslashes as is
+// standard for Windows. Otherwise, it will accept forward slashes as well
+// which we use for a lot of URL handling.
+template<typename CHAR>
+inline bool DoesBeginUNCPath(const CHAR* text,
+                             int start_offset,
+                             int len,
+                             bool strict_slashes) {
+  int remaining_len = len - start_offset;
+  if (remaining_len < 2)
+    return false;
+
+  if (strict_slashes)
+    return text[start_offset] == '\\' && text[start_offset + 1] == '\\';
+  return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]);
+}
+
+#endif  // WIN32
+
+}  // namespace url
+
+#endif  // URL_URL_FILE_H_

diff --git a/src/url/url_parse.h b/src/url/url_parse.h
new file mode 100644
index 0000000..3b9c546
--- /dev/null
+++ b/src/url/url_parse.h

@@ -0,0 +1,11 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_PARSE_H_
+#define URL_URL_PARSE_H_
+
+// TODO(tfarina): Remove this file when the callers are updated.
+#include "url/third_party/mozilla/url_parse.h"
+
+#endif  // URL_URL_PARSE_H_

diff --git a/googleurl/src/url_parse_file.cc b/src/url/url_parse_file.cc
similarity index 78%
rename from googleurl/src/url_parse_file.cc
rename to src/url/url_parse_file.cc
index 2e8429f..c08ddc6 100644
--- a/googleurl/src/url_parse_file.cc
+++ b/src/url/url_parse_file.cc

@@ -1,36 +1,11 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 #include "base/logging.h"
-#include "googleurl/src/url_file.h"
-#include "googleurl/src/url_parse.h"
-#include "googleurl/src/url_parse_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse.h"
+#include "url/url_parse_internal.h"
 
 // Interesting IE file:isms...
 //
@@ -63,7 +38,7 @@
 //      it looks like an absolute drive path. Also, slashes and backslashes are
 //      equally valid here.
 
-namespace url_parse {
+namespace url {
 
 namespace {
 
@@ -135,8 +110,9 @@
 }
 
 // Backend for the external functions that operates on either char type.
-// We are handed the character after the "file:" at the beginning of the spec.
-// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
+// Handles cases where there is a scheme, but also when handed the first
+// character following the "file:" at the beginning of the spec. If so,
+// this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
 template<typename CHAR>
 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   DCHECK(spec_len >= 0);
@@ -155,8 +131,8 @@
   int begin = 0;
   TrimURL(spec, &begin, &spec_len);
 
-  // Find the scheme.
-  int num_slashes;
+  // Find the scheme, if any.
+  int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
   int after_scheme;
   int after_slashes;
 #ifdef WIN32
@@ -165,7 +141,6 @@
   // links like "c:/foo/bar" or "//foo/bar". This is also called by the
   // relative URL resolver when it determines there is an absolute URL, which
   // may give us input like "/c:/foo".
-  num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
   after_slashes = begin + num_slashes;
   if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
     // Windows path, don't try to extract the scheme (for example, "c:\foo").
@@ -178,7 +153,12 @@
   } else
 #endif
   {
-    if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+    // ExtractScheme doesn't understand the possibility of filenames with
+    // colons in them, in which case it returns the entire spec up to the
+    // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
+    // the foo.c: scheme.
+    if (!num_slashes &&
+        ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
       // Offset the results since we gave ExtractScheme a substring.
       parsed->scheme.begin += begin;
       after_scheme = parsed->scheme.end() + 1;
@@ -198,7 +178,6 @@
   }
 
   num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
-
   after_slashes = after_scheme + num_slashes;
 #ifdef WIN32
   // Check whether the input is a drive again. We checked above for windows
@@ -236,8 +215,8 @@
   DoParseFileURL(url, url_len, parsed);
 }
 
-void ParseFileURL(const char16* url, int url_len, Parsed* parsed) {
+void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
   DoParseFileURL(url, url_len, parsed);
 }
 
-}  // namespace url_parse
+}  // namespace url

diff --git a/src/url/url_parse_internal.h b/src/url/url_parse_internal.h
new file mode 100644
index 0000000..4070b7e
--- /dev/null
+++ b/src/url/url_parse_internal.h

@@ -0,0 +1,91 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_PARSE_INTERNAL_H_
+#define URL_URL_PARSE_INTERNAL_H_
+
+// Contains common inline helper functions used by the URL parsing routines.
+
+#include "url/url_parse.h"
+
+namespace url {
+
+// We treat slashes and backslashes the same for IE compatability.
+inline bool IsURLSlash(base::char16 ch) {
+  return ch == '/' || ch == '\\';
+}
+
+// Returns true if we should trim this character from the URL because it is a
+// space or a control character.
+inline bool ShouldTrimFromURL(base::char16 ch) {
+  return ch <= ' ';
+}
+
+// Given an already-initialized begin index and length, this shrinks the range
+// to eliminate "should-be-trimmed" characters. Note that the length does *not*
+// indicate the length of untrimmed data from |*begin|, but rather the position
+// in the input string (so the string starts at character |*begin| in the spec,
+// and goes until |*len|).
+template<typename CHAR>
+inline void TrimURL(const CHAR* spec, int* begin, int* len,
+                    bool trim_path_end = true) {
+  // Strip leading whitespace and control characters.
+  while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
+    (*begin)++;
+
+  if (trim_path_end) {
+    // Strip trailing whitespace and control characters. We need the >i test
+    // for when the input string is all blanks; we don't want to back past the
+    // input.
+    while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
+      (*len)--;
+  }
+}
+
+// Counts the number of consecutive slashes starting at the given offset
+// in the given string of the given length.
+template<typename CHAR>
+inline int CountConsecutiveSlashes(const CHAR *str,
+                                   int begin_offset, int str_len) {
+  int count = 0;
+  while (begin_offset + count < str_len &&
+         IsURLSlash(str[begin_offset + count]))
+    ++count;
+  return count;
+}
+
+// Internal functions in url_parse.cc that parse the path, that is, everything
+// following the authority section. The input is the range of everything
+// following the authority section, and the output is the identified ranges.
+//
+// This is designed for the file URL parser or other consumers who may do
+// special stuff at the beginning, but want regular path parsing, it just
+// maps to the internal parsing function for paths.
+void ParsePathInternal(const char* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref);
+void ParsePathInternal(const base::char16* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref);
+
+
+// Given a spec and a pointer to the character after the colon following the
+// scheme, this parses it and fills in the structure, Every item in the parsed
+// structure is filled EXCEPT for the scheme, which is untouched.
+void ParseAfterScheme(const char* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed);
+void ParseAfterScheme(const base::char16* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed);
+
+}  // namespace url
+
+#endif  // URL_URL_PARSE_INTERNAL_H_

diff --git a/googleurl/src/url_parse_unittest.cc b/src/url/url_parse_unittest.cc
similarity index 71%
rename from googleurl/src/url_parse_unittest.cc
rename to src/url/url_parse_unittest.cc
index 299488b..dedd663 100644
--- a/googleurl/src/url_parse_unittest.cc
+++ b/src/url/url_parse_unittest.cc

@@ -1,42 +1,12 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include "base/basictypes.h"
-#include "googleurl/src/url_parse.h"
+#include "url/url_parse.h"
+
+#include "base/macros.h"
 #include "testing/base/public/gunit.h"
-
-// Some implementations of base/basictypes.h may define ARRAYSIZE.
-// If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
-// which is in our version of basictypes.h.
-#ifndef ARRAYSIZE
-#define ARRAYSIZE ARRAYSIZE_UNSAFE
-#endif
+#include "url/url_parse.h"
 
 // Interesting IE file:isms...
 //
@@ -68,6 +38,7 @@
 //      it looks like an absolute drive path. Also, slashes and backslashes are
 //      equally valid here.
 
+namespace url {
 namespace {
 
 // Used for regular URL parse cases.
@@ -101,10 +72,24 @@
   const char* query;
 };
 
+// More complicated version of URLParseCase for testing filesystem URLs.
+struct FileSystemURLParseCase {
+  const char* input;
+
+  const char* inner_scheme;
+  const char* inner_username;
+  const char* inner_password;
+  const char* inner_host;
+  int inner_port;
+  const char* inner_path;
+  const char* path;
+  const char* query;
+  const char* ref;
+};
 
 bool ComponentMatches(const char* input,
                       const char* reference,
-                      const url_parse::Component& component) {
+                      const Component& component) {
   // If the component is nonexistant (length == -1), it should begin at 0.
   EXPECT_TRUE(component.len >= 0 || component.len == -1);
 
@@ -124,13 +109,11 @@
   return strncmp(reference, &input[component.begin], component.len) == 0;
 }
 
-void ExpectInvalidComponent(const url_parse::Component& component) {
+void ExpectInvalidComponent(const Component& component) {
   EXPECT_EQ(0, component.begin);
   EXPECT_EQ(-1, component.len);
 }
 
-}  // namespace
-
 // Parsed ----------------------------------------------------------------------
 
 TEST(URLParser, Length) {
@@ -155,24 +138,23 @@
   for (size_t i = 0; i < arraysize(length_cases); i++) {
     int true_length = static_cast<int>(strlen(length_cases[i]));
 
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(length_cases[i], true_length, &parsed);
+    Parsed parsed;
+    ParseStandardURL(length_cases[i], true_length, &parsed);
 
     EXPECT_EQ(true_length, parsed.Length());
   }
 }
 
 TEST(URLParser, CountCharactersBefore) {
-  using namespace url_parse;
   struct CountCase {
     const char* url;
     Parsed::ComponentType component;
     bool include_delimiter;
     int expected_count;
   } count_cases[] = {
-      // Test each possibility in the case where all components are present.
-//    0         1         2
-//    0123456789012345678901
+  // Test each possibility in the case where all components are present.
+  //    0         1         2
+  //    0123456789012345678901
     {"http://u:p@h:8/p?q#r", Parsed::SCHEME, true, 0},
     {"http://u:p@h:8/p?q#r", Parsed::SCHEME, false, 0},
     {"http://u:p@h:8/p?q#r", Parsed::USERNAME, true, 7},
@@ -212,15 +194,15 @@
     {"file:///c:/foo", Parsed::HOST, true, 7},
     {"file:///c:/foo", Parsed::PATH, true, 7},
   };
-  for (size_t i = 0; i < ARRAYSIZE(count_cases); i++) {
+  for (size_t i = 0; i < arraysize(count_cases); i++) {
     int length = static_cast<int>(strlen(count_cases[i].url));
 
     // Simple test to distinguish file and standard URLs.
-    url_parse::Parsed parsed;
+    Parsed parsed;
     if (length > 0 && count_cases[i].url[0] == 'f')
-      url_parse::ParseFileURL(count_cases[i].url, length, &parsed);
+      ParseFileURL(count_cases[i].url, length, &parsed);
     else
-      url_parse::ParseStandardURL(count_cases[i].url, length, &parsed);
+      ParseStandardURL(count_cases[i].url, length, &parsed);
 
     int chars_before = parsed.CountCharactersBefore(
         count_cases[i].component, count_cases[i].include_delimiter);
@@ -328,12 +310,12 @@
 
 TEST(URLParser, Standard) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
-  url_parse::Parsed parsed;
+  // to reset something that is reset by the constructor.
+  Parsed parsed;
   for (size_t i = 0; i < arraysize(cases); i++) {
     const char* url = cases[i].input;
-    url_parse::ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
-    int port = url_parse::ParsePort(url, parsed.port);
+    ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
 
     EXPECT_TRUE(ComponentMatches(url, cases[i].scheme, parsed.scheme));
     EXPECT_TRUE(ComponentMatches(url, cases[i].username, parsed.username));
@@ -354,40 +336,37 @@
 {":",                                       "",            NULL},
 {":/",                                      "",            "/"},
 {"/",                                       NULL,          "/"},
-{" This is \\interesting// \t",             NULL,          "This is \\interesting//"},
+{" This is \\interesting// \t",             NULL,          "This is \\interesting// \t"},
 {"about:",                                  "about",       NULL},
 {"about:blank",                             "about",       "blank"},
-{"  about: blank ",                         "about",       " blank"},
-{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\");"},
+{"  about: blank ",                         "about",       " blank "},
+{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\"); "},
 };
 
 TEST(URLParser, PathURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
   // to reset something that is reset by the construtor.
-  url_parse::Parsed parsed;
+  Parsed parsed;
   for (size_t i = 0; i < arraysize(path_cases); i++) {
     const char* url = path_cases[i].input;
-    url_parse::ParsePathURL(url, static_cast<int>(strlen(url)), &parsed);
+    ParsePathURL(url, static_cast<int>(strlen(url)), false, &parsed);
 
-    EXPECT_TRUE(ComponentMatches(url, path_cases[i].scheme, parsed.scheme));
-    EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.path));
+    EXPECT_TRUE(ComponentMatches(url, path_cases[i].scheme, parsed.scheme))
+        << i;
+    EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
+        << i;
 
     // The remaining components are never used for path urls.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.host);
     ExpectInvalidComponent(parsed.port);
-    ExpectInvalidComponent(parsed.query);
-    ExpectInvalidComponent(parsed.ref);
   }
 }
 
-#ifdef WIN32
-
-// WindowsFile ----------------------------------------------------------------
-
-// Various incarnations of file URLs. These are for Windows only.
+// Various incarnations of file URLs.
 static URLParseCase file_cases[] = {
+#ifdef WIN32
 {"file:server",              "file", NULL, NULL, "server", -1, NULL,          NULL, NULL},
 {"  file: server  \t",       "file", NULL, NULL, " server",-1, NULL,          NULL, NULL},
 {"FiLe:c|",                  "FiLe", NULL, NULL, NULL,     -1, "c|",          NULL, NULL},
@@ -415,29 +394,96 @@
   // Queries and refs are valid for file URLs as well.
 {"file:///C:/foo.html?#",   "file", NULL, NULL,  NULL,     -1, "/C:/foo.html",  "",   ""},
 {"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"},
+#else  // WIN32
+  // No slashes.
+  {"file:",                    "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file:path",                "file", NULL, NULL, NULL,      -1, "path",           NULL, NULL},
+  {"file:path/",               "file", NULL, NULL, NULL,      -1, "path/",          NULL, NULL},
+  {"file:path/f.txt",          "file", NULL, NULL, NULL,      -1, "path/f.txt",     NULL, NULL},
+  // One slash.
+  {"file:/",                   "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:/path",               "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:/path/",              "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:/path/f.txt",         "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Two slashes.
+  {"file://",                  "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file://server",            "file", NULL, NULL, "server",  -1, NULL,             NULL, NULL},
+  {"file://server/",           "file", NULL, NULL, "server",  -1, "/",              NULL, NULL},
+  {"file://server/f.txt",      "file", NULL, NULL, "server",  -1, "/f.txt",         NULL, NULL},
+  // Three slashes.
+  {"file:///",                 "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:///path",             "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:///path/",            "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:///path/f.txt",       "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // More than three slashes.
+  {"file:////",                "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:////path",            "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:////path/",           "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:////path/f.txt",      "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Schemeless URLs
+  {"path/f.txt",               NULL,   NULL, NULL, NULL,       -1, "path/f.txt",    NULL, NULL},
+  {"path:80/f.txt",            "path", NULL, NULL, NULL,       -1, "80/f.txt",      NULL, NULL},
+  {"path/f.txt:80",            "path/f.txt",NULL, NULL, NULL,  -1, "80",            NULL, NULL}, // Wrong.
+  {"/path/f.txt",              NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"/path:80/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"/path/f.txt:80",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"//server/f.txt",           NULL,   NULL, NULL, "server",   -1, "/f.txt",        NULL, NULL},
+  {"//server:80/f.txt",        NULL,   NULL, NULL, "server:80",-1, "/f.txt",        NULL, NULL},
+  {"//server/f.txt:80",        NULL,   NULL, NULL, "server",   -1, "/f.txt:80",     NULL, NULL},
+  {"///path/f.txt",            NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"///path:80/f.txt",         NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"///path/f.txt:80",         NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"////path/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"////path:80/f.txt",        NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"////path/f.txt:80",        NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  // Queries and refs are valid for file URLs as well.
+  {"file:///foo.html?#",       "file", NULL, NULL, NULL,       -1, "/foo.html",     "",   ""},
+  {"file:///foo.html?q=y#ref", "file", NULL, NULL, NULL,       -1, "/foo.html",    "q=y", "ref"},
+#endif  // WIN32
 };
 
-TEST(URLParser, WindowsFile) {
+TEST(URLParser, ParseFileURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
   // to reset something that is reset by the construtor.
-  url_parse::Parsed parsed;
-  for (int i = 0; i < arraysize(file_cases); i++) {
+  Parsed parsed;
+  for (size_t i = 0; i < arraysize(file_cases); i++) {
     const char* url = file_cases[i].input;
-    url_parse::ParseFileURL(url, static_cast<int>(strlen(url)), &parsed);
-    int port = url_parse::ParsePort(url, parsed.port);
+    ParseFileURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
 
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host));
-    EXPECT_EQ(file_cases[i].port, port);
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query));
-    EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref));
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.scheme.begin << ", " << parsed.scheme.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.username.begin << ", " << parsed.username.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.password.begin << ", " << parsed.password.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.host.begin << ", " << parsed.host.len;
+
+    EXPECT_EQ(file_cases[i].port, port)
+        << " for case #" << i << " [ " << url << "] " << port;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.path.begin << ", " << parsed.path.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.query.begin << ", " << parsed.query.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref))
+        << " for case #" << i << " [ "<< url << "] "
+        << parsed.query.begin << ", " << parsed.scheme.len;
   }
 }
 
-#endif  // WIN32
 
 TEST(URLParser, ExtractFileName) {
   struct FileCase {
@@ -452,19 +498,24 @@
     {"http://www.google.com/foo/bar.html#ref", "bar.html"},
     {"http://www.google.com/search/;param", ""},
     {"http://www.google.com/foo/bar.html;param#ref", "bar.html"},
-    {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html;foo"},
+    {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"},
     {"http://www.google.com/foo/bar.html?query#ref", "bar.html"},
+    {"http://www.google.com/foo;/bar.html", "bar.html"},
+    {"http://www.google.com/foo;/", ""},
+    {"http://www.google.com/foo;", "foo"},
+    {"http://www.google.com/;", ""},
+    {"http://www.google.com/foo;bar;html", "foo"},
   };
 
-  for (size_t i = 0; i < ARRAYSIZE(file_cases); i++) {
+  for (size_t i = 0; i < arraysize(file_cases); i++) {
     const char* url = file_cases[i].input;
     int len = static_cast<int>(strlen(url));
 
-    url_parse::Parsed parsed;
-    url_parse::ParseStandardURL(url, len, &parsed);
+    Parsed parsed;
+    ParseStandardURL(url, len, &parsed);
 
-    url_parse::Component file_name;
-    url_parse::ExtractFileName(url, parsed.path, &file_name);
+    Component file_name;
+    ExtractFileName(url, parsed.path, &file_name);
 
     EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name));
   }
@@ -477,14 +528,14 @@
                            int parameter,
                            const char* expected_key,
                            const char* expected_value) {
-  url_parse::Parsed parsed;
-  url_parse::ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
+  Parsed parsed;
+  ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
 
-  url_parse::Component query = parsed.query;
+  Component query = parsed.query;
 
   for (int i = 1; i <= parameter; i++) {
-    url_parse::Component key, value;
-    if (!url_parse::ExtractQueryKeyValue(url, &query, &key, &value)) {
+    Component key, value;
+    if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
       if (parameter >= i && !expected_key)
         return true;  // Expected nonexistant key, got one.
       return false;  // Not enough keys.
@@ -563,16 +614,16 @@
 TEST(URLParser, MailtoUrl) {
   // Declared outside for loop to try to catch cases in init() where we forget
   // to reset something that is reset by the construtor.
-  url_parse::Parsed parsed;
+  Parsed parsed;
   for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
     const char* url = mailto_cases[i].input;
-    url_parse::ParseMailtoURL(url, static_cast<int>(strlen(url)), &parsed);
-    int port = url_parse::ParsePort(url, parsed.port);
+    ParseMailtoURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
 
     EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].scheme, parsed.scheme));
     EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].path, parsed.path));
     EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
-    EXPECT_EQ(url_parse::PORT_UNSPECIFIED, port);
+    EXPECT_EQ(PORT_UNSPECIFIED, port);
 
     // The remaining components are never used for mailto urls.
     ExpectInvalidComponent(parsed.username);
@@ -581,3 +632,57 @@
     ExpectInvalidComponent(parsed.ref);
   }
 }
+
+// Various incarnations of filesystem URLs.
+static FileSystemURLParseCase filesystem_cases[] = {
+  // Regular URL with all the parts
+{"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http",  "user", "pass", "foo", 21, "/temporary",  "/bar;par",  "b",  "c"},
+{"filesystem:https://foo/persistent/bar;par/",               "https", NULL,   NULL,   "foo", -1, "/persistent", "/bar;par/", NULL, NULL},
+{"filesystem:file:///persistent/bar;par/",                   "file", NULL,    NULL,   NULL,  -1, "/persistent", "/bar;par/", NULL, NULL},
+{"filesystem:file:///persistent/bar;par/?query#ref",                   "file", NULL,    NULL,   NULL,  -1, "/persistent", "/bar;par/", "query", "ref"},
+{"filesystem:file:///persistent",                            "file", NULL,    NULL,   NULL,  -1, "/persistent", "",        NULL, NULL},
+};
+
+TEST(URLParser, FileSystemURL) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the construtor.
+  Parsed parsed;
+  for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
+    const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
+    const char* url = parsecase->input;
+    ParseFileSystemURL(url, static_cast<int>(strlen(url)), &parsed);
+
+    EXPECT_TRUE(ComponentMatches(url, "filesystem", parsed.scheme));
+    EXPECT_EQ(!parsecase->inner_scheme, !parsed.inner_parsed());
+    // Only check the inner_parsed if there is one.
+    if (parsed.inner_parsed()) {
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_scheme,
+          parsed.inner_parsed()->scheme));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_username,
+          parsed.inner_parsed()->username));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_password,
+          parsed.inner_parsed()->password));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_host,
+          parsed.inner_parsed()->host));
+      int port = ParsePort(url, parsed.inner_parsed()->port);
+      EXPECT_EQ(parsecase->inner_port, port);
+
+      // The remaining components are never used for filesystem urls.
+      ExpectInvalidComponent(parsed.inner_parsed()->query);
+      ExpectInvalidComponent(parsed.inner_parsed()->ref);
+    }
+
+    EXPECT_TRUE(ComponentMatches(url, parsecase->path, parsed.path));
+    EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
+    EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
+
+    // The remaining components are never used for filesystem urls.
+    ExpectInvalidComponent(parsed.username);
+    ExpectInvalidComponent(parsed.password);
+    ExpectInvalidComponent(parsed.host);
+    ExpectInvalidComponent(parsed.port);
+  }
+}
+
+}  // namespace
+}  // namespace url

diff --git a/src/url/url_test_utils.h b/src/url/url_test_utils.h
new file mode 100644
index 0000000..6e66e85
--- /dev/null
+++ b/src/url/url_test_utils.h

@@ -0,0 +1,56 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_TEST_UTILS_H_
+#define URL_URL_TEST_UTILS_H_
+
+// Convenience functions for string conversions.
+// These are mostly intended for use in unit tests.
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "testing/base/public/gunit.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace test_utils {
+
+// Converts a UTF-16 string from native wchar_t format to char16, by
+// truncating the high 32 bits.  This is not meant to handle true UTF-32
+// encoded strings.
+inline base::string16 WStringToUTF16(const wchar_t* src) {
+  base::string16 str;
+  int length = static_cast<int>(wcslen(src));
+  for (int i = 0; i < length; ++i) {
+    str.push_back(static_cast<base::char16>(src[i]));
+  }
+  return str;
+}
+
+// Converts a string from UTF-8 to UTF-16
+inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
+  int length = static_cast<int>(src.length());
+  EXPECT_LT(length, 1024);
+  RawCanonOutputW<1024> output;
+  EXPECT_TRUE(ConvertUTF8ToUTF16(src.data(), length, &output));
+  return base::string16(output.data(), output.length());
+}
+
+// Converts a string from UTF-16 to UTF-8
+inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
+  std::string str;
+  StdStringCanonOutput output(&str);
+  EXPECT_TRUE(ConvertUTF16ToUTF8(src.data(), static_cast<int>(src.length()),
+                                 &output));
+  output.Complete();
+  return str;
+}
+
+}  // namespace test_utils
+
+}  // namespace url
+
+#endif  // URL_URL_TEST_UTILS_H_

diff --git a/src/url/url_util.cc b/src/url/url_util.cc
new file mode 100644
index 0000000..f4246e9
--- /dev/null
+++ b/src/url/url_util.cc

@@ -0,0 +1,590 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_util.h"
+
+#include <string.h>
+#include <vector>
+
+#ifdef GOOGLEURL_IN_GOOGLE3
+#include "base/heap-checker.h"
+#endif
+#include "base/logging.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
+// so we don't want to use it here.
+template<class Char>
+inline Char ToLowerASCII(Char c) {
+  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+
+// Backend for LowerCaseEqualsASCII.
+template<typename Iter>
+inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
+  for (Iter it = a_begin; it != a_end; ++it, ++b) {
+    if (!*b || ToLowerASCII(*it) != *b)
+      return false;
+  }
+  return *b == 0;
+}
+
+const int kNumStandardURLSchemes = 8;
+const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
+  kHttpScheme,
+  kHttpsScheme,
+  kFileScheme,  // Yes, file urls can have a hostname!
+  kFtpScheme,
+  kGopherScheme,
+  kWsScheme,    // WebSocket.
+  kWssScheme,   // WebSocket secure.
+  kFileSystemScheme,
+};
+
+// List of the currently installed standard schemes. This list is lazily
+// initialized by InitStandardSchemes and is leaked on shutdown to prevent
+// any destructors from being called that will slow us down or cause problems.
+std::vector<const char*>* standard_schemes = NULL;
+
+// See the LockStandardSchemes declaration in the header.
+bool standard_schemes_locked = false;
+
+// Ensures that the standard_schemes list is initialized, does nothing if it
+// already has values.
+void InitStandardSchemes() {
+  if (standard_schemes)
+    return;
+  standard_schemes = new std::vector<const char*>;
+  for (int i = 0; i < kNumStandardURLSchemes; i++)
+    standard_schemes->push_back(kStandardURLSchemes[i]);
+}
+
+// Given a string and a range inside the string, compares it to the given
+// lower-case |compare_to| buffer.
+template<typename CHAR>
+inline bool DoCompareSchemeComponent(const CHAR* spec,
+                                     const Component& component,
+                                     const char* compare_to) {
+  if (!component.is_nonempty())
+    return compare_to[0] == 0;  // When component is empty, match empty scheme.
+  return LowerCaseEqualsASCII(&spec[component.begin],
+                              &spec[component.end()],
+                              compare_to);
+}
+
+// Returns true if the given scheme identified by |scheme| within |spec| is one
+// of the registered "standard" schemes.
+template<typename CHAR>
+bool DoIsStandard(const CHAR* spec, const Component& scheme) {
+  if (!scheme.is_nonempty())
+    return false;  // Empty or invalid schemes are non-standard.
+
+  InitStandardSchemes();
+  for (size_t i = 0; i < standard_schemes->size(); i++) {
+    if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
+                             standard_schemes->at(i)))
+      return true;
+  }
+  return false;
+}
+
+template<typename CHAR>
+bool DoFindAndCompareScheme(const CHAR* str,
+                            int str_len,
+                            const char* compare,
+                            Component* found_scheme) {
+  // Before extracting scheme, canonicalize the URL to remove any whitespace.
+  // This matches the canonicalization done in DoCanonicalize function.
+  RawCanonOutputT<CHAR> whitespace_buffer;
+  int spec_len;
+  const CHAR* spec = RemoveURLWhitespace(str, str_len,
+                                         &whitespace_buffer, &spec_len);
+
+  Component our_scheme;
+  if (!ExtractScheme(spec, spec_len, &our_scheme)) {
+    // No scheme.
+    if (found_scheme)
+      *found_scheme = Component();
+    return false;
+  }
+  if (found_scheme)
+    *found_scheme = our_scheme;
+  return DoCompareSchemeComponent(spec, our_scheme, compare);
+}
+
+template<typename CHAR>
+bool DoCanonicalize(const CHAR* in_spec,
+                    int in_spec_len,
+                    bool trim_path_end,
+                    CharsetConverter* charset_converter,
+                    CanonOutput* output,
+                    Parsed* output_parsed) {
+  // Remove any whitespace from the middle of the relative URL, possibly
+  // copying to the new buffer.
+  RawCanonOutputT<CHAR> whitespace_buffer;
+  int spec_len;
+  const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,
+                                         &whitespace_buffer, &spec_len);
+
+  Parsed parsed_input;
+#ifdef WIN32
+  // For Windows, we allow things that look like absolute Windows paths to be
+  // fixed up magically to file URLs. This is done for IE compatability. For
+  // example, this will change "c:/foo" into a file URL rather than treating
+  // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
+  // There is similar logic in url_canon_relative.cc for
+  //
+  // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which
+  // has no meaning as an absolute path name. This is because browsers on Mac
+  // & Unix don't generally do this, so there is no compatibility reason for
+  // doing so.
+  if (DoesBeginUNCPath(spec, 0, spec_len, false) ||
+      DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {
+    ParseFileURL(spec, spec_len, &parsed_input);
+    return CanonicalizeFileURL(spec, spec_len, parsed_input, charset_converter,
+                               output, output_parsed);
+  }
+#endif
+
+  Component scheme;
+  if (!ExtractScheme(spec, spec_len, &scheme))
+    return false;
+
+  // This is the parsed version of the input URL, we have to canonicalize it
+  // before storing it in our object.
+  bool success;
+  if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) {
+    // File URLs are special.
+    ParseFileURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeFileURL(spec, spec_len, parsed_input,
+                                  charset_converter, output, output_parsed);
+  } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) {
+    // Filesystem URLs are special.
+    ParseFileSystemURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input,
+                                        charset_converter, output,
+                                        output_parsed);
+
+  } else if (DoIsStandard(spec, scheme)) {
+    // All "normal" URLs.
+    ParseStandardURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeStandardURL(spec, spec_len, parsed_input,
+                                      charset_converter, output, output_parsed);
+
+  } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
+    // Mailto are treated like a standard url with only a scheme, path, query
+    ParseMailtoURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
+                                    output_parsed);
+
+  } else {
+    // "Weird" URLs like data: and javascript:
+    ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
+    success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
+                                  output_parsed);
+  }
+  return success;
+}
+
+template<typename CHAR>
+bool DoResolveRelative(const char* base_spec,
+                       int base_spec_len,
+                       const Parsed& base_parsed,
+                       const CHAR* in_relative,
+                       int in_relative_length,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* output_parsed) {
+  // Remove any whitespace from the middle of the relative URL, possibly
+  // copying to the new buffer.
+  RawCanonOutputT<CHAR> whitespace_buffer;
+  int relative_length;
+  const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
+                                             &whitespace_buffer,
+                                             &relative_length);
+  bool base_is_authority_based = false;
+  bool base_is_hierarchical = false;
+  if (base_spec &&
+      base_parsed.scheme.is_nonempty()) {
+    int after_scheme = base_parsed.scheme.end() + 1;  // Skip past the colon.
+    int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme,
+                                              base_spec_len);
+    base_is_authority_based = num_slashes > 1;
+    base_is_hierarchical = num_slashes > 0;
+  }
+
+  bool standard_base_scheme =
+      base_parsed.scheme.is_nonempty() &&
+      DoIsStandard(base_spec, base_parsed.scheme);
+
+  bool is_relative;
+  Component relative_component;
+  if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length,
+                     (base_is_hierarchical || standard_base_scheme),
+                     &is_relative, &relative_component)) {
+    // Error resolving.
+    return false;
+  }
+
+  // Pretend for a moment that |base_spec| is a standard URL. Normally
+  // non-standard URLs are treated as PathURLs, but if the base has an
+  // authority we would like to preserve it.
+  if (is_relative && base_is_authority_based && !standard_base_scheme) {
+    Parsed base_parsed_authority;
+    ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority);
+    if (base_parsed_authority.host.is_nonempty()) {
+      RawCanonOutputT<char> temporary_output;
+      bool did_resolve_succeed =
+          ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,
+                             relative_component, charset_converter,
+                             &temporary_output, output_parsed);
+      // The output_parsed is incorrect at this point (because it was built
+      // based on base_parsed_authority instead of base_parsed) and needs to be
+      // re-created.
+      DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
+                     charset_converter, output, output_parsed);
+      return did_resolve_succeed;
+    }
+  } else if (is_relative) {
+    // Relative, resolve and canonicalize.
+    bool file_base_scheme = base_parsed.scheme.is_nonempty() &&
+        DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);
+    return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative,
+                              relative_component, charset_converter, output,
+                              output_parsed);
+  }
+
+  // Not relative, canonicalize the input.
+  return DoCanonicalize(relative, relative_length, true, charset_converter,
+                        output, output_parsed);
+}
+
+template<typename CHAR>
+bool DoReplaceComponents(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         const Replacements<CHAR>& replacements,
+                         CharsetConverter* charset_converter,
+                         CanonOutput* output,
+                         Parsed* out_parsed) {
+  // If the scheme is overridden, just do a simple string substitution and
+  // reparse the whole thing. There are lots of edge cases that we really don't
+  // want to deal with. Like what happens if I replace "http://e:8080/foo"
+  // with a file. Does it become "file:///E:/8080/foo" where the port number
+  // becomes part of the path? Parsing that string as a file URL says "yes"
+  // but almost no sane rule for dealing with the components individually would
+  // come up with that.
+  //
+  // Why allow these crazy cases at all? Programatically, there is almost no
+  // case for replacing the scheme. The most common case for hitting this is
+  // in JS when building up a URL using the location object. In this case, the
+  // JS code expects the string substitution behavior:
+  //   http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
+  if (replacements.IsSchemeOverridden()) {
+    // Canonicalize the new scheme so it is 8-bit and can be concatenated with
+    // the existing spec.
+    RawCanonOutput<128> scheme_replaced;
+    Component scheme_replaced_parsed;
+    CanonicalizeScheme(replacements.sources().scheme,
+                       replacements.components().scheme,
+                       &scheme_replaced, &scheme_replaced_parsed);
+
+    // We can assume that the input is canonicalized, which means it always has
+    // a colon after the scheme (or where the scheme would be).
+    int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
+                                                    : 1;
+    if (spec_len - spec_after_colon > 0) {
+      scheme_replaced.Append(&spec[spec_after_colon],
+                             spec_len - spec_after_colon);
+    }
+
+    // We now need to completely re-parse the resulting string since its meaning
+    // may have changed with the different scheme.
+    RawCanonOutput<128> recanonicalized;
+    Parsed recanonicalized_parsed;
+    DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
+                   charset_converter,
+                   &recanonicalized, &recanonicalized_parsed);
+
+    // Recurse using the version with the scheme already replaced. This will now
+    // use the replacement rules for the new scheme.
+    //
+    // Warning: this code assumes that ReplaceComponents will re-check all
+    // components for validity. This is because we can't fail if DoCanonicalize
+    // failed above since theoretically the thing making it fail could be
+    // getting replaced here. If ReplaceComponents didn't re-check everything,
+    // we wouldn't know if something *not* getting replaced is a problem.
+    // If the scheme-specific replacers are made more intelligent so they don't
+    // re-check everything, we should instead recanonicalize the whole thing
+    // after this call to check validity (this assumes replacing the scheme is
+    // much much less common than other types of replacements, like clearing the
+    // ref).
+    Replacements<CHAR> replacements_no_scheme = replacements;
+    replacements_no_scheme.SetScheme(NULL, Component());
+    return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
+                               recanonicalized_parsed, replacements_no_scheme,
+                               charset_converter, output, out_parsed);
+  }
+
+  // If we get here, then we know the scheme doesn't need to be replaced, so can
+  // just key off the scheme in the spec to know how to do the replacements.
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
+    return ReplaceFileURL(spec, parsed, replacements, charset_converter, output,
+                          out_parsed);
+  }
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) {
+    return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter,
+                                output, out_parsed);
+  }
+  if (DoIsStandard(spec, parsed.scheme)) {
+    return ReplaceStandardURL(spec, parsed, replacements, charset_converter,
+                              output, out_parsed);
+  }
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) {
+    return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed);
+  }
+
+  // Default is a path URL.
+  return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
+}
+
+}  // namespace
+
+void Initialize() {
+  InitStandardSchemes();
+}
+
+void Shutdown() {
+  if (standard_schemes) {
+    delete standard_schemes;
+    standard_schemes = NULL;
+  }
+}
+
+void AddStandardScheme(const char* new_scheme) {
+  // If this assert triggers, it means you've called AddStandardScheme after
+  // LockStandardSchemes have been called (see the header file for
+  // LockStandardSchemes for more).
+  //
+  // This normally means you're trying to set up a new standard scheme too late
+  // in your application's init process. Locate where your app does this
+  // initialization and calls LockStandardScheme, and add your new standard
+  // scheme there.
+  DCHECK(!standard_schemes_locked) <<
+      "Trying to add a standard scheme after the list has been locked.";
+
+  size_t scheme_len = strlen(new_scheme);
+  if (scheme_len == 0)
+    return;
+
+  // Dulicate the scheme into a new buffer and add it to the list of standard
+  // schemes. This pointer will be leaked on shutdown.
+  char* dup_scheme = new char[scheme_len + 1];
+#ifdef GOOGLEURL_IN_GOOGLE3
+  HeapLeakChecker::IgnoreObject(dup_scheme);
+#endif
+  memcpy(dup_scheme, new_scheme, scheme_len + 1);
+
+  InitStandardSchemes();
+  standard_schemes->push_back(dup_scheme);
+}
+
+void LockStandardSchemes() {
+  standard_schemes_locked = true;
+}
+
+bool IsStandard(const char* spec, const Component& scheme) {
+  return DoIsStandard(spec, scheme);
+}
+
+bool IsStandard(const base::char16* spec, const Component& scheme) {
+  return DoIsStandard(spec, scheme);
+}
+
+bool FindAndCompareScheme(const char* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme) {
+  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
+}
+
+bool FindAndCompareScheme(const base::char16* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme) {
+  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
+}
+
+bool Canonicalize(const char* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed) {
+  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
+                        output, output_parsed);
+}
+
+bool Canonicalize(const base::char16* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed) {
+  return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,
+                        output, output_parsed);
+}
+
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const char* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed) {
+  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
+                           relative, relative_length,
+                           charset_converter, output, output_parsed);
+}
+
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const base::char16* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed) {
+  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
+                           relative, relative_length,
+                           charset_converter, output, output_parsed);
+}
+
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<char>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed) {
+  return DoReplaceComponents(spec, spec_len, parsed, replacements,
+                             charset_converter, output, out_parsed);
+}
+
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<base::char16>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed) {
+  return DoReplaceComponents(spec, spec_len, parsed, replacements,
+                             charset_converter, output, out_parsed);
+}
+
+// Front-ends for LowerCaseEqualsASCII.
+bool LowerCaseEqualsASCII(const char* a_begin,
+                          const char* a_end,
+                          const char* b) {
+  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+bool LowerCaseEqualsASCII(const char* a_begin,
+                          const char* a_end,
+                          const char* b_begin,
+                          const char* b_end) {
+  while (a_begin != a_end && b_begin != b_end &&
+         ToLowerASCII(*a_begin) == *b_begin) {
+    a_begin++;
+    b_begin++;
+  }
+  return a_begin == a_end && b_begin == b_end;
+}
+
+bool LowerCaseEqualsASCII(const base::char16* a_begin,
+                          const base::char16* a_end,
+                          const char* b) {
+  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
+}
+
+void DecodeURLEscapeSequences(const char* input,
+                              int length,
+                              CanonOutputW* output) {
+  RawCanonOutputT<char> unescaped_chars;
+  for (int i = 0; i < length; i++) {
+    if (input[i] == '%') {
+      unsigned char ch;
+      if (DecodeEscaped(input, &i, length, &ch)) {
+        unescaped_chars.push_back(ch);
+      } else {
+        // Invalid escape sequence, copy the percent literal.
+        unescaped_chars.push_back('%');
+      }
+    } else {
+      // Regular non-escaped 8-bit character.
+      unescaped_chars.push_back(input[i]);
+    }
+  }
+
+  // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
+  // JavaScript URLs, but Firefox and Safari do.
+  for (int i = 0; i < unescaped_chars.length(); i++) {
+    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+    if (uch < 0x80) {
+      // Non-UTF-8, just append directly
+      output->push_back(uch);
+    } else {
+      // next_ch will point to the last character of the decoded
+      // character.
+      int next_character = i;
+      unsigned code_point;
+      if (ReadUTFChar(unescaped_chars.data(), &next_character,
+                      unescaped_chars.length(), &code_point)) {
+        // Valid UTF-8 character, convert to UTF-16.
+        AppendUTF16Value(code_point, output);
+        i = next_character;
+      } else {
+        // If there are any sequences that are not valid UTF-8, we keep
+        // invalid code points and promote to UTF-16. We copy all characters
+        // from the current position to the end of the identified sequence.
+        while (i < next_character) {
+          output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
+          i++;
+        }
+        output->push_back(static_cast<unsigned char>(unescaped_chars.at(i)));
+      }
+    }
+  }
+}
+
+void EncodeURIComponent(const char* input, int length, CanonOutput* output) {
+  for (int i = 0; i < length; ++i) {
+    unsigned char c = static_cast<unsigned char>(input[i]);
+    if (IsComponentChar(c))
+      output->push_back(c);
+    else
+      AppendEscapedChar(c, output);
+  }
+}
+
+bool CompareSchemeComponent(const char* spec,
+                            const Component& component,
+                            const char* compare_to) {
+  return DoCompareSchemeComponent(spec, component, compare_to);
+}
+
+bool CompareSchemeComponent(const base::char16* spec,
+                            const Component& component,
+                            const char* compare_to) {
+  return DoCompareSchemeComponent(spec, component, compare_to);
+}
+
+}  // namespace url

diff --git a/src/url/url_util.h b/src/url/url_util.h
new file mode 100644
index 0000000..458d1e8
--- /dev/null
+++ b/src/url/url_util.h

@@ -0,0 +1,205 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_UTIL_H_
+#define URL_URL_UTIL_H_
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "url/url_canon.h"
+#include "url/url_constants.h"
+#include "url/url_export.h"
+#include "url/url_parse.h"
+
+namespace url {
+
+// Init ------------------------------------------------------------------------
+
+// Initialization is NOT required, it will be implicitly initialized when first
+// used. However, this implicit initialization is NOT threadsafe. If you are
+// using this library in a threaded environment and don't have a consistent
+// "first call" (an example might be calling "AddStandardScheme" with your
+// special application-specific schemes) then you will want to call initialize
+// before spawning any threads.
+//
+// It is OK to call this function more than once, subsequent calls will simply
+// "noop", unless Shutdown() was called in the mean time. This will also be a
+// "noop" if other calls to the library have forced an initialization
+// beforehand.
+URL_EXPORT void Initialize();
+
+// Cleanup is not required, except some strings may leak. For most user
+// applications, this is fine. If you're using it in a library that may get
+// loaded and unloaded, you'll want to unload to properly clean up your
+// library.
+URL_EXPORT void Shutdown();
+
+// Schemes --------------------------------------------------------------------
+
+// Adds an application-defined scheme to the internal list of "standard" URL
+// schemes. This function is not threadsafe and can not be called concurrently
+// with any other url_util function. It will assert if the list of standard
+// schemes has been locked (see LockStandardSchemes).
+URL_EXPORT void AddStandardScheme(const char* new_scheme);
+
+// Sets a flag to prevent future calls to AddStandardScheme from succeeding.
+//
+// This is designed to help prevent errors for multithreaded applications.
+// Normal usage would be to call AddStandardScheme for your custom schemes at
+// the beginning of program initialization, and then LockStandardSchemes. This
+// prevents future callers from mistakenly calling AddStandardScheme when the
+// program is running with multiple threads, where such usage would be
+// dangerous.
+//
+// We could have had AddStandardScheme use a lock instead, but that would add
+// some platform-specific dependencies we don't otherwise have now, and is
+// overkill considering the normal usage is so simple.
+URL_EXPORT void LockStandardSchemes();
+
+// Locates the scheme in the given string and places it into |found_scheme|,
+// which may be NULL to indicate the caller does not care about the range.
+//
+// Returns whether the given |compare| scheme matches the scheme found in the
+// input (if any). The |compare| scheme must be a valid canonical scheme or
+// the result of the comparison is undefined.
+URL_EXPORT bool FindAndCompareScheme(const char* str,
+                                     int str_len,
+                                     const char* compare,
+                                     Component* found_scheme);
+URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
+                                     int str_len,
+                                     const char* compare,
+                                     Component* found_scheme);
+inline bool FindAndCompareScheme(const std::string& str,
+                                 const char* compare,
+                                 Component* found_scheme) {
+  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
+                              compare, found_scheme);
+}
+inline bool FindAndCompareScheme(const base::string16& str,
+                                 const char* compare,
+                                 Component* found_scheme) {
+  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
+                              compare, found_scheme);
+}
+
+// Returns true if the given string represents a standard URL. This means that
+// either the scheme is in the list of known standard schemes.
+URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
+URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
+
+// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
+// breaking the WebKit build when this version is synced via Chrome.
+inline bool IsStandard(const char* spec,
+                       int spec_len,
+                       const Component& scheme) {
+  return IsStandard(spec, scheme);
+}
+
+// URL library wrappers -------------------------------------------------------
+
+// Parses the given spec according to the extracted scheme type. Normal users
+// should use the URL object, although this may be useful if performance is
+// critical and you don't want to do the heap allocation for the std::string.
+//
+// As with the Canonicalize* functions, the charset converter can
+// be NULL to use UTF-8 (it will be faster in this case).
+//
+// Returns true if a valid URL was produced, false if not. On failure, the
+// output and parsed structures will still be filled and will be consistent,
+// but they will not represent a loadable URL.
+URL_EXPORT bool Canonicalize(const char* spec,
+                             int spec_len,
+                             bool trim_path_end,
+                             CharsetConverter* charset_converter,
+                             CanonOutput* output,
+                             Parsed* output_parsed);
+URL_EXPORT bool Canonicalize(const base::char16* spec,
+                             int spec_len,
+                             bool trim_path_end,
+                             CharsetConverter* charset_converter,
+                             CanonOutput* output,
+                             Parsed* output_parsed);
+
+// Resolves a potentially relative URL relative to the given parsed base URL.
+// The base MUST be valid. The resulting canonical URL and parsed information
+// will be placed in to the given out variables.
+//
+// The relative need not be relative. If we discover that it's absolute, this
+// will produce a canonical version of that URL. See Canonicalize() for more
+// about the charset_converter.
+//
+// Returns true if the output is valid, false if the input could not produce
+// a valid URL.
+URL_EXPORT bool ResolveRelative(const char* base_spec,
+                                int base_spec_len,
+                                const Parsed& base_parsed,
+                                const char* relative,
+                                int relative_length,
+                                CharsetConverter* charset_converter,
+                                CanonOutput* output,
+                                Parsed* output_parsed);
+URL_EXPORT bool ResolveRelative(const char* base_spec,
+                                int base_spec_len,
+                                const Parsed& base_parsed,
+                                const base::char16* relative,
+                                int relative_length,
+                                CharsetConverter* charset_converter,
+                                CanonOutput* output,
+                                Parsed* output_parsed);
+
+// Replaces components in the given VALID input url. The new canonical URL info
+// is written to output and out_parsed.
+//
+// Returns true if the resulting URL is valid.
+URL_EXPORT bool ReplaceComponents(const char* spec,
+                                  int spec_len,
+                                  const Parsed& parsed,
+                                  const Replacements<char>& replacements,
+                                  CharsetConverter* charset_converter,
+                                  CanonOutput* output,
+                                  Parsed* out_parsed);
+URL_EXPORT bool ReplaceComponents(
+    const char* spec,
+    int spec_len,
+    const Parsed& parsed,
+    const Replacements<base::char16>& replacements,
+    CharsetConverter* charset_converter,
+    CanonOutput* output,
+    Parsed* out_parsed);
+
+// String helper functions ----------------------------------------------------
+
+// Compare the lower-case form of the given string against the given ASCII
+// string.  This is useful for doing checking if an input string matches some
+// token, and it is optimized to avoid intermediate string copies.
+//
+// The versions of this function that don't take a b_end assume that the b
+// string is NULL terminated.
+URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
+                                     const char* a_end,
+                                     const char* b);
+URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
+                                     const char* a_end,
+                                     const char* b_begin,
+                                     const char* b_end);
+URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin,
+                                     const base::char16* a_end,
+                                     const char* b);
+
+// Unescapes the given string using URL escaping rules.
+URL_EXPORT void DecodeURLEscapeSequences(const char* input,
+                                         int length,
+                                         CanonOutputW* output);
+
+// Escapes the given string as defined by the JS method encodeURIComponent.  See
+// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
+URL_EXPORT void EncodeURIComponent(const char* input,
+                                   int length,
+                                   CanonOutput* output);
+
+}  // namespace url
+
+#endif  // URL_URL_UTIL_H_

diff --git a/src/url/url_util_internal.h b/src/url/url_util_internal.h
new file mode 100644
index 0000000..c72598f
--- /dev/null
+++ b/src/url/url_util_internal.h

@@ -0,0 +1,26 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_UTIL_INTERNAL_H_
+#define URL_URL_UTIL_INTERNAL_H_
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "url/url_parse.h"
+
+namespace url {
+
+// Given a string and a range inside the string, compares it to the given
+// lower-case |compare_to| buffer.
+bool CompareSchemeComponent(const char* spec,
+                            const Component& component,
+                            const char* compare_to);
+bool CompareSchemeComponent(const base::char16* spec,
+                            const Component& component,
+                            const char* compare_to);
+
+}  // namespace url
+
+#endif  // URL_URL_UTIL_INTERNAL_H_

diff --git a/src/url/url_util_unittest.cc b/src/url/url_util_unittest.cc
new file mode 100644
index 0000000..2216252
--- /dev/null
+++ b/src/url/url_util_unittest.cc

@@ -0,0 +1,319 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/macros.h"
+#include "testing/base/public/gunit.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_parse.h"
+#include "url/url_test_utils.h"
+#include "url/url_util.h"
+
+namespace url {
+
+TEST(URLUtilTest, FindAndCompareScheme) {
+  Component found_scheme;
+
+  // Simple case where the scheme is found and matches.
+  const char kStr1[] = "http://www.com/";
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "http", NULL));
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 4));
+
+  // A case where the scheme is found and doesn't match.
+  EXPECT_FALSE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "https", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 4));
+
+  // A case where there is no scheme.
+  const char kStr2[] = "httpfoobar";
+  EXPECT_FALSE(FindAndCompareScheme(
+      kStr2, static_cast<int>(strlen(kStr2)), "http", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component());
+
+  // When there is an empty scheme, it should match the empty scheme.
+  const char kStr3[] = ":foo.com/";
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 0));
+
+  // But when there is no scheme, it should fail.
+  EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component());
+
+  // When there is a whitespace char in scheme, it should canonicalize the url
+  // before comparison.
+  const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
+  EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
+                                   static_cast<int>(strlen(whtspc_str)),
+                                   "javascript", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(1, 10));
+
+  // Control characters should be stripped out on the ends, and kept in the
+  // middle.
+  const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)";
+  EXPECT_FALSE(FindAndCompareScheme(ctrl_str,
+                                    static_cast<int>(strlen(ctrl_str)),
+                                    "javascript", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(1, 11));
+}
+
+TEST(URLUtilTest, ReplaceComponents) {
+  Parsed parsed;
+  RawCanonOutputT<char> output;
+  Parsed new_parsed;
+
+  // Check that the following calls do not cause crash
+  Replacements<char> replacements;
+  replacements.SetRef("test", Component(0, 4));
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+  replacements.ClearRef();
+  replacements.SetHost("test", Component(0, 4));
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+
+  replacements.ClearHost();
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+}
+
+static std::string CheckReplaceScheme(const char* base_url,
+                                      const char* scheme) {
+  // Make sure the input is canonicalized.
+  RawCanonOutput<32> original;
+  Parsed original_parsed;
+  Canonicalize(base_url, strlen(base_url), true, NULL, &original,
+               &original_parsed);
+
+  Replacements<char> replacements;
+  replacements.SetScheme(scheme, Component(0, strlen(scheme)));
+
+  std::string output_string;
+  StdStringCanonOutput output(&output_string);
+  Parsed output_parsed;
+  ReplaceComponents(original.data(), original.length(), original_parsed,
+                    replacements, NULL, &output, &output_parsed);
+
+  output.Complete();
+  return output_string;
+}
+
+TEST(URLUtilTest, ReplaceScheme) {
+  EXPECT_EQ("https://google.com/",
+            CheckReplaceScheme("http://google.com/", "https"));
+  EXPECT_EQ("file://google.com/",
+            CheckReplaceScheme("http://google.com/", "file"));
+  EXPECT_EQ("http://home/Build",
+            CheckReplaceScheme("file:///Home/Build", "http"));
+  EXPECT_EQ("javascript:foo",
+            CheckReplaceScheme("about:foo", "javascript"));
+  EXPECT_EQ("://google.com/",
+            CheckReplaceScheme("http://google.com/", ""));
+  EXPECT_EQ("http://google.com/",
+            CheckReplaceScheme("about:google.com", "http"));
+  EXPECT_EQ("http:", CheckReplaceScheme("", "http"));
+
+#ifdef WIN32
+  // Magic Windows drive letter behavior when converting to a file URL.
+  EXPECT_EQ("file:///E:/foo/",
+            CheckReplaceScheme("http://localhost/e:foo/", "file"));
+#endif
+
+  // This will probably change to "about://google.com/" when we fix
+  // http://crbug.com/160 which should also be an acceptable result.
+  EXPECT_EQ("about://google.com/",
+            CheckReplaceScheme("http://google.com/", "about"));
+
+  EXPECT_EQ("http://example.com/%20hello%20# world",
+            CheckReplaceScheme("myscheme:example.com/ hello # world ", "http"));
+}
+
+TEST(URLUtilTest, DecodeURLEscapeSequences) {
+  struct DecodeCase {
+    const char* input;
+    const char* output;
+  } decode_cases[] = {
+    {"hello, world", "hello, world"},
+    {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
+     "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
+    {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
+     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
+    {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
+     " !\"#$%&'()*+,-.//"},
+    {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
+     "0123456789:;<=>?/"},
+    {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
+     "@ABCDEFGHIJKLMNO/"},
+    {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
+     "PQRSTUVWXYZ[\\]^_/"},
+    {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
+     "`abcdefghijklmno/"},
+    {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
+     "pqrstuvwxyz{|}~\x7f/"},
+    // Test un-UTF-8-ization.
+    {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+  };
+
+  for (size_t i = 0; i < arraysize(decode_cases); i++) {
+    const char* input = decode_cases[i].input;
+    RawCanonOutputT<base::char16> output;
+    DecodeURLEscapeSequences(input, strlen(input), &output);
+    EXPECT_EQ(decode_cases[i].output,
+              test_utils::ConvertUTF16ToUTF8(base::string16(output.data(),
+                                                            output.length())));
+  }
+
+  // Our decode should decode %00
+  const char zero_input[] = "%00";
+  RawCanonOutputT<base::char16> zero_output;
+  DecodeURLEscapeSequences(zero_input, strlen(zero_input), &zero_output);
+  EXPECT_NE("%00", test_utils::ConvertUTF16ToUTF8(
+      base::string16(zero_output.data(), zero_output.length())));
+
+  // Test the error behavior for invalid UTF-8.
+  const char invalid_input[] = "%e4%a0%e5%a5%bd";
+  const base::char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0};
+  RawCanonOutputT<base::char16> invalid_output;
+  DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
+                           &invalid_output);
+  EXPECT_EQ(base::string16(invalid_expected),
+            base::string16(invalid_output.data(), invalid_output.length()));
+}
+
+TEST(URLUtilTest, TestEncodeURIComponent) {
+  struct EncodeCase {
+    const char* input;
+    const char* output;
+  } encode_cases[] = {
+    {"hello, world", "hello%2C%20world"},
+    {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+     "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"},
+    {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+     "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"},
+    {" !\"#$%&'()*+,-./",
+     "%20!%22%23%24%25%26%27()*%2B%2C-.%2F"},
+    {"0123456789:;<=>?",
+     "0123456789%3A%3B%3C%3D%3E%3F"},
+    {"@ABCDEFGHIJKLMNO",
+     "%40ABCDEFGHIJKLMNO"},
+    {"PQRSTUVWXYZ[\\]^_",
+     "PQRSTUVWXYZ%5B%5C%5D%5E_"},
+    {"`abcdefghijklmno",
+     "%60abcdefghijklmno"},
+    {"pqrstuvwxyz{|}~\x7f",
+     "pqrstuvwxyz%7B%7C%7D~%7F"},
+  };
+
+  for (size_t i = 0; i < arraysize(encode_cases); i++) {
+    const char* input = encode_cases[i].input;
+    RawCanonOutputT<char> buffer;
+    EncodeURIComponent(input, strlen(input), &buffer);
+    std::string output(buffer.data(), buffer.length());
+    EXPECT_EQ(encode_cases[i].output, output);
+  }
+}
+
+TEST(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
+  // This tests non-standard (in the sense that GIsStandard() == false)
+  // hierarchical schemes.
+  struct ResolveRelativeCase {
+    const char* base;
+    const char* rel;
+    bool is_valid;
+    const char* out;
+  } resolve_non_standard_cases[] = {
+      // Resolving a relative path against a non-hierarchical URL should fail.
+    {"scheme:opaque_data", "/path", false, ""},
+      // Resolving a relative path against a non-standard authority-based base
+      // URL doesn't alter the authority section.
+    {"scheme://Authority/", "../path", true, "scheme://Authority/path"},
+      // A non-standard hierarchical base is resolved with path URL
+      // canonicalization rules.
+    {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"},
+    {"data:/Path/../part/part2", "file.html", true,
+      "data:/Path/../part/file.html"},
+      // Path URL canonicalization rules also apply to non-standard authority-
+      // based URLs.
+    {"custom://Authority/", "file.html", true,
+      "custom://Authority/file.html"},
+    {"custom://Authority/", "other://Auth/", true, "other://Auth/"},
+    {"custom://Authority/", "../../file.html", true,
+      "custom://Authority/file.html"},
+    {"custom://Authority/path/", "file.html", true,
+      "custom://Authority/path/file.html"},
+    {"custom://Authority:NoCanon/path/", "file.html", true,
+      "custom://Authority:NoCanon/path/file.html"},
+      // It's still possible to get an invalid path URL.
+    {"custom://Invalid:!#Auth/", "file.html", false, ""},
+      // A path with an authority section gets canonicalized under standard URL
+      // rules, even though the base was non-standard.
+    {"content://content.Provider/", "//other.Provider", true,
+      "content://other.provider/"},
+      // Resolving an absolute URL doesn't cause canonicalization of the
+      // result.
+    {"about:blank", "custom://Authority", true, "custom://Authority"},
+      // Fragment URLs can be resolved against a non-standard base.
+    {"scheme://Authority/path", "#fragment", true,
+      "scheme://Authority/path#fragment"},
+    {"scheme://Authority/", "#fragment", true, "scheme://Authority/#fragment"},
+      // Resolving should fail if the base URL is authority-based but is
+      // missing a path component (the '/' at the end).
+    {"scheme://Authority", "path", false, ""},
+      // Test resolving a fragment (only) against any kind of base-URL.
+    {"about:blank", "#id42", true, "about:blank#id42" },
+    {"about:blank", " #id42", true, "about:blank#id42" },
+    {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag" },
+      // A surprising side effect of allowing fragments to resolve against
+      // any URL scheme is we might break javascript: URLs by doing so...
+    {"javascript:alert('foo#bar')", "#badfrag", true,
+      "javascript:alert('foo#badfrag" },
+  };
+
+  for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) {
+    const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
+    Parsed base_parsed;
+    ParsePathURL(test_data.base, strlen(test_data.base), false, &base_parsed);
+
+    std::string resolved;
+    StdStringCanonOutput output(&resolved);
+    Parsed resolved_parsed;
+    bool valid = ResolveRelative(test_data.base, strlen(test_data.base),
+                                 base_parsed, test_data.rel,
+                                 strlen(test_data.rel), NULL, &output,
+                                 &resolved_parsed);
+    output.Complete();
+
+    EXPECT_EQ(test_data.is_valid, valid) << i;
+    if (test_data.is_valid && valid)
+      EXPECT_EQ(test_data.out, resolved) << i;
+  }
+}
+
+TEST(URLUtilTest, TestNoRefComponent) {
+  // The hash-mark must be ignored when mailto: scheme is
+  // parsed, even if the url has a base and relative part.
+  const char* base = "mailto://to/";
+  const char* rel = "any#body";
+
+  Parsed base_parsed;
+  ParsePathURL(base, strlen(base), false, &base_parsed);
+
+  std::string resolved;
+  StdStringCanonOutput output(&resolved);
+  Parsed resolved_parsed;
+
+  bool valid = ResolveRelative(base, strlen(base),
+                               base_parsed, rel,
+                               strlen(rel), NULL, &output,
+                               &resolved_parsed);
+  EXPECT_TRUE(valid);
+  EXPECT_FALSE(resolved_parsed.ref.is_valid());
+}
+
+}  // namespace url
commit	8013a334f29538d9383baf4b5eb252feaabfe9d4	[log] [tgz]
author	Devany Sandoval <sandovad@google.com>	Wed Jun 24 11:07:13 2015 -0700
committer	sandovad <sandovad@google.com>	Tue Sep 03 12:52:03 2019 -0700
tree	a6727d72f2ad4b873a719a86936fda4263739079
parent	c53c811b6f3515682094bc554115b764da7fccca [diff]