convert_UTF: rewrite in C++

This allows us to namespace the symbols properly.

Bug: google-breakpad:725
Change-Id: Iea8052547eef6c0acb299c1995781735c6d8994f
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1769236
Reviewed-by: Mark Mentovai <mark@chromium.org>
diff --git a/Makefile.am b/Makefile.am
index c8a5740..5978876 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -170,7 +170,7 @@
 	src/client/minidump_file_writer-inl.h \
 	src/client/minidump_file_writer.cc \
 	src/client/minidump_file_writer.h \
-	src/common/convert_UTF.c \
+	src/common/convert_UTF.cc \
 	src/common/convert_UTF.h \
 	src/common/md5.cc \
 	src/common/md5.h \
@@ -658,7 +658,7 @@
 
 src_common_dumper_unittest_SOURCES = \
 	src/common/byte_cursor_unittest.cc \
-	src/common/convert_UTF.c \
+	src/common/convert_UTF.cc \
 	src/common/dwarf_cfi_to_module.cc \
 	src/common/dwarf_cfi_to_module_unittest.cc \
 	src/common/dwarf_cu_to_module.cc \
diff --git a/Makefile.in b/Makefile.in
index 77b4956..2e3793a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -307,7 +307,7 @@
 	src/client/linux/minidump_writer/minidump_writer.cc \
 	src/client/minidump_file_writer-inl.h \
 	src/client/minidump_file_writer.cc \
-	src/client/minidump_file_writer.h src/common/convert_UTF.c \
+	src/client/minidump_file_writer.h src/common/convert_UTF.cc \
 	src/common/convert_UTF.h src/common/md5.cc src/common/md5.h \
 	src/common/string_conversion.cc src/common/string_conversion.h \
 	src/common/linux/elf_core_dump.cc src/common/linux/elfutils.cc \
@@ -689,7 +689,7 @@
 	$(src_client_linux_linux_dumper_unittest_helper_LDFLAGS) \
 	$(LDFLAGS) -o $@
 am__src_common_dumper_unittest_SOURCES_DIST =  \
-	src/common/byte_cursor_unittest.cc src/common/convert_UTF.c \
+	src/common/byte_cursor_unittest.cc src/common/convert_UTF.cc \
 	src/common/dwarf_cfi_to_module.cc \
 	src/common/dwarf_cfi_to_module_unittest.cc \
 	src/common/dwarf_cu_to_module.cc \
@@ -2157,7 +2157,7 @@
 @LINUX_HOST_TRUE@	src/client/minidump_file_writer-inl.h \
 @LINUX_HOST_TRUE@	src/client/minidump_file_writer.cc \
 @LINUX_HOST_TRUE@	src/client/minidump_file_writer.h \
-@LINUX_HOST_TRUE@	src/common/convert_UTF.c \
+@LINUX_HOST_TRUE@	src/common/convert_UTF.cc \
 @LINUX_HOST_TRUE@	src/common/convert_UTF.h src/common/md5.cc \
 @LINUX_HOST_TRUE@	src/common/md5.h \
 @LINUX_HOST_TRUE@	src/common/string_conversion.cc \
@@ -2522,7 +2522,7 @@
 
 @DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@src_common_dumper_unittest_SOURCES = \
 @DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/byte_cursor_unittest.cc \
-@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/convert_UTF.c \
+@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/convert_UTF.cc \
 @DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/dwarf_cfi_to_module.cc \
 @DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/dwarf_cfi_to_module_unittest.cc \
 @DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@	src/common/dwarf_cu_to_module.cc \
@@ -5170,20 +5170,6 @@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
-src/common/src_common_dumper_unittest-convert_UTF.o: src/common/convert_UTF.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.c' || echo '$(srcdir)/'`src/common/convert_UTF.c
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='src/common/convert_UTF.c' object='src/common/src_common_dumper_unittest-convert_UTF.o' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.c' || echo '$(srcdir)/'`src/common/convert_UTF.c
-
-src/common/src_common_dumper_unittest-convert_UTF.obj: src/common/convert_UTF.c
-@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.obj -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.c'; then $(CYGPATH_W) 'src/common/convert_UTF.c'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.c'; fi`
-@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='src/common/convert_UTF.c' object='src/common/src_common_dumper_unittest-convert_UTF.obj' libtool=no @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.c'; then $(CYGPATH_W) 'src/common/convert_UTF.c'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.c'; fi`
-
 .cc.o:
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
 @am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
@@ -5662,6 +5648,20 @@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-byte_cursor_unittest.obj `if test -f 'src/common/byte_cursor_unittest.cc'; then $(CYGPATH_W) 'src/common/byte_cursor_unittest.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/byte_cursor_unittest.cc'; fi`
 
+src/common/src_common_dumper_unittest-convert_UTF.o: src/common/convert_UTF.cc
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.cc' || echo '$(srcdir)/'`src/common/convert_UTF.cc
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='src/common/convert_UTF.cc' object='src/common/src_common_dumper_unittest-convert_UTF.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.cc' || echo '$(srcdir)/'`src/common/convert_UTF.cc
+
+src/common/src_common_dumper_unittest-convert_UTF.obj: src/common/convert_UTF.cc
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.obj -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.cc'; then $(CYGPATH_W) 'src/common/convert_UTF.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.cc'; fi`
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='src/common/convert_UTF.cc' object='src/common/src_common_dumper_unittest-convert_UTF.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.cc'; then $(CYGPATH_W) 'src/common/convert_UTF.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.cc'; fi`
+
 src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o: src/common/dwarf_cfi_to_module.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Tpo -c -o src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o `test -f 'src/common/dwarf_cfi_to_module.cc' || echo '$(srcdir)/'`src/common/dwarf_cfi_to_module.cc
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Po
diff --git a/android/google_breakpad/Android.mk b/android/google_breakpad/Android.mk
index 74625eb..20a3f4f 100644
--- a/android/google_breakpad/Android.mk
+++ b/android/google_breakpad/Android.mk
@@ -82,7 +82,7 @@
     src/client/linux/minidump_writer/minidump_writer.cc \
     src/client/minidump_file_writer.cc \
     src/common/android/breakpad_getcontext.S \
-    src/common/convert_UTF.c \
+    src/common/convert_UTF.cc \
     src/common/md5.cc \
     src/common/string_conversion.cc \
     src/common/linux/elfutils.cc \
@@ -100,4 +100,4 @@
 
 include $(BUILD_STATIC_LIBRARY)
 
-# Done.
\ No newline at end of file
+# Done.
diff --git a/src/client/minidump_file_writer_unittest.cc b/src/client/minidump_file_writer_unittest.cc
index 60c364e..256e337 100644
--- a/src/client/minidump_file_writer_unittest.cc
+++ b/src/client/minidump_file_writer_unittest.cc
@@ -30,7 +30,7 @@
 // Author: waylonis@google.com (Dan Waylonis)
 
 /*
- g++ -I../ ../common/convert_UTF.c \
+ g++ -I../ ../common/convert_UTF.cc \
  ../common/string_conversion.cc \
  minidump_file_writer.cc \
  minidump_file_writer_unittest.cc \
diff --git a/src/client/solaris/handler/Makefile b/src/client/solaris/handler/Makefile
index beeb944..6da9464 100644
--- a/src/client/solaris/handler/Makefile
+++ b/src/client/solaris/handler/Makefile
@@ -40,13 +40,13 @@
 
 THREAD_SRC=solaris_lwp.cc
 SHARE_SRC=../../minidump_file_writer.cc\
+	  ../../../common/convert_UTF.cc\
 	  ../../../common/md5.cc\
 	  ../../../common/string_conversion.cc\
 	  ../../../common/solaris/file_id.cc\
 	  minidump_generator.cc
 HANDLER_SRC=exception_handler.cc\
 	  ../../../common/solaris/guid_creator.cc
-SHARE_C_SRC=../../../common/convert_UTF.c
 
 MINIDUMP_TEST_SRC=minidump_test.cc
 EXCEPTION_TEST_SRC=exception_handler_test.cc
@@ -54,11 +54,10 @@
 THREAD_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(THREAD_SRC))
 SHARE_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(SHARE_SRC))
 HANDLER_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(HANDLER_SRC))
-SHARE_C_OBJ=$(patsubst %.c,$(OBJ_DIR)/%.o,$(SHARE_C_SRC))
 MINIDUMP_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(MINIDUMP_TEST_SRC))\
-		  $(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
+		  $(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
 EXCEPTION_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(EXCEPTION_TEST_SRC))\
-          $(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
+          $(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
 
 BIN=$(BIN_DIR)/minidump_test\
     $(BIN_DIR)/exception_handler_test
diff --git a/src/common/common.gyp b/src/common/common.gyp
index fe646b4..7d5e5c7 100644
--- a/src/common/common.gyp
+++ b/src/common/common.gyp
@@ -61,7 +61,7 @@
         'android/ucontext_constants.h',
         'basictypes.h',
         'byte_cursor.h',
-        'convert_UTF.c',
+        'convert_UTF.cc',
         'convert_UTF.h',
         'dwarf/bytereader-inl.h',
         'dwarf/bytereader.cc',
diff --git a/src/common/convert_UTF.c b/src/common/convert_UTF.cc
similarity index 97%
rename from src/common/convert_UTF.c
rename to src/common/convert_UTF.cc
index 12a3c89..fed04e7 100644
--- a/src/common/convert_UTF.c
+++ b/src/common/convert_UTF.cc
@@ -60,10 +60,16 @@
 #include <stdio.h>
 #endif
 
-static const int halfShift  = 10; /* used for shifting by 10 bits */
+namespace google_breakpad {
 
-static const UTF32 halfBase = 0x0010000UL;
-static const UTF32 halfMask = 0x3FFUL;
+namespace {
+
+const int halfShift  = 10; /* used for shifting by 10 bits */
+
+const UTF32 halfBase = 0x0010000UL;
+const UTF32 halfMask = 0x3FFUL;
+
+}  // namespace
 
 #define UNI_SUR_HIGH_START  (UTF32)0xD800
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
@@ -183,6 +189,8 @@
 
 /* --------------------------------------------------------------------- */
 
+namespace {
+
 /*
  * Index into the table below with the first byte of a UTF-8 sequence to
  * get the number of trailing bytes that are supposed to follow it.
@@ -190,7 +198,7 @@
  * left as-is for anyone who may want to do such conversion, which was
  * allowed in earlier algorithms.
  */
-static const char trailingBytesForUTF8[256] = {
+const char trailingBytesForUTF8[256] = {
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -206,7 +214,7 @@
  * This table contains as many values as there might be trailing bytes
  * in a UTF-8 sequence.
  */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
   0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
 /*
@@ -216,7 +224,7 @@
  * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
  * for *legal* UTF-8 will be 4 or fewer bytes total.
  */
-static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 
 /* --------------------------------------------------------------------- */
 
@@ -228,6 +236,8 @@
 * into an inline function.
 */
 
+}  // namespace
+
 /* --------------------------------------------------------------------- */
 
 ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd,
@@ -299,6 +309,8 @@
 
 /* --------------------------------------------------------------------- */
 
+namespace {
+
 /*
  * Utility routine to tell whether a sequence of bytes is legal UTF-8.
  * This must be called with the length pre-determined by the first byte.
@@ -309,8 +321,7 @@
  * If presented with a length > 4, this returns false.  The Unicode
  * definition of UTF-8 goes up to 4-byte sequences.
  */
-
-static Boolean isLegalUTF8(const UTF8 *source, int length) {
+Boolean isLegalUTF8(const UTF8 *source, int length) {
   UTF8 a;
   const UTF8 *srcptr = source+length;
   switch (length) {
@@ -335,6 +346,8 @@
   return true;
 }
 
+}  // namespace
+
 /* --------------------------------------------------------------------- */
 
 /*
@@ -552,3 +565,5 @@
 similarly unrolled loops.
 
 --------------------------------------------------------------------- */
+
+}  // namespace google_breakpad
diff --git a/src/common/convert_UTF.h b/src/common/convert_UTF.h
index 644d099..2f69495 100644
--- a/src/common/convert_UTF.h
+++ b/src/common/convert_UTF.h
@@ -106,6 +106,8 @@
 bit mask & shift operations.
 ------------------------------------------------------------------------ */
 
+namespace google_breakpad {
+
 typedef unsigned long	UTF32;	/* at least 32 bits */
 typedef unsigned short	UTF16;	/* at least 16 bits */
 typedef unsigned char	UTF8;	/* typically 8 bits */
@@ -130,11 +132,6 @@
 	lenientConversion
 } ConversionFlags;
 
-/* This is for C++ and does no harm in C */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd,
                                      UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
 
@@ -155,9 +152,7 @@
 
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 
-#ifdef __cplusplus
-}
-#endif
+}  // namespace google_breakpad
 
 /* --------------------------------------------------------------------- */