mingw/gettext/gettext-tools/src/xg-mixed-string.c - kiwivm - Git at Google

 /* Handling strings that are given partially in the source encoding and
    partially in Unicode.
    Copyright (C) 2001-2018 Free Software Foundation, Inc.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #ifdef HAVE_CONFIG_H
 # include <config.h>
 #endif

 /* Specification.  */
 #include "xg-mixed-string.h"

 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>

 #include "error.h"
 #include "error-progname.h"
 #include "flexmember.h"
 #include "msgl-ascii.h"
 #include "po-charset.h"
 #include "unistr.h"
 #include "xalloc.h"

 #include "xg-pos.h"

 #include "gettext.h"
 #define _(str) gettext (str)


 /* Allocates a single segment.  */
 static inline struct mixed_string_segment *
 segment_alloc (enum segment_type type, const char *string, size_t length)
 {
   struct mixed_string_segment *segment =
     (struct mixed_string_segment *)
     xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, length));
   segment->type = type;
   segment->length = length;
   memcpy (segment->contents, string, length);
   return segment;
 }

 /* Clones a single segment.  */
 static inline struct mixed_string_segment *
 segment_clone (const struct mixed_string_segment *segment)
 {
   return segment_alloc (segment->type, segment->contents, segment->length);
 }

 mixed_string_ty *
 mixed_string_alloc_simple (const char *string,
                            lexical_context_ty lcontext,
                            const char *logical_file_name,
                            int line_number)
 {
   struct mixed_string *ms = XMALLOC (struct mixed_string);

   if (*string == '\0')
     {
       /* An empty string.  */
       ms->segments = NULL;
       ms->nsegments = 0;
     }
   else
     {
       ms->segments = XNMALLOC (1, struct mixed_string_segment *);
       if ((xgettext_current_source_encoding == po_charset_ascii
            || xgettext_current_source_encoding == po_charset_utf8)
           && is_ascii_string (string))
         /* An optimization.  */
         ms->segments[0] =
           segment_alloc (utf8_encoded, string, strlen (string));
       else
         /* The general case.  */
         ms->segments[0] =
           segment_alloc (source_encoded, string, strlen (string));
       ms->nsegments = 1;
     }
   ms->lcontext = lcontext;
   ms->logical_file_name = logical_file_name;
   ms->line_number = line_number;

   return ms;
 }

 mixed_string_ty *
 mixed_string_alloc_utf8 (const char *string,
                          lexical_context_ty lcontext,
                          const char *logical_file_name,
                          int line_number)
 {
   struct mixed_string *ms = XMALLOC (struct mixed_string);

   if (*string == '\0')
     {
       /* An empty string.  */
       ms->segments = NULL;
       ms->nsegments = 0;
     }
   else
     {
       ms->segments = XNMALLOC (1, struct mixed_string_segment *);
       ms->segments[0] = segment_alloc (utf8_encoded, string, strlen (string));
       ms->nsegments = 1;
     }
   ms->lcontext = lcontext;
   ms->logical_file_name = logical_file_name;
   ms->line_number = line_number;

   return ms;
 }

 mixed_string_ty *
 mixed_string_clone (const mixed_string_ty *ms1)
 {
   struct mixed_string *ms = XMALLOC (struct mixed_string);
   size_t nsegments = ms1->nsegments;

   if (nsegments == 0)
     {
       ms->segments = NULL;
       ms->nsegments = 0;
     }
   else
     {
       size_t i;

       ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
       for (i = 0; i < nsegments; i++)
         ms->segments[i] = segment_clone (ms1->segments[i]);
       ms->nsegments = nsegments;
     }
   ms->lcontext = ms1->lcontext;
   ms->logical_file_name = ms1->logical_file_name;
   ms->line_number = ms1->line_number;

   return ms;
 }

 char *
 mixed_string_contents (const mixed_string_ty *ms)
 {
   size_t nsegments = ms->nsegments;
   /* Trivial cases.  */
   if (nsegments == 0)
     return xstrdup ("");
   if (nsegments == 1 && ms->segments[0]->type == utf8_encoded)
     {
       /* Return the segment, with a NUL at the end.  */
       size_t len = ms->segments[0]->length;
       char *string = XNMALLOC (len + 1, char);
       memcpy (string, ms->segments[0]->contents, len);
       string[len] = '\0';
       return string;
     }
   /* General case.  */
   {
     size_t i;

     for (i = 0; i < nsegments - 1; i++)
       if (memchr (ms->segments[i]->contents, '\0', ms->segments[i]->length)
           != NULL)
         {
           /* Segment i contains a NUL character.  Ignore the remaining
              segments.  */
           nsegments = i + 1;
           break;
         }
   }
   {
     char **converted_segments = XNMALLOC (nsegments, char *);
     size_t length;

     length = 0;
     {
       size_t i;

       for (i = 0; i < nsegments; i++)
         if (ms->segments[i]->type == source_encoded)
           {
             char *source_encoded_string;
             char *utf8_encoded_string;

             /* Copy the segment's contents, with a NUL at the end.  */
             {
               size_t len = ms->segments[i]->length;
               source_encoded_string = XNMALLOC (len + 1, char);
               memcpy (source_encoded_string, ms->segments[i]->contents, len);
               source_encoded_string[len] = '\0';
             }
             /* Convert it to UTF-8 encoding.  */
             utf8_encoded_string =
               from_current_source_encoding (source_encoded_string,
                                             ms->lcontext,
                                             ms->logical_file_name,
                                             ms->line_number);
             if (utf8_encoded_string != source_encoded_string)
               free (source_encoded_string);
             converted_segments[i] = utf8_encoded_string;
             length += strlen (utf8_encoded_string);
           }
         else
           length += ms->segments[i]->length;
     }

     {
       char *string = XNMALLOC (length + 1, char);
       {
         char *p;
         size_t i;

         p = string;
         for (i = 0; i < nsegments; i++)
           if (ms->segments[i]->type == source_encoded)
             {
               p = stpcpy (p, converted_segments[i]);
               free (converted_segments[i]);
             }
           else
             {
               memcpy (p, ms->segments[i]->contents, ms->segments[i]->length);
               p += ms->segments[i]->length;
             }
         assert (p == string + length);
         *p = '\0';
       }

       free (converted_segments);
       return string;
     }
   }
 }

 void
 mixed_string_free (mixed_string_ty *ms)
 {
   struct mixed_string_segment **segments = ms->segments;
   size_t nsegments = ms->nsegments;
   if (nsegments > 0)
     {
       size_t i;
       for (i = 0; i < nsegments; i++)
         free (segments[i]);
     }
   free (segments);
   free (ms);
 }

 char *
 mixed_string_contents_free1 (mixed_string_ty *ms)
 {
   char *contents = mixed_string_contents (ms);
   mixed_string_free (ms);
   return contents;
 }

 mixed_string_ty *
 mixed_string_concat (const mixed_string_ty *ms1,
                      const mixed_string_ty *ms2)
 {
   /* Trivial cases.  */
   if (ms2->nsegments == 0)
     return mixed_string_clone (ms1);
   if (ms1->nsegments == 0)
     return mixed_string_clone (ms2);
   /* General case.  */
   {
     struct mixed_string *ms = XMALLOC (struct mixed_string);
     size_t nsegments = ms1->nsegments + ms2->nsegments;
     size_t j;
     if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
       {
         /* Combine the last segment of ms1 with the first segment of ms2.  */
         size_t i;

         nsegments -= 1;
         ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
         j = 0;
         for (i = 0; i < ms1->nsegments - 1; i++)
           ms->segments[j++] = segment_clone (ms1->segments[i]);
         {
           size_t len1 = ms1->segments[i]->length;
           size_t len2 = ms2->segments[0]->length;
           struct mixed_string_segment *newseg =
             (struct mixed_string_segment *)
             xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
                                  len1 + len2));
           newseg->type = ms2->segments[0]->type;
           newseg->length = len1 + len2;
           memcpy (newseg->contents, ms1->segments[i]->contents, len1);
           memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
           ms->segments[j++] = newseg;
         }
         for (i = 1; i < ms2->nsegments; i++)
           ms->segments[j++] = segment_clone (ms2->segments[i]);
       }
     else
       {
         size_t i;

         ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
         j = 0;
         for (i = 0; i < ms1->nsegments; i++)
           ms->segments[j++] = segment_clone (ms1->segments[i]);
         for (i = 0; i < ms2->nsegments; i++)
           ms->segments[j++] = segment_clone (ms2->segments[i]);
       }
     assert (j == nsegments);
     ms->nsegments = nsegments;
     ms->lcontext = ms1->lcontext;
     ms->logical_file_name = ms1->logical_file_name;
     ms->line_number = ms1->line_number;

     return ms;
   }
 }

 mixed_string_ty *
 mixed_string_concat_free1 (mixed_string_ty *ms1, const mixed_string_ty *ms2)
 {
   /* Trivial cases.  */
   if (ms2->nsegments == 0)
     return ms1;
   if (ms1->nsegments == 0)
     {
       mixed_string_free (ms1);
       return mixed_string_clone (ms2);
     }
   /* General case.  */
   {
     struct mixed_string *ms = XMALLOC (struct mixed_string);
     size_t nsegments = ms1->nsegments + ms2->nsegments;
     size_t j;
     if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
       {
         /* Combine the last segment of ms1 with the first segment of ms2.  */
         size_t i;

         nsegments -= 1;
         ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
         j = 0;
         for (i = 0; i < ms1->nsegments - 1; i++)
           ms->segments[j++] = ms1->segments[i];
         {
           size_t len1 = ms1->segments[i]->length;
           size_t len2 = ms2->segments[0]->length;
           struct mixed_string_segment *newseg =
             (struct mixed_string_segment *)
             xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
                                  len1 + len2));
           newseg->type = ms2->segments[0]->type;
           newseg->length = len1 + len2;
           memcpy (newseg->contents, ms1->segments[i]->contents, len1);
           memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
           ms->segments[j++] = newseg;
         }
         free (ms1->segments[i]);
         for (i = 1; i < ms2->nsegments; i++)
           ms->segments[j++] = segment_clone (ms2->segments[i]);
       }
     else
       {
         size_t i;

         ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
         j = 0;
         for (i = 0; i < ms1->nsegments; i++)
           ms->segments[j++] = ms1->segments[i];
         for (i = 0; i < ms2->nsegments; i++)
           ms->segments[j++] = segment_clone (ms2->segments[i]);
       }
     assert (j == nsegments);
     free (ms1->segments);
     ms->nsegments = nsegments;
     ms->lcontext = ms1->lcontext;
     ms->logical_file_name = ms1->logical_file_name;
     ms->line_number = ms1->line_number;
     free (ms1);

     return ms;
   }
 }


 void
 mixed_string_buffer_init (struct mixed_string_buffer *bp,
                           lexical_context_ty lcontext,
                           const char *logical_file_name,
                           int line_number)
 {
   bp->segments = NULL;
   bp->nsegments = 0;
   bp->nsegments_allocated = 0;
   bp->curr_type = -1;
   bp->curr_buffer = NULL;
   bp->curr_buflen = 0;
   bp->curr_allocated = 0;
   bp->utf16_surr = 0;
   bp->lcontext = lcontext;
   bp->logical_file_name = logical_file_name;
   bp->line_number = line_number;
 }

 bool
 mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp)
 {
   return (bp->nsegments == 0 && bp->curr_buflen == 0);
 }

 /* Auxiliary function: Ensure count more bytes are available in
    bp->curr_buffer.  */
 static inline void
 mixed_string_buffer_grow_curr_buffer (struct mixed_string_buffer *bp,
                                       size_t count)
 {
   if (bp->curr_buflen + count > bp->curr_allocated)
     {
       size_t new_allocated = 2 * bp->curr_allocated + 10;
       if (new_allocated < bp->curr_buflen + count)
         new_allocated = bp->curr_buflen + count;
       bp->curr_allocated = new_allocated;
       bp->curr_buffer = xrealloc (bp->curr_buffer, new_allocated);
     }
 }

 /* Auxiliary function: Append a byte to bp->curr.  */
 static inline void
 mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
                                            unsigned char c)
 {
   if (bp->curr_buflen == bp->curr_allocated)
     {
       bp->curr_allocated = 2 * bp->curr_allocated + 10;
       bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
     }
   bp->curr_buffer[bp->curr_buflen++] = c;
 }

 /* Auxiliary function: Assuming bp->curr_type == utf8_encoded, append a
    Unicode character to bp->curr_buffer.  uc must be < 0x110000.  */
 static inline void
 mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
                                            ucs4_t uc)
 {
   unsigned char utf8buf[6];
   int count = u8_uctomb (utf8buf, uc, 6);

   if (count < 0)
     /* The caller should have ensured that uc is not out-of-range.  */
     abort ();

   mixed_string_buffer_grow_curr_buffer (bp, count);
   memcpy (bp->curr_buffer + bp->curr_buflen, utf8buf, count);
   bp->curr_buflen += count;
 }

 /* Auxiliary function: Assuming bp->curr_type == utf8_encoded, handle the
    attempt to append a lone surrogate to bp->curr_buffer.  */
 static void
 mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp,
                                            ucs4_t uc)
 {
   /* A half surrogate is invalid, therefore use U+FFFD instead.
      It may be valid in a particular programming language.
      But a half surrogate is invalid in UTF-8:
        - RFC 3629 says
            "The definition of UTF-8 prohibits encoding character
             numbers between U+D800 and U+DFFF".
        - Unicode 4.0 chapter 3
          <http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf>
          section 3.9, p.77, says
            "Because surrogate code points are not Unicode scalar
             values, any UTF-8 byte sequence that would otherwise
             map to code points D800..DFFF is ill-formed."
          and in table 3-6, p. 78, does not mention D800..DFFF.
        - The unicode.org FAQ question "How do I convert an unpaired
          UTF-16 surrogate to UTF-8?" has the answer
            "By representing such an unpaired surrogate on its own
             as a 3-byte sequence, the resulting UTF-8 data stream
             would become ill-formed."
      So use U+FFFD instead.  */
   error_with_progname = false;
   error (0, 0, _("%s:%d: warning: lone surrogate U+%04X"),
          logical_file_name, line_number, uc);
   error_with_progname = true;
   mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
 }

 /* Auxiliary function: Assuming bp->curr_type == utf8_encoded, flush
    bp->utf16_surr into bp->curr_buffer.  */
 static inline void
 mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
 {
   if (bp->utf16_surr != 0)
     {
       mixed_string_buffer_append_lone_surrogate (bp, bp->utf16_surr);
       bp->utf16_surr = 0;
     }
 }

 /* Auxiliary function: Append a segment to bp->segments.  */
 static inline void
 mixed_string_buffer_add_segment (struct mixed_string_buffer *bp,
                                  struct mixed_string_segment *newseg)
 {
   if (bp->nsegments == bp->nsegments_allocated)
     {
       size_t new_allocated =
         bp->nsegments_allocated = 2 * bp->nsegments_allocated + 1;
       bp->segments =
         (struct mixed_string_segment **)
         xrealloc (bp->segments,
                   new_allocated * sizeof (struct mixed_string_segment *));
     }
   bp->segments[bp->nsegments++] = newseg;
 }

 /* Auxiliary function: Flush bp->curr_buffer and bp->utf16_surr into
    bp->segments.  */
 static void
 mixed_string_buffer_flush_curr (struct mixed_string_buffer *bp)
 {
   if (bp->curr_type == utf8_encoded)
     mixed_string_buffer_flush_utf16_surr (bp);
   if (bp->curr_type != -1)
     {
       if (bp->curr_buflen > 0)
         {
           struct mixed_string_segment *segment =
             segment_alloc (bp->curr_type, bp->curr_buffer, bp->curr_buflen);
           mixed_string_buffer_add_segment (bp, segment);
         }
       bp->curr_buflen = 0;
     }
 }

 void
 mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
 {
   /* Switch to multibyte character mode.  */
   if (bp->curr_type != source_encoded)
     {
       mixed_string_buffer_flush_curr (bp);
       bp->curr_type = source_encoded;
     }

     mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
 }

 void
 mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
 {
   /* Switch to Unicode character mode.  */
   if (bp->curr_type != utf8_encoded)
     {
       mixed_string_buffer_flush_curr (bp);
       bp->curr_type = utf8_encoded;
       assert (bp->utf16_surr == 0);
     }

   /* Test whether this character and the previous one form a Unicode
      surrogate character pair.  */
   if (bp->utf16_surr != 0 && (c >= 0xdc00 && c < 0xe000))
     {
       unsigned short utf16buf[2];
       ucs4_t uc;

       utf16buf[0] = bp->utf16_surr;
       utf16buf[1] = c;
       if (u16_mbtouc (&uc, utf16buf, 2) != 2)
         abort ();

       mixed_string_buffer_append_to_utf8_buffer (bp, uc);
       bp->utf16_surr = 0;
     }
   else
     {
       mixed_string_buffer_flush_utf16_surr (bp);

       if (c >= 0xd800 && c < 0xdc00)
         bp->utf16_surr = c;
       else if (c >= 0xdc00 && c < 0xe000)
         mixed_string_buffer_append_lone_surrogate (bp, c);
       else
         mixed_string_buffer_append_to_utf8_buffer (bp, c);
     }
 }

 void
 mixed_string_buffer_destroy (struct mixed_string_buffer *bp)
 {
   struct mixed_string_segment **segments = bp->segments;
   size_t nsegments = bp->nsegments;
   if (nsegments > 0)
     {
       size_t i;
       for (i = 0; i < nsegments; i++)
         free (segments[i]);
     }
   free (segments);
   free (bp->curr_buffer);
 }

 mixed_string_ty *
 mixed_string_buffer_result (struct mixed_string_buffer *bp)
 {
   mixed_string_buffer_flush_curr (bp);

   {
     struct mixed_string *ms = XMALLOC (struct mixed_string);
     size_t nsegments = bp->nsegments;

     if (nsegments > 0)
       ms->segments =
         (struct mixed_string_segment **)
         xrealloc (bp->segments,
                   nsegments * sizeof (struct mixed_string_segment *));
     else
       {
         assert (bp->segments == NULL);
         ms->segments = NULL;
       }
     ms->nsegments = nsegments;
     ms->lcontext = bp->lcontext;
     ms->logical_file_name = bp->logical_file_name;
     ms->line_number = bp->line_number;

     free (bp->curr_buffer);

     return ms;
   }
 }
	/* Handling strings that are given partially in the source encoding and
	partially in Unicode.
	Copyright (C) 2001-2018 Free Software Foundation, Inc.

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <https://www.gnu.org/licenses/>. */

	#ifdef HAVE_CONFIG_H
	# include <config.h>
	#endif

	/* Specification. */
	#include "xg-mixed-string.h"

	#include <assert.h>
	#include <stdlib.h>
	#include <string.h>

	#include "error.h"
	#include "error-progname.h"
	#include "flexmember.h"
	#include "msgl-ascii.h"
	#include "po-charset.h"
	#include "unistr.h"
	#include "xalloc.h"

	#include "xg-pos.h"

	#include "gettext.h"
	#define _(str) gettext (str)


	/* Allocates a single segment. */
	static inline struct mixed_string_segment *
	segment_alloc (enum segment_type type, const char *string, size_t length)
	{
	struct mixed_string_segment *segment =
	(struct mixed_string_segment *)
	xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents, length));
	segment->type = type;
	segment->length = length;
	memcpy (segment->contents, string, length);
	return segment;
	}

	/* Clones a single segment. */
	static inline struct mixed_string_segment *
	segment_clone (const struct mixed_string_segment *segment)
	{
	return segment_alloc (segment->type, segment->contents, segment->length);
	}

	mixed_string_ty *
	mixed_string_alloc_simple (const char *string,
	lexical_context_ty lcontext,
	const char *logical_file_name,
	int line_number)
	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);

	if (*string == '\0')
	{
	/* An empty string. */
	ms->segments = NULL;
	ms->nsegments = 0;
	}
	else
	{
	ms->segments = XNMALLOC (1, struct mixed_string_segment *);
	if ((xgettext_current_source_encoding == po_charset_ascii
	\|\| xgettext_current_source_encoding == po_charset_utf8)
	&& is_ascii_string (string))
	/* An optimization. */
	ms->segments[0] =
	segment_alloc (utf8_encoded, string, strlen (string));
	else
	/* The general case. */
	ms->segments[0] =
	segment_alloc (source_encoded, string, strlen (string));
	ms->nsegments = 1;
	}
	ms->lcontext = lcontext;
	ms->logical_file_name = logical_file_name;
	ms->line_number = line_number;

	return ms;
	}

	mixed_string_ty *
	mixed_string_alloc_utf8 (const char *string,
	lexical_context_ty lcontext,
	const char *logical_file_name,
	int line_number)
	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);

	if (*string == '\0')
	{
	/* An empty string. */
	ms->segments = NULL;
	ms->nsegments = 0;
	}
	else
	{
	ms->segments = XNMALLOC (1, struct mixed_string_segment *);
	ms->segments[0] = segment_alloc (utf8_encoded, string, strlen (string));
	ms->nsegments = 1;
	}
	ms->lcontext = lcontext;
	ms->logical_file_name = logical_file_name;
	ms->line_number = line_number;

	return ms;
	}

	mixed_string_ty *
	mixed_string_clone (const mixed_string_ty *ms1)
	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);
	size_t nsegments = ms1->nsegments;

	if (nsegments == 0)
	{
	ms->segments = NULL;
	ms->nsegments = 0;
	}
	else
	{
	size_t i;

	ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
	for (i = 0; i < nsegments; i++)
	ms->segments[i] = segment_clone (ms1->segments[i]);
	ms->nsegments = nsegments;
	}
	ms->lcontext = ms1->lcontext;
	ms->logical_file_name = ms1->logical_file_name;
	ms->line_number = ms1->line_number;

	return ms;
	}

	char *
	mixed_string_contents (const mixed_string_ty *ms)
	{
	size_t nsegments = ms->nsegments;
	/* Trivial cases. */
	if (nsegments == 0)
	return xstrdup ("");
	if (nsegments == 1 && ms->segments[0]->type == utf8_encoded)
	{
	/* Return the segment, with a NUL at the end. */
	size_t len = ms->segments[0]->length;
	char *string = XNMALLOC (len + 1, char);
	memcpy (string, ms->segments[0]->contents, len);
	string[len] = '\0';
	return string;
	}
	/* General case. */
	{
	size_t i;

	for (i = 0; i < nsegments - 1; i++)
	if (memchr (ms->segments[i]->contents, '\0', ms->segments[i]->length)
	!= NULL)
	{
	/* Segment i contains a NUL character. Ignore the remaining
	segments. */
	nsegments = i + 1;
	break;
	}
	}
	{
	char *converted_segments = XNMALLOC (nsegments, char );
	size_t length;

	length = 0;
	{
	size_t i;

	for (i = 0; i < nsegments; i++)
	if (ms->segments[i]->type == source_encoded)
	{
	char *source_encoded_string;
	char *utf8_encoded_string;

	/* Copy the segment's contents, with a NUL at the end. */
	{
	size_t len = ms->segments[i]->length;
	source_encoded_string = XNMALLOC (len + 1, char);
	memcpy (source_encoded_string, ms->segments[i]->contents, len);
	source_encoded_string[len] = '\0';
	}
	/* Convert it to UTF-8 encoding. */
	utf8_encoded_string =
	from_current_source_encoding (source_encoded_string,
	ms->lcontext,
	ms->logical_file_name,
	ms->line_number);
	if (utf8_encoded_string != source_encoded_string)
	free (source_encoded_string);
	converted_segments[i] = utf8_encoded_string;
	length += strlen (utf8_encoded_string);
	}
	else
	length += ms->segments[i]->length;
	}

	{
	char *string = XNMALLOC (length + 1, char);
	{
	char *p;
	size_t i;

	p = string;
	for (i = 0; i < nsegments; i++)
	if (ms->segments[i]->type == source_encoded)
	{
	p = stpcpy (p, converted_segments[i]);
	free (converted_segments[i]);
	}
	else
	{
	memcpy (p, ms->segments[i]->contents, ms->segments[i]->length);
	p += ms->segments[i]->length;
	}
	assert (p == string + length);
	*p = '\0';
	}

	free (converted_segments);
	return string;
	}
	}
	}

	void
	mixed_string_free (mixed_string_ty *ms)
	{
	struct mixed_string_segment **segments = ms->segments;
	size_t nsegments = ms->nsegments;
	if (nsegments > 0)
	{
	size_t i;
	for (i = 0; i < nsegments; i++)
	free (segments[i]);
	}
	free (segments);
	free (ms);
	}

	char *
	mixed_string_contents_free1 (mixed_string_ty *ms)
	{
	char *contents = mixed_string_contents (ms);
	mixed_string_free (ms);
	return contents;
	}

	mixed_string_ty *
	mixed_string_concat (const mixed_string_ty *ms1,
	const mixed_string_ty *ms2)
	{
	/* Trivial cases. */
	if (ms2->nsegments == 0)
	return mixed_string_clone (ms1);
	if (ms1->nsegments == 0)
	return mixed_string_clone (ms2);
	/* General case. */
	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);
	size_t nsegments = ms1->nsegments + ms2->nsegments;
	size_t j;
	if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
	{
	/* Combine the last segment of ms1 with the first segment of ms2. */
	size_t i;

	nsegments -= 1;
	ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
	j = 0;
	for (i = 0; i < ms1->nsegments - 1; i++)
	ms->segments[j++] = segment_clone (ms1->segments[i]);
	{
	size_t len1 = ms1->segments[i]->length;
	size_t len2 = ms2->segments[0]->length;
	struct mixed_string_segment *newseg =
	(struct mixed_string_segment *)
	xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
	len1 + len2));
	newseg->type = ms2->segments[0]->type;
	newseg->length = len1 + len2;
	memcpy (newseg->contents, ms1->segments[i]->contents, len1);
	memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
	ms->segments[j++] = newseg;
	}
	for (i = 1; i < ms2->nsegments; i++)
	ms->segments[j++] = segment_clone (ms2->segments[i]);
	}
	else
	{
	size_t i;

	ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
	j = 0;
	for (i = 0; i < ms1->nsegments; i++)
	ms->segments[j++] = segment_clone (ms1->segments[i]);
	for (i = 0; i < ms2->nsegments; i++)
	ms->segments[j++] = segment_clone (ms2->segments[i]);
	}
	assert (j == nsegments);
	ms->nsegments = nsegments;
	ms->lcontext = ms1->lcontext;
	ms->logical_file_name = ms1->logical_file_name;
	ms->line_number = ms1->line_number;

	return ms;
	}
	}

	mixed_string_ty *
	mixed_string_concat_free1 (mixed_string_ty ms1, const mixed_string_ty ms2)
	{
	/* Trivial cases. */
	if (ms2->nsegments == 0)
	return ms1;
	if (ms1->nsegments == 0)
	{
	mixed_string_free (ms1);
	return mixed_string_clone (ms2);
	}
	/* General case. */
	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);
	size_t nsegments = ms1->nsegments + ms2->nsegments;
	size_t j;
	if (ms1->segments[ms1->nsegments-1]->type == ms2->segments[0]->type)
	{
	/* Combine the last segment of ms1 with the first segment of ms2. */
	size_t i;

	nsegments -= 1;
	ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
	j = 0;
	for (i = 0; i < ms1->nsegments - 1; i++)
	ms->segments[j++] = ms1->segments[i];
	{
	size_t len1 = ms1->segments[i]->length;
	size_t len2 = ms2->segments[0]->length;
	struct mixed_string_segment *newseg =
	(struct mixed_string_segment *)
	xmalloc (FLEXSIZEOF (struct mixed_string_segment, contents,
	len1 + len2));
	newseg->type = ms2->segments[0]->type;
	newseg->length = len1 + len2;
	memcpy (newseg->contents, ms1->segments[i]->contents, len1);
	memcpy (newseg->contents + len1, ms2->segments[0]->contents, len2);
	ms->segments[j++] = newseg;
	}
	free (ms1->segments[i]);
	for (i = 1; i < ms2->nsegments; i++)
	ms->segments[j++] = segment_clone (ms2->segments[i]);
	}
	else
	{
	size_t i;

	ms->segments = XNMALLOC (nsegments, struct mixed_string_segment *);
	j = 0;
	for (i = 0; i < ms1->nsegments; i++)
	ms->segments[j++] = ms1->segments[i];
	for (i = 0; i < ms2->nsegments; i++)
	ms->segments[j++] = segment_clone (ms2->segments[i]);
	}
	assert (j == nsegments);
	free (ms1->segments);
	ms->nsegments = nsegments;
	ms->lcontext = ms1->lcontext;
	ms->logical_file_name = ms1->logical_file_name;
	ms->line_number = ms1->line_number;
	free (ms1);

	return ms;
	}
	}


	void
	mixed_string_buffer_init (struct mixed_string_buffer *bp,
	lexical_context_ty lcontext,
	const char *logical_file_name,
	int line_number)
	{
	bp->segments = NULL;
	bp->nsegments = 0;
	bp->nsegments_allocated = 0;
	bp->curr_type = -1;
	bp->curr_buffer = NULL;
	bp->curr_buflen = 0;
	bp->curr_allocated = 0;
	bp->utf16_surr = 0;
	bp->lcontext = lcontext;
	bp->logical_file_name = logical_file_name;
	bp->line_number = line_number;
	}

	bool
	mixed_string_buffer_is_empty (const struct mixed_string_buffer *bp)
	{
	return (bp->nsegments == 0 && bp->curr_buflen == 0);
	}

	/* Auxiliary function: Ensure count more bytes are available in
	bp->curr_buffer. */
	static inline void
	mixed_string_buffer_grow_curr_buffer (struct mixed_string_buffer *bp,
	size_t count)
	{
	if (bp->curr_buflen + count > bp->curr_allocated)
	{
	size_t new_allocated = 2 * bp->curr_allocated + 10;
	if (new_allocated < bp->curr_buflen + count)
	new_allocated = bp->curr_buflen + count;
	bp->curr_allocated = new_allocated;
	bp->curr_buffer = xrealloc (bp->curr_buffer, new_allocated);
	}
	}

	/* Auxiliary function: Append a byte to bp->curr. */
	static inline void
	mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
	unsigned char c)
	{
	if (bp->curr_buflen == bp->curr_allocated)
	{
	bp->curr_allocated = 2 * bp->curr_allocated + 10;
	bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
	}
	bp->curr_buffer[bp->curr_buflen++] = c;
	}

	/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, append a
	Unicode character to bp->curr_buffer. uc must be < 0x110000. */
	static inline void
	mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
	ucs4_t uc)
	{
	unsigned char utf8buf[6];
	int count = u8_uctomb (utf8buf, uc, 6);

	if (count < 0)
	/* The caller should have ensured that uc is not out-of-range. */
	abort ();

	mixed_string_buffer_grow_curr_buffer (bp, count);
	memcpy (bp->curr_buffer + bp->curr_buflen, utf8buf, count);
	bp->curr_buflen += count;
	}

	/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, handle the
	attempt to append a lone surrogate to bp->curr_buffer. */
	static void
	mixed_string_buffer_append_lone_surrogate (struct mixed_string_buffer *bp,
	ucs4_t uc)
	{
	/* A half surrogate is invalid, therefore use U+FFFD instead.
	It may be valid in a particular programming language.
	But a half surrogate is invalid in UTF-8:
	- RFC 3629 says
	"The definition of UTF-8 prohibits encoding character
	numbers between U+D800 and U+DFFF".
	- Unicode 4.0 chapter 3
	<http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf>
	section 3.9, p.77, says
	"Because surrogate code points are not Unicode scalar
	values, any UTF-8 byte sequence that would otherwise
	map to code points D800..DFFF is ill-formed."
	and in table 3-6, p. 78, does not mention D800..DFFF.
	- The unicode.org FAQ question "How do I convert an unpaired
	UTF-16 surrogate to UTF-8?" has the answer
	"By representing such an unpaired surrogate on its own
	as a 3-byte sequence, the resulting UTF-8 data stream
	would become ill-formed."
	So use U+FFFD instead. */
	error_with_progname = false;
	error (0, 0, _("%s:%d: warning: lone surrogate U+%04X"),
	logical_file_name, line_number, uc);
	error_with_progname = true;
	mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
	}

	/* Auxiliary function: Assuming bp->curr_type == utf8_encoded, flush
	bp->utf16_surr into bp->curr_buffer. */
	static inline void
	mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
	{
	if (bp->utf16_surr != 0)
	{
	mixed_string_buffer_append_lone_surrogate (bp, bp->utf16_surr);
	bp->utf16_surr = 0;
	}
	}

	/* Auxiliary function: Append a segment to bp->segments. */
	static inline void
	mixed_string_buffer_add_segment (struct mixed_string_buffer *bp,
	struct mixed_string_segment *newseg)
	{
	if (bp->nsegments == bp->nsegments_allocated)
	{
	size_t new_allocated =
	bp->nsegments_allocated = 2 * bp->nsegments_allocated + 1;
	bp->segments =
	(struct mixed_string_segment **)
	xrealloc (bp->segments,
	new_allocated * sizeof (struct mixed_string_segment *));
	}
	bp->segments[bp->nsegments++] = newseg;
	}

	/* Auxiliary function: Flush bp->curr_buffer and bp->utf16_surr into
	bp->segments. */
	static void
	mixed_string_buffer_flush_curr (struct mixed_string_buffer *bp)
	{
	if (bp->curr_type == utf8_encoded)
	mixed_string_buffer_flush_utf16_surr (bp);
	if (bp->curr_type != -1)
	{
	if (bp->curr_buflen > 0)
	{
	struct mixed_string_segment *segment =
	segment_alloc (bp->curr_type, bp->curr_buffer, bp->curr_buflen);
	mixed_string_buffer_add_segment (bp, segment);
	}
	bp->curr_buflen = 0;
	}
	}

	void
	mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
	{
	/* Switch to multibyte character mode. */
	if (bp->curr_type != source_encoded)
	{
	mixed_string_buffer_flush_curr (bp);
	bp->curr_type = source_encoded;
	}

	mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
	}

	void
	mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
	{
	/* Switch to Unicode character mode. */
	if (bp->curr_type != utf8_encoded)
	{
	mixed_string_buffer_flush_curr (bp);
	bp->curr_type = utf8_encoded;
	assert (bp->utf16_surr == 0);
	}

	/* Test whether this character and the previous one form a Unicode
	surrogate character pair. */
	if (bp->utf16_surr != 0 && (c >= 0xdc00 && c < 0xe000))
	{
	unsigned short utf16buf[2];
	ucs4_t uc;

	utf16buf[0] = bp->utf16_surr;
	utf16buf[1] = c;
	if (u16_mbtouc (&uc, utf16buf, 2) != 2)
	abort ();

	mixed_string_buffer_append_to_utf8_buffer (bp, uc);
	bp->utf16_surr = 0;
	}
	else
	{
	mixed_string_buffer_flush_utf16_surr (bp);

	if (c >= 0xd800 && c < 0xdc00)
	bp->utf16_surr = c;
	else if (c >= 0xdc00 && c < 0xe000)
	mixed_string_buffer_append_lone_surrogate (bp, c);
	else
	mixed_string_buffer_append_to_utf8_buffer (bp, c);
	}
	}

	void
	mixed_string_buffer_destroy (struct mixed_string_buffer *bp)
	{
	struct mixed_string_segment **segments = bp->segments;
	size_t nsegments = bp->nsegments;
	if (nsegments > 0)
	{
	size_t i;
	for (i = 0; i < nsegments; i++)
	free (segments[i]);
	}
	free (segments);
	free (bp->curr_buffer);
	}

	mixed_string_ty *
	mixed_string_buffer_result (struct mixed_string_buffer *bp)
	{
	mixed_string_buffer_flush_curr (bp);

	{
	struct mixed_string *ms = XMALLOC (struct mixed_string);
	size_t nsegments = bp->nsegments;

	if (nsegments > 0)
	ms->segments =
	(struct mixed_string_segment **)
	xrealloc (bp->segments,
	nsegments * sizeof (struct mixed_string_segment *));
	else
	{
	assert (bp->segments == NULL);
	ms->segments = NULL;
	}
	ms->nsegments = nsegments;
	ms->lcontext = bp->lcontext;
	ms->logical_file_name = bp->logical_file_name;
	ms->line_number = bp->line_number;

	free (bp->curr_buffer);

	return ms;
	}
	}