mingw/gettext/gettext-tools/src/x-ycp.c - kiwivm - Git at Google

 /* xgettext YCP backend.
    Copyright (C) 2001-2003, 2005-2009, 2011, 2018-2020 Free Software Foundation, Inc.

    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #ifdef HAVE_CONFIG_H
 # include "config.h"
 #endif

 /* Specification.  */
 #include "x-ycp.h"

 #include <errno.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>

 #include "attribute.h"
 #include "message.h"
 #include "rc-str-list.h"
 #include "xgettext.h"
 #include "xg-pos.h"
 #include "xg-arglist-context.h"
 #include "xg-message.h"
 #include "error.h"
 #include "xalloc.h"
 #include "gettext.h"

 #define _(s) gettext(s)

 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))


 /* The YCP syntax is defined in libycp/doc/syntax.html.
    See also libycp/src/scanner.ll.
    Both are part of the yast2-core package in SuSE Linux distributions.  */


 void
 init_flag_table_ycp ()
 {
   xgettext_record_flag ("sformat:1:ycp-format");
   xgettext_record_flag ("y2debug:1:ycp-format");
   xgettext_record_flag ("y2milestone:1:ycp-format");
   xgettext_record_flag ("y2warning:1:ycp-format");
   xgettext_record_flag ("y2error:1:ycp-format");
   xgettext_record_flag ("y2security:1:ycp-format");
   xgettext_record_flag ("y2internal:1:ycp-format");
 }


 /* ======================== Reading of characters.  ======================== */

 /* Position in the current line.  */
 static int char_in_line;

 /* The input file stream.  */
 static FILE *fp;

 /* These are for tracking whether comments count as immediately before
    keyword.  */
 static int last_comment_line;
 static int last_non_comment_line;


 /* 1. line_number handling.  */

 static int
 phase1_getc ()
 {
   int c = getc (fp);

   if (c == EOF)
     {
       if (ferror (fp))
         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
                real_file_name);
       return EOF;
     }

   if (c == '\n')
     {
       line_number++;
       char_in_line = 0;
     }
   else
     char_in_line++;

   return c;
 }

 /* Supports only one pushback character.  */
 static void
 phase1_ungetc (int c)
 {
   if (c != EOF)
     {
       if (c == '\n')
         {
           --line_number;
           char_in_line = INT_MAX;
         }
       else
         --char_in_line;

       ungetc (c, fp);
     }
 }


 /* 2. Replace each comment that is not inside a character constant or
    string literal with a space character.  We need to remember the
    comment for later, because it may be attached to a keyword string.
    YCP comments can be in C comment syntax, C++ comment syntax or sh
    comment syntax.  */

 static unsigned char phase2_pushback[1];
 static int phase2_pushback_length;

 static int
 phase2_getc ()
 {
   static char *buffer;
   static size_t bufmax;
   size_t buflen;
   int lineno;
   int c;
   bool last_was_star;

   if (phase2_pushback_length)
     return phase2_pushback[--phase2_pushback_length];

   if (char_in_line == 0)
     {
       /* Eat whitespace, to recognize ^[\t ]*# pattern.  */
       do
         c = phase1_getc ();
       while (c == '\t' || c == ' ');

       if (c == '#')
         {
           /* sh comment.  */
           buflen = 0;
           lineno = line_number;
           for (;;)
             {
               c = phase1_getc ();
               if (c == '\n' || c == EOF)
                 break;
               /* We skip all leading white space, but not EOLs.  */
               if (!(buflen == 0 && (c == ' ' || c == '\t')))
                 {
                   if (buflen >= bufmax)
                     {
                       bufmax = 2 * bufmax + 10;
                       buffer = xrealloc (buffer, bufmax);
                     }
                   buffer[buflen++] = c;
                 }
             }
           if (buflen >= bufmax)
             {
               bufmax = 2 * bufmax + 10;
               buffer = xrealloc (buffer, bufmax);
             }
           buffer[buflen] = '\0';
           savable_comment_add (buffer);
           last_comment_line = lineno;
           return '\n';
         }
     }
   else
     c = phase1_getc ();

   if (c == '/')
     {
       c = phase1_getc ();

       switch (c)
         {
         default:
           phase1_ungetc (c);
           return '/';

         case '*':
           /* C comment.  */
           buflen = 0;
           lineno = line_number;
           last_was_star = false;
           for (;;)
             {
               c = phase1_getc ();
               if (c == EOF)
                 break;
               /* We skip all leading white space, but not EOLs.  */
               if (buflen == 0 && (c == ' ' || c == '\t'))
                 continue;
               if (buflen >= bufmax)
                 {
                   bufmax = 2 * bufmax + 10;
                   buffer = xrealloc (buffer, bufmax);
                 }
               buffer[buflen++] = c;
               switch (c)
                 {
                 case '\n':
                   --buflen;
                   while (buflen >= 1
                          && (buffer[buflen - 1] == ' '
                              || buffer[buflen - 1] == '\t'))
                     --buflen;
                   buffer[buflen] = '\0';
                   savable_comment_add (buffer);
                   buflen = 0;
                   lineno = line_number;
                   last_was_star = false;
                   continue;

                 case '*':
                   last_was_star = true;
                   continue;

                 case '/':
                   if (last_was_star)
                     {
                       buflen -= 2;
                       while (buflen >= 1
                              && (buffer[buflen - 1] == ' '
                                  || buffer[buflen - 1] == '\t'))
                         --buflen;
                       buffer[buflen] = '\0';
                       savable_comment_add (buffer);
                       break;
                     }
                   FALLTHROUGH;

                 default:
                   last_was_star = false;
                   continue;
                 }
               break;
             }
           last_comment_line = lineno;
           return ' ';

         case '/':
           /* C++ comment.  */
           buflen = 0;
           lineno = line_number;
           for (;;)
             {
               c = phase1_getc ();
               if (c == '\n' || c == EOF)
                 break;
               /* We skip all leading white space, but not EOLs.  */
               if (!(buflen == 0 && (c == ' ' || c == '\t')))
                 {
                   if (buflen >= bufmax)
                     {
                       bufmax = 2 * bufmax + 10;
                       buffer = xrealloc (buffer, bufmax);
                     }
                   buffer[buflen++] = c;
                 }
             }
           if (buflen >= bufmax)
             {
               bufmax = 2 * bufmax + 10;
               buffer = xrealloc (buffer, bufmax);
             }
           buffer[buflen] = '\0';
           savable_comment_add (buffer);
           last_comment_line = lineno;
           return '\n';
         }
     }
   else
     return c;
 }

 /* Supports only one pushback character.  */
 static void
 phase2_ungetc (int c)
 {
   if (c != EOF)
     {
       if (phase2_pushback_length == SIZEOF (phase2_pushback))
         abort ();
       phase2_pushback[phase2_pushback_length++] = c;
     }
 }


 /* ========================== Reading of tokens.  ========================== */


 enum token_type_ty
 {
   token_type_eof,
   token_type_lparen,            /* ( */
   token_type_rparen,            /* ) */
   token_type_comma,             /* , */
   token_type_i18n,              /* _( */
   token_type_string_literal,    /* "abc" */
   token_type_symbol,            /* symbol, number */
   token_type_other              /* misc. operator */
 };
 typedef enum token_type_ty token_type_ty;

 typedef struct token_ty token_ty;
 struct token_ty
 {
   token_type_ty type;
   char *string;         /* for token_type_string_literal, token_type_symbol */
   refcounted_string_list_ty *comment;   /* for token_type_string_literal */
   int line_number;
 };


 /* 7. Replace escape sequences within character strings with their
    single character equivalents.  */

 #define P7_QUOTES (1000 + '"')

 static int
 phase7_getc ()
 {
   int c;

   for (;;)
     {
       /* Use phase 1, because phase 2 elides comments.  */
       c = phase1_getc ();

       if (c == '"')
         return P7_QUOTES;
       if (c != '\\')
         return c;
       c = phase1_getc ();
       if (c != '\n')
         switch (c)
           {
           case 'b':
             return '\b';
           case 'f':
             return '\f';
           case 'n':
             return '\n';
           case 'r':
             return '\r';
           case 't':
             return '\t';

           /* FIXME: What is the octal escape syntax?
              syntax.html says: [0] [0-7]+
              scanner.ll says:  [0-7] [0-7] [0-7]
            */
 #if 0
           case '0': case '1': case '2': case '3':
           case '4': case '5': case '6': case '7':
             {
               int n, j;

               n = 0;
               for (j = 0; j < 3; ++j)
                 {
                   n = n * 8 + c - '0';
                   c = phase1_getc ();
                   switch (c)
                     {
                     default:
                       break;

                     case '0': case '1': case '2': case '3':
                     case '4': case '5': case '6': case '7':
                       continue;
                     }
                   break;
                 }
               phase1_ungetc (c);
               return n;
             }
 #endif

           default:
             return c;
           }
     }
 }


 /* Free the memory pointed to by a 'struct token_ty'.  */
 static inline void
 free_token (token_ty *tp)
 {
   if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
     free (tp->string);
   if (tp->type == token_type_string_literal)
     drop_reference (tp->comment);
 }


 /* Combine characters into tokens.  Discard whitespace.  */

 static token_ty phase5_pushback[1];
 static int phase5_pushback_length;

 static void
 phase5_get (token_ty *tp)
 {
   static char *buffer;
   static int bufmax;
   int bufpos;
   int c;

   if (phase5_pushback_length)
     {
       *tp = phase5_pushback[--phase5_pushback_length];
       return;
     }
   for (;;)
     {
       tp->line_number = line_number;
       c = phase2_getc ();

       switch (c)
         {
         case EOF:
           tp->type = token_type_eof;
           return;

         case '\n':
           if (last_non_comment_line > last_comment_line)
             savable_comment_reset ();
           FALLTHROUGH;
         case '\r':
         case '\t':
         case ' ':
           /* Ignore whitespace and comments.  */
           continue;
         }

       last_non_comment_line = tp->line_number;

       switch (c)
         {
         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
         case 'Y': case 'Z':
         case '_':
         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
         case 'y': case 'z':
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
           /* Symbol, or part of a number.  */
           bufpos = 0;
           for (;;)
             {
               if (bufpos >= bufmax)
                 {
                   bufmax = 2 * bufmax + 10;
                   buffer = xrealloc (buffer, bufmax);
                 }
               buffer[bufpos++] = c;
               c = phase2_getc ();
               switch (c)
                 {
                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
                 case 'Y': case 'Z':
                 case '_':
                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
                 case 'y': case 'z':
                 case '0': case '1': case '2': case '3': case '4':
                 case '5': case '6': case '7': case '8': case '9':
                   continue;
                 default:
                   if (bufpos == 1 && buffer[0] == '_' && c == '(')
                     {
                       tp->type = token_type_i18n;
                       return;
                     }
                   phase2_ungetc (c);
                   break;
                 }
               break;
             }
           if (bufpos >= bufmax)
             {
               bufmax = 2 * bufmax + 10;
               buffer = xrealloc (buffer, bufmax);
             }
           buffer[bufpos] = '\0';
           tp->string = xstrdup (buffer);
           tp->type = token_type_symbol;
           return;

         case '"':
           bufpos = 0;
           for (;;)
             {
               c = phase7_getc ();
               if (c == EOF || c == P7_QUOTES)
                 break;
               if (bufpos >= bufmax)
                 {
                   bufmax = 2 * bufmax + 10;
                   buffer = xrealloc (buffer, bufmax);
                 }
               buffer[bufpos++] = c;
             }
           if (bufpos >= bufmax)
             {
               bufmax = 2 * bufmax + 10;
               buffer = xrealloc (buffer, bufmax);
             }
           buffer[bufpos] = '\0';
           tp->string = xstrdup (buffer);
           tp->type = token_type_string_literal;
           tp->comment = add_reference (savable_comment);
           return;

         case '(':
           tp->type = token_type_lparen;
           return;

         case ')':
           tp->type = token_type_rparen;
           return;

         case ',':
           tp->type = token_type_comma;
           return;

         default:
           /* We could carefully recognize each of the 2 and 3 character
              operators, but it is not necessary, as we only need to recognize
              gettext invocations.  Don't bother.  */
           tp->type = token_type_other;
           return;
         }
     }
 }

 /* Supports only one pushback token.  */
 static void
 phase5_unget (token_ty *tp)
 {
   if (tp->type != token_type_eof)
     {
       if (phase5_pushback_length == SIZEOF (phase5_pushback))
         abort ();
       phase5_pushback[phase5_pushback_length++] = *tp;
     }
 }


 /* Concatenate adjacent string literals to form single string literals.
    (See libycp/src/parser.yy, rule 'string' vs. terminal 'STRING'.)  */

 static token_ty phase8_pushback[1];
 static int phase8_pushback_length;

 static void
 phase8_get (token_ty *tp)
 {
   if (phase8_pushback_length)
     {
       *tp = phase8_pushback[--phase8_pushback_length];
       return;
     }
   phase5_get (tp);
   if (tp->type != token_type_string_literal)
     return;
   for (;;)
     {
       token_ty tmp;
       size_t len;

       phase5_get (&tmp);
       if (tmp.type != token_type_string_literal)
         {
           phase5_unget (&tmp);
           return;
         }
       len = strlen (tp->string);
       tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
       strcpy (tp->string + len, tmp.string);
       free_token (&tmp);
     }
 }

 /* Supports only one pushback token.  */
 static void
 phase8_unget (token_ty *tp)
 {
   if (tp->type != token_type_eof)
     {
       if (phase8_pushback_length == SIZEOF (phase8_pushback))
         abort ();
       phase8_pushback[phase8_pushback_length++] = *tp;
     }
 }


 /* ========================= Extracting strings.  ========================== */


 /* Context lookup table.  */
 static flag_context_list_table_ty *flag_context_list_table;


 /* The file is broken into tokens.

      Normal handling: Look for
        [A] _( [B] msgid ... )
      Plural handling: Look for
        [A] _( [B] msgid [C] , [D] msgid_plural ... )
      At point [A]: state == 0.
      At point [B]: state == 1, plural_mp == NULL.
      At point [C]: state == 2, plural_mp != NULL.
      At point [D]: state == 1, plural_mp != NULL.

    We use recursion because we have to set the context according to the given
    flags.  */


 /* Extract messages until the next balanced closing parenthesis.
    Extracted messages are added to MLP.
    Return true upon eof, false upon closing parenthesis.  */
 static bool
 extract_parenthesized (message_list_ty *mlp,
                        flag_context_ty outer_context,
                        flag_context_list_iterator_ty context_iter,
                        bool in_i18n)
 {
   int state; /* 1 or 2 inside _( ... ), otherwise 0 */
   int plural_state = 0; /* defined only when in states 1 and 2 */
   message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */
   /* Context iterator that will be used if the next token is a '('.  */
   flag_context_list_iterator_ty next_context_iter =
     passthrough_context_list_iterator;
   /* Current context.  */
   flag_context_ty inner_context =
     inherited_context (outer_context,
                        flag_context_list_iterator_advance (&context_iter));

   /* Start state is 0 or 1.  */
   state = (in_i18n ? 1 : 0);

   for (;;)
     {
       token_ty token;

       if (in_i18n)
         phase8_get (&token);
       else
         phase5_get (&token);

       switch (token.type)
         {
         case token_type_i18n:
           if (extract_parenthesized (mlp, inner_context, next_context_iter,
                                      true))
             return true;
           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_string_literal:
           if (state == 1)
             {
               lex_pos_ty pos;
               pos.file_name = logical_file_name;
               pos.line_number = token.line_number;

               if (plural_state == 0)
                 {
                   /* Seen an msgid.  */
                   token_ty token2;

                   if (in_i18n)
                     phase8_get (&token2);
                   else
                     phase5_get (&token2);

                   plural_mp =
                     remember_a_message (mlp, NULL, token.string, false,
                                         token2.type == token_type_comma,
                                         inner_context, &pos,
                                         NULL, token.comment, false);

                   if (in_i18n)
                     phase8_unget (&token2);
                   else
                     phase5_unget (&token2);

                   plural_state = 1;
                   state = 2;
                 }
               else
                 {
                   /* Seen an msgid_plural.  */
                   if (plural_mp != NULL)
                     remember_a_message_plural (plural_mp, token.string, false,
                                                inner_context, &pos,
                                                token.comment, false);
                   state = 0;
                 }
               drop_reference (token.comment);
             }
           else
             {
               free_token (&token);
               state = 0;
             }
           next_context_iter = null_context_list_iterator;
           continue;

         case token_type_symbol:
           next_context_iter =
             flag_context_list_iterator (
               flag_context_list_table_lookup (
                 flag_context_list_table,
                 token.string, strlen (token.string)));
           free_token (&token);
           state = 0;
           continue;

         case token_type_lparen:
           if (extract_parenthesized (mlp, inner_context, next_context_iter,
                                      false))
             return true;
           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_rparen:
           return false;

         case token_type_comma:
           if (state == 2)
             state = 1;
           else
             state = 0;
           inner_context =
             inherited_context (outer_context,
                                flag_context_list_iterator_advance (
                                  &context_iter));
           next_context_iter = passthrough_context_list_iterator;
           continue;

         case token_type_other:
           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_eof:
           return true;

         default:
           abort ();
         }
     }
 }


 void
 extract_ycp (FILE *f,
              const char *real_filename, const char *logical_filename,
              flag_context_list_table_ty *flag_table,
              msgdomain_list_ty *mdlp)
 {
   message_list_ty *mlp = mdlp->item[0]->messages;

   fp = f;
   real_file_name = real_filename;
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;
   char_in_line = 0;

   last_comment_line = -1;
   last_non_comment_line = -1;

   phase2_pushback_length = 0;
   phase5_pushback_length = 0;
   phase8_pushback_length = 0;

   flag_context_list_table = flag_table;

   /* Eat tokens until eof is seen.  When extract_parenthesized returns
      due to an unbalanced closing parenthesis, just restart it.  */
   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
                                  false))
     ;

   fp = NULL;
   real_file_name = NULL;
   logical_file_name = NULL;
   line_number = 0;
   char_in_line = 0;
 }
	/* xgettext YCP backend.
	Copyright (C) 2001-2003, 2005-2009, 2011, 2018-2020 Free Software Foundation, Inc.

	This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <https://www.gnu.org/licenses/>. */

	#ifdef HAVE_CONFIG_H
	# include "config.h"
	#endif

	/* Specification. */
	#include "x-ycp.h"

	#include <errno.h>
	#include <limits.h>
	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>

	#include "attribute.h"
	#include "message.h"
	#include "rc-str-list.h"
	#include "xgettext.h"
	#include "xg-pos.h"
	#include "xg-arglist-context.h"
	#include "xg-message.h"
	#include "error.h"
	#include "xalloc.h"
	#include "gettext.h"

	#define _(s) gettext(s)

	#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))


	/* The YCP syntax is defined in libycp/doc/syntax.html.
	See also libycp/src/scanner.ll.
	Both are part of the yast2-core package in SuSE Linux distributions. */


	void
	init_flag_table_ycp ()
	{
	xgettext_record_flag ("sformat:1:ycp-format");
	xgettext_record_flag ("y2debug:1:ycp-format");
	xgettext_record_flag ("y2milestone:1:ycp-format");
	xgettext_record_flag ("y2warning:1:ycp-format");
	xgettext_record_flag ("y2error:1:ycp-format");
	xgettext_record_flag ("y2security:1:ycp-format");
	xgettext_record_flag ("y2internal:1:ycp-format");
	}


	/* ======================== Reading of characters. ======================== */

	/* Position in the current line. */
	static int char_in_line;

	/* The input file stream. */
	static FILE *fp;

	/* These are for tracking whether comments count as immediately before
	keyword. */
	static int last_comment_line;
	static int last_non_comment_line;


	/* 1. line_number handling. */

	static int
	phase1_getc ()
	{
	int c = getc (fp);

	if (c == EOF)
	{
	if (ferror (fp))
	error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
	real_file_name);
	return EOF;
	}

	if (c == '\n')
	{
	line_number++;
	char_in_line = 0;
	}
	else
	char_in_line++;

	return c;
	}

	/* Supports only one pushback character. */
	static void
	phase1_ungetc (int c)
	{
	if (c != EOF)
	{
	if (c == '\n')
	{
	--line_number;
	char_in_line = INT_MAX;
	}
	else
	--char_in_line;

	ungetc (c, fp);
	}
	}


	/* 2. Replace each comment that is not inside a character constant or
	string literal with a space character. We need to remember the
	comment for later, because it may be attached to a keyword string.
	YCP comments can be in C comment syntax, C++ comment syntax or sh
	comment syntax. */

	static unsigned char phase2_pushback[1];
	static int phase2_pushback_length;

	static int
	phase2_getc ()
	{
	static char *buffer;
	static size_t bufmax;
	size_t buflen;
	int lineno;
	int c;
	bool last_was_star;

	if (phase2_pushback_length)
	return phase2_pushback[--phase2_pushback_length];

	if (char_in_line == 0)
	{
	/* Eat whitespace, to recognize ^[\t ]# pattern. /
	do
	c = phase1_getc ();
	while (c == '\t' \|\| c == ' ');

	if (c == '#')
	{
	/* sh comment. */
	buflen = 0;
	lineno = line_number;
	for (;;)
	{
	c = phase1_getc ();
	if (c == '\n' \|\| c == EOF)
	break;
	/* We skip all leading white space, but not EOLs. */
	if (!(buflen == 0 && (c == ' ' \|\| c == '\t')))
	{
	if (buflen >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[buflen++] = c;
	}
	}
	if (buflen >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[buflen] = '\0';
	savable_comment_add (buffer);
	last_comment_line = lineno;
	return '\n';
	}
	}
	else
	c = phase1_getc ();

	if (c == '/')
	{
	c = phase1_getc ();

	switch (c)
	{
	default:
	phase1_ungetc (c);
	return '/';

	case '*':
	/* C comment. */
	buflen = 0;
	lineno = line_number;
	last_was_star = false;
	for (;;)
	{
	c = phase1_getc ();
	if (c == EOF)
	break;
	/* We skip all leading white space, but not EOLs. */
	if (buflen == 0 && (c == ' ' \|\| c == '\t'))
	continue;
	if (buflen >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[buflen++] = c;
	switch (c)
	{
	case '\n':
	--buflen;
	while (buflen >= 1
	&& (buffer[buflen - 1] == ' '
	\|\| buffer[buflen - 1] == '\t'))
	--buflen;
	buffer[buflen] = '\0';
	savable_comment_add (buffer);
	buflen = 0;
	lineno = line_number;
	last_was_star = false;
	continue;

	case '*':
	last_was_star = true;
	continue;

	case '/':
	if (last_was_star)
	{
	buflen -= 2;
	while (buflen >= 1
	&& (buffer[buflen - 1] == ' '
	\|\| buffer[buflen - 1] == '\t'))
	--buflen;
	buffer[buflen] = '\0';
	savable_comment_add (buffer);
	break;
	}
	FALLTHROUGH;

	default:
	last_was_star = false;
	continue;
	}
	break;
	}
	last_comment_line = lineno;
	return ' ';

	case '/':
	/* C++ comment. */
	buflen = 0;
	lineno = line_number;
	for (;;)
	{
	c = phase1_getc ();
	if (c == '\n' \|\| c == EOF)
	break;
	/* We skip all leading white space, but not EOLs. */
	if (!(buflen == 0 && (c == ' ' \|\| c == '\t')))
	{
	if (buflen >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[buflen++] = c;
	}
	}
	if (buflen >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[buflen] = '\0';
	savable_comment_add (buffer);
	last_comment_line = lineno;
	return '\n';
	}
	}
	else
	return c;
	}

	/* Supports only one pushback character. */
	static void
	phase2_ungetc (int c)
	{
	if (c != EOF)
	{
	if (phase2_pushback_length == SIZEOF (phase2_pushback))
	abort ();
	phase2_pushback[phase2_pushback_length++] = c;
	}
	}


	/* ========================== Reading of tokens. ========================== */


	enum token_type_ty
	{
	token_type_eof,
	token_type_lparen, /* ( */
	token_type_rparen, /* ) */
	token_type_comma, /* , */
	token_type_i18n, /* _( */
	token_type_string_literal, /* "abc" */
	token_type_symbol, /* symbol, number */
	token_type_other /* misc. operator */
	};
	typedef enum token_type_ty token_type_ty;

	typedef struct token_ty token_ty;
	struct token_ty
	{
	token_type_ty type;
	char string; / for token_type_string_literal, token_type_symbol */
	refcounted_string_list_ty comment; / for token_type_string_literal */
	int line_number;
	};


	/* 7. Replace escape sequences within character strings with their
	single character equivalents. */

	#define P7_QUOTES (1000 + '"')

	static int
	phase7_getc ()
	{
	int c;

	for (;;)
	{
	/* Use phase 1, because phase 2 elides comments. */
	c = phase1_getc ();

	if (c == '"')
	return P7_QUOTES;
	if (c != '\\')
	return c;
	c = phase1_getc ();
	if (c != '\n')
	switch (c)
	{
	case 'b':
	return '\b';
	case 'f':
	return '\f';
	case 'n':
	return '\n';
	case 'r':
	return '\r';
	case 't':
	return '\t';

	/* FIXME: What is the octal escape syntax?
	syntax.html says: [0] [0-7]+
	scanner.ll says: [0-7] [0-7] [0-7]
	*/
	#if 0
	case '0': case '1': case '2': case '3':
	case '4': case '5': case '6': case '7':
	{
	int n, j;

	n = 0;
	for (j = 0; j < 3; ++j)
	{
	n = n * 8 + c - '0';
	c = phase1_getc ();
	switch (c)
	{
	default:
	break;

	case '0': case '1': case '2': case '3':
	case '4': case '5': case '6': case '7':
	continue;
	}
	break;
	}
	phase1_ungetc (c);
	return n;
	}
	#endif

	default:
	return c;
	}
	}
	}


	/* Free the memory pointed to by a 'struct token_ty'. */
	static inline void
	free_token (token_ty *tp)
	{
	if (tp->type == token_type_string_literal \|\| tp->type == token_type_symbol)
	free (tp->string);
	if (tp->type == token_type_string_literal)
	drop_reference (tp->comment);
	}


	/* Combine characters into tokens. Discard whitespace. */

	static token_ty phase5_pushback[1];
	static int phase5_pushback_length;

	static void
	phase5_get (token_ty *tp)
	{
	static char *buffer;
	static int bufmax;
	int bufpos;
	int c;

	if (phase5_pushback_length)
	{
	*tp = phase5_pushback[--phase5_pushback_length];
	return;
	}
	for (;;)
	{
	tp->line_number = line_number;
	c = phase2_getc ();

	switch (c)
	{
	case EOF:
	tp->type = token_type_eof;
	return;

	case '\n':
	if (last_non_comment_line > last_comment_line)
	savable_comment_reset ();
	FALLTHROUGH;
	case '\r':
	case '\t':
	case ' ':
	/* Ignore whitespace and comments. */
	continue;
	}

	last_non_comment_line = tp->line_number;

	switch (c)
	{
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
	case 'Y': case 'Z':
	case '_':
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
	case 'y': case 'z':
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	/* Symbol, or part of a number. */
	bufpos = 0;
	for (;;)
	{
	if (bufpos >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[bufpos++] = c;
	c = phase2_getc ();
	switch (c)
	{
	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
	case 'Y': case 'Z':
	case '_':
	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
	case 'y': case 'z':
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	continue;
	default:
	if (bufpos == 1 && buffer[0] == '_' && c == '(')
	{
	tp->type = token_type_i18n;
	return;
	}
	phase2_ungetc (c);
	break;
	}
	break;
	}
	if (bufpos >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[bufpos] = '\0';
	tp->string = xstrdup (buffer);
	tp->type = token_type_symbol;
	return;

	case '"':
	bufpos = 0;
	for (;;)
	{
	c = phase7_getc ();
	if (c == EOF \|\| c == P7_QUOTES)
	break;
	if (bufpos >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[bufpos++] = c;
	}
	if (bufpos >= bufmax)
	{
	bufmax = 2 * bufmax + 10;
	buffer = xrealloc (buffer, bufmax);
	}
	buffer[bufpos] = '\0';
	tp->string = xstrdup (buffer);
	tp->type = token_type_string_literal;
	tp->comment = add_reference (savable_comment);
	return;

	case '(':
	tp->type = token_type_lparen;
	return;

	case ')':
	tp->type = token_type_rparen;
	return;

	case ',':
	tp->type = token_type_comma;
	return;

	default:
	/* We could carefully recognize each of the 2 and 3 character
	operators, but it is not necessary, as we only need to recognize
	gettext invocations. Don't bother. */
	tp->type = token_type_other;
	return;
	}
	}
	}

	/* Supports only one pushback token. */
	static void
	phase5_unget (token_ty *tp)
	{
	if (tp->type != token_type_eof)
	{
	if (phase5_pushback_length == SIZEOF (phase5_pushback))
	abort ();
	phase5_pushback[phase5_pushback_length++] = *tp;
	}
	}


	/* Concatenate adjacent string literals to form single string literals.
	(See libycp/src/parser.yy, rule 'string' vs. terminal 'STRING'.) */

	static token_ty phase8_pushback[1];
	static int phase8_pushback_length;

	static void
	phase8_get (token_ty *tp)
	{
	if (phase8_pushback_length)
	{
	*tp = phase8_pushback[--phase8_pushback_length];
	return;
	}
	phase5_get (tp);
	if (tp->type != token_type_string_literal)
	return;
	for (;;)
	{
	token_ty tmp;
	size_t len;

	phase5_get (&tmp);
	if (tmp.type != token_type_string_literal)
	{
	phase5_unget (&tmp);
	return;
	}
	len = strlen (tp->string);
	tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
	strcpy (tp->string + len, tmp.string);
	free_token (&tmp);
	}
	}

	/* Supports only one pushback token. */
	static void
	phase8_unget (token_ty *tp)
	{
	if (tp->type != token_type_eof)
	{
	if (phase8_pushback_length == SIZEOF (phase8_pushback))
	abort ();
	phase8_pushback[phase8_pushback_length++] = *tp;
	}
	}


	/* ========================= Extracting strings. ========================== */


	/* Context lookup table. */
	static flag_context_list_table_ty *flag_context_list_table;


	/* The file is broken into tokens.

	Normal handling: Look for
	[A] _( [B] msgid ... )
	Plural handling: Look for
	[A] _( [B] msgid [C] , [D] msgid_plural ... )
	At point [A]: state == 0.
	At point [B]: state == 1, plural_mp == NULL.
	At point [C]: state == 2, plural_mp != NULL.
	At point [D]: state == 1, plural_mp != NULL.

	We use recursion because we have to set the context according to the given
	flags. */


	/* Extract messages until the next balanced closing parenthesis.
	Extracted messages are added to MLP.
	Return true upon eof, false upon closing parenthesis. */
	static bool
	extract_parenthesized (message_list_ty *mlp,
	flag_context_ty outer_context,
	flag_context_list_iterator_ty context_iter,
	bool in_i18n)
	{
	int state; /* 1 or 2 inside _( ... ), otherwise 0 */
	int plural_state = 0; /* defined only when in states 1 and 2 */
	message_ty plural_mp = NULL; / defined only when in states 1 and 2 */
	/* Context iterator that will be used if the next token is a '('. */
	flag_context_list_iterator_ty next_context_iter =
	passthrough_context_list_iterator;
	/* Current context. */
	flag_context_ty inner_context =
	inherited_context (outer_context,
	flag_context_list_iterator_advance (&context_iter));

	/* Start state is 0 or 1. */
	state = (in_i18n ? 1 : 0);

	for (;;)
	{
	token_ty token;

	if (in_i18n)
	phase8_get (&token);
	else
	phase5_get (&token);

	switch (token.type)
	{
	case token_type_i18n:
	if (extract_parenthesized (mlp, inner_context, next_context_iter,
	true))
	return true;
	next_context_iter = null_context_list_iterator;
	state = 0;
	continue;

	case token_type_string_literal:
	if (state == 1)
	{
	lex_pos_ty pos;
	pos.file_name = logical_file_name;
	pos.line_number = token.line_number;

	if (plural_state == 0)
	{
	/* Seen an msgid. */
	token_ty token2;

	if (in_i18n)
	phase8_get (&token2);
	else
	phase5_get (&token2);

	plural_mp =
	remember_a_message (mlp, NULL, token.string, false,
	token2.type == token_type_comma,
	inner_context, &pos,
	NULL, token.comment, false);

	if (in_i18n)
	phase8_unget (&token2);
	else
	phase5_unget (&token2);

	plural_state = 1;
	state = 2;
	}
	else
	{
	/* Seen an msgid_plural. */
	if (plural_mp != NULL)
	remember_a_message_plural (plural_mp, token.string, false,
	inner_context, &pos,
	token.comment, false);
	state = 0;
	}
	drop_reference (token.comment);
	}
	else
	{
	free_token (&token);
	state = 0;
	}
	next_context_iter = null_context_list_iterator;
	continue;

	case token_type_symbol:
	next_context_iter =
	flag_context_list_iterator (
	flag_context_list_table_lookup (
	flag_context_list_table,
	token.string, strlen (token.string)));
	free_token (&token);
	state = 0;
	continue;

	case token_type_lparen:
	if (extract_parenthesized (mlp, inner_context, next_context_iter,
	false))
	return true;
	next_context_iter = null_context_list_iterator;
	state = 0;
	continue;

	case token_type_rparen:
	return false;

	case token_type_comma:
	if (state == 2)
	state = 1;
	else
	state = 0;
	inner_context =
	inherited_context (outer_context,
	flag_context_list_iterator_advance (
	&context_iter));
	next_context_iter = passthrough_context_list_iterator;
	continue;

	case token_type_other:
	next_context_iter = null_context_list_iterator;
	state = 0;
	continue;

	case token_type_eof:
	return true;

	default:
	abort ();
	}
	}
	}


	void
	extract_ycp (FILE *f,
	const char real_filename, const char logical_filename,
	flag_context_list_table_ty *flag_table,
	msgdomain_list_ty *mdlp)
	{
	message_list_ty *mlp = mdlp->item[0]->messages;

	fp = f;
	real_file_name = real_filename;
	logical_file_name = xstrdup (logical_filename);
	line_number = 1;
	char_in_line = 0;

	last_comment_line = -1;
	last_non_comment_line = -1;

	phase2_pushback_length = 0;
	phase5_pushback_length = 0;
	phase8_pushback_length = 0;

	flag_context_list_table = flag_table;

	/* Eat tokens until eof is seen. When extract_parenthesized returns
	due to an unbalanced closing parenthesis, just restart it. */
	while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
	false))
	;

	fp = NULL;
	real_file_name = NULL;
	logical_file_name = NULL;
	line_number = 0;
	char_in_line = 0;
	}