mingw/gettext/gettext-tools/src/x-lua.c - kiwivm - Git at Google

 /* xgettext Lua backend.
    Copyright (C) 2012-2013, 2016, 2018-2020 Free Software Foundation, Inc.

    This file was written by Ľubomír Remák <lubomirr@lubomirr.eu>, 2012.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif

 /* Specification.  */
 #include "x-lua.h"

 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>

 #include "message.h"
 #include "rc-str-list.h"
 #include "xgettext.h"
 #include "xg-pos.h"
 #include "xg-mixed-string.h"
 #include "xg-arglist-context.h"
 #include "xg-arglist-callshape.h"
 #include "xg-arglist-parser.h"
 #include "xg-message.h"
 #include "error.h"
 #include "xalloc.h"
 #include "gettext.h"
 #include "po-charset.h"

 #define _(s) gettext(s)

 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))

 /* The Lua syntax is defined in the Lua manual sections 3.1 and 9,
    which can be found at
    https://www.lua.org/manual/5.2/manual.html#3.1
    https://www.lua.org/manual/5.2/manual.html#9  */

 /* If true extract all strings.  */
 static bool extract_all = false;

 /* A hash table for keywords.  */
 static hash_table keywords;
 static bool default_keywords = true;

 /* Set extract_all flag (gettext will extract all strings).  */
 void
 x_lua_extract_all ()
 {
   extract_all = true;
 }

 /* Adds a keyword.  Copied from other lexers.  */
 void
 x_lua_keyword (const char *name)
 {
   if (name == NULL)
     default_keywords = false;
   else
     {
       const char *end;
       struct callshape shape;
       const char *colon;

       if (keywords.table == NULL)
         hash_init (&keywords, 100);

       split_keywordspec (name, &end, &shape);

       /* The characters between name and end should form a valid C identifier.
          A colon means an invalid parse in split_keywordspec().  */
       colon = strchr (name, ':');
       if (colon == NULL || colon >= end)
         insert_keyword_callshape (&keywords, name, end - name, &shape);
     }
 }

 /* Finish initializing the keywords hash table.
    Called after argument processing, before each file is processed.  */
 static void
 init_keywords ()
 {
   if (default_keywords)
     {
       /* When adding new keywords here, also update the documentation in
          xgettext.texi!  */
       x_lua_keyword ("_");
       x_lua_keyword ("gettext.gettext");
       x_lua_keyword ("gettext.dgettext:2");
       x_lua_keyword ("gettext.dcgettext:2");
       x_lua_keyword ("gettext.ngettext:1,2");
       x_lua_keyword ("gettext.dngettext:2,3");
       x_lua_keyword ("gettext.dcngettext:2,3");
       default_keywords = false;
     }
 }

 void
 init_flag_table_lua ()
 {
   xgettext_record_flag ("_:1:pass-lua-format");
   xgettext_record_flag ("gettext.gettext:1:pass-lua-format");
   xgettext_record_flag ("gettext.dgettext:2:pass-lua-format");
   xgettext_record_flag ("gettext.dcgettext:2:pass-lua-format");
   xgettext_record_flag ("gettext.ngettext:1:pass-lua-format");
   xgettext_record_flag ("gettext.ngettext:2:pass-lua-format");
   xgettext_record_flag ("gettext.dngettext:2:pass-lua-format");
   xgettext_record_flag ("gettext.dngettext:3:pass-lua-format");
   xgettext_record_flag ("gettext.dcngettext:2:pass-lua-format");
   xgettext_record_flag ("gettext.dcngettext:3:pass-lua-format");
   xgettext_record_flag ("string.format:1:lua-format");
 }


 /* ======================== Reading of characters.  ======================== */

 /* The input file stream.  */
 static FILE *fp;


 /* 1. line_number handling.  */

 static unsigned char phase1_pushback[2];
 static int phase1_pushback_length;

 static bool first_character;

 static int
 phase1_getc ()
 {
   int c;

   if (phase1_pushback_length)
     c = phase1_pushback[--phase1_pushback_length];
   else
     {
       c = getc (fp);

       if (first_character)
         {
           first_character = false;

           /* Ignore shebang line.  No pushback required in this case.  */
           if (c == '#')
             {
               while (c != '\n' && c != EOF)
                 c = getc (fp);
               if (c == '\n')
                 {
                   line_number++;
                   c = getc (fp);
                 }
             }
         }

       if (c == EOF)
         {
           if (ferror (fp))
             error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
                    real_file_name);
           return EOF;
         }
     }

   if (c == '\n')
     line_number++;

   return c;
 }

 /* Supports 2 characters of pushback.  */

 static void
 phase1_ungetc (int c)
 {
   if (c != EOF)
     {
       if (c == '\n')
         --line_number;

       if (phase1_pushback_length == SIZEOF (phase1_pushback))
         abort ();
       phase1_pushback[phase1_pushback_length++] = c;
     }
 }


 /* These are for tracking whether comments count as immediately before
    keyword.  */
 static int last_comment_line;
 static int last_non_comment_line;

 /* Accumulating comments.  */

 static char *buffer;
 static size_t bufmax;
 static size_t buflen;

 static inline void
 comment_start ()
 {
   buflen = 0;
 }

 static inline void
 comment_add (int c)
 {
   if (buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen++] = c;
 }

 static inline void
 comment_line_end (size_t chars_to_remove)
 {
   buflen -= chars_to_remove;
   while (buflen >= 1
          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     --buflen;
   if (chars_to_remove == 0 && buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen] = '\0';
   savable_comment_add (buffer);
 }

 /* Eats characters until '\n' and adds them to the comment.  */
 static void
 eat_comment_line ()
 {
   for (;;)
     {
       int c = phase1_getc ();
       if (c == '\n' || c == EOF)
         {
           comment_line_end (0);
           break;
         }

       if (!(buflen == 0 && (c == ' ' || c == '\t')))
         comment_add (c);
     }
 }

 static int
 phase2_getc ()
 {
   int c;
   int lineno;

   c = phase1_getc ();

   if (c == '-')
     {
       c = phase1_getc ();

       if (c == '-')
         {
           /* It starts with '--', so it must be either a short or a long
              comment.  */
           c = phase1_getc ();

           if (c == '[')
             {
               c = phase1_getc ();

               int esigns = 0;
               while (c == '=')
                 {
                   esigns++;
                   c = phase1_getc ();
                 }

               if (c == '[')
                 {
                   /* Long comment.  */
                   bool right_bracket = false;
                   bool end = false;
                   int esigns2 = 0;

                   lineno = line_number;
                   comment_start ();
                   while (!end)
                     {
                       c = phase1_getc ();

                       if (c == EOF)
                         break;

                       /* Ignore leading spaces and tabs.  */
                       if (!(buflen == 0 && (c == ' ' || c == '\t')))
                         {
                           comment_add (c);

                           switch (c)
                             {
                             case ']':
                               if (!right_bracket)
                                 {
                                   right_bracket = true;
                                   esigns2 = 0;
                                 }
                               else
                                 {
                                   if (esigns2 == esigns)
                                     {
                                       comment_line_end (2 + esigns);
                                       end = true;
                                     }
                                 }
                               break;

                             case '=':
                               if (right_bracket)
                                 esigns2++;
                               break;

                             case '\n':
                               comment_line_end (1);
                               comment_start ();
                               lineno = line_number;
                               /* Intentionally not breaking.  */

                             default:
                               right_bracket = false;
                             }
                         }
                     }
                   last_comment_line = lineno;
                   return ' ';
                 }
               else
                 {
                   /* One line (short) comment, starting with '--[=...='.  */
                   lineno = last_comment_line;
                   comment_start ();
                   comment_add ('[');
                   while (esigns--)
                     comment_add ('=');
                   phase1_ungetc (c);
                   eat_comment_line ();
                   last_comment_line = lineno;
                   return '\n';
                 }
             }
           else
             {
               /* One line (short) comment.  */
               lineno = line_number;
               comment_start ();
               phase1_ungetc (c);
               eat_comment_line ();
               last_comment_line = lineno;
               return '\n';
             }
         }
       else
         {
           /* Minus sign.  */
           phase1_ungetc (c);
           return '-';
         }
     }
   else
     return c;
 }


 /* ========================== Reading of tokens.  ========================== */

 enum token_type_ty
 {
   token_type_eof,
   token_type_lparen,            /* ( */
   token_type_rparen,            /* ) */
   token_type_lbracket,          /* [ */
   token_type_rbracket,          /* ] */
   token_type_comma,             /* , */
   token_type_dot,               /* . */
   token_type_doubledot,         /* .. */
   token_type_operator1,         /* + - * / % not # - ^ */
   token_type_operator2,         /* < > <= >= ~= == and or */
   token_type_string,
   token_type_number,
   token_type_symbol,
   token_type_other
 };

 typedef enum token_type_ty token_type_ty;

 typedef struct token_ty token_ty;
 struct token_ty
 {
   token_type_ty type;
   char *string; /* for token_type_string_literal, token_type_symbol */
   refcounted_string_list_ty *comment;  /* for token_type_string_literal */
   int line_number;
 };

 /* Free the memory pointed to by a 'struct token_ty'.  */
 static inline void
 free_token (token_ty *tp)
 {
   if (tp->type == token_type_string || tp->type == token_type_symbol)
     free (tp->string);
   if (tp->type == token_type_string)
     drop_reference (tp->comment);
 }

 /* Our current string.  */
 static int string_buf_length;
 static int string_buf_alloc;
 static char *string_buf;

 static void
 string_start ()
 {
   string_buf_length = 0;
 }

 static void
 string_add (int c)
 {
   if (string_buf_length >= string_buf_alloc)
     {
       string_buf_alloc = 2 * string_buf_alloc + 10;
       string_buf = xrealloc (string_buf, string_buf_alloc);
     }

   string_buf[string_buf_length++] = c;
 }

 static void
 string_end ()
 {
   if (string_buf_length >= string_buf_alloc)
     {
       string_buf_alloc = string_buf_alloc + 1;
       string_buf = xrealloc (string_buf, string_buf_alloc);
     }

   string_buf[string_buf_length] = '\0';
 }


 /* We need 3 pushback tokens for string optimization.  */
 static int phase3_pushback_length;
 static token_ty phase3_pushback[3];


 static void
 phase3_unget (token_ty *tp)
 {
   if (tp->type != token_type_eof)
     {
       if (phase3_pushback_length == SIZEOF (phase3_pushback))
         abort ();
       phase3_pushback[phase3_pushback_length++] = *tp;
     }
 }

 static void
 phase3_get (token_ty *tp)
 {
   int c;
   int c2;
   int c_start;

   if (phase3_pushback_length)
     {
       *tp = phase3_pushback[--phase3_pushback_length];
       return;
     }

   tp->string = NULL;

   for (;;)
     {
       tp->line_number = line_number;
       c = phase2_getc ();

       switch (c)
         {
         case EOF:
           tp->type = token_type_eof;
           return;

         case '\n':
           if (last_non_comment_line > last_comment_line)
             savable_comment_reset ();
           /* Intentionally not breaking.  */
         case ' ':
         case '\t':
         case '\f':
           continue;

         case '+':
         case '-':
         case '*':
         case '/':
         case '^':
         case '%':
         case '#':
           tp->type = token_type_operator1;
           return;
         case '<':
         case '>':
         case '=':
           c2 = phase1_getc ();
           if (c2 != '=')
             phase1_ungetc (c2);
           tp->type = token_type_operator2;
           return;
         case '~':
           c2 = phase1_getc ();
           if (c2 == '=')
             {
               tp->type = token_type_operator2;
               return;
             }
           else
             phase1_ungetc (c2);
           continue;
         case '(':
           tp->type = token_type_lparen;
           return;
         case ')':
           tp->type = token_type_rparen;
           return;
         case ',':
           tp->type = token_type_comma;
           return;

         case ';':
           tp->type = token_type_other;
           return;

           /* There are three operators beginning with a dot.  '.',
              '..' and '...'.  The most useful for us is the string
              concatenation operator ('..').  */
         case '.':
           c = phase1_getc ();
           if (c == '.')
             {
               c = phase1_getc ();
               if (c == '.')
                 {
                   tp->type = token_type_other;
                   return;
                 }
               else
                 {
                   phase1_ungetc (c);
                   tp->type = token_type_doubledot;
                   return;
                 }
             }
           else if (c >= '0' && c <= '9')
             {
               /* It's a number.  We aren't interested in the actual
                  numeric value, so ignore the dot and let next
                  iteration eat the number.  */
               phase1_ungetc (c);
               continue;
             }
           else
             {
               phase1_ungetc (c);
               tp->type = token_type_dot;
               return;
             }

         case '"':
         case '\'':
           c_start = c;
           string_start ();

           for (;;)
             {
               /* We need unprocessed characters from phase 1.  */
               c = phase1_getc ();

               if (c == EOF || c == c_start || c == '\n')
                 {
                   /* End of string.  */
                   string_end ();
                   tp->string = xstrdup (string_buf);
                   tp->comment = add_reference (savable_comment);
                   tp->type = token_type_string;
                   return;
                 }

               /* We got '\', this is probably an escape sequence.  */
               if (c == '\\')
                 {
                   c = phase1_getc ();
                   switch (c)
                     {
                     case 'a':
                       string_add ('\a');
                       break;
                     case 'b':
                       string_add ('\b');
                       break;
                     case 'f':
                       string_add ('\f');
                       break;
                     case 'n':
                       string_add ('\n');
                       break;
                     case 'r':
                       string_add ('\r');
                       break;
                     case 't':
                       string_add ('\t');
                       break;
                     case 'v':
                       string_add ('\v');
                       break;
                     case 'x':
                       {
                         int num = 0;
                         int i = 0;

                         for (i = 0; i < 2; i++)
                           {
                             c = phase1_getc ();
                             if (c >= '0' && c <= '9')
                               num += c - '0';
                             else if (c >= 'a' && c <= 'f')
                               num += c - 'a' + 10;
                             else if (c >= 'A' && c <= 'F')
                               num += c - 'A' + 10;
                             else
                               {
                                 phase1_ungetc (c);
                                 break;
                               }

                             if (i == 0)
                               num *= 16;
                           }

                         if (i == 2)
                           string_add (num);
                       }

                       break;
                     case 'z':
                       /* Ignore the following whitespace.  */
                       do
                         {
                           c = phase1_getc ();
                         }
                       while (c == ' ' || c == '\n' || c == '\t' || c == '\r'
                              || c == '\f' || c == '\v');

                       phase1_ungetc (c);

                       break;
                     default:
                       /* Check if it's a '\ddd' sequence.  */
                       if (c >= '0' && c <= '9')
                         {
                           int num = 0;
                           int i = 0;

                           while (c >= '0' && c <= '9' && i < 3)
                             {
                               num *= 10;
                               num += (c - '0');
                               c = phase1_getc ();
                               i++;
                             }

                           /* The last read character is either a
                              non-number or another number after our
                              '\ddd' sequence.  We need to ungetc it.  */
                           phase1_ungetc (c);

                           /* The sequence number is too big, this
                              causes a lexical error.  Ignore it.  */
                           if (num < 256)
                             string_add (num);
                         }
                       else
                         string_add (c);
                     }
                 }
               else
                 string_add (c);
             }
           break;

         case '[':
           c = phase1_getc ();

           /* Count the number of equal signs.  */
           int esigns = 0;
           while (c == '=')
             {
               esigns++;
               c = phase1_getc ();
             }

           if (c != '[')
             {
               /* We did not find what we were looking for, ungetc it.  */
               phase1_ungetc (c);
               if (esigns == 0)
                 {
                   /* Our current character isn't '[' and we got 0 equal
                      signs, so the first '[' must have been a left
                      bracket.  */
                   tp->type = token_type_lbracket;
                   return;
                 }
               else
                 /* Lexical error, ignore it.  */
                 continue;
             }

           /* Found an opening long bracket.  */
           string_start ();

           /* See if it is immediately followed by a newline.  */
           c = phase1_getc ();
           if (c != '\n')
             phase1_ungetc (c);

           for (;;)
             {
               c = phase1_getc ();

               if (c == EOF)
                 {
                   string_end ();
                   tp->string = xstrdup (string_buf);
                   tp->comment = add_reference (savable_comment);
                   tp->type = token_type_string;
                   return;
                 }
               if (c == ']')
                 {
                   c = phase1_getc ();

                   /* Count the number of equal signs.  */
                   int esigns2 = 0;
                   while (c == '=')
                     {
                       esigns2++;
                       c = phase1_getc ();
                     }

                   if (c == ']' && esigns == esigns2)
                     {
                       /* We got ']==...==]', where the number of equal
                          signs matches the number of equal signs in
                          the opening bracket.  */
                       string_end ();
                       tp->string = xstrdup (string_buf);
                       tp->comment = add_reference (savable_comment);
                       tp->type = token_type_string;
                       return;
                     }
                   else
                     {
                       /* Otherwise we got either ']==' garbage or
                          ']==...==]' with a different number of equal
                          signs.

                          Add ']' and equal signs to the string, and
                          ungetc the current character, because the
                          second ']' might be a part of another closing
                          long bracket, e.g. '==]===]'.  */
                       phase1_ungetc (c);

                       string_add (']');
                       while (esigns2--)
                         string_add ('=');
                     }
                 }
               else
                 string_add (c);
             }
           break;

         case ']':
           tp->type = token_type_rbracket;
           return;

         default:
           if (c >= '0' && c <= '9')
             {
               while (c >= '0' && c <= '9')
                 c = phase1_getc ();

               if (c == '.')
                 {
                   c = phase1_getc ();
                   while (c >= '0' && c <= '9')
                     c = phase1_getc ();
                 }

               if (c == 'e' || c == 'E')
                 {
                   if (c == '+' || c == '-')
                     c = phase1_getc ();
                   while (c >= '0' && c <= '9')
                     c = phase1_getc ();
                 }

               phase1_ungetc (c);

               tp->type = token_type_number;
               return;
             }
           else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                    || c == '_')
             {
               string_start ();
               while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                      || c == '_' || (c >= '0' && c <= '9'))
                 {
                   string_add (c);
                   c = phase1_getc ();
                 }
               string_end ();
               phase1_ungetc (c);

               if (strcmp (string_buf, "not") == 0)
                 tp->type = token_type_operator1;
               else if (strcmp (string_buf, "and") == 0)
                 tp->type = token_type_operator2;
               else if (strcmp (string_buf, "or") == 0)
                 tp->type = token_type_operator2;
               else
                 {
                   tp->string = xstrdup (string_buf);
                   tp->type = token_type_symbol;
                 }
               return;
             }
           else
             tp->type = token_type_other;
         }
     }
 }

 /* String and symbol concatenation.  */

 static token_type_ty phase4_last;

 /* We need 3 pushback tokens for string and symbol concatenation.  */
 static int phase4_pushback_length;
 static token_ty phase4_pushback[3];

 static void
 phase4_unget (token_ty *tp)
 {
   if (tp->type != token_type_eof)
     {
       if (phase4_pushback_length == SIZEOF (phase4_pushback))
         abort ();
       phase4_pushback[phase4_pushback_length++] = *tp;
     }
 }

 static void
 phase4_get (token_ty *tp)
 {
   if (phase4_pushback_length)
     {
       *tp = phase4_pushback[--phase4_pushback_length];
       phase4_last = tp->type;
       return;
     }

   phase3_get (tp);
   if (tp->type == token_type_string
       && !(phase4_last == token_type_operator1
            || phase4_last == token_type_dot
            || phase4_last == token_type_symbol
            || phase4_last == token_type_doubledot
            || phase4_last == token_type_rparen))
     {
       char *sum = tp->string;
       size_t sum_len = strlen (sum);

       for (;;)
         {
           token_ty token2;

           phase3_get (&token2);
           if (token2.type == token_type_doubledot)
             {
               token_ty token3;

               phase3_get (&token3);
               if (token3.type == token_type_string)
                 {
                   token_ty token_after;

                   phase3_get (&token_after);
                   if (token_after.type != token_type_operator1)
                     {
                       char *addend = token3.string;
                       size_t addend_len = strlen (addend);

                       sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
                       memcpy (sum + sum_len, addend, addend_len + 1);
                       sum_len += addend_len;

                       phase3_unget (&token_after);
                       free_token (&token3);
                       free_token (&token2);
                       continue;
                     }
                   phase3_unget (&token_after);
                 }
               phase3_unget (&token3);
             }
           phase3_unget (&token2);
           break;
         }
       tp->string = sum;
     }
   phase4_last = tp->type;
 }

 static void
 phase5_get (token_ty *tp)
 {
   phase4_get (tp);

   /* Combine symbol1 . ... . symbolN to a single strings, so that
      we can recognize function calls like
      gettext.gettext.  The information present for
      symbolI.....symbolN has precedence over the information for
      symbolJ.....symbolN with J > I.  */
   if (tp->type == token_type_symbol)
     {
       char *sum = tp->string;
       size_t sum_len = strlen (sum);

       for (;;)
         {
           token_ty token2;

           phase4_get (&token2);
           if (token2.type == token_type_dot)
             {
               token_ty token3;

               phase4_get (&token3);
               if (token3.type == token_type_symbol)
                 {
                   char *addend = token3.string;
                   size_t addend_len = strlen (addend);

                   sum = (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
                   sum[sum_len] = '.';
                   memcpy (sum + sum_len + 1, addend, addend_len + 1);
                   sum_len += 1 + addend_len;

                   free_token (&token2);
                   free_token (&token3);
                   continue;
                 }
               phase4_unget (&token3);
             }
           phase4_unget (&token2);
           break;
         }
       tp->string = sum;
     }
 }

 static void
 x_lua_lex (token_ty *tok)
 {
   phase5_get (tok);
 }


 /* ========================= Extracting strings.  ========================== */


 /* Context lookup table.  */
 static flag_context_list_table_ty *flag_context_list_table;


 /* The file is broken into tokens.  Scan the token stream, looking for
    a keyword, followed by a left paren, followed by a string.  When we
    see this sequence, we have something to remember.  We assume we are
    looking at a valid Lua program, and leave the complaints about the
    grammar to the compiler.

      Normal handling: Look for
        keyword ( ... msgid ... )
        keyword msgid
      Plural handling: Look for
        keyword ( ... msgid ... msgid_plural ... )

    We use recursion because the arguments before msgid or between msgid
    and msgid_plural can contain subexpressions of the same form.  */

 /* Extract messages until the next balanced closing parenthesis or bracket.
    Extracted messages are added to MLP.
    DELIM can be either token_type_rparen or token_type_rbracket, or
    token_type_eof to accept both.
    Return true upon eof, false upon closing parenthesis or bracket.  */
 static bool
 extract_balanced (message_list_ty *mlp, token_type_ty delim,
                   flag_context_ty outer_context,
                   flag_context_list_iterator_ty context_iter,
                   struct arglist_parser *argparser)
 {
   /* Current argument number.  */
   int arg = 1;
   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
   int state;
   /* Parameters of the keyword just seen.  Defined only in state 1.  */
   const struct callshapes *next_shapes = NULL;
   /* Context iterator that will be used if the next token is a '('.  */
   flag_context_list_iterator_ty next_context_iter =
     passthrough_context_list_iterator;
   /* Current context.  */
   flag_context_ty inner_context =
     inherited_context (outer_context,
                        flag_context_list_iterator_advance (&context_iter));

   /* Start state is 0.  */
   state = 0;

   for (;;)
     {
       token_ty token;

       x_lua_lex (&token);

       switch (token.type)
         {
         case token_type_symbol:
           {
             void *keyword_value;

             if (hash_find_entry (&keywords, token.string, strlen (token.string),
                                  &keyword_value)
                 == 0)
               {
                 next_shapes = (const struct callshapes *) keyword_value;
                 state = 1;
               }
             else
               state = 0;
           }
           next_context_iter =
             flag_context_list_iterator (
               flag_context_list_table_lookup (
                 flag_context_list_table,
                 token.string, strlen (token.string)));
           free (token.string);
           continue;

         case token_type_lparen:
           if (extract_balanced (mlp, token_type_rparen,
                                 inner_context, next_context_iter,
                                 arglist_parser_alloc (mlp,
                                                       state ? next_shapes : NULL)))
             {
               arglist_parser_done (argparser, arg);
               return true;
             }
           next_context_iter = null_context_list_iterator;
           state = 0;
           break;

         case token_type_rparen:
           if (delim == token_type_rparen || delim == token_type_eof)
             {
               arglist_parser_done (argparser, arg);
               return false;
             }

           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_lbracket:
           if (extract_balanced (mlp, token_type_rbracket,
                                 null_context, null_context_list_iterator,
                                 arglist_parser_alloc (mlp, NULL)))
             {
               arglist_parser_done (argparser, arg);
               return true;
             }
           next_context_iter = null_context_list_iterator;
           state = 0;
           break;

         case token_type_rbracket:
           if (delim == token_type_rbracket || delim == token_type_eof)
             {
               arglist_parser_done (argparser, arg);
               return false;
             }

           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_comma:
           arg++;
           inner_context =
             inherited_context (outer_context,
                                flag_context_list_iterator_advance (
                                  &context_iter));
           next_context_iter = passthrough_context_list_iterator;
           state = 0;
           continue;

         case token_type_eof:
           arglist_parser_done (argparser, arg);
           return true;

         case token_type_string:
           {
             lex_pos_ty pos;
             pos.file_name = logical_file_name;
             pos.line_number = token.line_number;

             if (extract_all)
               remember_a_message (mlp, NULL, token.string, false, false,
                                   inner_context, &pos,
                                   NULL, token.comment, false);
             else
               {
                 mixed_string_ty *ms =
                   mixed_string_alloc_simple (token.string, lc_string,
                                              pos.file_name, pos.line_number);
                 free (token.string);
                 /* A string immediately after a symbol means a function call.  */
                 if (state)
                   {
                     struct arglist_parser *tmp_argparser;
                     tmp_argparser = arglist_parser_alloc (mlp, next_shapes);

                     arglist_parser_remember (tmp_argparser, 1, ms,
                                              inner_context,
                                              pos.file_name, pos.line_number,
                                              token.comment, false);
                     arglist_parser_done (tmp_argparser, 1);
                   }
                 else
                   arglist_parser_remember (argparser, arg, ms,
                                            inner_context,
                                            pos.file_name, pos.line_number,
                                            token.comment, false);
               }
           }
           drop_reference (token.comment);
           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         case token_type_dot:
         case token_type_doubledot:
         case token_type_operator1:
         case token_type_operator2:
         case token_type_number:
         case token_type_other:
           next_context_iter = null_context_list_iterator;
           state = 0;
           continue;

         default:
           abort ();
         }
     }
 }

 void
 extract_lua (FILE *f,
              const char *real_filename, const char *logical_filename,
              flag_context_list_table_ty *flag_table,
              msgdomain_list_ty *mdlp)
 {
   message_list_ty *mlp = mdlp->item[0]->messages;

   fp = f;
   real_file_name = real_filename;
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;

   phase1_pushback_length = 0;
   first_character = true;

   last_comment_line = -1;
   last_non_comment_line = -1;

   phase3_pushback_length = 0;

   phase4_last = token_type_eof;
   phase4_pushback_length = 0;

   flag_context_list_table = flag_table;

   init_keywords ();

   /* Eat tokens until eof is seen.  When extract_parenthesized returns
      due to an unbalanced closing parenthesis, just restart it.  */
   while (!extract_balanced (mlp, token_type_eof,
                             null_context, null_context_list_iterator,
                             arglist_parser_alloc (mlp, NULL)))
     ;

   fp = NULL;
   real_file_name = NULL;
   logical_file_name = NULL;
   line_number = 0;
 }