mingw/gettext/gettext-tools/src/x-sh.c - kiwivm - Git at Google

 /* xgettext sh backend.
    Copyright (C) 2003, 2005-2009, 2014, 2018-2020 Free Software Foundation, Inc.
    Written by Bruno Haible <bruno@clisp.org>, 2003.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #ifdef HAVE_CONFIG_H
 # include "config.h"
 #endif

 /* Specification.  */
 #include "x-sh.h"

 #include <errno.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 #include "attribute.h"
 #include "message.h"
 #include "xgettext.h"
 #include "xg-pos.h"
 #include "xg-mixed-string.h"
 #include "xg-arglist-context.h"
 #include "xg-arglist-callshape.h"
 #include "xg-arglist-parser.h"
 #include "xg-message.h"
 #include "error.h"
 #include "error-progname.h"
 #include "xalloc.h"
 #include "mem-hash-map.h"
 #include "../../gettext-runtime/src/escapes.h"
 #include "gettext.h"

 #define _(s) gettext(s)

 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))


 /* The sh syntax is defined in POSIX:2001, see
      http://www.opengroup.org/onlinepubs/007904975/utilities/xcu_chap02.html
    Summary of sh syntax:
    - Input is broken into words, which are then subject to
      - tilde expansion ~...
      - command substitution `...`
      - variable substitution $var
      - arithmetic substitution $((...))
      - field splitting at whitespace (IFS)
      - wildcard pattern expansion *?
      - quote removal
    - Strings are enclosed in "..."; command substitution, variable
      substitution and arithmetic substitution are performed here as well.
    - '...' is a string without substitutions.
    - The list of resulting words is split into commands by semicolon and
      newline.
    - '#' at the beginning of a word introduces a comment until end of line.
    The parser is implemented in bash-2.05b/parse.y.  */


 /* ====================== Keyword set customization.  ====================== */

 /* If true extract all strings.  */
 static bool extract_all = false;

 static hash_table keywords;
 static bool default_keywords = true;


 void
 x_sh_extract_all ()
 {
   extract_all = true;
 }


 void
 x_sh_keyword (const char *name)
 {
   if (name == NULL)
     default_keywords = false;
   else
     {
       const char *end;
       struct callshape shape;
       const char *colon;

       if (keywords.table == NULL)
         hash_init (&keywords, 100);

       split_keywordspec (name, &end, &shape);

       /* The characters between name and end should form a valid C identifier.
          A colon means an invalid parse in split_keywordspec().  */
       colon = strchr (name, ':');
       if (colon == NULL || colon >= end)
         insert_keyword_callshape (&keywords, name, end - name, &shape);
     }
 }

 /* Finish initializing the keywords hash table.
    Called after argument processing, before each file is processed.  */
 static void
 init_keywords ()
 {
   if (default_keywords)
     {
       /* When adding new keywords here, also update the documentation in
          xgettext.texi!  */
       x_sh_keyword ("gettext");
       x_sh_keyword ("ngettext:1,2");
       /* Note: There is also special handling for 'gettext' and 'ngettext'
          in read_command, below.  */
       x_sh_keyword ("eval_gettext");
       x_sh_keyword ("eval_ngettext:1,2");
       x_sh_keyword ("eval_pgettext:1c,2");
       x_sh_keyword ("eval_npgettext:1c,2,3");
       default_keywords = false;
     }
 }

 void
 init_flag_table_sh ()
 {
   xgettext_record_flag ("gettext:1:pass-sh-format");
   xgettext_record_flag ("ngettext:1:pass-sh-format");
   xgettext_record_flag ("ngettext:2:pass-sh-format");
   xgettext_record_flag ("eval_gettext:1:sh-format");
   xgettext_record_flag ("eval_ngettext:1:sh-format");
   xgettext_record_flag ("eval_ngettext:2:sh-format");
   xgettext_record_flag ("eval_pgettext:2:sh-format");
   xgettext_record_flag ("eval_npgettext:2:sh-format");
   xgettext_record_flag ("eval_npgettext:3:sh-format");
 }


 /* ======================== Reading of characters.  ======================== */

 /* The input file stream.  */
 static FILE *fp;


 /* Fetch the next character from the input file.  */
 static int
 do_getc ()
 {
   int c = getc (fp);

   if (c == EOF)
     {
       if (ferror (fp))
         error (EXIT_FAILURE, errno,
                _("error while reading \"%s\""), real_file_name);
     }
   else if (c == '\n')
    line_number++;

   return c;
 }

 /* Put back the last fetched character, not EOF.  */
 static void
 do_ungetc (int c)
 {
   if (c == '\n')
     line_number--;
   ungetc (c, fp);
 }


 /* Remove backslash followed by newline from the input stream.  */

 static int phase1_pushback[1];
 static int phase1_pushback_length;

 static int
 phase1_getc ()
 {
   int c;

   if (phase1_pushback_length)
     {
       c = phase1_pushback[--phase1_pushback_length];
       if (c == '\n')
         ++line_number;
       return c;
     }
   for (;;)
     {
       c = do_getc ();
       if (c != '\\')
         return c;
       c = do_getc ();
       if (c != '\n')
         {
           if (c != EOF)
             do_ungetc (c);
           return '\\';
         }
     }
 }

 /* Supports only one pushback character.  */
 static void
 phase1_ungetc (int c)
 {
   switch (c)
     {
     case EOF:
       break;

     case '\n':
       --line_number;
       FALLTHROUGH;

     default:
       if (phase1_pushback_length == SIZEOF (phase1_pushback))
         abort ();
       phase1_pushback[phase1_pushback_length++] = c;
       break;
     }
 }


 /* ========================== Reading of tokens.  ========================== */


 /* A token consists of a sequence of characters.  */
 struct token
 {
   int allocated;                /* number of allocated 'token_char's */
   int charcount;                /* number of used 'token_char's */
   char *chars;                  /* the token's constituents */
 };

 /* Initialize a 'struct token'.  */
 static inline void
 init_token (struct token *tp)
 {
   tp->allocated = 10;
   tp->chars = XNMALLOC (tp->allocated, char);
   tp->charcount = 0;
 }

 /* Free the memory pointed to by a 'struct token'.  */
 static inline void
 free_token (struct token *tp)
 {
   free (tp->chars);
 }

 /* Ensure there is enough room in the token for one more character.  */
 static inline void
 grow_token (struct token *tp)
 {
   if (tp->charcount == tp->allocated)
     {
       tp->allocated *= 2;
       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
     }
 }

 /* Convert a struct token * to a char*.  */
 static char *
 string_of_token (const struct token *tp)
 {
   char *str;
   int n;

   n = tp->charcount;
   str = XNMALLOC (n + 1, char);
   memcpy (str, tp->chars, n);
   str[n] = '\0';
   return str;
 }


 /* ========================= Accumulating messages ========================= */


 static message_list_ty *mlp;


 /* ========================= Accumulating comments ========================= */


 static char *buffer;
 static size_t bufmax;
 static size_t buflen;

 static inline void
 comment_start ()
 {
   buflen = 0;
 }

 static inline void
 comment_add (int c)
 {
   if (buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen++] = c;
 }

 static inline void
 comment_line_end ()
 {
   while (buflen >= 1
          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     --buflen;
   if (buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen] = '\0';
   savable_comment_add (buffer);
 }


 /* These are for tracking whether comments count as immediately before
    keyword.  */
 static int last_comment_line;
 static int last_non_comment_line;


 /* ========================= Debackslashification ========================== */

 /* This state tracks the effect of backquotes, double-quotes and single-quotes
    on the parsing of backslashes.  We make a single pass through the input
    file, keeping the state up to date.  This is much faster than accumulating
    strings and processing them with explicit debackslashification, like the
    shell does it.  */

 /* The number of nested `...` or "`...`" constructs.  Assumed to be <= 32.  */
 static unsigned int nested_backquotes;

 /* A bit mask indicating which of the currently open `...` or "`...`"
    constructs is with double-quotes: "`...`".
    A bit value of 1 stands for "`...`", a bit value of 0 stands for `...`.
    Bit position 0 designates the outermost backquotes nesting,
    bit position 1 the second-outermost backquotes nesting,
    ...
    bit position (nested_backquotes-1) the innermost backquotes nesting.  */
 static unsigned int open_doublequotes_mask;

 /* A bit indicating whether a double-quote is currently open inside the
    innermost backquotes nesting.  */
 static bool open_doublequote;

 /* A bit indicating whether a single-quote is currently open inside the
    innermost backquotes nesting.  */
 static bool open_singlequote;

 /* The expected terminator of the currently open single-quote.
    Usually '\'', but can be '"' for i18n-quotes.  */
 static char open_singlequote_terminator;


 /* Functions to update the state.  */

 static inline void
 saw_opening_backquote ()
 {
   if (open_singlequote)
     abort ();
   if (open_doublequote)
     open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes;
   nested_backquotes++;
   open_doublequote = false;
 }

 static inline void
 saw_closing_backquote ()
 {
   nested_backquotes--;
   open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1;
   open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1;
   open_singlequote = false; /* just for safety */
 }

 static inline void
 saw_opening_doublequote ()
 {
   if (open_singlequote || open_doublequote)
     abort ();
   open_doublequote = true;
 }

 static inline void
 saw_closing_doublequote ()
 {
   if (open_singlequote || !open_doublequote)
     abort ();
   open_doublequote = false;
 }

 static inline void
 saw_opening_singlequote ()
 {
   if (open_doublequote || open_singlequote)
     abort ();
   open_singlequote = true;
   open_singlequote_terminator = '\'';
 }

 static inline void
 saw_closing_singlequote ()
 {
   if (open_doublequote || !open_singlequote)
     abort ();
   open_singlequote = false;
 }


 /* ========================== Reading of commands ========================== */

 /* We are only interested in constant strings.  Other words need not to be
    represented precisely.  */
 enum word_type
 {
   t_string,     /* constant string */
   t_assignment, /* variable assignment */
   t_other,      /* other string */
   t_separator,  /* command separator: semicolon or newline */
   t_redirect,   /* redirection: one of < > >| << <<- >> <> <& >& */
   t_backquote,  /* closing '`' pseudo word */
   t_paren,      /* closing ')' pseudo word */
   t_eof         /* EOF marker */
 };

 struct word
 {
   enum word_type type;
   struct token *token;          /* for t_string */
   int line_number_at_start;     /* for t_string */
 };

 /* Free the memory pointed to by a 'struct word'.  */
 static inline void
 free_word (struct word *wp)
 {
   if (wp->type == t_string)
     {
       free_token (wp->token);
       free (wp->token);
     }
 }

 /* Convert a t_string token to a char*.  */
 static char *
 string_of_word (const struct word *wp)
 {
   char *str;
   int n;

   if (!(wp->type == t_string))
     abort ();
   n = wp->token->charcount;
   str = XNMALLOC (n + 1, char);
   memcpy (str, wp->token->chars, n);
   str[n] = '\0';
   return str;
 }

 /* Convert a t_string token to a char*, ignoring the first OFFSET bytes.  */
 static char *
 substring_of_word (const struct word *wp, size_t offset)
 {
   char *str;
   int n;

   if (!(wp->type == t_string))
     abort ();
   n = wp->token->charcount;
   if (!(offset <= n))
     abort ();
   str = XNMALLOC (n - offset + 1, char);
   memcpy (str, wp->token->chars + offset, n - offset);
   str[n - offset] = '\0';
   return str;
 }


 /* Whitespace recognition.  */

 static inline bool
 is_whitespace (int c)
 {
   return (c == ' ' || c == '\t' || c == '\n');
 }

 /* Operator character recognition.  */

 static inline bool
 is_operator_start (int c)
 {
   return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>'
           || c == '(' || c == ')');
 }


 /* Denotation of a quoted character.
    The distinction between quoted and unquoted character is important only for
    the special, whitespace and operator characters; it is irrelevant for
    alphanumeric characters, '\\' and many others.  */
 #define QUOTED(c) (UCHAR_MAX + 1 + (c))
 /* Values in the 'unsigned char' range are implicitly unquoted.  Among these,
    the following are important:
      '"'         opening or closing double quote
      '\''        opening or closing single quote
      '$'         the unknown result of a dollar expansion
      '`'         does not occur - replaced with OPENING_BACKQUOTE or
                  CLOSING_BACKQUOTE
  */
 #define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`')
 #define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`')

 /* 2 characters of pushback are supported.
    2 characters of pushback occur only when the first is an 'x'; in all
    other cases only one character of pushback is needed.  */
 static int phase2_pushback[2];
 static int phase2_pushback_length;

 /* Return the next character, with backslashes removed.
    The result is QUOTED(c) for some unsigned char c, if the next character
    is escaped sufficiently often to make it a regular constituent character,
    or simply an 'unsigned char' if it has its special meaning (of special,
    whitespace or operator charcter), or OPENING_BACKQUOTE, CLOSING_BACKQUOTE,
    EOF.
    It's the caller's responsibility to update the state.  */
 static int
 phase2_getc ()
 {
   int c;

   if (phase2_pushback_length)
     {
       c = phase2_pushback[--phase2_pushback_length];
       if (c == '\n')
         ++line_number;
       return c;
     }

   c = phase1_getc ();
   if (c == EOF)
     return c;
   if (c == '\'')
     return ((open_doublequote
              || (open_singlequote && open_singlequote_terminator != c))
             ? QUOTED (c)
             : c);
   if (open_singlequote)
     {
       if (c == open_singlequote_terminator)
         return c;
     }
   else
     {
       if (c == '"' || c == '$')
         return c;
       if (c == '`')
         return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE);
     }
   if (c == '\\')
     {
       /* Number of debackslashification passes that are active at the
          current point.  */
       unsigned int debackslashify =
         nested_backquotes + (open_singlequote ? 0 : 1);
       /* Normal number of backslashes that yield a single backslash in the
          final output.  */
       unsigned int expected_count =
         (unsigned int) 1 << debackslashify;
       /* Number of backslashes found.  */
       unsigned int count;

       for (count = 1; count < expected_count; count++)
         {
           c = phase1_getc ();
           if (c != '\\')
             break;
         }
       if (count == expected_count)
         return '\\';

       /* The count of backslashes is > 0 and < expected_count, therefore the
          result depends on c, the first character after the backslashes.
          Note: The formulas below don't necessarily have a logic; they were
          empirically determined such that 1. the xgettext-sh-1 test succeeds,
          2. the behaviour for count == 0 would correspond to the one without
          any baskslash.  */
       if (c == '\'')
         {
           if (!open_singlequote && count > (expected_count >> 1))
             {
               phase1_ungetc (c);
               return '\\';
             }
           else
             return ((open_doublequote
                      || (open_singlequote
                          ? open_singlequote_terminator != c
                          : count == (expected_count >> 1)))
                     ? QUOTED (c)
                     : c);
         }
       else if (c == '"')
         {
           /* Each debackslashification pass converts \\ to \ and \" to ";
              passes corresponding to `...` drop a lone " whereas passes
              corresponding to "`...`" leave it alone.  Therefore, the
              minimum number of backslashes needed to get one double-quote
              in the end is  open_doublequotes_mask + 1.  */
           if (open_singlequote)
             {
               if (count > open_doublequotes_mask)
                 {
                   phase1_ungetc (c);
                   return '\\';
                 }
               else
                 return (open_singlequote_terminator != c ? QUOTED (c) : c);
             }
           else
             {
               if (count > open_doublequotes_mask)
                 return QUOTED (c);
               else
                 /* Some of the count values <= open_doublequotes_mask are
                    actually invalid here, but we assume a syntactically
                    correct input file anyway.  */
                 return c;
             }
         }
       else if (c == '`')
         {
           /* FIXME: This code looks fishy.  */
           if (count == expected_count - 1)
             return c;
           else
             /* Some of the count values < expected_count - 1 are
                actually invalid here, but we assume a syntactically
                correct input file anyway.  */
             if (nested_backquotes > 0 && !open_singlequote
                 && count >= (expected_count >> 2))
               return OPENING_BACKQUOTE;
             else
               return CLOSING_BACKQUOTE;
         }
       else if (c == '$')
         {
           if (open_singlequote)
             return QUOTED (c);
           if (count >= (expected_count >> 1))
             return QUOTED (c);
           else
             return c;
         }
       else
         {
           /* When not followed by a quoting character or backslash or dollar,
              a backslash survives a debackslashification pass unmodified.
              Therefore each debackslashification pass performs a
                count := (count + 1) >> 1
              operation.  Therefore the minimum number of backslashes needed
              to get one backslash in the end is  (expected_count >> 1) + 1.  */
           if (open_doublequote || open_singlequote)
             {
               if (count > 0)
                 {
                   phase1_ungetc (c);
                   return '\\';
                 }
               else
                 return QUOTED (c);
             }
           else
             {
               if (count > (expected_count >> 1))
                 {
                   phase1_ungetc (c);
                   return '\\';
                 }
               else if (count > 0)
                 return QUOTED (c);
               else
                 return c;
             }
         }
     }

   return (open_singlequote || open_doublequote ? QUOTED (c) : c);
 }

 /* Supports 2 characters of pushback.  */
 static void
 phase2_ungetc (int c)
 {
   switch (c)
     {
     case EOF:
       break;

     case '\n':
       --line_number;
       FALLTHROUGH;

     default:
       if (phase2_pushback_length == SIZEOF (phase2_pushback))
         abort ();
       phase2_pushback[phase2_pushback_length++] = c;
       break;
     }
 }


 /* Context lookup table.  */
 static flag_context_list_table_ty *flag_context_list_table;


 /* Forward declaration of local functions.  */
 static enum word_type read_command_list (int looking_for,
                                          flag_context_ty outer_context);


 /* Read the next word.
    'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
    or '\0'.  */
 static void
 read_word (struct word *wp, int looking_for, flag_context_ty context)
 {
   int c;
   bool all_unquoted_digits;
   bool all_unquoted_name_characters;

   do
     {
       c = phase2_getc ();
       if (c == '#')
         {
           /* Skip a comment up to end of line.  */
           last_comment_line = line_number;
           comment_start ();
           for (;;)
             {
               c = phase1_getc ();
               if (c == EOF || c == '\n')
                 break;
               /* We skip all leading white space, but not EOLs.  */
               if (!(buflen == 0 && (c == ' ' || c == '\t')))
                 comment_add (c);
             }
           comment_line_end ();
         }
       if (c == '\n')
         {
           /* Comments assumed to be grouped with a message must immediately
              precede it, with no non-whitespace token on a line between
              both.  */
           if (last_non_comment_line > last_comment_line)
             savable_comment_reset ();
           wp->type = t_separator;
           return;
         }
     }
   while (is_whitespace (c));

   if (c == EOF)
     {
       wp->type = t_eof;
       return;
     }

   if (c == '<' || c == '>')
     {
       /* Recognize the redirection operators < > >| << <<- >> <> <& >&
          But <( and >) are handled below, not here.  */
       int c2 = phase2_getc ();
       if (c2 != '(')
         {
           if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
             {
               if (c == '<' && c2 == '<')
                 {
                   int c3 = phase2_getc ();
                   if (c3 != '-')
                     phase2_ungetc (c3);
                 }
             }
           else
             phase2_ungetc (c2);
           wp->type = t_redirect;
           return;
         }
       else
         phase2_ungetc (c2);
     }

   if (c == CLOSING_BACKQUOTE)
     {
       if (looking_for == CLOSING_BACKQUOTE)
         {
           saw_closing_backquote ();
           wp->type = t_backquote;
           last_non_comment_line = line_number;
           return;
         }
       else if (looking_for == ')')
         {
           /* The input is invalid syntax, such as `a<(`
              Push back the closing backquote and pretend that we have seen a
              closing parenthesis.  */
           phase2_ungetc (c);
           wp->type = t_paren;
           last_non_comment_line = line_number;
           return;
         }
       else
         /* We shouldn't be reading a CLOSING_BACKQUOTE when
            looking_for == '\0'.  */
         abort ();
     }

   if (looking_for == ')' && c == ')')
     {
       wp->type = t_paren;
       last_non_comment_line = line_number;
       return;
     }

   if (is_operator_start (c))
     {
       wp->type = (c == ';' ? t_separator : t_other);
       return;
     }

   wp->type = t_string;
   wp->token = XMALLOC (struct token);
   init_token (wp->token);
   wp->line_number_at_start = line_number;
   /* True while all characters in the token seen so far are digits.  */
   all_unquoted_digits = true;
   /* True while all characters in the token seen so far form a "name":
      all characters are unquoted underscores, digits, or alphabetics from the
      portable character set, and the first character is not a digit.  Cf.
      <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_235>
    */
   all_unquoted_name_characters = true;

   for (;; c = phase2_getc ())
     {
       if (c == EOF)
         break;

       if (all_unquoted_digits && (c == '<' || c == '>'))
         {
           /* Recognize the redirection operators < > >| << <<- >> <> <& >&
              prefixed with a nonempty sequence of unquoted digits.  */
           int c2 = phase2_getc ();
           if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
             {
               if (c == '<' && c2 == '<')
                 {
                   int c3 = phase2_getc ();
                   if (c3 != '-')
                     phase2_ungetc (c3);
                 }
             }
           else
             phase2_ungetc (c2);

           wp->type = t_redirect;
           free_token (wp->token);
           free (wp->token);

           last_non_comment_line = line_number;

           return;
         }

       all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');

       if (all_unquoted_name_characters && wp->token->charcount > 0 && c == '=')
         {
           wp->type = t_assignment;
           continue;
         }

       all_unquoted_name_characters =
          all_unquoted_name_characters
          && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
              || (wp->token->charcount > 0 && c >= '0' && c <= '9'));

       if (c == '$')
         {
           int c2;

           /* An unquoted dollar indicates we are not inside '...'.  */
           if (open_singlequote)
             abort ();
           /* After reading a dollar, we know that there is no pushed back
              character from an earlier lookahead.  */
           if (phase2_pushback_length > 0)
             abort ();
           /* Therefore we can use phase1 without interfering with phase2.
              We need to recognize $( outside and inside double-quotes.
              It would be incorrect to do
                 c2 = phase2_getc ();
                 if (c2 == '(' || c2 == QUOTED ('('))
              because that would also trigger for $\(.  */
           c2 = phase1_getc ();
           if (c2 == '(')
             {
               bool saved_open_doublequote;
               int c3;

               phase1_ungetc (c2);

               /* The entire inner command or arithmetic expression is read
                  ignoring possible surrounding double-quotes.  */
               saved_open_doublequote = open_doublequote;
               open_doublequote = false;

               c2 = phase2_getc ();
               if (c2 != '(')
                 abort ();

               c3 = phase2_getc ();
               if (c3 == '(')
                 {
                   /* Arithmetic expression (Bash syntax).  Skip until the
                      matching closing parenthesis.  */
                   unsigned int depth = 2;

                   do
                     {
                       c = phase2_getc ();
                       if (c == '(')
                         depth++;
                       else if (c == ')')
                         if (--depth == 0)
                           break;
                     }
                   while (c != EOF);
                 }
               else
                 {
                   /* Command substitution (Bash syntax).  */
                   phase2_ungetc (c3);
                   read_command_list (')', context);
                 }

               open_doublequote = saved_open_doublequote;
             }
           else
             {
               phase1_ungetc (c2);
               c2 = phase2_getc ();

               if (c2 == '\'' && !open_singlequote)
                 {
                   /* Bash builtin for string with ANSI-C escape sequences.  */
                   for (;;)
                     {
                       /* We have to use phase1 throughout this loop,
                          because phase2 does debackslashification,
                          which is undesirable when parsing ANSI-C
                          escape sequences.  */
                       c = phase1_getc ();
                       if (c == EOF)
                         break;
                       if (c == '\'')
                         break;
                       if (c == '\\')
                         {
                           c = phase1_getc ();
                           switch (c)
                             {
                             default:
                               phase1_ungetc (c);
                               c = '\\';
                               break;

                             case '\\':
                               break;
                             case '\'':
                               break;
                             case '"':
                               break;

                             case 'a':
                               c = '\a';
                               break;
                             case 'b':
                               c = '\b';
                               break;
                             case 'e':
                             case 'E':
                               c = 0x1b; /* ESC */
                               break;
                             case 'f':
                               c = '\f';
                               break;
                             case 'n':
                               c = '\n';
                               break;
                             case 'r':
                               c = '\r';
                               break;
                             case 't':
                               c = '\t';
                               break;
                             case 'v':
                               c = '\v';
                               break;

                             case 'x':
                               c = phase1_getc ();
                               if ((c >= '0' && c <= '9')
                                   || (c >= 'A' && c <= 'F')
                                   || (c >= 'a' && c <= 'f'))
                                 {
                                   int n;

                                   if (c >= '0' && c <= '9')
                                     n = c - '0';
                                   else if (c >= 'A' && c <= 'F')
                                     n = 10 + c - 'A';
                                   else if (c >= 'a' && c <= 'f')
                                     n = 10 + c - 'a';
                                   else
                                     abort ();

                                   c = phase1_getc ();
                                   if ((c >= '0' && c <= '9')
                                       || (c >= 'A' && c <= 'F')
                                       || (c >= 'a' && c <= 'f'))
                                     {
                                       if (c >= '0' && c <= '9')
                                         n = n * 16 + c - '0';
                                       else if (c >= 'A' && c <= 'F')
                                         n = n * 16 + 10 + c - 'A';
                                       else if (c >= 'a' && c <= 'f')
                                         n = n * 16 + 10 + c - 'a';
                                       else
                                         abort ();
                                     }
                                   else
                                     phase1_ungetc (c);

                                   c = n;
                                 }
                               else
                                 {
                                   phase1_ungetc (c);
                                   phase1_ungetc ('x');
                                   c = '\\';
                                 }
                               break;

                             case '0': case '1': case '2': case '3':
                             case '4': case '5': case '6': case '7':
                               {
                                 int n = c - '0';

                                 c = phase1_getc ();
                                 if (c >= '0' && c <= '7')
                                   {
                                     n = n * 8 + c - '0';

                                     c = phase1_getc ();
                                     if (c >= '0' && c <= '7')
                                       n = n * 8 + c - '0';
                                     else
                                       phase1_ungetc (c);
                                   }
                                 else
                                   phase1_ungetc (c);

                                 c = n;
                               }
                               break;
                             }
                         }
                       if (wp->type == t_string)
                         {
                           grow_token (wp->token);
                           wp->token->chars[wp->token->charcount++] =
                             (unsigned char) c;
                         }
                     }
                   /* The result is a literal string.  Don't change wp->type.  */
                   continue;
                 }
               else if (c2 == '"' && !open_doublequote)
                 {
                   /* Bash builtin for internationalized string.  */
                   lex_pos_ty pos;
                   struct token string;

                   saw_opening_singlequote ();
                   open_singlequote_terminator = '"';
                   pos.file_name = logical_file_name;
                   pos.line_number = line_number;
                   init_token (&string);
                   for (;;)
                     {
                       c = phase2_getc ();
                       if (c == EOF)
                         break;
                       if (c == '"')
                         {
                           saw_closing_singlequote ();
                           break;
                         }
                       grow_token (&string);
                       string.chars[string.charcount++] = (unsigned char) c;
                     }
                   remember_a_message (mlp, NULL, string_of_token (&string),
                                       false, false, context, &pos,
                                       NULL, savable_comment, false);
                   free_token (&string);

                   error_with_progname = false;
                   error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"),
                          pos.file_name, (unsigned long) pos.line_number);
                   error_with_progname = true;

                   /* The result at runtime is not constant. Therefore we
                      change wp->type.  */
                 }
               else
                 phase2_ungetc (c2);
             }
           wp->type = t_other;
           continue;
         }

       if (c == '\'')
         {
           if (!open_singlequote)
             {
               /* Handle an opening single quote.  */
               saw_opening_singlequote ();
             }
           else
             {
               /* Handle a closing single quote.  */
               saw_closing_singlequote ();
             }
           continue;
         }

       if (c == '"')
         {
           if (open_singlequote && open_singlequote_terminator == '"')
             {
               /* Handle a closing i18n quote.  */
               saw_closing_singlequote ();
             }
           else if (!open_doublequote)
             {
               /* Handle an opening double quote.  */
               saw_opening_doublequote ();
             }
           else
             {
               /* Handle a closing double quote.  */
               saw_closing_doublequote ();
             }
           continue;
         }

       if (c == OPENING_BACKQUOTE)
         {
           /* Handle an opening backquote.  */
           saw_opening_backquote ();

           read_command_list (CLOSING_BACKQUOTE, context);

           wp->type = t_other;
           continue;
         }
       if (c == CLOSING_BACKQUOTE)
         break;

       if (c == '<' || c == '>')
         {
           int c2;

           /* An unquoted c indicates we are not inside '...' nor "...".  */
           if (open_singlequote || open_doublequote)
             abort ();

           c2 = phase2_getc ();
           if (c2 == '(')
             {
               /* Process substitution (Bash syntax).  */
               read_command_list (')', context);

               wp->type = t_other;
               continue;
             }
           else
             phase2_ungetc (c2);
         }

       if (!open_singlequote && !open_doublequote
           && (is_whitespace (c) || is_operator_start (c)))
         break;

       if (wp->type == t_string)
         {
           grow_token (wp->token);
           wp->token->chars[wp->token->charcount++] = (unsigned char) c;
         }
     }

   phase2_ungetc (c);

   if (wp->type != t_string)
     {
       free_token (wp->token);
       free (wp->token);
     }
   last_non_comment_line = line_number;
 }


 /* Read the next command.
    'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
    or '\0'.
    Returns the type of the word that terminated the command.  */
 static enum word_type
 read_command (int looking_for, flag_context_ty outer_context)
 {
   /* Read the words that make up the command.
      Here we completely ignore field splitting at whitespace and wildcard
      expansions; i.e. we assume that the source is written in such a way that
      every word in the program determines exactly one word in the resulting
      command.
      But we do not require that the 'gettext'/'ngettext' command is the
      first in the command; this is because 1. we want to allow for prefixes
      like "$verbose" that may expand to nothing, and 2. it's a big effort
      to know where a command starts in a $(for ...) or $(case ...) compound
      command.  */
   int arg = 0;                  /* Current argument number.  */
   bool arg_of_redirect = false; /* True right after a redirection operator.  */
   bool must_expand_arg_strings = false; /* True if need to expand escape
                                            sequences in arguments.  */
   flag_context_list_iterator_ty context_iter;
   const struct callshapes *shapes = NULL;
   struct arglist_parser *argparser = NULL;

   for (;;)
     {
       struct word inner;
       flag_context_ty inner_context;

       if (arg == 0)
         inner_context = null_context;
       else
         inner_context =
           inherited_context (outer_context,
                              flag_context_list_iterator_advance (
                                &context_iter));

       read_word (&inner, looking_for, inner_context);

       /* Recognize end of command.  */
       if (inner.type == t_separator
           || inner.type == t_backquote || inner.type == t_paren
           || inner.type == t_eof)
         {
           if (argparser != NULL)
             arglist_parser_done (argparser, arg);
           return inner.type;
         }

       if (extract_all)
         {
           if (inner.type == t_string)
             {
               lex_pos_ty pos;

               pos.file_name = logical_file_name;
               pos.line_number = inner.line_number_at_start;
               remember_a_message (mlp, NULL, string_of_word (&inner), false,
                                   false, inner_context, &pos,
                                   NULL, savable_comment, false);
             }
         }

       if (arg_of_redirect)
         {
           /* Ignore arguments of redirection operators.  */
           arg_of_redirect = false;
         }
       else if (inner.type == t_redirect)
         {
           /* Ignore this word and the following one.  */
           arg_of_redirect = true;
         }
       else
         {
           bool matters_for_argparser = true;

           if (argparser == NULL)
             {
               /* This is the function position.  */
               arg = 0;
               if (inner.type == t_assignment)
                 {
                   /* An assignment just sets an environment variable.
                      Ignore it.  */
                   /* Don't increment arg in this round.  */
                   matters_for_argparser = false;
                 }
               else if (inner.type == t_string)
                 {
                   char *function_name = string_of_word (&inner);

                   if (strcmp (function_name, "env") == 0)
                     {
                       /* The 'env' command just introduces more assignments.
                          Ignore it.  */
                       /* Don't increment arg in this round.  */
                       matters_for_argparser = false;
                     }
                   else
                     {
                       void *keyword_value;

                       if (hash_find_entry (&keywords,
                                            function_name,
                                            strlen (function_name),
                                            &keyword_value)
                           == 0)
                         shapes = (const struct callshapes *) keyword_value;

                       argparser = arglist_parser_alloc (mlp, shapes);

                       context_iter =
                         flag_context_list_iterator (
                           flag_context_list_table_lookup (
                             flag_context_list_table,
                             function_name, strlen (function_name)));
                     }

                   free (function_name);
                 }
               else
                 context_iter = null_context_list_iterator;
             }
           else
             {
               /* These are the argument positions.  */
               if (inner.type == t_string)
                 {
                   bool accepts_context =
                     ((argparser->keyword_len == 7
                       && memcmp (argparser->keyword, "gettext", 7) == 0)
                      || (argparser->keyword_len == 8
                          && memcmp (argparser->keyword, "ngettext", 8) == 0));
                   bool accepts_expand =
                     ((argparser->keyword_len == 7
                       && memcmp (argparser->keyword, "gettext", 7) == 0)
                      || (argparser->keyword_len == 8
                          && memcmp (argparser->keyword, "ngettext", 8) == 0));
                   if (accepts_context && argparser->next_is_msgctxt)
                     {
                       char *s = string_of_word (&inner);
                       mixed_string_ty *ms =
                         mixed_string_alloc_simple (s, lc_string,
                                                    logical_file_name,
                                                    inner.line_number_at_start);
                       free (s);
                       argparser->next_is_msgctxt = false;
                       arglist_parser_remember_msgctxt (argparser, ms,
                                                        inner_context,
                                                        logical_file_name,
                                                        inner.line_number_at_start);
                       matters_for_argparser = false;
                     }
                   else if (accepts_context
                            && ((inner.token->charcount == 2
                                 && memcmp (inner.token->chars, "-c", 2) == 0)
                                || (inner.token->charcount == 9
                                    && memcmp (inner.token->chars, "--context", 9) == 0)))
                     {
                       argparser->next_is_msgctxt = true;
                       matters_for_argparser = false;
                     }
                   else if (accepts_context
                            && (inner.token->charcount >= 10
                                && memcmp (inner.token->chars, "--context=", 10) == 0))
                     {
                       char *s = substring_of_word (&inner, 10);
                       mixed_string_ty *ms =
                         mixed_string_alloc_simple (s, lc_string,
                                                    logical_file_name,
                                                    inner.line_number_at_start);
                       free (s);
                       argparser->next_is_msgctxt = false;
                       arglist_parser_remember_msgctxt (argparser, ms,
                                                        inner_context,
                                                        logical_file_name,
                                                        inner.line_number_at_start);
                       matters_for_argparser = false;
                     }
                   else if (accepts_expand
                            && inner.token->charcount == 2
                            && memcmp (inner.token->chars, "-e", 2) == 0)
                     {
                       must_expand_arg_strings = true;
                       matters_for_argparser = false;
                     }
                   else
                     {
                       char *s = string_of_word (&inner);
                       mixed_string_ty *ms;

                       /* When '-e' was specified, expand escape sequences in s.  */
                       if (accepts_expand && must_expand_arg_strings)
                         {
                           bool expands_backslash_c =
                             (argparser->keyword_len == 7
                              && memcmp (argparser->keyword, "gettext", 7) == 0);
                           bool backslash_c = false;
                           char *expanded =
                             (char *)
                             expand_escapes (s, expands_backslash_c ? &backslash_c : NULL);
                           /* We can ignore the value of expands_backslash_c, because
                              here we don't support the gettext '-s' option.  */
                           if (expanded != s)
                             free (s);
                           s = expanded;
                         }

                       ms = mixed_string_alloc_simple (s, lc_string,
                                                       logical_file_name,
                                                       inner.line_number_at_start);
                       free (s);
                       arglist_parser_remember (argparser, arg, ms,
                                                inner_context,
                                                logical_file_name,
                                                inner.line_number_at_start,
                                                savable_comment, false);
                     }
                 }

               if (matters_for_argparser)
                 if (arglist_parser_decidedp (argparser, arg))
                   {
                     /* Stop looking for arguments of the last function_name.  */
                     /* FIXME: What about context_iter?  */
                     arglist_parser_done (argparser, arg);
                     shapes = NULL;
                     argparser = NULL;
                   }
             }

           if (matters_for_argparser)
             arg++;
         }

       free_word (&inner);
     }
 }


 /* Read a list of commands.
    'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
    or '\0'.
    Returns the type of the word that terminated the command list.  */
 static enum word_type
 read_command_list (int looking_for, flag_context_ty outer_context)
 {
   for (;;)
     {
       enum word_type terminator;

       terminator = read_command (looking_for, outer_context);
       if (terminator != t_separator)
         return terminator;
     }
 }


 void
 extract_sh (FILE *f,
             const char *real_filename, const char *logical_filename,
             flag_context_list_table_ty *flag_table,
             msgdomain_list_ty *mdlp)
 {
   mlp = mdlp->item[0]->messages;

   fp = f;
   real_file_name = real_filename;
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;

   phase1_pushback_length = 0;

   last_comment_line = -1;
   last_non_comment_line = -1;

   nested_backquotes = 0;
   open_doublequotes_mask = 0;
   open_doublequote = false;
   open_singlequote = false;

   phase2_pushback_length = 0;

   flag_context_list_table = flag_table;

   init_keywords ();

   /* Eat tokens until eof is seen.  */
   read_command_list ('\0', null_context);

   fp = NULL;
   real_file_name = NULL;
   logical_file_name = NULL;
   line_number = 0;
 }