mingw/gettext/gettext-tools/src/x-scheme.c - kiwivm - Git at Google

 /* xgettext Scheme backend.
    Copyright (C) 2004-2009, 2011, 2014, 2018-2020 Free Software Foundation, Inc.

    This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #ifdef HAVE_CONFIG_H
 # include "config.h"
 #endif

 /* Specification.  */
 #include "x-scheme.h"

 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 #include "attribute.h"
 #include "message.h"
 #include "xgettext.h"
 #include "xg-pos.h"
 #include "xg-mixed-string.h"
 #include "xg-arglist-context.h"
 #include "xg-arglist-callshape.h"
 #include "xg-arglist-parser.h"
 #include "xg-message.h"
 #include "error.h"
 #include "xalloc.h"
 #include "mem-hash-map.h"
 #include "gettext.h"

 #define _(s) gettext(s)


 /* The Scheme syntax is described in R5RS.  It is implemented in
    guile-2.0.0/libguile/read.c.
    Since we are interested only in strings and in forms similar to
         (gettext msgid ...)
    or   (ngettext msgid msgid_plural ...)
    we make the following simplifications:

    - Assume the keywords and strings are in an ASCII compatible encoding.
      This means we can read the input file one byte at a time, instead of
      one character at a time.  No need to worry about multibyte characters:
      If they occur as part of identifiers, they most probably act as
      constituent characters, and the byte based approach will do the same.

    - Assume the read-hash-procedures is in the default state.
      Non-standard reader extensions are mostly used to read data, not programs.

    The remaining syntax rules are:

    - The syntax code assigned to each character, and how tokens are built
      up from characters (single escape, multiple escape etc.).

    - Comment syntax: ';' and '#! ... !#' and '#| ... |#' (may be nested).

    - String syntax: "..." with single escapes.

    - Read macros and dispatch macro character '#'.  Needed to be able to
      tell which is the n-th argument of a function call.

  */


 /* ====================== Keyword set customization.  ====================== */

 /* If true extract all strings.  */
 static bool extract_all = false;

 static hash_table keywords;
 static bool default_keywords = true;


 void
 x_scheme_extract_all ()
 {
   extract_all = true;
 }


 void
 x_scheme_keyword (const char *name)
 {
   if (name == NULL)
     default_keywords = false;
   else
     {
       const char *end;
       struct callshape shape;
       const char *colon;

       if (keywords.table == NULL)
         hash_init (&keywords, 100);

       split_keywordspec (name, &end, &shape);

       /* The characters between name and end should form a valid Lisp symbol.
          Extract the symbol name part.  */
       colon = strchr (name, ':');
       if (colon != NULL && colon < end)
         {
           name = colon + 1;
           if (name < end && *name == ':')
             name++;
           colon = strchr (name, ':');
           if (colon != NULL && colon < end)
             return;
         }

       insert_keyword_callshape (&keywords, name, end - name, &shape);
     }
 }

 /* Finish initializing the keywords hash table.
    Called after argument processing, before each file is processed.  */
 static void
 init_keywords ()
 {
   if (default_keywords)
     {
       /* When adding new keywords here, also update the documentation in
          xgettext.texi!  */
       x_scheme_keyword ("gettext");             /* libguile/i18n.c */
       x_scheme_keyword ("ngettext:1,2");        /* libguile/i18n.c */
       x_scheme_keyword ("gettext-noop");
       default_keywords = false;
     }
 }

 void
 init_flag_table_scheme ()
 {
   xgettext_record_flag ("gettext:1:pass-scheme-format");
   xgettext_record_flag ("ngettext:1:pass-scheme-format");
   xgettext_record_flag ("ngettext:2:pass-scheme-format");
   xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
   xgettext_record_flag ("format:2:scheme-format");
 }


 /* ======================== Reading of characters.  ======================== */

 /* The input file stream.  */
 static FILE *fp;


 /* Fetch the next character from the input file.  */
 static int
 do_getc ()
 {
   int c = getc (fp);

   if (c == EOF)
     {
       if (ferror (fp))
         error (EXIT_FAILURE, errno,
                _("error while reading \"%s\""), real_file_name);
     }
   else if (c == '\n')
    line_number++;

   return c;
 }

 /* Put back the last fetched character, not EOF.  */
 static void
 do_ungetc (int c)
 {
   if (c == '\n')
     line_number--;
   ungetc (c, fp);
 }


 /* ========================== Reading of tokens.  ========================== */


 /* A token consists of a sequence of characters.  */
 struct token
 {
   int allocated;                /* number of allocated 'token_char's */
   int charcount;                /* number of used 'token_char's */
   char *chars;                  /* the token's constituents */
 };

 /* Initialize a 'struct token'.  */
 static inline void
 init_token (struct token *tp)
 {
   tp->allocated = 10;
   tp->chars = XNMALLOC (tp->allocated, char);
   tp->charcount = 0;
 }

 /* Free the memory pointed to by a 'struct token'.  */
 static inline void
 free_token (struct token *tp)
 {
   free (tp->chars);
 }

 /* Ensure there is enough room in the token for one more character.  */
 static inline void
 grow_token (struct token *tp)
 {
   if (tp->charcount == tp->allocated)
     {
       tp->allocated *= 2;
       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
     }
 }

 /* Read the next token.  'first' is the first character, which has already
    been read.  */
 static void
 read_token (struct token *tp, int first)
 {
   init_token (tp);

   grow_token (tp);
   tp->chars[tp->charcount++] = first;

   for (;;)
     {
       int c = do_getc ();

       if (c == EOF)
         break;
       if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
           || c == '"' || c == '(' || c == ')' || c == ';')
         {
           do_ungetc (c);
           break;
         }
       grow_token (tp);
       tp->chars[tp->charcount++] = c;
     }
 }

 /* Tests if a token represents an integer.
    Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int().  */
 static inline bool
 is_integer_syntax (const char *str, int len, int radix)
 {
   const char *p = str;
   const char *p_end = str + len;

   /* The accepted syntax is
        ['+'|'-'] DIGIT+
      where DIGIT is a hexadecimal digit whose value is below radix.  */

   if (p == p_end)
     return false;
   if (*p == '+' || *p == '-')
     {
       p++;
       if (p == p_end)
         return false;
     }
   do
     {
       int c = *p++;

       if (c >= '0' && c <= '9')
         c = c - '0';
       else if (c >= 'A' && c <= 'F')
         c = c - 'A' + 10;
       else if (c >= 'a' && c <= 'f')
         c = c - 'a' + 10;
       else
         return false;
       if (c >= radix)
         return false;
     }
   while (p < p_end);
   return true;
 }

 /* Tests if a token represents a rational, floating-point or complex number.
    If unconstrained is false, only real numbers are accepted; otherwise,
    complex numbers are accepted as well.
    Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo().  */
 static inline bool
 is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
 {
   const char *p = str;
   const char *p_end = str + len;
   bool seen_sign;
   bool seen_digits;

   /* The accepted syntaxes are:
      for a floating-point number:
        ['+'|'-'] DIGIT+ [EXPONENT]
        ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
        where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
        (Dot and exponent are allowed only if radix is 10.)
      for a rational number:
        ['+'|'-'] DIGIT+ '/' DIGIT+
      for a complex number:
        REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
        REAL-NUMBER {'+'|'-'} 'i'
        {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
        {'+'|'-'} 'i'
        REAL-NUMBER '@' REAL-NUMBER
    */
   if (p == p_end)
     return false;
   /* Parse leading sign.  */
   seen_sign = false;
   if (*p == '+' || *p == '-')
     {
       p++;
       if (p == p_end)
         return false;
       seen_sign = true;
       /* Recognize complex number syntax: {'+'|'-'} 'i'  */
       if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
         return true;
     }
   /* Parse digits before dot or exponent or slash.  */
   seen_digits = false;
   do
     {
       int c = *p;

       if (c >= '0' && c <= '9')
         c = c - '0';
       else if (c >= 'A' && c <= 'F')
         {
           if (c >= 'D' && radix == 10) /* exponent? */
             break;
           c = c - 'A' + 10;
         }
       else if (c >= 'a' && c <= 'f')
         {
           if (c >= 'd' && radix == 10) /* exponent? */
             break;
           c = c - 'a' + 10;
         }
       else
         break;
       if (c >= radix)
         return false;
       seen_digits = true;
       p++;
     }
   while (p < p_end);
   /* If p == p_end, we know that seen_digits = true, and the number is an
      integer without exponent.  */
   if (p < p_end)
     {
       /* If we have no digits so far, we need a decimal point later.  */
       if (!seen_digits && !(*p == '.' && radix == 10))
         return false;
       /* Trailing '#' signs are equivalent to zeroes.  */
       while (p < p_end && *p == '#')
         p++;
       if (p < p_end)
         {
           if (*p == '/')
             {
               /* Parse digits after the slash.  */
               bool all_zeroes = true;
               p++;
               for (; p < p_end; p++)
                 {
                   int c = *p;

                   if (c >= '0' && c <= '9')
                     c = c - '0';
                   else if (c >= 'A' && c <= 'F')
                     c = c - 'A' + 10;
                   else if (c >= 'a' && c <= 'f')
                     c = c - 'a' + 10;
                   else
                     break;
                   if (c >= radix)
                     return false;
                   if (c != 0)
                     all_zeroes = false;
                 }
               /* A zero denominator is not allowed.  */
               if (all_zeroes)
                 return false;
               /* Trailing '#' signs are equivalent to zeroes.  */
               while (p < p_end && *p == '#')
                 p++;
             }
           else
             {
               if (*p == '.')
                 {
                   /* Decimal point notation.  */
                   if (radix != 10)
                     return false;
                   /* Parse digits after the decimal point.  */
                   p++;
                   for (; p < p_end; p++)
                     {
                       int c = *p;

                       if (c >= '0' && c <= '9')
                         seen_digits = true;
                       else
                         break;
                     }
                   /* Digits are required before or after the decimal point.  */
                   if (!seen_digits)
                     return false;
                   /* Trailing '#' signs are equivalent to zeroes.  */
                   while (p < p_end && *p == '#')
                     p++;
                 }
               if (p < p_end)
                 {
                   /* Parse exponent.  */
                   switch (*p)
                     {
                     case 'D': case 'd':
                     case 'E': case 'e':
                     case 'F': case 'f':
                     case 'L': case 'l':
                     case 'S': case 's':
                       if (radix != 10)
                         return false;
                       p++;
                       if (p == p_end)
                         return false;
                       if (*p == '+' || *p == '-')
                         {
                           p++;
                           if (p == p_end)
                             return false;
                         }
                       if (!(*p >= '0' && *p <= '9'))
                         return false;
                       for (;;)
                         {
                           p++;
                           if (p == p_end)
                             break;
                           if (!(*p >= '0' && *p <= '9'))
                             break;
                         }
                       break;
                     default:
                       break;
                     }
                 }
             }
         }
     }
   if (p == p_end)
     return true;
   /* Recognize complex number syntax.  */
   if (unconstrained)
     {
       /* Recognize the syntax  {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'  */
       if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
         return true;
       /* Recognize the syntaxes
            REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
            REAL-NUMBER {'+'|'-'} 'i'
        */
       if (*p == '+' || *p == '-')
         return (p_end[-1] == 'I' || p_end[-1] == 'i')
                 && (p + 1 == p_end - 1
                     || is_other_number_syntax (p, p_end - 1 - p, radix, false));
       /* Recognize the syntax  REAL-NUMBER '@' REAL-NUMBER  */
       if (*p == '@')
         {
           p++;
           return is_other_number_syntax (p, p_end - p, radix, false);
         }
     }
   return false;
 }

 /* Tests if a token represents a number.
    Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number().  */
 static bool
 is_number (const struct token *tp)
 {
   const char *str = tp->chars;
   int len = tp->charcount;
   enum { unknown, exact, inexact } exactness = unknown;
   bool seen_radix_prefix = false;
   bool seen_exactness_prefix = false;

   if (len == 1)
     if (*str == '+' || *str == '-')
       return false;
   while (len >= 2 && *str == '#')
     {
       switch (str[1])
         {
         case 'B': case 'b':
           if (seen_radix_prefix)
             return false;
           seen_radix_prefix = true;
           break;
         case 'O': case 'o':
           if (seen_radix_prefix)
             return false;
           seen_radix_prefix = true;
           break;
         case 'D': case 'd':
           if (seen_radix_prefix)
             return false;
           seen_radix_prefix = true;
           break;
         case 'X': case 'x':
           if (seen_radix_prefix)
             return false;
           seen_radix_prefix = true;
           break;
         case 'E': case 'e':
           if (seen_exactness_prefix)
             return false;
           exactness = exact;
           seen_exactness_prefix = true;
           break;
         case 'I': case 'i':
           if (seen_exactness_prefix)
             return false;
           exactness = inexact;
           seen_exactness_prefix = true;
           break;
         default:
           return false;
         }
       str += 2;
       len -= 2;
     }
   if (exactness != inexact)
     {
       /* Try to parse an integer.  */
       if (is_integer_syntax (str, len, 10))
         return true;
       /* FIXME: Other Scheme implementations support exact rational numbers
          or exact complex numbers.  */
     }
   if (exactness != exact)
     {
       /* Try to parse a rational, floating-point or complex number.  */
       if (is_other_number_syntax (str, len, 10, true))
         return true;
     }
   return false;
 }


 /* ========================= Accumulating comments ========================= */


 static char *buffer;
 static size_t bufmax;
 static size_t buflen;

 static inline void
 comment_start ()
 {
   buflen = 0;
 }

 static inline void
 comment_add (int c)
 {
   if (buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen++] = c;
 }

 static inline void
 comment_line_end (size_t chars_to_remove)
 {
   buflen -= chars_to_remove;
   while (buflen >= 1
          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
     --buflen;
   if (chars_to_remove == 0 && buflen >= bufmax)
     {
       bufmax = 2 * bufmax + 10;
       buffer = xrealloc (buffer, bufmax);
     }
   buffer[buflen] = '\0';
   savable_comment_add (buffer);
 }


 /* These are for tracking whether comments count as immediately before
    keyword.  */
 static int last_comment_line;
 static int last_non_comment_line;


 /* ========================= Accumulating messages ========================= */


 static message_list_ty *mlp;


 /* ========================== Reading of objects.  ========================= */


 /* We are only interested in symbols (e.g. gettext or ngettext) and strings.
    Other objects need not to be represented precisely.  */
 enum object_type
 {
   t_symbol,     /* symbol */
   t_string,     /* string */
   t_other,      /* other kind of real object */
   t_dot,        /* '.' pseudo object */
   t_close,      /* ')' pseudo object */
   t_eof         /* EOF marker */
 };

 struct object
 {
   enum object_type type;
   struct token *token;          /* for t_symbol and t_string */
   int line_number_at_start;     /* for t_string */
 };

 /* Free the memory pointed to by a 'struct object'.  */
 static inline void
 free_object (struct object *op)
 {
   if (op->type == t_symbol || op->type == t_string)
     {
       free_token (op->token);
       free (op->token);
     }
 }

 /* Convert a t_symbol/t_string token to a char*.  */
 static char *
 string_of_object (const struct object *op)
 {
   char *str;
   int n;

   if (!(op->type == t_symbol || op->type == t_string))
     abort ();
   n = op->token->charcount;
   str = XNMALLOC (n + 1, char);
   memcpy (str, op->token->chars, n);
   str[n] = '\0';
   return str;
 }

 /* Context lookup table.  */
 static flag_context_list_table_ty *flag_context_list_table;

 /* Read the next object.  */
 static void
 read_object (struct object *op, flag_context_ty outer_context)
 {
   for (;;)
     {
       int c = do_getc ();
       bool seen_underscore_prefix = false;

       switch (c)
         {
         case EOF:
           op->type = t_eof;
           return;

         case ' ': case '\r': case '\f': case '\t':
           continue;

         case '\n':
           /* Comments assumed to be grouped with a message must immediately
              precede it, with no non-whitespace token on a line between
              both.  */
           if (last_non_comment_line > last_comment_line)
             savable_comment_reset ();
           continue;

         case ';':
           {
             bool all_semicolons = true;

             last_comment_line = line_number;
             comment_start ();
             for (;;)
               {
                 c = do_getc ();
                 if (c == EOF || c == '\n')
                   break;
                 if (c != ';')
                   all_semicolons = false;
                 if (!all_semicolons)
                   {
                     /* We skip all leading white space, but not EOLs.  */
                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
                       comment_add (c);
                   }
               }
             comment_line_end (0);
             continue;
           }

         case '(':
           {
             int arg = 0;                /* Current argument number.  */
             flag_context_list_iterator_ty context_iter;
             const struct callshapes *shapes = NULL;
             struct arglist_parser *argparser = NULL;

              for (;; arg++)
                {
                 struct object inner;
                 flag_context_ty inner_context;

                 if (arg == 0)
                   inner_context = null_context;
                 else
                   inner_context =
                     inherited_context (outer_context,
                                        flag_context_list_iterator_advance (
                                          &context_iter));

                 read_object (&inner, inner_context);

                 /* Recognize end of list.  */
                 if (inner.type == t_close)
                   {
                     op->type = t_other;
                     last_non_comment_line = line_number;
                     if (argparser != NULL)
                       arglist_parser_done (argparser, arg);
                     return;
                   }

                 /* Dots are not allowed in every position.
                    But be tolerant.  */

                 /* EOF inside list is illegal.
                    But be tolerant.  */
                 if (inner.type == t_eof)
                   break;

                 if (arg == 0)
                   {
                     /* This is the function position.  */
                     if (inner.type == t_symbol)
                       {
                         char *symbol_name = string_of_object (&inner);
                         void *keyword_value;

                         if (hash_find_entry (&keywords,
                                              symbol_name, strlen (symbol_name),
                                              &keyword_value)
                             == 0)
                           shapes = (const struct callshapes *) keyword_value;

                         argparser = arglist_parser_alloc (mlp, shapes);

                         context_iter =
                           flag_context_list_iterator (
                             flag_context_list_table_lookup (
                               flag_context_list_table,
                               symbol_name, strlen (symbol_name)));

                         free (symbol_name);
                       }
                     else
                       context_iter = null_context_list_iterator;
                   }
                 else
                   {
                     /* These are the argument positions.  */
                     if (argparser != NULL && inner.type == t_string)
                       {
                         char *s = string_of_object (&inner);
                         mixed_string_ty *ms =
                           mixed_string_alloc_simple (s, lc_string,
                                                      logical_file_name,
                                                      inner.line_number_at_start);
                         free (s);
                         arglist_parser_remember (argparser, arg, ms,
                                                  inner_context,
                                                  logical_file_name,
                                                  inner.line_number_at_start,
                                                  savable_comment, false);
                       }
                   }

                 free_object (&inner);
               }
             if (argparser != NULL)
               arglist_parser_done (argparser, arg);
           }
           op->type = t_other;
           last_non_comment_line = line_number;
           return;

         case ')':
           /* Tell the caller about the end of list.
              Unmatched closing parenthesis is illegal.
              But be tolerant.  */
           op->type = t_close;
           last_non_comment_line = line_number;
           return;

         case ',':
           {
             int c = do_getc ();
             /* The ,@ handling inside lists is wrong anyway, because
                ,@form expands to an unknown number of elements.  */
             if (c != EOF && c != '@')
               do_ungetc (c);
           }
           FALLTHROUGH;
         case '\'':
         case '`':
           {
             struct object inner;

             read_object (&inner, null_context);

             /* Dots and EOF are not allowed here.  But be tolerant.  */

             free_object (&inner);

             op->type = t_other;
             last_non_comment_line = line_number;
             return;
           }

         case '#':
           /* Dispatch macro handling.  */
           {
             c = do_getc ();
             if (c == EOF)
               /* Invalid input.  Be tolerant, no error message.  */
               {
                 op->type = t_other;
                 return;
               }

             switch (c)
               {
               case '(': /* Vector */
                 do_ungetc (c);
                 {
                   struct object inner;
                   read_object (&inner, null_context);
                   /* Dots and EOF are not allowed here.
                      But be tolerant.  */
                   free_object (&inner);
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               case 'T': case 't': /* Boolean true */
               case 'F': /* Boolean false */
                 op->type = t_other;
                 last_non_comment_line = line_number;
                 return;

               case 'a':
               case 'c':
               case 'f':
               case 'h':
               case 'l':
               case 's':
               case 'u':
               case 'v':
               case 'y':
                 {
                   struct token token;
                   do_ungetc (c);
                   read_token (&token, '#');
                   if ((token.charcount == 2
                        && (token.chars[1] == 'a' || token.chars[1] == 'c'
                            || token.chars[1] == 'h' || token.chars[1] == 'l'
                            || token.chars[1] == 's' || token.chars[1] == 'u'
                            || token.chars[1] == 'y'))
                       || (token.charcount == 3
                           && (token.chars[1] == 's' || token.chars[1] == 'u')
                           && token.chars[2] == '8')
                       || (token.charcount == 4
                           && (((token.chars[1] == 's' || token.chars[1] == 'u')
                                && token.chars[2] == '1'
                                && token.chars[3] == '6')
                               || ((token.chars[1] == 'c'
                                    || token.chars[1] == 'f'
                                    || token.chars[1] == 's'
                                    || token.chars[1] == 'u')
                                   && ((token.chars[2] == '3'
                                        && token.chars[3] == '2')
                                       || (token.chars[2] == '6'
                                           && token.chars[3] == '4')))
                               || (token.chars[1] == 'v'
                                   && token.chars[2] == 'u'
                                   && token.chars[3] == '8'))))
                     {
                       c = do_getc ();
                       if (c != EOF)
                         do_ungetc (c);
                       if (c == '(')
                         {
                           /* Homogenous vector syntax:
                                #a(...) - vector of char
                                #c(...) - vector of complex (old)
                                #c32(...) - vector of complex of single-float
                                #c64(...) - vector of complex of double-float
                                #f32(...) - vector of single-float
                                #f64(...) - vector of double-float
                                #h(...) - vector of short (old)
                                #l(...) - vector of long long (old)
                                #s(...) - vector of single-float (old)
                                #s8(...) - vector of signed 8-bit integers
                                #s16(...) - vector of signed 16-bit integers
                                #s32(...) - vector of signed 32-bit integers
                                #s64(...) - vector of signed 64-bit integers
                                #u(...) - vector of unsigned long (old)
                                #u8(...) - vector of unsigned 8-bit integers
                                #u16(...) - vector of unsigned 16-bit integers
                                #u32(...) - vector of unsigned 32-bit integers
                                #u64(...) - vector of unsigned 64-bit integers
                                #vu8(...) - vector of byte
                                #y(...) - vector of byte (old)
                            */
                           struct object inner;
                           read_object (&inner, null_context);
                           /* Dots and EOF are not allowed here.
                              But be tolerant.  */
                           free_token (&token);
                           free_object (&inner);
                           op->type = t_other;
                           last_non_comment_line = line_number;
                           return;
                         }
                     }
                   /* Boolean false, or unknown # object.  But be tolerant.  */
                   free_token (&token);
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               case 'B': case 'b':
               case 'O': case 'o':
               case 'D': case 'd':
               case 'X': case 'x':
               case 'E': case 'e':
               case 'I': case 'i':
                 {
                   struct token token;
                   do_ungetc (c);
                   read_token (&token, '#');
                   if (is_number (&token))
                     {
                       /* A number.  */
                       free_token (&token);
                       op->type = t_other;
                       last_non_comment_line = line_number;
                       return;
                     }
                   else
                     {
                       if (token.charcount == 2
                           && (token.chars[1] == 'e' || token.chars[1] == 'i'))
                         {
                           c = do_getc ();
                           if (c != EOF)
                             do_ungetc (c);
                           if (c == '(')
                             {
                               /* Homogenous vector syntax:
                                    #e(...) - vector of long (old)
                                    #i(...) - vector of double-float (old)
                                */
                               struct object inner;
                               read_object (&inner, null_context);
                               /* Dots and EOF are not allowed here.
                                  But be tolerant.  */
                               free_token (&token);
                               free_object (&inner);
                               op->type = t_other;
                               last_non_comment_line = line_number;
                               return;
                             }
                         }
                       /* Unknown # object.  But be tolerant.  */
                       free_token (&token);
                       op->type = t_other;
                       last_non_comment_line = line_number;
                       return;
                     }
                 }

               case '!':
                 /* Block comment '#! ... !#'.  See
                    <https://www.gnu.org/software/guile/manual/html_node/Block-Comments.html>.  */
                 {
                   int c;

                   comment_start ();
                   c = do_getc ();
                   for (;;)
                     {
                       if (c == EOF)
                         break;
                       if (c == '!')
                         {
                           c = do_getc ();
                           if (c == EOF)
                             break;
                           if (c == '#')
                             {
                               comment_line_end (0);
                               break;
                             }
                           else
                             comment_add ('!');
                         }
                       else
                         {
                           /* We skip all leading white space.  */
                           if (!(buflen == 0 && (c == ' ' || c == '\t')))
                             comment_add (c);
                           if (c == '\n')
                             {
                               comment_line_end (1);
                               comment_start ();
                             }
                           c = do_getc ();
                         }
                     }
                   if (c == EOF)
                     {
                       /* EOF not allowed here.  But be tolerant.  */
                       op->type = t_eof;
                       return;
                     }
                   last_comment_line = line_number;
                   continue;
                 }

               case '|':
                 /* Block comment '#| ... |#'.  See
                    <https://www.gnu.org/software/guile/manual/html_node/Block-Comments.html>
                    and <https://srfi.schemers.org/srfi-30/srfi-30.html>.  */
                 {
                   int depth = 0;
                   int c;

                   comment_start ();
                   c = do_getc ();
                   for (;;)
                     {
                       if (c == EOF)
                         break;
                       if (c == '|')
                         {
                           c = do_getc ();
                           if (c == EOF)
                             break;
                           if (c == '#')
                             {
                               if (depth == 0)
                                 {
                                   comment_line_end (0);
                                   break;
                                 }
                               depth--;
                               comment_add ('|');
                               comment_add ('#');
                               c = do_getc ();
                             }
                           else
                             comment_add ('|');
                         }
                       else if (c == '#')
                         {
                           c = do_getc ();
                           if (c == EOF)
                             break;
                           comment_add ('#');
                           if (c == '|')
                             {
                               depth++;
                               comment_add ('|');
                               c = do_getc ();
                             }
                         }
                       else
                         {
                           /* We skip all leading white space.  */
                           if (!(buflen == 0 && (c == ' ' || c == '\t')))
                             comment_add (c);
                           if (c == '\n')
                             {
                               comment_line_end (1);
                               comment_start ();
                             }
                           c = do_getc ();
                         }
                     }
                   if (c == EOF)
                     {
                       /* EOF not allowed here.  But be tolerant.  */
                       op->type = t_eof;
                       return;
                     }
                   last_comment_line = line_number;
                   continue;
                 }

               case '*':
                 /* Bit vector.  */
                 {
                   struct token token;
                   read_token (&token, c);
                   /* The token should consists only of '0' and '1', except
                      for the initial '*'.  But be tolerant.  */
                   free_token (&token);
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               case '{':
                 /* Symbol with multiple escapes: #{...}#  */
                 {
                   op->token = XMALLOC (struct token);

                   init_token (op->token);

                   for (;;)
                     {
                       c = do_getc ();

                       if (c == EOF)
                         break;
                       if (c == '\\')
                         {
                           c = do_getc ();
                           if (c == EOF)
                             break;
                         }
                       else if (c == '}')
                         {
                           c = do_getc ();
                           if (c == '#')
                             break;
                           if (c != EOF)
                             do_ungetc (c);
                           c = '}';
                         }
                       grow_token (op->token);
                       op->token->chars[op->token->charcount++] = c;
                     }

                   op->type = t_symbol;
                   last_non_comment_line = line_number;
                   return;
                 }

               case '\\':
                 /* Character.  */
                 {
                   struct token token;
                   c = do_getc ();
                   if (c != EOF)
                     {
                       read_token (&token, c);
                       free_token (&token);
                     }
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               case ':': /* Keyword.  */
               case '&': /* Deprecated keyword, installed in optargs.scm.  */
                 {
                   struct token token;
                   read_token (&token, '-');
                   free_token (&token);
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               /* The following are installed through read-hash-extend.  */

               /* arrays.scm */
               case '0': case '1': case '2': case '3': case '4':
               case '5': case '6': case '7': case '8': case '9':
                 /* Multidimensional array syntax: #nx(...) where
                      n ::= DIGIT+
                      x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
                  */
                 do
                   c = do_getc ();
                 while (c >= '0' && c <= '9');
                 /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
                    But be tolerant.  */
                 FALLTHROUGH;
               case '\'': /* boot-9.scm */
               case '.': /* boot-9.scm */
               case ',': /* srfi-10.scm */
                 {
                   struct object inner;
                   read_object (&inner, null_context);
                   /* Dots and EOF are not allowed here.
                      But be tolerant.  */
                   free_object (&inner);
                   op->type = t_other;
                   last_non_comment_line = line_number;
                   return;
                 }

               default:
                 /* Unknown.  */
                 op->type = t_other;
                 last_non_comment_line = line_number;
                 return;
               }
             /*NOTREACHED*/
             abort ();
           }

         case '_':
           /* GIMP script-fu extension: '_' before a string literal is
              considered a gettext call on the string.  */
           {
             int c = do_getc ();
             if (c == EOF)
               /* Invalid input.  Be tolerant, no error message.  */
               {
                 op->type = t_other;
                 return;
               }
             if (c != '"')
               {
                 do_ungetc (c);

                 /* If '_' is not followed by a string literal,
                    consider it a part of symbol.  */
                 op->token = XMALLOC (struct token);
                 read_token (op->token, '_');
                 op->type = t_symbol;
                 last_non_comment_line = line_number;
                 return;
               }
             seen_underscore_prefix = true;
           }
           FALLTHROUGH;

         case '"':
           {
             op->token = XMALLOC (struct token);
             init_token (op->token);
             op->line_number_at_start = line_number;
             for (;;)
               {
                 int c = do_getc ();
                 if (c == EOF)
                   /* Invalid input.  Be tolerant, no error message.  */
                   break;
                 if (c == '"')
                   break;
                 if (c == '\\')
                   {
                     c = do_getc ();
                     if (c == EOF)
                       /* Invalid input.  Be tolerant, no error message.  */
                       break;
                     switch (c)
                       {
                       case '\n':
                         continue;
                       case '0':
                         c = '\0';
                         break;
                       case 'a':
                         c = '\a';
                         break;
                       case 'f':
                         c = '\f';
                         break;
                       case 'n':
                         c = '\n';
                         break;
                       case 'r':
                         c = '\r';
                         break;
                       case 't':
                         c = '\t';
                         break;
                       case 'v':
                         c = '\v';
                         break;
                       default:
                         break;
                       }
                   }
                 grow_token (op->token);
                 op->token->chars[op->token->charcount++] = c;
               }
             op->type = t_string;

             if (seen_underscore_prefix || extract_all)
               {
                 lex_pos_ty pos;

                 pos.file_name = logical_file_name;
                 pos.line_number = op->line_number_at_start;
                 remember_a_message (mlp, NULL, string_of_object (op), false,
                                     false, null_context, &pos,
                                     NULL, savable_comment, false);
               }
             last_non_comment_line = line_number;
             return;
           }

         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
         case '+': case '-': case '.':
           /* Read a number or symbol token.  */
           op->token = XMALLOC (struct token);
           read_token (op->token, c);
           if (op->token->charcount == 1 && op->token->chars[0] == '.')
             {
               free_token (op->token);
               free (op->token);
               op->type = t_dot;
             }
           else if (is_number (op->token))
             {
               /* A number.  */
               free_token (op->token);
               free (op->token);
               op->type = t_other;
             }
           else
             {
               /* A symbol.  */
               op->type = t_symbol;
             }
           last_non_comment_line = line_number;
           return;

         case ':':
         default:
           /* Read a symbol token.  */
           op->token = XMALLOC (struct token);
           read_token (op->token, c);
           op->type = t_symbol;
           last_non_comment_line = line_number;
           return;
         }
     }
 }


 void
 extract_scheme (FILE *f,
                 const char *real_filename, const char *logical_filename,
                 flag_context_list_table_ty *flag_table,
                 msgdomain_list_ty *mdlp)
 {
   mlp = mdlp->item[0]->messages;

   fp = f;
   real_file_name = real_filename;
   logical_file_name = xstrdup (logical_filename);
   line_number = 1;

   last_comment_line = -1;
   last_non_comment_line = -1;

   flag_context_list_table = flag_table;

   init_keywords ();

   /* Eat tokens until eof is seen.  When read_object returns
      due to an unbalanced closing parenthesis, just restart it.  */
   do
     {
       struct object toplevel_object;

       read_object (&toplevel_object, null_context);

       if (toplevel_object.type == t_eof)
         break;

       free_object (&toplevel_object);
     }
   while (!feof (fp));

   /* Close scanner.  */
   fp = NULL;
   real_file_name = NULL;
   logical_file_name = NULL;
   line_number = 0;
 }