blob: f879ab02614d4a7a56566be1f85f6f4919079039 [file] [log] [blame]
/* xgettext Scheme backend.
Copyright (C) 2004-2009, 2011, 2014, 2018-2020 Free Software Foundation, Inc.
This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/* Specification. */
#include "x-scheme.h"
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "attribute.h"
#include "message.h"
#include "xgettext.h"
#include "xg-pos.h"
#include "xg-mixed-string.h"
#include "xg-arglist-context.h"
#include "xg-arglist-callshape.h"
#include "xg-arglist-parser.h"
#include "xg-message.h"
#include "error.h"
#include "xalloc.h"
#include "mem-hash-map.h"
#include "gettext.h"
#define _(s) gettext(s)
/* The Scheme syntax is described in R5RS. It is implemented in
guile-2.0.0/libguile/read.c.
Since we are interested only in strings and in forms similar to
(gettext msgid ...)
or (ngettext msgid msgid_plural ...)
we make the following simplifications:
- Assume the keywords and strings are in an ASCII compatible encoding.
This means we can read the input file one byte at a time, instead of
one character at a time. No need to worry about multibyte characters:
If they occur as part of identifiers, they most probably act as
constituent characters, and the byte based approach will do the same.
- Assume the read-hash-procedures is in the default state.
Non-standard reader extensions are mostly used to read data, not programs.
The remaining syntax rules are:
- The syntax code assigned to each character, and how tokens are built
up from characters (single escape, multiple escape etc.).
- Comment syntax: ';' and '#! ... !#' and '#| ... |#' (may be nested).
- String syntax: "..." with single escapes.
- Read macros and dispatch macro character '#'. Needed to be able to
tell which is the n-th argument of a function call.
*/
/* ====================== Keyword set customization. ====================== */
/* If true extract all strings. */
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_scheme_extract_all ()
{
extract_all = true;
}
void
x_scheme_keyword (const char *name)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
struct callshape shape;
const char *colon;
if (keywords.table == NULL)
hash_init (&keywords, 100);
split_keywordspec (name, &end, &shape);
/* The characters between name and end should form a valid Lisp symbol.
Extract the symbol name part. */
colon = strchr (name, ':');
if (colon != NULL && colon < end)
{
name = colon + 1;
if (name < end && *name == ':')
name++;
colon = strchr (name, ':');
if (colon != NULL && colon < end)
return;
}
insert_keyword_callshape (&keywords, name, end - name, &shape);
}
}
/* Finish initializing the keywords hash table.
Called after argument processing, before each file is processed. */
static void
init_keywords ()
{
if (default_keywords)
{
/* When adding new keywords here, also update the documentation in
xgettext.texi! */
x_scheme_keyword ("gettext"); /* libguile/i18n.c */
x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */
x_scheme_keyword ("gettext-noop");
default_keywords = false;
}
}
void
init_flag_table_scheme ()
{
xgettext_record_flag ("gettext:1:pass-scheme-format");
xgettext_record_flag ("ngettext:1:pass-scheme-format");
xgettext_record_flag ("ngettext:2:pass-scheme-format");
xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
xgettext_record_flag ("format:2:scheme-format");
}
/* ======================== Reading of characters. ======================== */
/* The input file stream. */
static FILE *fp;
/* Fetch the next character from the input file. */
static int
do_getc ()
{
int c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno,
_("error while reading \"%s\""), real_file_name);
}
else if (c == '\n')
line_number++;
return c;
}
/* Put back the last fetched character, not EOF. */
static void
do_ungetc (int c)
{
if (c == '\n')
line_number--;
ungetc (c, fp);
}
/* ========================== Reading of tokens. ========================== */
/* A token consists of a sequence of characters. */
struct token
{
int allocated; /* number of allocated 'token_char's */
int charcount; /* number of used 'token_char's */
char *chars; /* the token's constituents */
};
/* Initialize a 'struct token'. */
static inline void
init_token (struct token *tp)
{
tp->allocated = 10;
tp->chars = XNMALLOC (tp->allocated, char);
tp->charcount = 0;
}
/* Free the memory pointed to by a 'struct token'. */
static inline void
free_token (struct token *tp)
{
free (tp->chars);
}
/* Ensure there is enough room in the token for one more character. */
static inline void
grow_token (struct token *tp)
{
if (tp->charcount == tp->allocated)
{
tp->allocated *= 2;
tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
}
}
/* Read the next token. 'first' is the first character, which has already
been read. */
static void
read_token (struct token *tp, int first)
{
init_token (tp);
grow_token (tp);
tp->chars[tp->charcount++] = first;
for (;;)
{
int c = do_getc ();
if (c == EOF)
break;
if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
|| c == '"' || c == '(' || c == ')' || c == ';')
{
do_ungetc (c);
break;
}
grow_token (tp);
tp->chars[tp->charcount++] = c;
}
}
/* Tests if a token represents an integer.
Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */
static inline bool
is_integer_syntax (const char *str, int len, int radix)
{
const char *p = str;
const char *p_end = str + len;
/* The accepted syntax is
['+'|'-'] DIGIT+
where DIGIT is a hexadecimal digit whose value is below radix. */
if (p == p_end)
return false;
if (*p == '+' || *p == '-')
{
p++;
if (p == p_end)
return false;
}
do
{
int c = *p++;
if (c >= '0' && c <= '9')
c = c - '0';
else if (c >= 'A' && c <= 'F')
c = c - 'A' + 10;
else if (c >= 'a' && c <= 'f')
c = c - 'a' + 10;
else
return false;
if (c >= radix)
return false;
}
while (p < p_end);
return true;
}
/* Tests if a token represents a rational, floating-point or complex number.
If unconstrained is false, only real numbers are accepted; otherwise,
complex numbers are accepted as well.
Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */
static inline bool
is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
{
const char *p = str;
const char *p_end = str + len;
bool seen_sign;
bool seen_digits;
/* The accepted syntaxes are:
for a floating-point number:
['+'|'-'] DIGIT+ [EXPONENT]
['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
(Dot and exponent are allowed only if radix is 10.)
for a rational number:
['+'|'-'] DIGIT+ '/' DIGIT+
for a complex number:
REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
REAL-NUMBER {'+'|'-'} 'i'
{'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
{'+'|'-'} 'i'
REAL-NUMBER '@' REAL-NUMBER
*/
if (p == p_end)
return false;
/* Parse leading sign. */
seen_sign = false;
if (*p == '+' || *p == '-')
{
p++;
if (p == p_end)
return false;
seen_sign = true;
/* Recognize complex number syntax: {'+'|'-'} 'i' */
if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
return true;
}
/* Parse digits before dot or exponent or slash. */
seen_digits = false;
do
{
int c = *p;
if (c >= '0' && c <= '9')
c = c - '0';
else if (c >= 'A' && c <= 'F')
{
if (c >= 'D' && radix == 10) /* exponent? */
break;
c = c - 'A' + 10;
}
else if (c >= 'a' && c <= 'f')
{
if (c >= 'd' && radix == 10) /* exponent? */
break;
c = c - 'a' + 10;
}
else
break;
if (c >= radix)
return false;
seen_digits = true;
p++;
}
while (p < p_end);
/* If p == p_end, we know that seen_digits = true, and the number is an
integer without exponent. */
if (p < p_end)
{
/* If we have no digits so far, we need a decimal point later. */
if (!seen_digits && !(*p == '.' && radix == 10))
return false;
/* Trailing '#' signs are equivalent to zeroes. */
while (p < p_end && *p == '#')
p++;
if (p < p_end)
{
if (*p == '/')
{
/* Parse digits after the slash. */
bool all_zeroes = true;
p++;
for (; p < p_end; p++)
{
int c = *p;
if (c >= '0' && c <= '9')
c = c - '0';
else if (c >= 'A' && c <= 'F')
c = c - 'A' + 10;
else if (c >= 'a' && c <= 'f')
c = c - 'a' + 10;
else
break;
if (c >= radix)
return false;
if (c != 0)
all_zeroes = false;
}
/* A zero denominator is not allowed. */
if (all_zeroes)
return false;
/* Trailing '#' signs are equivalent to zeroes. */
while (p < p_end && *p == '#')
p++;
}
else
{
if (*p == '.')
{
/* Decimal point notation. */
if (radix != 10)
return false;
/* Parse digits after the decimal point. */
p++;
for (; p < p_end; p++)
{
int c = *p;
if (c >= '0' && c <= '9')
seen_digits = true;
else
break;
}
/* Digits are required before or after the decimal point. */
if (!seen_digits)
return false;
/* Trailing '#' signs are equivalent to zeroes. */
while (p < p_end && *p == '#')
p++;
}
if (p < p_end)
{
/* Parse exponent. */
switch (*p)
{
case 'D': case 'd':
case 'E': case 'e':
case 'F': case 'f':
case 'L': case 'l':
case 'S': case 's':
if (radix != 10)
return false;
p++;
if (p == p_end)
return false;
if (*p == '+' || *p == '-')
{
p++;
if (p == p_end)
return false;
}
if (!(*p >= '0' && *p <= '9'))
return false;
for (;;)
{
p++;
if (p == p_end)
break;
if (!(*p >= '0' && *p <= '9'))
break;
}
break;
default:
break;
}
}
}
}
}
if (p == p_end)
return true;
/* Recognize complex number syntax. */
if (unconstrained)
{
/* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */
if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
return true;
/* Recognize the syntaxes
REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
REAL-NUMBER {'+'|'-'} 'i'
*/
if (*p == '+' || *p == '-')
return (p_end[-1] == 'I' || p_end[-1] == 'i')
&& (p + 1 == p_end - 1
|| is_other_number_syntax (p, p_end - 1 - p, radix, false));
/* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */
if (*p == '@')
{
p++;
return is_other_number_syntax (p, p_end - p, radix, false);
}
}
return false;
}
/* Tests if a token represents a number.
Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */
static bool
is_number (const struct token *tp)
{
const char *str = tp->chars;
int len = tp->charcount;
enum { unknown, exact, inexact } exactness = unknown;
bool seen_radix_prefix = false;
bool seen_exactness_prefix = false;
if (len == 1)
if (*str == '+' || *str == '-')
return false;
while (len >= 2 && *str == '#')
{
switch (str[1])
{
case 'B': case 'b':
if (seen_radix_prefix)
return false;
seen_radix_prefix = true;
break;
case 'O': case 'o':
if (seen_radix_prefix)
return false;
seen_radix_prefix = true;
break;
case 'D': case 'd':
if (seen_radix_prefix)
return false;
seen_radix_prefix = true;
break;
case 'X': case 'x':
if (seen_radix_prefix)
return false;
seen_radix_prefix = true;
break;
case 'E': case 'e':
if (seen_exactness_prefix)
return false;
exactness = exact;
seen_exactness_prefix = true;
break;
case 'I': case 'i':
if (seen_exactness_prefix)
return false;
exactness = inexact;
seen_exactness_prefix = true;
break;
default:
return false;
}
str += 2;
len -= 2;
}
if (exactness != inexact)
{
/* Try to parse an integer. */
if (is_integer_syntax (str, len, 10))
return true;
/* FIXME: Other Scheme implementations support exact rational numbers
or exact complex numbers. */
}
if (exactness != exact)
{
/* Try to parse a rational, floating-point or complex number. */
if (is_other_number_syntax (str, len, 10, true))
return true;
}
return false;
}
/* ========================= Accumulating comments ========================= */
static char *buffer;
static size_t bufmax;
static size_t buflen;
static inline void
comment_start ()
{
buflen = 0;
}
static inline void
comment_add (int c)
{
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen++] = c;
}
static inline void
comment_line_end (size_t chars_to_remove)
{
buflen -= chars_to_remove;
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
if (chars_to_remove == 0 && buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen] = '\0';
savable_comment_add (buffer);
}
/* These are for tracking whether comments count as immediately before
keyword. */
static int last_comment_line;
static int last_non_comment_line;
/* ========================= Accumulating messages ========================= */
static message_list_ty *mlp;
/* ========================== Reading of objects. ========================= */
/* We are only interested in symbols (e.g. gettext or ngettext) and strings.
Other objects need not to be represented precisely. */
enum object_type
{
t_symbol, /* symbol */
t_string, /* string */
t_other, /* other kind of real object */
t_dot, /* '.' pseudo object */
t_close, /* ')' pseudo object */
t_eof /* EOF marker */
};
struct object
{
enum object_type type;
struct token *token; /* for t_symbol and t_string */
int line_number_at_start; /* for t_string */
};
/* Free the memory pointed to by a 'struct object'. */
static inline void
free_object (struct object *op)
{
if (op->type == t_symbol || op->type == t_string)
{
free_token (op->token);
free (op->token);
}
}
/* Convert a t_symbol/t_string token to a char*. */
static char *
string_of_object (const struct object *op)
{
char *str;
int n;
if (!(op->type == t_symbol || op->type == t_string))
abort ();
n = op->token->charcount;
str = XNMALLOC (n + 1, char);
memcpy (str, op->token->chars, n);
str[n] = '\0';
return str;
}
/* Context lookup table. */
static flag_context_list_table_ty *flag_context_list_table;
/* Read the next object. */
static void
read_object (struct object *op, flag_context_ty outer_context)
{
for (;;)
{
int c = do_getc ();
bool seen_underscore_prefix = false;
switch (c)
{
case EOF:
op->type = t_eof;
return;
case ' ': case '\r': case '\f': case '\t':
continue;
case '\n':
/* Comments assumed to be grouped with a message must immediately
precede it, with no non-whitespace token on a line between
both. */
if (last_non_comment_line > last_comment_line)
savable_comment_reset ();
continue;
case ';':
{
bool all_semicolons = true;
last_comment_line = line_number;
comment_start ();
for (;;)
{
c = do_getc ();
if (c == EOF || c == '\n')
break;
if (c != ';')
all_semicolons = false;
if (!all_semicolons)
{
/* We skip all leading white space, but not EOLs. */
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
}
}
comment_line_end (0);
continue;
}
case '(':
{
int arg = 0; /* Current argument number. */
flag_context_list_iterator_ty context_iter;
const struct callshapes *shapes = NULL;
struct arglist_parser *argparser = NULL;
for (;; arg++)
{
struct object inner;
flag_context_ty inner_context;
if (arg == 0)
inner_context = null_context;
else
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
read_object (&inner, inner_context);
/* Recognize end of list. */
if (inner.type == t_close)
{
op->type = t_other;
last_non_comment_line = line_number;
if (argparser != NULL)
arglist_parser_done (argparser, arg);
return;
}
/* Dots are not allowed in every position.
But be tolerant. */
/* EOF inside list is illegal.
But be tolerant. */
if (inner.type == t_eof)
break;
if (arg == 0)
{
/* This is the function position. */
if (inner.type == t_symbol)
{
char *symbol_name = string_of_object (&inner);
void *keyword_value;
if (hash_find_entry (&keywords,
symbol_name, strlen (symbol_name),
&keyword_value)
== 0)
shapes = (const struct callshapes *) keyword_value;
argparser = arglist_parser_alloc (mlp, shapes);
context_iter =
flag_context_list_iterator (
flag_context_list_table_lookup (
flag_context_list_table,
symbol_name, strlen (symbol_name)));
free (symbol_name);
}
else
context_iter = null_context_list_iterator;
}
else
{
/* These are the argument positions. */
if (argparser != NULL && inner.type == t_string)
{
char *s = string_of_object (&inner);
mixed_string_ty *ms =
mixed_string_alloc_simple (s, lc_string,
logical_file_name,
inner.line_number_at_start);
free (s);
arglist_parser_remember (argparser, arg, ms,
inner_context,
logical_file_name,
inner.line_number_at_start,
savable_comment, false);
}
}
free_object (&inner);
}
if (argparser != NULL)
arglist_parser_done (argparser, arg);
}
op->type = t_other;
last_non_comment_line = line_number;
return;
case ')':
/* Tell the caller about the end of list.
Unmatched closing parenthesis is illegal.
But be tolerant. */
op->type = t_close;
last_non_comment_line = line_number;
return;
case ',':
{
int c = do_getc ();
/* The ,@ handling inside lists is wrong anyway, because
,@form expands to an unknown number of elements. */
if (c != EOF && c != '@')
do_ungetc (c);
}
FALLTHROUGH;
case '\'':
case '`':
{
struct object inner;
read_object (&inner, null_context);
/* Dots and EOF are not allowed here. But be tolerant. */
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '#':
/* Dispatch macro handling. */
{
c = do_getc ();
if (c == EOF)
/* Invalid input. Be tolerant, no error message. */
{
op->type = t_other;
return;
}
switch (c)
{
case '(': /* Vector */
do_ungetc (c);
{
struct object inner;
read_object (&inner, null_context);
/* Dots and EOF are not allowed here.
But be tolerant. */
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case 'T': case 't': /* Boolean true */
case 'F': /* Boolean false */
op->type = t_other;
last_non_comment_line = line_number;
return;
case 'a':
case 'c':
case 'f':
case 'h':
case 'l':
case 's':
case 'u':
case 'v':
case 'y':
{
struct token token;
do_ungetc (c);
read_token (&token, '#');
if ((token.charcount == 2
&& (token.chars[1] == 'a' || token.chars[1] == 'c'
|| token.chars[1] == 'h' || token.chars[1] == 'l'
|| token.chars[1] == 's' || token.chars[1] == 'u'
|| token.chars[1] == 'y'))
|| (token.charcount == 3
&& (token.chars[1] == 's' || token.chars[1] == 'u')
&& token.chars[2] == '8')
|| (token.charcount == 4
&& (((token.chars[1] == 's' || token.chars[1] == 'u')
&& token.chars[2] == '1'
&& token.chars[3] == '6')
|| ((token.chars[1] == 'c'
|| token.chars[1] == 'f'
|| token.chars[1] == 's'
|| token.chars[1] == 'u')
&& ((token.chars[2] == '3'
&& token.chars[3] == '2')
|| (token.chars[2] == '6'
&& token.chars[3] == '4')))
|| (token.chars[1] == 'v'
&& token.chars[2] == 'u'
&& token.chars[3] == '8'))))
{
c = do_getc ();
if (c != EOF)
do_ungetc (c);
if (c == '(')
{
/* Homogenous vector syntax:
#a(...) - vector of char
#c(...) - vector of complex (old)
#c32(...) - vector of complex of single-float
#c64(...) - vector of complex of double-float
#f32(...) - vector of single-float
#f64(...) - vector of double-float
#h(...) - vector of short (old)
#l(...) - vector of long long (old)
#s(...) - vector of single-float (old)
#s8(...) - vector of signed 8-bit integers
#s16(...) - vector of signed 16-bit integers
#s32(...) - vector of signed 32-bit integers
#s64(...) - vector of signed 64-bit integers
#u(...) - vector of unsigned long (old)
#u8(...) - vector of unsigned 8-bit integers
#u16(...) - vector of unsigned 16-bit integers
#u32(...) - vector of unsigned 32-bit integers
#u64(...) - vector of unsigned 64-bit integers
#vu8(...) - vector of byte
#y(...) - vector of byte (old)
*/
struct object inner;
read_object (&inner, null_context);
/* Dots and EOF are not allowed here.
But be tolerant. */
free_token (&token);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
}
/* Boolean false, or unknown # object. But be tolerant. */
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case 'B': case 'b':
case 'O': case 'o':
case 'D': case 'd':
case 'X': case 'x':
case 'E': case 'e':
case 'I': case 'i':
{
struct token token;
do_ungetc (c);
read_token (&token, '#');
if (is_number (&token))
{
/* A number. */
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
else
{
if (token.charcount == 2
&& (token.chars[1] == 'e' || token.chars[1] == 'i'))
{
c = do_getc ();
if (c != EOF)
do_ungetc (c);
if (c == '(')
{
/* Homogenous vector syntax:
#e(...) - vector of long (old)
#i(...) - vector of double-float (old)
*/
struct object inner;
read_object (&inner, null_context);
/* Dots and EOF are not allowed here.
But be tolerant. */
free_token (&token);
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
}
/* Unknown # object. But be tolerant. */
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
}
case '!':
/* Block comment '#! ... !#'. See
<https://www.gnu.org/software/guile/manual/html_node/Block-Comments.html>. */
{
int c;
comment_start ();
c = do_getc ();
for (;;)
{
if (c == EOF)
break;
if (c == '!')
{
c = do_getc ();
if (c == EOF)
break;
if (c == '#')
{
comment_line_end (0);
break;
}
else
comment_add ('!');
}
else
{
/* We skip all leading white space. */
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
if (c == '\n')
{
comment_line_end (1);
comment_start ();
}
c = do_getc ();
}
}
if (c == EOF)
{
/* EOF not allowed here. But be tolerant. */
op->type = t_eof;
return;
}
last_comment_line = line_number;
continue;
}
case '|':
/* Block comment '#| ... |#'. See
<https://www.gnu.org/software/guile/manual/html_node/Block-Comments.html>
and <https://srfi.schemers.org/srfi-30/srfi-30.html>. */
{
int depth = 0;
int c;
comment_start ();
c = do_getc ();
for (;;)
{
if (c == EOF)
break;
if (c == '|')
{
c = do_getc ();
if (c == EOF)
break;
if (c == '#')
{
if (depth == 0)
{
comment_line_end (0);
break;
}
depth--;
comment_add ('|');
comment_add ('#');
c = do_getc ();
}
else
comment_add ('|');
}
else if (c == '#')
{
c = do_getc ();
if (c == EOF)
break;
comment_add ('#');
if (c == '|')
{
depth++;
comment_add ('|');
c = do_getc ();
}
}
else
{
/* We skip all leading white space. */
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
if (c == '\n')
{
comment_line_end (1);
comment_start ();
}
c = do_getc ();
}
}
if (c == EOF)
{
/* EOF not allowed here. But be tolerant. */
op->type = t_eof;
return;
}
last_comment_line = line_number;
continue;
}
case '*':
/* Bit vector. */
{
struct token token;
read_token (&token, c);
/* The token should consists only of '0' and '1', except
for the initial '*'. But be tolerant. */
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case '{':
/* Symbol with multiple escapes: #{...}# */
{
op->token = XMALLOC (struct token);
init_token (op->token);
for (;;)
{
c = do_getc ();
if (c == EOF)
break;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
break;
}
else if (c == '}')
{
c = do_getc ();
if (c == '#')
break;
if (c != EOF)
do_ungetc (c);
c = '}';
}
grow_token (op->token);
op->token->chars[op->token->charcount++] = c;
}
op->type = t_symbol;
last_non_comment_line = line_number;
return;
}
case '\\':
/* Character. */
{
struct token token;
c = do_getc ();
if (c != EOF)
{
read_token (&token, c);
free_token (&token);
}
op->type = t_other;
last_non_comment_line = line_number;
return;
}
case ':': /* Keyword. */
case '&': /* Deprecated keyword, installed in optargs.scm. */
{
struct token token;
read_token (&token, '-');
free_token (&token);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
/* The following are installed through read-hash-extend. */
/* arrays.scm */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* Multidimensional array syntax: #nx(...) where
n ::= DIGIT+
x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
*/
do
c = do_getc ();
while (c >= '0' && c <= '9');
/* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
But be tolerant. */
FALLTHROUGH;
case '\'': /* boot-9.scm */
case '.': /* boot-9.scm */
case ',': /* srfi-10.scm */
{
struct object inner;
read_object (&inner, null_context);
/* Dots and EOF are not allowed here.
But be tolerant. */
free_object (&inner);
op->type = t_other;
last_non_comment_line = line_number;
return;
}
default:
/* Unknown. */
op->type = t_other;
last_non_comment_line = line_number;
return;
}
/*NOTREACHED*/
abort ();
}
case '_':
/* GIMP script-fu extension: '_' before a string literal is
considered a gettext call on the string. */
{
int c = do_getc ();
if (c == EOF)
/* Invalid input. Be tolerant, no error message. */
{
op->type = t_other;
return;
}
if (c != '"')
{
do_ungetc (c);
/* If '_' is not followed by a string literal,
consider it a part of symbol. */
op->token = XMALLOC (struct token);
read_token (op->token, '_');
op->type = t_symbol;
last_non_comment_line = line_number;
return;
}
seen_underscore_prefix = true;
}
FALLTHROUGH;
case '"':
{
op->token = XMALLOC (struct token);
init_token (op->token);
op->line_number_at_start = line_number;
for (;;)
{
int c = do_getc ();
if (c == EOF)
/* Invalid input. Be tolerant, no error message. */
break;
if (c == '"')
break;
if (c == '\\')
{
c = do_getc ();
if (c == EOF)
/* Invalid input. Be tolerant, no error message. */
break;
switch (c)
{
case '\n':
continue;
case '0':
c = '\0';
break;
case 'a':
c = '\a';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
default:
break;
}
}
grow_token (op->token);
op->token->chars[op->token->charcount++] = c;
}
op->type = t_string;
if (seen_underscore_prefix || extract_all)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = op->line_number_at_start;
remember_a_message (mlp, NULL, string_of_object (op), false,
false, null_context, &pos,
NULL, savable_comment, false);
}
last_non_comment_line = line_number;
return;
}
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '+': case '-': case '.':
/* Read a number or symbol token. */
op->token = XMALLOC (struct token);
read_token (op->token, c);
if (op->token->charcount == 1 && op->token->chars[0] == '.')
{
free_token (op->token);
free (op->token);
op->type = t_dot;
}
else if (is_number (op->token))
{
/* A number. */
free_token (op->token);
free (op->token);
op->type = t_other;
}
else
{
/* A symbol. */
op->type = t_symbol;
}
last_non_comment_line = line_number;
return;
case ':':
default:
/* Read a symbol token. */
op->token = XMALLOC (struct token);
read_token (op->token, c);
op->type = t_symbol;
last_non_comment_line = line_number;
return;
}
}
}
void
extract_scheme (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
last_comment_line = -1;
last_non_comment_line = -1;
flag_context_list_table = flag_table;
init_keywords ();
/* Eat tokens until eof is seen. When read_object returns
due to an unbalanced closing parenthesis, just restart it. */
do
{
struct object toplevel_object;
read_object (&toplevel_object, null_context);
if (toplevel_object.type == t_eof)
break;
free_object (&toplevel_object);
}
while (!feof (fp));
/* Close scanner. */
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}