blob: e2754f73d2fc06919a696c18386a5c2f7fbd30fe [file] [log] [blame]
/* xgettext Vala backend.
Copyright (C) 2013-2014, 2018-2020 Free Software Foundation, Inc.
This file was written by Daiki Ueno <ueno@gnu.org>, 2013.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/* Specification. */
#include "x-vala.h"
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "attribute.h"
#include "message.h"
#include "rc-str-list.h"
#include "xgettext.h"
#include "xg-pos.h"
#include "xg-encoding.h"
#include "xg-mixed-string.h"
#include "xg-arglist-context.h"
#include "xg-arglist-callshape.h"
#include "xg-arglist-parser.h"
#include "xg-message.h"
#include "error.h"
#include "error-progname.h"
#include "xalloc.h"
#include "xvasprintf.h"
#include "mem-hash-map.h"
#include "po-charset.h"
#include "gettext.h"
#define _(s) gettext(s)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
/* The Vala syntax is defined in the Vala Reference Manual
https://www.vala-project.org/doc/vala/.
See also vala/valascanner.vala. */
/* ====================== Keyword set customization. ====================== */
/* If true extract all strings. */
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_vala_extract_all ()
{
extract_all = true;
}
static void
add_keyword (const char *name, hash_table *keywords)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
struct callshape shape;
const char *colon;
if (keywords->table == NULL)
hash_init (keywords, 100);
split_keywordspec (name, &end, &shape);
/* The characters between name and end should form a valid C identifier.
A colon means an invalid parse in split_keywordspec(). */
colon = strchr (name, ':');
if (colon == NULL || colon >= end)
insert_keyword_callshape (keywords, name, end - name, &shape);
}
}
void
x_vala_keyword (const char *name)
{
add_keyword (name, &keywords);
}
static void
init_keywords ()
{
if (default_keywords)
{
/* When adding new keywords here, also update the documentation in
xgettext.texi! */
x_vala_keyword ("dgettext:2");
x_vala_keyword ("dcgettext:2");
x_vala_keyword ("ngettext:1,2");
x_vala_keyword ("dngettext:2,3");
x_vala_keyword ("dpgettext:2g");
x_vala_keyword ("dpgettext2:2c,3");
x_vala_keyword ("_");
x_vala_keyword ("Q_");
x_vala_keyword ("N_");
x_vala_keyword ("NC_:1c,2");
default_keywords = false;
}
}
void
init_flag_table_vala ()
{
xgettext_record_flag ("dgettext:2:pass-c-format");
xgettext_record_flag ("dcgettext:2:pass-c-format");
xgettext_record_flag ("ngettext:1:pass-c-format");
xgettext_record_flag ("ngettext:2:pass-c-format");
xgettext_record_flag ("dngettext:2:pass-c-format");
xgettext_record_flag ("dngettext:3:pass-c-format");
xgettext_record_flag ("dpgettext:2:pass-c-format");
xgettext_record_flag ("dpgettext2:3:pass-c-format");
xgettext_record_flag ("_:1:pass-c-format");
xgettext_record_flag ("Q_:1:pass-c-format");
xgettext_record_flag ("N_:1:pass-c-format");
xgettext_record_flag ("NC_:2:pass-c-format");
/* Vala leaves string formatting to Glib functions and thus the
format string is exactly same as C. See also
vapi/glib-2.0.vapi. */
xgettext_record_flag ("printf:1:c-format");
xgettext_record_flag ("vprintf:1:c-format");
}
/* ======================== Reading of characters. ======================== */
/* The input file stream. */
static FILE *fp;
/* 1. line_number handling. */
#define MAX_PHASE1_PUSHBACK 16
static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
static int phase1_pushback_length;
static int
phase1_getc ()
{
int c;
if (phase1_pushback_length)
c = phase1_pushback[--phase1_pushback_length];
else
{
c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
real_file_name);
return EOF;
}
}
if (c == '\n')
++line_number;
return c;
}
/* Supports 2 characters of pushback. */
static void
phase1_ungetc (int c)
{
if (c != EOF)
{
if (c == '\n')
--line_number;
if (phase1_pushback_length == SIZEOF (phase1_pushback))
abort ();
phase1_pushback[phase1_pushback_length++] = c;
}
}
/* These are for tracking whether comments count as immediately before
keyword. */
static int last_comment_line;
static int last_non_comment_line;
/* Accumulating comments. */
static char *buffer;
static size_t bufmax;
static size_t buflen;
static inline void
comment_start ()
{
buflen = 0;
}
static inline void
comment_add (int c)
{
if (buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen++] = c;
}
static inline void
comment_line_end (size_t chars_to_remove)
{
buflen -= chars_to_remove;
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
if (chars_to_remove == 0 && buflen >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[buflen] = '\0';
savable_comment_add (buffer);
}
/* 2. Replace each comment that is not inside a character constant or
string literal with a space character. */
static int
phase2_getc ()
{
int c;
bool last_was_star;
c = phase1_getc ();
if (c != '/')
return c;
c = phase1_getc ();
switch (c)
{
default:
phase1_ungetc (c);
return '/';
case '*':
/* C comment. */
comment_start ();
last_was_star = false;
for (;;)
{
c = phase1_getc ();
if (c == EOF)
break;
/* We skip all leading white space, but not EOLs. */
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
switch (c)
{
case '\n':
comment_line_end (1);
comment_start ();
last_was_star = false;
continue;
case '*':
last_was_star = true;
continue;
case '/':
if (last_was_star)
{
comment_line_end (2);
break;
}
FALLTHROUGH;
default:
last_was_star = false;
continue;
}
break;
}
last_comment_line = line_number;
return ' ';
case '/':
/* C++ or ISO C 99 comment. */
comment_start ();
for (;;)
{
c = phase1_getc ();
if (c == '\n' || c == EOF)
break;
/* We skip all leading white space, but not EOLs. */
if (!(buflen == 0 && (c == ' ' || c == '\t')))
comment_add (c);
}
comment_line_end (0);
last_comment_line = line_number;
return '\n';
}
}
static void
phase2_ungetc (int c)
{
phase1_ungetc (c);
}
/* ========================== Reading of tokens. ========================== */
enum token_type_ty
{
token_type_character_constant, /* 'x' */
token_type_eof,
token_type_lparen, /* ( */
token_type_rparen, /* ) */
token_type_lbrace, /* { */
token_type_rbrace, /* } */
token_type_assign, /* = += -= *= /= %= <<= >>= &= |= ^= */
token_type_return, /* return */
token_type_plus, /* + */
token_type_arithmetic_operator, /* - * / % << >> & | ^ */
token_type_equality_test_operator, /* == < > >= <= != */
token_type_logic_operator, /* ! && || */
token_type_comma, /* , */
token_type_question, /* ? */
token_type_colon, /* : */
token_type_number, /* 2.7 */
token_type_string_literal, /* "abc" */
token_type_string_template, /* @"abc" */
token_type_regex_literal, /* /.../ */
token_type_symbol, /* if else etc. */
token_type_other
};
typedef enum token_type_ty token_type_ty;
typedef struct token_ty token_ty;
struct token_ty
{
token_type_ty type;
char *string; /* for token_type_symbol */
mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
};
/* Free the memory pointed to by a 'struct token_ty'. */
static inline void
free_token (token_ty *tp)
{
if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string_literal)
{
mixed_string_free (tp->mixed_string);
drop_reference (tp->comment);
}
}
/* Return value of phase7_getc when EOF is reached. */
#define P7_EOF (-1)
#define P7_STRING_END (-2)
/* Replace escape sequences within character strings with their single
character equivalents. */
#define P7_QUOTES (-3)
#define P7_QUOTE (-4)
#define P7_NEWLINE (-5)
/* Convert an UTF-16 or UTF-32 code point to a return value that can be
distinguished from a single-byte return value. */
#define UNICODE(code) (0x100 + (code))
/* Test a return value of phase7_getuc whether it designates an UTF-16 or
UTF-32 code point. */
#define IS_UNICODE(p7_result) ((p7_result) >= 0x100)
/* Extract the UTF-16 or UTF-32 code of a return value that satisfies
IS_UNICODE. */
#define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
static int
phase7_getc ()
{
int c, n, j;
/* Use phase 1, because phase 2 elides comments. */
c = phase1_getc ();
/* Return a magic newline indicator, so that we can distinguish
between the user requesting a newline in the string (e.g. using
"\n" or "\012") from the user failing to terminate the string or
character constant. The ANSI C standard says: 3.1.3.4 Character
Constants contain "any character except single quote, backslash or
newline; or an escape sequence" and 3.1.4 String Literals contain
"any character except double quote, backslash or newline; or an
escape sequence".
Most compilers give a fatal error in this case, however gcc is
stupidly silent, even though this is a very common typo. OK, so
"gcc --pedantic" will tell me, but that gripes about too much other
stuff. Could I have a "gcc -Wnewline-in-string" option, or
better yet a "gcc -fno-newline-in-string" option, please? Gcc is
also inconsistent between string literals and character constants:
you may not embed newlines in character constants; try it, you get
a useful diagnostic. --PMiller */
if (c == '\n')
return P7_NEWLINE;
if (c == '"')
return P7_QUOTES;
if (c == '\'')
return P7_QUOTE;
if (c != '\\')
return c;
c = phase1_getc ();
switch (c)
{
default:
/* Unknown escape sequences really should be an error, but just
ignore them, and let the real compiler complain. */
phase1_ungetc (c);
return '\\';
case '"':
case '\'':
case '\\':
case '$':
return c;
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
case 'x':
c = phase1_getc ();
switch (c)
{
default:
phase1_ungetc (c);
phase1_ungetc ('x');
return '\\';
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
break;
}
n = 0;
for (;;)
{
switch (c)
{
default:
phase1_ungetc (c);
return n;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
n = n * 16 + c - '0';
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
n = n * 16 + 10 + c - 'A';
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
n = n * 16 + 10 + c - 'a';
break;
}
c = phase1_getc ();
}
return n;
case '0':
n = 0;
for (j = 0; j < 3; ++j)
{
n = n * 8 + c - '0';
c = phase1_getc ();
switch (c)
{
default:
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
continue;
}
break;
}
phase1_ungetc (c);
return n;
case 'u':
{
unsigned char buf[8];
n = 0;
for (j = 0; j < 4; j++)
{
int c1 = phase1_getc ();
if (c1 >= '0' && c1 <= '9')
n = (n << 4) + (c1 - '0');
else if (c1 >= 'A' && c1 <= 'F')
n = (n << 4) + (c1 - 'A' + 10);
else if (c1 >= 'a' && c1 <= 'f')
n = (n << 4) + (c1 - 'a' + 10);
else
{
phase1_ungetc (c1);
while (--j >= 0)
phase1_ungetc (buf[j]);
phase1_ungetc (c);
return '\\';
}
buf[j] = c1;
}
if (n < 0x110000)
return UNICODE (n);
error_with_progname = false;
error (0, 0, _("%s:%d: warning: invalid Unicode character"),
logical_file_name, line_number);
error_with_progname = true;
while (--j >= 0)
phase1_ungetc (buf[j]);
phase1_ungetc (c);
return '\\';
}
}
}
static void
phase7_ungetc (int c)
{
phase1_ungetc (c);
}
/* 3. Parse each resulting logical line as preprocessing tokens and
white space. Preprocessing tokens and Vala tokens don't always
match. */
static token_ty phase3_pushback[2];
static int phase3_pushback_length;
static token_type_ty last_token_type;
static void
phase3_scan_regex ()
{
int c;
for (;;)
{
c = phase1_getc ();
if (c == '/')
break;
if (c == '\\')
{
c = phase1_getc ();
if (c != EOF)
continue;
}
if (c == EOF)
{
error_with_progname = false;
error (0, 0,
_("%s:%d: warning: regular expression literal terminated too early"),
logical_file_name, line_number);
error_with_progname = true;
return;
}
}
c = phase2_getc ();
if (!(c == 'i' || c == 's' || c == 'm' || c == 'x'))
phase2_ungetc (c);
}
static void
phase3_get (token_ty *tp)
{
static char *buffer;
static int bufmax;
int bufpos;
#undef APPEND
#define APPEND(c) \
do \
{ \
if (bufpos >= bufmax) \
{ \
bufmax = 2 * bufmax + 10; \
buffer = xrealloc (buffer, bufmax); \
} \
buffer[bufpos++] = c; \
} \
while (0)
if (phase3_pushback_length)
{
*tp = phase3_pushback[--phase3_pushback_length];
last_token_type = tp->type;
return;
}
for (;;)
{
bool template;
bool verbatim;
int c;
tp->line_number = line_number;
c = phase2_getc ();
switch (c)
{
case EOF:
tp->type = last_token_type = token_type_eof;
return;
case '\n':
if (last_non_comment_line > last_comment_line)
savable_comment_reset ();
FALLTHROUGH;
case ' ':
case '\f':
case '\t':
/* Ignore whitespace and comments. */
continue;
default:
break;
}
last_non_comment_line = tp->line_number;
template = false;
verbatim = false;
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
bufpos = 0;
for (;;)
{
APPEND (c);
c = phase2_getc ();
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
continue;
default:
phase2_ungetc (c);
break;
}
break;
}
APPEND (0);
if (strcmp (buffer, "return") == 0)
tp->type = last_token_type = token_type_return;
else
{
tp->string = xstrdup (buffer);
tp->type = last_token_type = token_type_symbol;
}
return;
case '.':
c = phase2_getc ();
phase2_ungetc (c);
switch (c)
{
default:
tp->string = xstrdup (".");
tp->type = last_token_type = token_type_symbol;
return;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c = '.';
break;
}
FALLTHROUGH;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* The preprocessing number token is more "generous" than the C
number tokens. This is mostly due to token pasting (another
thing we can ignore here). */
bufpos = 0;
for (;;)
{
APPEND (c);
c = phase2_getc ();
switch (c)
{
case 'e':
case 'E':
APPEND (c);
c = phase2_getc ();
if (c != '+' && c != '-')
{
phase2_ungetc (c);
break;
}
continue;
case 'A': case 'B': case 'C': case 'D': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '.':
continue;
default:
phase2_ungetc (c);
break;
}
break;
}
APPEND (0);
tp->type = last_token_type = token_type_number;
return;
case '\'':
for (;;)
{
c = phase7_getc ();
if (c == P7_NEWLINE)
{
error_with_progname = false;
error (0, 0, _("%s:%d: warning: unterminated character constant"),
logical_file_name, line_number - 1);
error_with_progname = true;
phase7_ungetc ('\n');
break;
}
if (c == EOF || c == P7_QUOTE)
break;
}
tp->type = last_token_type = token_type_character_constant;
return;
/* Vala provides strings in three different formats.
Usual string literals:
"..."
Verbatim string literals:
"""...""" (where ... can include newlines and double quotes)
String templates.
@"...", @"""..."""
Note that, with the current implementation string
templates are not subject to translation, because they are
inspected at compile time. For example, the following code
string bar = "bar";
string foo = _(@"foo $bar");
will be translated into the C code, like:
_(g_strconcat ("foo ", "bar", NULL)); */
case '@':
c = phase2_getc ();
if (c != '"')
{
phase2_ungetc (c);
tp->type = last_token_type = token_type_other;
return;
}
template = true;
FALLTHROUGH;
case '"':
{
struct mixed_string_buffer msb;
int c2 = phase1_getc ();
if (c2 == '"')
{
int c3 = phase1_getc ();
if (c3 == '"')
verbatim = true;
else
{
phase1_ungetc (c3);
phase1_ungetc (c2);
}
}
else
phase2_ungetc (c2);
/* Start accumulating the string. */
mixed_string_buffer_init (&msb, lc_string,
logical_file_name, line_number);
if (verbatim)
for (;;)
{
c = phase1_getc ();
/* Keep line_number in sync. */
msb.line_number = line_number;
if (c == '"')
{
int c2 = phase1_getc ();
if (c2 == '"')
{
int c3 = phase1_getc ();
if (c3 == '"')
break;
phase1_ungetc (c3);
}
phase1_ungetc (c2);
}
if (c == EOF)
break;
mixed_string_buffer_append_char (&msb, c);
}
else
for (;;)
{
c = phase7_getc ();
/* Keep line_number in sync. */
msb.line_number = line_number;
if (c == P7_NEWLINE)
{
error_with_progname = false;
error (0, 0,
_("%s:%d: warning: unterminated string literal"),
logical_file_name, line_number - 1);
error_with_progname = true;
phase7_ungetc ('\n');
break;
}
if (c == P7_QUOTES)
break;
if (c == EOF)
break;
if (c == P7_QUOTE)
c = '\'';
if (IS_UNICODE (c))
{
assert (UNICODE_VALUE (c) >= 0
&& UNICODE_VALUE (c) < 0x110000);
mixed_string_buffer_append_unicode (&msb,
UNICODE_VALUE (c));
}
else
mixed_string_buffer_append_char (&msb, c);
}
/* Done accumulating the string. */
if (template)
{
tp->type = token_type_string_template;
mixed_string_buffer_destroy (&msb);
}
else
{
tp->type = token_type_string_literal;
tp->mixed_string = mixed_string_buffer_result (&msb);
tp->comment = add_reference (savable_comment);
}
last_token_type = tp->type;
return;
}
case '/':
switch (last_token_type)
{
case token_type_lparen:
case token_type_lbrace:
case token_type_assign:
case token_type_return:
case token_type_plus:
case token_type_arithmetic_operator:
case token_type_equality_test_operator:
case token_type_logic_operator:
case token_type_comma:
case token_type_question:
case token_type_colon:
phase3_scan_regex ();
tp->type = last_token_type = token_type_regex_literal;
break;
default:
{
int c2 = phase2_getc ();
if (c2 == '=')
tp->type = last_token_type = token_type_assign;
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_arithmetic_operator;
}
break;
}
}
return;
case '(':
tp->type = last_token_type = token_type_lparen;
return;
case ')':
tp->type = last_token_type = token_type_rparen;
return;
case '{':
tp->type = last_token_type = token_type_lbrace;
return;
case '}':
tp->type = last_token_type = token_type_rbrace;
return;
case '+':
{
int c2 = phase2_getc ();
switch (c2)
{
case '+':
tp->type = last_token_type = token_type_other;
break;
case '=':
tp->type = last_token_type = token_type_assign;
break;
default:
phase2_ungetc (c2);
tp->type = last_token_type = token_type_plus;
break;
}
return;
}
case '-':
{
int c2 = phase2_getc ();
switch (c2)
{
case '-':
tp->type = last_token_type = token_type_other;
break;
case '=':
tp->type = last_token_type = token_type_assign;
break;
default:
phase2_ungetc (c2);
tp->type = last_token_type = token_type_arithmetic_operator;
break;
}
return;
}
case '%':
case '^':
{
int c2 = phase2_getc ();
if (c2 == '=')
tp->type = last_token_type = token_type_assign;
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_logic_operator;
}
return;
}
case '=':
{
int c2 = phase2_getc ();
switch (c2)
{
case '=':
tp->type = last_token_type = token_type_equality_test_operator;
break;
case '>':
tp->type = last_token_type = token_type_other;
break;
default:
phase2_ungetc (c2);
tp->type = last_token_type = token_type_assign;
break;
}
return;
}
case '!':
{
int c2 = phase2_getc ();
if (c2 == '=')
tp->type = last_token_type = token_type_equality_test_operator;
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_logic_operator;
}
return;
}
case '>':
case '<':
{
int c2 = phase2_getc ();
if (c2 == '=')
tp->type = last_token_type = token_type_equality_test_operator;
else if (c2 == c)
{
int c3 = phase2_getc ();
if (c3 == '=')
tp->type = last_token_type = token_type_assign;
else
{
phase2_ungetc (c2);
phase2_ungetc (c3);
tp->type = last_token_type = token_type_other;
}
}
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_equality_test_operator;
}
return;
}
case ',':
tp->type = last_token_type = token_type_comma;
return;
case ':':
tp->type = last_token_type = token_type_colon;
return;
case '&':
case '|':
{
int c2 = phase2_getc ();
if (c2 == c)
tp->type = last_token_type = token_type_logic_operator;
else if (c2 == '=')
tp->type = last_token_type = token_type_assign;
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_arithmetic_operator;
}
return;
}
case '?':
{
int c2 = phase2_getc ();
if (c2 == '?')
tp->type = last_token_type = token_type_logic_operator;
else
{
phase2_ungetc (c2);
tp->type = last_token_type = token_type_question;
}
return;
}
default:
tp->type = last_token_type = token_type_other;
return;
}
}
#undef APPEND
}
static void
phase3_unget (token_ty *tp)
{
if (tp->type != token_type_eof)
{
if (phase3_pushback_length == SIZEOF (phase3_pushback))
abort ();
phase3_pushback[phase3_pushback_length++] = *tp;
}
}
/* String concatenation with '+'. */
static void
x_vala_lex (token_ty *tp)
{
phase3_get (tp);
if (tp->type == token_type_string_literal)
{
mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
token_ty token2;
phase3_get (&token2);
if (token2.type == token_type_plus)
{
token_ty token3;
phase3_get (&token3);
if (token3.type == token_type_string_literal)
{
sum = mixed_string_concat_free1 (sum, token3.mixed_string);
free_token (&token3);
free_token (&token2);
continue;
}
phase3_unget (&token3);
}
phase3_unget (&token2);
break;
}
tp->mixed_string = sum;
}
}
/* ========================= Extracting strings. ========================== */
/* Context lookup table. */
static flag_context_list_table_ty *flag_context_list_table;
/* The file is broken into tokens. Scan the token stream, looking for
a keyword, followed by a left paren, followed by a string. When we
see this sequence, we have something to remember. We assume we are
looking at a valid Vala program, and leave the complaints about the
grammar to the compiler.
Normal handling: Look for
keyword ( ... msgid ... )
keyword msgid
Plural handling: Look for
keyword ( ... msgid ... msgid_plural ... )
We use recursion because the arguments before msgid or between msgid
and msgid_plural can contain subexpressions of the same form. */
/* Extract messages until the next balanced closing parenthesis or bracket.
Extracted messages are added to MLP.
DELIM can be either token_type_rparen or token_type_rbracket, or
token_type_eof to accept both.
Return true upon eof, false upon closing parenthesis or bracket. */
static bool
extract_balanced (message_list_ty *mlp, token_type_ty delim,
flag_context_ty outer_context,
flag_context_list_iterator_ty context_iter,
struct arglist_parser *argparser)
{
/* Current argument number. */
int arg = 1;
/* 0 when no keyword has been seen. 1 right after a keyword is seen. */
int state;
/* Parameters of the keyword just seen. Defined only in state 1. */
const struct callshapes *next_shapes = NULL;
/* Context iterator that will be used if the next token is a '('. */
flag_context_list_iterator_ty next_context_iter =
passthrough_context_list_iterator;
/* Current context. */
flag_context_ty inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (&context_iter));
/* Start state is 0. */
state = 0;
for (;;)
{
token_ty token;
x_vala_lex (&token);
switch (token.type)
{
case token_type_symbol:
{
void *keyword_value;
if (hash_find_entry (&keywords, token.string, strlen (token.string),
&keyword_value)
== 0)
{
next_shapes = (const struct callshapes *) keyword_value;
state = 1;
}
else
state = 0;
}
next_context_iter =
flag_context_list_iterator (
flag_context_list_table_lookup (
flag_context_list_table,
token.string, strlen (token.string)));
free (token.string);
continue;
case token_type_lparen:
if (extract_balanced (mlp, token_type_rparen,
inner_context, next_context_iter,
arglist_parser_alloc (mlp,
state ? next_shapes : NULL)))
{
arglist_parser_done (argparser, arg);
return true;
}
next_context_iter = null_context_list_iterator;
state = 0;
break;
case token_type_rparen:
if (delim == token_type_rparen || delim == token_type_eof)
{
arglist_parser_done (argparser, arg);
return false;
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_comma:
arg++;
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
next_context_iter = passthrough_context_list_iterator;
state = 0;
continue;
case token_type_eof:
arglist_parser_done (argparser, arg);
return true;
case token_type_string_literal:
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
if (extract_all)
{
char *string = mixed_string_contents (token.mixed_string);
mixed_string_free (token.mixed_string);
remember_a_message (mlp, NULL, string, true, false,
inner_context, &pos,
NULL, token.comment, false);
}
else
{
/* A string immediately after a symbol means a function call. */
if (state)
{
struct arglist_parser *tmp_argparser;
tmp_argparser = arglist_parser_alloc (mlp, next_shapes);
arglist_parser_remember (tmp_argparser, 1,
token.mixed_string, inner_context,
pos.file_name, pos.line_number,
token.comment, false);
arglist_parser_done (tmp_argparser, 1);
}
else
arglist_parser_remember (argparser, arg,
token.mixed_string, inner_context,
pos.file_name, pos.line_number,
token.comment, false);
}
}
drop_reference (token.comment);
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_character_constant:
case token_type_lbrace:
case token_type_rbrace:
case token_type_assign:
case token_type_return:
case token_type_plus:
case token_type_arithmetic_operator:
case token_type_equality_test_operator:
case token_type_logic_operator:
case token_type_question:
case token_type_colon:
case token_type_number:
case token_type_string_template:
case token_type_regex_literal:
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
continue;
default:
abort ();
}
}
}
void
extract_vala (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
message_list_ty *mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
phase1_pushback_length = 0;
last_comment_line = -1;
last_non_comment_line = -1;
phase3_pushback_length = 0;
last_token_type = token_type_other;
flag_context_list_table = flag_table;
init_keywords ();
/* Eat tokens until eof is seen. When extract_parenthesized returns
due to an unbalanced closing parenthesis, just restart it. */
while (!extract_balanced (mlp, token_type_eof,
null_context, null_context_list_iterator,
arglist_parser_alloc (mlp, NULL)))
;
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}