| /* xgettext JavaScript backend. |
| Copyright (C) 2002-2003, 2005-2009, 2013-2014, 2018-2020 Free Software Foundation, Inc. |
| |
| This file was written by Andreas Stricker <andy@knitter.ch>, 2010 |
| It's based on x-python from Bruno Haible. |
| |
| This program is free software: you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| /* Specification. */ |
| #include "x-javascript.h" |
| |
| #include <assert.h> |
| #include <errno.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "attribute.h" |
| #include "message.h" |
| #include "rc-str-list.h" |
| #include "xgettext.h" |
| #include "xg-pos.h" |
| #include "xg-encoding.h" |
| #include "xg-mixed-string.h" |
| #include "xg-arglist-context.h" |
| #include "xg-arglist-callshape.h" |
| #include "xg-arglist-parser.h" |
| #include "xg-message.h" |
| #include "error.h" |
| #include "error-progname.h" |
| #include "progname.h" |
| #include "xerror.h" |
| #include "xvasprintf.h" |
| #include "xalloc.h" |
| #include "c-strstr.h" |
| #include "c-ctype.h" |
| #include "po-charset.h" |
| #include "unistr.h" |
| #include "gettext.h" |
| |
| #define _(s) gettext(s) |
| |
| #undef max /* clean up after MSVC's <stdlib.h> */ |
| #define max(a,b) ((a) > (b) ? (a) : (b)) |
| |
| #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) |
| |
| /* The JavaScript aka ECMA-Script syntax is defined in ECMA-262 |
| specification: |
| <https://www.ecma-international.org/publications/standards/Ecma-262.htm> |
| |
| Regarding the XML element support: |
| The earlier standard E4X |
| <https://en.wikipedia.org/wiki/ECMAScript_for_XML> |
| <https://web.archive.org/web/20131104082608/http://www.ecma-international.org/publications/standards/Ecma-357.htm> |
| is no longer widely supported. |
| Instead, nowadays, JSX is widely used. |
| <https://facebook.github.io/jsx/> |
| */ |
| |
| /* ====================== Keyword set customization. ====================== */ |
| |
| /* If true extract all strings. */ |
| static bool extract_all = false; |
| |
| static hash_table keywords; |
| static bool default_keywords = true; |
| |
| |
| void |
| x_javascript_extract_all () |
| { |
| extract_all = true; |
| } |
| |
| |
| void |
| x_javascript_keyword (const char *name) |
| { |
| if (name == NULL) |
| default_keywords = false; |
| else |
| { |
| const char *end; |
| struct callshape shape; |
| const char *colon; |
| |
| if (keywords.table == NULL) |
| hash_init (&keywords, 100); |
| |
| split_keywordspec (name, &end, &shape); |
| |
| /* The characters between name and end should form a valid C identifier. |
| A colon means an invalid parse in split_keywordspec(). */ |
| colon = strchr (name, ':'); |
| if (colon == NULL || colon >= end) |
| insert_keyword_callshape (&keywords, name, end - name, &shape); |
| } |
| } |
| |
| /* Finish initializing the keywords hash table. |
| Called after argument processing, before each file is processed. */ |
| static void |
| init_keywords () |
| { |
| if (default_keywords) |
| { |
| /* When adding new keywords here, also update the documentation in |
| xgettext.texi! */ |
| x_javascript_keyword ("gettext"); |
| x_javascript_keyword ("dgettext:2"); |
| x_javascript_keyword ("dcgettext:2"); |
| x_javascript_keyword ("ngettext:1,2"); |
| x_javascript_keyword ("dngettext:2,3"); |
| x_javascript_keyword ("pgettext:1c,2"); |
| x_javascript_keyword ("dpgettext:2c,3"); |
| x_javascript_keyword ("_"); |
| default_keywords = false; |
| } |
| } |
| |
| void |
| init_flag_table_javascript () |
| { |
| xgettext_record_flag ("gettext:1:pass-javascript-format"); |
| xgettext_record_flag ("dgettext:2:pass-javascript-format"); |
| xgettext_record_flag ("dcgettext:2:pass-javascript-format"); |
| xgettext_record_flag ("ngettext:1:pass-javascript-format"); |
| xgettext_record_flag ("ngettext:2:pass-javascript-format"); |
| xgettext_record_flag ("dngettext:2:pass-javascript-format"); |
| xgettext_record_flag ("dngettext:3:pass-javascript-format"); |
| xgettext_record_flag ("pgettext:2:pass-javascript-format"); |
| xgettext_record_flag ("dpgettext:3:pass-javascript-format"); |
| xgettext_record_flag ("_:1:pass-javascript-format"); |
| } |
| |
| |
| /* ======================== Reading of characters. ======================== */ |
| |
| /* The input file stream. */ |
| static FILE *fp; |
| |
| |
| /* 1. line_number handling. */ |
| |
| /* Maximum used, roughly a safer MB_LEN_MAX. */ |
| #define MAX_PHASE1_PUSHBACK 16 |
| static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK]; |
| static int phase1_pushback_length; |
| |
| /* Read the next single byte from the input file. */ |
| static int |
| phase1_getc () |
| { |
| int c; |
| |
| if (phase1_pushback_length) |
| c = phase1_pushback[--phase1_pushback_length]; |
| else |
| { |
| c = getc (fp); |
| |
| if (c == EOF) |
| { |
| if (ferror (fp)) |
| error (EXIT_FAILURE, errno, _("error while reading \"%s\""), |
| real_file_name); |
| return EOF; |
| } |
| } |
| |
| if (c == '\n') |
| ++line_number; |
| |
| return c; |
| } |
| |
| /* Supports MAX_PHASE1_PUSHBACK characters of pushback. */ |
| static void |
| phase1_ungetc (int c) |
| { |
| if (c != EOF) |
| { |
| if (c == '\n') |
| --line_number; |
| |
| if (phase1_pushback_length == SIZEOF (phase1_pushback)) |
| abort (); |
| phase1_pushback[phase1_pushback_length++] = c; |
| } |
| } |
| |
| |
| /* Phase 2: Conversion to Unicode. |
| For now, we expect JavaScript files to be encoded as UTF-8. */ |
| |
| /* End-of-file indicator for functions returning an UCS-4 character. */ |
| #define UEOF -1 |
| |
| static lexical_context_ty lexical_context; |
| |
| /* Maximum used, length of "<![CDATA[" tag minus one. */ |
| static int phase2_pushback[8]; |
| static int phase2_pushback_length; |
| |
| /* Read the next Unicode UCS-4 character from the input file. */ |
| static int |
| phase2_getc () |
| { |
| if (phase2_pushback_length) |
| return phase2_pushback[--phase2_pushback_length]; |
| |
| if (xgettext_current_source_encoding == po_charset_ascii) |
| { |
| int c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| if (!c_isascii (c)) |
| { |
| multiline_error (xstrdup (""), |
| xasprintf ("%s\n%s\n", |
| non_ascii_error_message (lexical_context, |
| real_file_name, |
| line_number), |
| _("Please specify the source encoding through --from-code\n"))); |
| exit (EXIT_FAILURE); |
| } |
| return c; |
| } |
| else if (xgettext_current_source_encoding != po_charset_utf8) |
| { |
| #if HAVE_ICONV |
| /* Use iconv on an increasing number of bytes. Read only as many bytes |
| through phase1_getc as needed. This is needed to give reasonable |
| interactive behaviour when fp is connected to an interactive tty. */ |
| unsigned char buf[MAX_PHASE1_PUSHBACK]; |
| size_t bufcount; |
| int c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[0] = (unsigned char) c; |
| bufcount = 1; |
| |
| for (;;) |
| { |
| unsigned char scratchbuf[6]; |
| const char *inptr = (const char *) &buf[0]; |
| size_t insize = bufcount; |
| char *outptr = (char *) &scratchbuf[0]; |
| size_t outsize = sizeof (scratchbuf); |
| |
| size_t res = iconv (xgettext_current_source_iconv, |
| (ICONV_CONST char **) &inptr, &insize, |
| &outptr, &outsize); |
| /* We expect that a character has been produced if and only if |
| some input bytes have been consumed. */ |
| if ((insize < bufcount) != (outsize < sizeof (scratchbuf))) |
| abort (); |
| if (outsize == sizeof (scratchbuf)) |
| { |
| /* No character has been produced. Must be an error. */ |
| if (res != (size_t)(-1)) |
| abort (); |
| |
| if (errno == EILSEQ) |
| { |
| /* An invalid multibyte sequence was encountered. */ |
| multiline_error (xstrdup (""), |
| xasprintf (_("\ |
| %s:%d: Invalid multibyte sequence.\n\ |
| Please specify the correct source encoding through --from-code\n"), |
| real_file_name, line_number)); |
| exit (EXIT_FAILURE); |
| } |
| else if (errno == EINVAL) |
| { |
| /* An incomplete multibyte character. */ |
| int c; |
| |
| if (bufcount == MAX_PHASE1_PUSHBACK) |
| { |
| /* An overlong incomplete multibyte sequence was |
| encountered. */ |
| multiline_error (xstrdup (""), |
| xasprintf (_("\ |
| %s:%d: Long incomplete multibyte sequence.\n\ |
| Please specify the correct source encoding through --from-code\n"), |
| real_file_name, line_number)); |
| exit (EXIT_FAILURE); |
| } |
| |
| /* Read one more byte and retry iconv. */ |
| c = phase1_getc (); |
| if (c == EOF) |
| { |
| multiline_error (xstrdup (""), |
| xasprintf (_("\ |
| %s:%d: Incomplete multibyte sequence at end of file.\n\ |
| Please specify the correct source encoding through --from-code\n"), |
| real_file_name, line_number)); |
| exit (EXIT_FAILURE); |
| } |
| if (c == '\n') |
| { |
| multiline_error (xstrdup (""), |
| xasprintf (_("\ |
| %s:%d: Incomplete multibyte sequence at end of line.\n\ |
| Please specify the correct source encoding through --from-code\n"), |
| real_file_name, line_number - 1)); |
| exit (EXIT_FAILURE); |
| } |
| buf[bufcount++] = (unsigned char) c; |
| } |
| else |
| error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"), |
| real_file_name, line_number); |
| } |
| else |
| { |
| size_t outbytes = sizeof (scratchbuf) - outsize; |
| size_t bytes = bufcount - insize; |
| ucs4_t uc; |
| |
| /* We expect that one character has been produced. */ |
| if (bytes == 0) |
| abort (); |
| if (outbytes == 0) |
| abort (); |
| /* Push back the unused bytes. */ |
| while (insize > 0) |
| phase1_ungetc (buf[--insize]); |
| /* Convert the character from UTF-8 to UCS-4. */ |
| if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes) |
| { |
| /* scratchbuf contains an out-of-range Unicode character |
| (> 0x10ffff). */ |
| multiline_error (xstrdup (""), |
| xasprintf (_("\ |
| %s:%d: Invalid multibyte sequence.\n\ |
| Please specify the source encoding through --from-code\n"), |
| real_file_name, line_number)); |
| exit (EXIT_FAILURE); |
| } |
| return uc; |
| } |
| } |
| #else |
| /* If we don't have iconv(), the only supported values for |
| xgettext_global_source_encoding and thus also for |
| xgettext_current_source_encoding are ASCII and UTF-8. */ |
| abort (); |
| #endif |
| } |
| else |
| { |
| /* Read an UTF-8 encoded character. */ |
| unsigned char buf[6]; |
| unsigned int count; |
| int c; |
| ucs4_t uc; |
| |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[0] = c; |
| count = 1; |
| |
| if (buf[0] >= 0xc0) |
| { |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[1] = c; |
| count = 2; |
| } |
| |
| if (buf[0] >= 0xe0 |
| && ((buf[1] ^ 0x80) < 0x40)) |
| { |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[2] = c; |
| count = 3; |
| } |
| |
| if (buf[0] >= 0xf0 |
| && ((buf[1] ^ 0x80) < 0x40) |
| && ((buf[2] ^ 0x80) < 0x40)) |
| { |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[3] = c; |
| count = 4; |
| } |
| |
| if (buf[0] >= 0xf8 |
| && ((buf[1] ^ 0x80) < 0x40) |
| && ((buf[2] ^ 0x80) < 0x40) |
| && ((buf[3] ^ 0x80) < 0x40)) |
| { |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[4] = c; |
| count = 5; |
| } |
| |
| if (buf[0] >= 0xfc |
| && ((buf[1] ^ 0x80) < 0x40) |
| && ((buf[2] ^ 0x80) < 0x40) |
| && ((buf[3] ^ 0x80) < 0x40) |
| && ((buf[4] ^ 0x80) < 0x40)) |
| { |
| c = phase1_getc (); |
| if (c == EOF) |
| return UEOF; |
| buf[5] = c; |
| count = 6; |
| } |
| |
| u8_mbtouc (&uc, buf, count); |
| return uc; |
| } |
| } |
| |
| /* Supports max (9, UNINAME_MAX + 3) pushback characters. */ |
| static void |
| phase2_ungetc (int c) |
| { |
| if (c != UEOF) |
| { |
| if (phase2_pushback_length == SIZEOF (phase2_pushback)) |
| abort (); |
| phase2_pushback[phase2_pushback_length++] = c; |
| } |
| } |
| |
| |
| /* ========================= Accumulating strings. ======================== */ |
| |
| /* See xg-mixed-string.h for the API. */ |
| |
| |
| /* ======================== Accumulating comments. ======================== */ |
| |
| |
| /* Accumulating a single comment line. */ |
| |
| static struct mixed_string_buffer comment_buffer; |
| |
| static inline void |
| comment_start () |
| { |
| mixed_string_buffer_init (&comment_buffer, lc_comment, |
| logical_file_name, line_number); |
| } |
| |
| static inline bool |
| comment_at_start () |
| { |
| return mixed_string_buffer_is_empty (&comment_buffer); |
| } |
| |
| static inline void |
| comment_add (int c) |
| { |
| mixed_string_buffer_append_unicode (&comment_buffer, c); |
| } |
| |
| static inline const char * |
| comment_line_end (size_t chars_to_remove) |
| { |
| char *buffer = |
| mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer)); |
| size_t buflen = strlen (buffer) - chars_to_remove; |
| |
| while (buflen >= 1 |
| && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) |
| --buflen; |
| buffer[buflen] = '\0'; |
| savable_comment_add (buffer); |
| lexical_context = lc_outside; |
| return buffer; |
| } |
| |
| |
| /* These are for tracking whether comments count as immediately before |
| keyword. */ |
| static int last_comment_line; |
| static int last_non_comment_line; |
| |
| |
| /* ======================== Recognizing comments. ======================== */ |
| |
| |
| /* Canonicalized encoding name for the current input file. */ |
| static const char *xgettext_current_file_source_encoding; |
| |
| #if HAVE_ICONV |
| /* Converter from xgettext_current_file_source_encoding to UTF-8 (except from |
| ASCII or UTF-8, when this conversion is a no-op). */ |
| static iconv_t xgettext_current_file_source_iconv; |
| #endif |
| |
| /* Tracking whether the current line is a continuation line or contains a |
| non-blank character. */ |
| static bool continuation_or_nonblank_line; |
| |
| |
| /* Phase 3: Outside strings, replace backslash-newline with nothing and a |
| comment with nothing. */ |
| |
| static int |
| phase3_getc () |
| { |
| int c; |
| |
| for (;;) |
| { |
| c = phase2_getc (); |
| if (c == '\\') |
| { |
| c = phase2_getc (); |
| if (c != '\n') |
| { |
| phase2_ungetc (c); |
| /* This shouldn't happen usually, because "A backslash is |
| illegal elsewhere on a line outside a string literal." */ |
| return '\\'; |
| } |
| /* Eat backslash-newline. */ |
| continuation_or_nonblank_line = true; |
| } |
| else if (c == '/') |
| { |
| c = phase2_getc (); |
| if (c == '/') |
| { |
| /* C++ style comment. */ |
| last_comment_line = line_number; |
| comment_start (); |
| for (;;) |
| { |
| c = phase2_getc (); |
| if (c == UEOF || c == '\n') |
| { |
| comment_line_end (0); |
| break; |
| } |
| /* We skip all leading white space, but not EOLs. */ |
| if (!(comment_at_start () && (c == ' ' || c == '\t'))) |
| comment_add (c); |
| } |
| continuation_or_nonblank_line = false; |
| return c; |
| } |
| else if (c == '*') |
| { |
| /* C style comment. */ |
| bool last_was_star = false; |
| last_comment_line = line_number; |
| comment_start (); |
| for (;;) |
| { |
| c = phase2_getc (); |
| if (c == UEOF) |
| break; |
| /* We skip all leading white space, but not EOLs. */ |
| if (!(comment_at_start () && (c == ' ' || c == '\t'))) |
| comment_add (c); |
| switch (c) |
| { |
| case '\n': |
| comment_line_end (1); |
| comment_start (); |
| last_was_star = false; |
| continue; |
| |
| case '*': |
| last_was_star = true; |
| continue; |
| case '/': |
| if (last_was_star) |
| { |
| comment_line_end (2); |
| break; |
| } |
| FALLTHROUGH; |
| |
| default: |
| last_was_star = false; |
| continue; |
| } |
| break; |
| } |
| continuation_or_nonblank_line = false; |
| } |
| else |
| { |
| phase2_ungetc (c); |
| return '/'; |
| } |
| } |
| else |
| { |
| if (c == '\n') |
| continuation_or_nonblank_line = false; |
| else if (!(c == ' ' || c == '\t' || c == '\f')) |
| continuation_or_nonblank_line = true; |
| return c; |
| } |
| } |
| } |
| |
| /* Supports only one pushback character. */ |
| static void |
| phase3_ungetc (int c) |
| { |
| phase2_ungetc (c); |
| } |
| |
| |
| /* ========================= Accumulating strings. ======================== */ |
| |
| /* Return value of phase7_getuc when EOF is reached. */ |
| #define P7_EOF (-1) |
| #define P7_STRING_END (-2) |
| #define P7_TEMPLATE_START_OF_EXPRESSION (-3) /* ${ */ |
| |
| /* Convert an UTF-16 or UTF-32 code point to a return value that can be |
| distinguished from a single-byte return value. */ |
| #define UNICODE(code) (0x100 + (code)) |
| |
| /* Test a return value of phase7_getuc whether it designates an UTF-16 or |
| UTF-32 code point. */ |
| #define IS_UNICODE(p7_result) ((p7_result) >= 0x100) |
| |
| /* Extract the UTF-16 or UTF-32 code of a return value that satisfies |
| IS_UNICODE. */ |
| #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100) |
| |
| |
| /* ========================== Reading of tokens. ========================== */ |
| |
| |
| enum token_type_ty |
| { |
| token_type_eof, |
| token_type_start, |
| token_type_lparen, /* ( */ |
| token_type_rparen, /* ) */ |
| token_type_lbrace, /* { */ |
| token_type_rbrace, /* } */ |
| token_type_comma, /* , */ |
| token_type_dot, /* . */ |
| token_type_lbracket, /* [ */ |
| token_type_rbracket, /* ] */ |
| token_type_plus, /* + */ |
| token_type_regexp, /* /.../ */ |
| token_type_operator, /* - * / % . < > = ~ ! | & ? : ^ */ |
| token_type_equal, /* = */ |
| token_type_string, /* "abc", 'abc' */ |
| token_type_template, /* `abc` */ |
| token_type_ltemplate, /* left part of template: `abc${ */ |
| token_type_mtemplate, /* middle part of template: }abc${ */ |
| token_type_rtemplate, /* right part of template: }abc` */ |
| token_type_xml_tag, /* < or </ */ |
| token_type_xml_element_start, /* last token of < ... > */ |
| token_type_xml_element_end, /* last token of </ ... > */ |
| token_type_xml_empty_element, /* last token of < ... /> */ |
| token_type_keyword, /* return, else */ |
| token_type_symbol, /* symbol, number */ |
| token_type_other /* misc. operator */ |
| }; |
| typedef enum token_type_ty token_type_ty; |
| |
| typedef struct token_ty token_ty; |
| struct token_ty |
| { |
| token_type_ty type; |
| char *string; /* for token_type_symbol, token_type_keyword */ |
| mixed_string_ty *mixed_string; /* for token_type_string, token_type_template */ |
| refcounted_string_list_ty *comment; /* for token_type_string, token_type_template */ |
| int line_number; |
| }; |
| |
| |
| /* Free the memory pointed to by a 'struct token_ty'. */ |
| static inline void |
| free_token (token_ty *tp) |
| { |
| if (tp->type == token_type_symbol || tp->type == token_type_keyword) |
| free (tp->string); |
| if (tp->type == token_type_string || tp->type == token_type_template) |
| { |
| mixed_string_free (tp->mixed_string); |
| drop_reference (tp->comment); |
| } |
| } |
| |
| |
| /* JavaScript provides strings with either double or single quotes: |
| "abc" or 'abc' or `abc` |
| Both may contain special sequences after a backslash: |
| \', \", \\, \b, \f, \n, \r, \t, \v |
| Special characters can be entered using hexadecimal escape |
| sequences or deprecated octal escape sequences: |
| \xXX, \OOO |
| Any unicode point can be entered using Unicode escape sequences: |
| \uNNNN |
| If a sequence after a backslash is not a legitimate character |
| escape sequence, the character value is the sequence itself without |
| a backslash. For example, \xxx is treated as xxx. */ |
| |
| static int |
| phase7_getuc (int quote_char) |
| { |
| int c; |
| |
| for (;;) |
| { |
| /* Use phase 2, because phase 3 elides comments. */ |
| c = phase2_getc (); |
| |
| if (c == UEOF) |
| return P7_EOF; |
| |
| if (c == quote_char) |
| return P7_STRING_END; |
| |
| if (c == '$' && quote_char == '`') |
| { |
| int c1 = phase2_getc (); |
| |
| if (c1 == '{') |
| return P7_TEMPLATE_START_OF_EXPRESSION; |
| phase2_ungetc (c1); |
| } |
| |
| if (c == '\n') |
| { |
| if (quote_char == '`') |
| return UNICODE ('\n'); |
| else |
| { |
| phase2_ungetc (c); |
| error_with_progname = false; |
| error (0, 0, _("%s:%d: warning: unterminated string"), |
| logical_file_name, line_number); |
| error_with_progname = true; |
| return P7_STRING_END; |
| } |
| } |
| |
| if (c == '\r' && quote_char == '`') |
| { |
| /* Line terminators inside template literals are normalized to \n, |
| says <http://exploringjs.com/es6/ch_template-literals.html>. */ |
| int c1 = phase2_getc (); |
| |
| if (c1 == '\n') |
| return UNICODE ('\n'); |
| phase2_ungetc (c1); |
| } |
| |
| if (c != '\\') |
| return UNICODE (c); |
| |
| /* Dispatch according to the character following the backslash. */ |
| c = phase2_getc (); |
| if (c == UEOF) |
| return P7_EOF; |
| |
| switch (c) |
| { |
| case '\n': |
| continue; |
| case 'b': |
| return UNICODE ('\b'); |
| case 'f': |
| return UNICODE ('\f'); |
| case 'n': |
| return UNICODE ('\n'); |
| case 'r': |
| return UNICODE ('\r'); |
| case 't': |
| return UNICODE ('\t'); |
| case 'v': |
| return UNICODE ('\v'); |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': |
| { |
| int n = c - '0'; |
| |
| c = phase2_getc (); |
| if (c != UEOF) |
| { |
| if (c >= '0' && c <= '7') |
| { |
| n = (n << 3) + (c - '0'); |
| c = phase2_getc (); |
| if (c != UEOF) |
| { |
| if (c >= '0' && c <= '7') |
| n = (n << 3) + (c - '0'); |
| else |
| phase2_ungetc (c); |
| } |
| } |
| else |
| phase2_ungetc (c); |
| } |
| return UNICODE (n); |
| } |
| case 'x': |
| { |
| int c1 = phase2_getc (); |
| int n1; |
| |
| if (c1 >= '0' && c1 <= '9') |
| n1 = c1 - '0'; |
| else if (c1 >= 'A' && c1 <= 'F') |
| n1 = c1 - 'A' + 10; |
| else if (c1 >= 'a' && c1 <= 'f') |
| n1 = c1 - 'a' + 10; |
| else |
| n1 = -1; |
| |
| if (n1 >= 0) |
| { |
| int c2 = phase2_getc (); |
| int n2; |
| |
| if (c2 >= '0' && c2 <= '9') |
| n2 = c2 - '0'; |
| else if (c2 >= 'A' && c2 <= 'F') |
| n2 = c2 - 'A' + 10; |
| else if (c2 >= 'a' && c2 <= 'f') |
| n2 = c2 - 'a' + 10; |
| else |
| n2 = -1; |
| |
| if (n2 >= 0) |
| { |
| int n = (n1 << 4) + n2; |
| return UNICODE (n); |
| } |
| |
| phase2_ungetc (c2); |
| } |
| phase2_ungetc (c1); |
| return UNICODE (c); |
| } |
| case 'u': |
| { |
| unsigned char buf[4]; |
| unsigned int n = 0; |
| int i; |
| |
| for (i = 0; i < 4; i++) |
| { |
| int c1 = phase2_getc (); |
| |
| if (c1 >= '0' && c1 <= '9') |
| n = (n << 4) + (c1 - '0'); |
| else if (c1 >= 'A' && c1 <= 'F') |
| n = (n << 4) + (c1 - 'A' + 10); |
| else if (c1 >= 'a' && c1 <= 'f') |
| n = (n << 4) + (c1 - 'a' + 10); |
| else |
| { |
| phase2_ungetc (c1); |
| while (--i >= 0) |
| phase2_ungetc (buf[i]); |
| return UNICODE (c); |
| } |
| |
| buf[i] = c1; |
| } |
| return UNICODE (n); |
| } |
| default: |
| return UNICODE (c); |
| } |
| } |
| } |
| |
| |
| /* Combine characters into tokens. Discard whitespace except newlines at |
| the end of logical lines. */ |
| |
| static token_ty phase5_pushback[2]; |
| static int phase5_pushback_length; |
| |
| static token_type_ty last_token_type; |
| |
| /* Returns true if last_token_type indicates that we have just seen the |
| possibly last token of an expression. In this case, '<', '>', and '/' |
| need to be interpreted as operators, rather than as XML markup or start |
| of a regular expression. */ |
| static bool |
| is_after_expression (void) |
| { |
| switch (last_token_type) |
| { |
| case token_type_rparen: |
| case token_type_rbrace: |
| case token_type_rbracket: |
| case token_type_regexp: |
| case token_type_string: |
| case token_type_template: |
| case token_type_rtemplate: |
| case token_type_xml_element_end: |
| case token_type_xml_empty_element: |
| case token_type_symbol: |
| return true; |
| |
| case token_type_eof: |
| case token_type_start: |
| case token_type_lparen: |
| case token_type_lbrace: |
| case token_type_comma: |
| case token_type_dot: |
| case token_type_lbracket: |
| case token_type_plus: |
| case token_type_operator: |
| case token_type_equal: |
| case token_type_ltemplate: |
| case token_type_mtemplate: |
| case token_type_xml_tag: |
| case token_type_xml_element_start: |
| case token_type_keyword: |
| case token_type_other: |
| return false; |
| |
| default: |
| abort (); |
| } |
| } |
| |
| static void |
| phase5_scan_regexp (void) |
| { |
| int c; |
| |
| /* Scan for end of RegExp literal ('/'). */ |
| for (;;) |
| { |
| /* Must use phase2 as there can't be comments. */ |
| c = phase2_getc (); |
| if (c == '/') |
| break; |
| if (c == '\\') |
| { |
| c = phase2_getc (); |
| if (c != UEOF) |
| continue; |
| } |
| if (c == UEOF) |
| { |
| error_with_progname = false; |
| error (0, 0, |
| _("%s:%d: warning: RegExp literal terminated too early"), |
| logical_file_name, line_number); |
| error_with_progname = true; |
| return; |
| } |
| } |
| |
| /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1). */ |
| c = phase2_getc (); |
| if (!(c == 'g' || c == 'i' || c == 'm')) |
| phase2_ungetc (c); |
| } |
| |
| /* Number of open template literals `...${ */ |
| static int template_literal_depth; |
| |
| /* Number of open '{' tokens, at each template literal level. |
| The "current" element is brace_depths[template_literal_depth]. */ |
| static int *brace_depths; |
| /* Number of allocated elements in brace_depths. */ |
| static size_t brace_depths_alloc; |
| |
| /* Adds a new brace_depths level after template_literal_depth was |
| incremented. */ |
| static void |
| new_brace_depth_level (void) |
| { |
| if (template_literal_depth == brace_depths_alloc) |
| { |
| brace_depths_alloc = 2 * brace_depths_alloc + 1; |
| /* Now template_literal_depth < brace_depths_alloc. */ |
| brace_depths = |
| (int *) xrealloc (brace_depths, brace_depths_alloc * sizeof (int)); |
| } |
| brace_depths[template_literal_depth] = 0; |
| } |
| |
| /* Number of open XML elements. */ |
| static int xml_element_depth; |
| static bool inside_embedded_js_in_xml; |
| |
| static bool |
| phase5_scan_xml_markup (token_ty *tp) |
| { |
| struct |
| { |
| const char *start; |
| const char *end; |
| } markers[] = |
| { |
| { "!--", "--" }, |
| { "![CDATA[", "]]" }, |
| { "?", "?" } |
| }; |
| int i; |
| |
| for (i = 0; i < SIZEOF (markers); i++) |
| { |
| const char *start = markers[i].start; |
| const char *end = markers[i].end; |
| int j; |
| |
| /* Look for a start marker. */ |
| for (j = 0; start[j] != '\0'; j++) |
| { |
| int c; |
| |
| assert (phase2_pushback_length + j < SIZEOF (phase2_pushback)); |
| c = phase2_getc (); |
| if (c == UEOF) |
| goto eof; |
| if (c != start[j]) |
| { |
| int k = j; |
| |
| phase2_ungetc (c); |
| k--; |
| |
| for (; k >= 0; k--) |
| phase2_ungetc (start[k]); |
| break; |
| } |
| } |
| |
| if (start[j] == '\0') |
| /* Skip until the end marker. */ |
| for (;;) |
| { |
| int c; |
| |
| for (j = 0; end[j] != '\0'; j++) |
| { |
| assert (phase2_pushback_length + 1 < SIZEOF (phase2_pushback)); |
| c = phase2_getc (); |
| if (c == UEOF) |
| goto eof; |
| if (c != end[j]) |
| { |
| /* Don't push the first character back so the next |
| iteration start from the second character. */ |
| if (j > 0) |
| { |
| int k = j; |
| |
| phase2_ungetc (c); |
| k--; |
| |
| for (; k > 0; k--) |
| phase2_ungetc (end[k]); |
| } |
| break; |
| } |
| } |
| |
| if (end[j] == '\0') |
| { |
| c = phase2_getc (); |
| if (c == UEOF) |
| goto eof; |
| if (c != '>') |
| { |
| error_with_progname = false; |
| error (0, 0, |
| _("%s:%d: warning: %s is not allowed"), |
| logical_file_name, line_number, |
| end); |
| error_with_progname = true; |
| return false; |
| } |
| return true; |
| } |
| } |
| } |
| return false; |
| |
| eof: |
| error_with_progname = false; |
| error (0, 0, |
| _("%s:%d: warning: unterminated XML markup"), |
| logical_file_name, line_number); |
| error_with_progname = true; |
| return false; |
| } |
| |
| static void |
| phase5_get (token_ty *tp) |
| { |
| int c; |
| |
| if (phase5_pushback_length) |
| { |
| *tp = phase5_pushback[--phase5_pushback_length]; |
| last_token_type = tp->type; |
| return; |
| } |
| |
| for (;;) |
| { |
| tp->line_number = line_number; |
| c = phase3_getc (); |
| |
| switch (c) |
| { |
| case UEOF: |
| tp->type = last_token_type = token_type_eof; |
| return; |
| |
| case '\n': |
| if (last_non_comment_line > last_comment_line) |
| savable_comment_reset (); |
| FALLTHROUGH; |
| case ' ': |
| case '\t': |
| case '\f': |
| /* Ignore whitespace and comments. */ |
| continue; |
| } |
| |
| last_non_comment_line = tp->line_number; |
| |
| switch (c) |
| { |
| case '.': |
| { |
| int c1 = phase3_getc (); |
| phase3_ungetc (c1); |
| if (!(c1 >= '0' && c1 <= '9')) |
| { |
| |
| tp->type = last_token_type = token_type_dot; |
| return; |
| } |
| } |
| FALLTHROUGH; |
| case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': |
| case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
| case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': |
| case 'Y': case 'Z': |
| case '_': |
| case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': |
| case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': |
| case 's': case 't': case 'u': case 'v': case 'w': case 'x': |
| case 'y': case 'z': |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| /* Symbol, or part of a number. */ |
| { |
| static char *buffer; |
| static int bufmax; |
| int bufpos; |
| |
| bufpos = 0; |
| for (;;) |
| { |
| if (bufpos >= bufmax) |
| { |
| bufmax = 2 * bufmax + 10; |
| buffer = xrealloc (buffer, bufmax); |
| } |
| buffer[bufpos++] = c; |
| c = phase3_getc (); |
| switch (c) |
| { |
| case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': |
| case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
| case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': |
| case 'Y': case 'Z': |
| case '_': |
| case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': |
| case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': |
| case 's': case 't': case 'u': case 'v': case 'w': case 'x': |
| case 'y': case 'z': |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| continue; |
| default: |
| phase3_ungetc (c); |
| break; |
| } |
| break; |
| } |
| if (bufpos >= bufmax) |
| { |
| bufmax = 2 * bufmax + 10; |
| buffer = xrealloc (buffer, bufmax); |
| } |
| buffer[bufpos] = '\0'; |
| tp->string = xstrdup (buffer); |
| if (strcmp (buffer, "return") == 0 |
| || strcmp (buffer, "else") == 0) |
| tp->type = last_token_type = token_type_keyword; |
| else |
| tp->type = last_token_type = token_type_symbol; |
| return; |
| } |
| |
| case '"': case '\'': |
| /* Strings. */ |
| { |
| int quote_char = c; |
| lexical_context_ty saved_lexical_context = lexical_context; |
| struct mixed_string_buffer msb; |
| |
| lexical_context = lc_string; |
| /* Start accumulating the string. */ |
| mixed_string_buffer_init (&msb, lexical_context, |
| logical_file_name, line_number); |
| for (;;) |
| { |
| int uc = phase7_getuc (quote_char); |
| |
| /* Keep line_number in sync. */ |
| msb.line_number = line_number; |
| |
| if (uc == P7_EOF || uc == P7_STRING_END) |
| break; |
| |
| if (IS_UNICODE (uc)) |
| { |
| assert (UNICODE_VALUE (uc) >= 0 |
| && UNICODE_VALUE (uc) < 0x110000); |
| mixed_string_buffer_append_unicode (&msb, |
| UNICODE_VALUE (uc)); |
| } |
| else |
| mixed_string_buffer_append_char (&msb, uc); |
| } |
| tp->mixed_string = mixed_string_buffer_result (&msb); |
| tp->comment = add_reference (savable_comment); |
| lexical_context = saved_lexical_context; |
| tp->type = last_token_type = token_type_string; |
| return; |
| } |
| |
| case '`': |
| /* Template literals. */ |
| { |
| struct mixed_string_buffer msb; |
| |
| lexical_context = lc_string; |
| /* Start accumulating the string. */ |
| mixed_string_buffer_init (&msb, lexical_context, |
| logical_file_name, line_number); |
| for (;;) |
| { |
| int uc = phase7_getuc ('`'); |
| |
| /* Keep line_number in sync. */ |
| msb.line_number = line_number; |
| |
| if (uc == P7_EOF || uc == P7_STRING_END) |
| { |
| tp->mixed_string = mixed_string_buffer_result (&msb); |
| tp->comment = add_reference (savable_comment); |
| tp->type = last_token_type = token_type_template; |
| break; |
| } |
| |
| if (uc == P7_TEMPLATE_START_OF_EXPRESSION) |
| { |
| mixed_string_buffer_destroy (&msb); |
| tp->type = last_token_type = token_type_ltemplate; |
| template_literal_depth++; |
| new_brace_depth_level (); |
| break; |
| } |
| |
| if (IS_UNICODE (uc)) |
| { |
| assert (UNICODE_VALUE (uc) >= 0 |
| && UNICODE_VALUE (uc) < 0x110000); |
| mixed_string_buffer_append_unicode (&msb, |
| UNICODE_VALUE (uc)); |
| } |
| else |
| mixed_string_buffer_append_char (&msb, uc); |
| } |
| lexical_context = lc_outside; |
| return; |
| } |
| |
| case '+': |
| tp->type = last_token_type = token_type_plus; |
| return; |
| |
| /* Identify operators. The multiple character ones are simply ignored |
| * as they are recognized here and are otherwise not relevant. */ |
| case '-': case '*': /* '+' and '/' are not listed here! */ |
| case '%': |
| case '~': case '!': case '|': case '&': case '^': |
| case '?': case ':': |
| tp->type = last_token_type = token_type_operator; |
| return; |
| |
| case '=': |
| tp->type = last_token_type = token_type_equal; |
| return; |
| |
| case '<': |
| { |
| /* We assume: |
| - XMLMarkup and XMLElement are not allowed after an expression, |
| - embedded JavaScript expressions in XML do not recurse. |
| */ |
| if (xml_element_depth > 0 |
| || (!inside_embedded_js_in_xml |
| && ! is_after_expression ())) |
| { |
| /* Comments, PI, or CDATA. */ |
| if (phase5_scan_xml_markup (tp)) |
| /* BUG: *tp is not filled in here! */ |
| return; |
| c = phase2_getc (); |
| |
| if (c == '/') |
| { |
| /* Closing tag. */ |
| lexical_context = lc_xml_close_tag; |
| } |
| else |
| { |
| /* Opening element. */ |
| phase2_ungetc (c); |
| lexical_context = lc_xml_open_tag; |
| xml_element_depth++; |
| } |
| tp->type = last_token_type = token_type_xml_tag; |
| } |
| else |
| tp->type = last_token_type = token_type_operator; |
| } |
| return; |
| |
| case '>': |
| if (xml_element_depth > 0 && !inside_embedded_js_in_xml) |
| { |
| switch (lexical_context) |
| { |
| case lc_xml_open_tag: |
| lexical_context = lc_xml_content; |
| tp->type = last_token_type = token_type_xml_element_start; |
| return; |
| |
| case lc_xml_close_tag: |
| if (--xml_element_depth > 0) |
| lexical_context = lc_xml_content; |
| else |
| lexical_context = lc_outside; |
| tp->type = last_token_type = token_type_xml_element_end; |
| return; |
| |
| default: |
| break; |
| } |
| } |
| tp->type = last_token_type = token_type_operator; |
| return; |
| |
| case '/': |
| if (xml_element_depth > 0 && !inside_embedded_js_in_xml) |
| { |
| /* If it appears in an opening tag of an XML element, it's |
| part of '/>'. */ |
| if (lexical_context == lc_xml_open_tag) |
| { |
| c = phase2_getc (); |
| if (c == '>') |
| { |
| if (--xml_element_depth > 0) |
| lexical_context = lc_xml_content; |
| else |
| lexical_context = lc_outside; |
| tp->type = last_token_type = token_type_xml_empty_element; |
| return; |
| } |
| else |
| phase2_ungetc (c); |
| } |
| } |
| |
| /* Either a division operator or the start of a regular expression |
| literal. If the '/' token is spotted after an expression, it's a |
| division; otherwise it's a regular expression. */ |
| if (is_after_expression ()) |
| tp->type = last_token_type = token_type_operator; |
| else |
| { |
| phase5_scan_regexp (); |
| tp->type = last_token_type = token_type_regexp; |
| } |
| return; |
| |
| case '{': |
| if (xml_element_depth > 0 && !inside_embedded_js_in_xml) |
| inside_embedded_js_in_xml = true; |
| else |
| brace_depths[template_literal_depth]++; |
| tp->type = last_token_type = token_type_lbrace; |
| return; |
| |
| case '}': |
| if (xml_element_depth > 0 && inside_embedded_js_in_xml) |
| inside_embedded_js_in_xml = false; |
| else if (brace_depths[template_literal_depth] > 0) |
| brace_depths[template_literal_depth]--; |
| else if (template_literal_depth > 0) |
| { |
| /* Middle or right part of template literal. */ |
| for (;;) |
| { |
| int uc = phase7_getuc ('`'); |
| |
| if (uc == P7_EOF || uc == P7_STRING_END) |
| { |
| tp->type = last_token_type = token_type_rtemplate; |
| template_literal_depth--; |
| break; |
| } |
| |
| if (uc == P7_TEMPLATE_START_OF_EXPRESSION) |
| { |
| tp->type = last_token_type = token_type_mtemplate; |
| break; |
| } |
| } |
| return; |
| } |
| tp->type = last_token_type = token_type_rbrace; |
| return; |
| |
| case '(': |
| tp->type = last_token_type = token_type_lparen; |
| return; |
| |
| case ')': |
| tp->type = last_token_type = token_type_rparen; |
| return; |
| |
| case ',': |
| tp->type = last_token_type = token_type_comma; |
| return; |
| |
| case '[': |
| tp->type = last_token_type = token_type_lbracket; |
| return; |
| |
| case ']': |
| tp->type = last_token_type = token_type_rbracket; |
| return; |
| |
| default: |
| /* We could carefully recognize each of the 2 and 3 character |
| operators, but it is not necessary, as we only need to recognize |
| gettext invocations. Don't bother. */ |
| tp->type = last_token_type = token_type_other; |
| return; |
| } |
| } |
| } |
| |
| /* Supports only one pushback token. */ |
| static void |
| phase5_unget (token_ty *tp) |
| { |
| if (tp->type != token_type_eof) |
| { |
| if (phase5_pushback_length == SIZEOF (phase5_pushback)) |
| abort (); |
| phase5_pushback[phase5_pushback_length++] = *tp; |
| } |
| } |
| |
| |
| /* String concatenation with '+'. |
| Handling of tagged template literals. */ |
| |
| static void |
| x_javascript_lex (token_ty *tp) |
| { |
| phase5_get (tp); |
| if (tp->type == token_type_string || tp->type == token_type_template) |
| { |
| mixed_string_ty *sum = tp->mixed_string; |
| |
| for (;;) |
| { |
| token_ty token2; |
| |
| phase5_get (&token2); |
| if (token2.type == token_type_plus) |
| { |
| token_ty token3; |
| |
| phase5_get (&token3); |
| if (token3.type == token_type_string |
| || token3.type == token_type_template) |
| { |
| sum = mixed_string_concat_free1 (sum, token3.mixed_string); |
| |
| free_token (&token3); |
| free_token (&token2); |
| continue; |
| } |
| phase5_unget (&token3); |
| } |
| phase5_unget (&token2); |
| break; |
| } |
| tp->mixed_string = sum; |
| } |
| else if (tp->type == token_type_symbol) |
| { |
| token_ty token2; |
| |
| phase5_get (&token2); |
| if (token2.type == token_type_template) |
| { |
| /* The value of |
| tag `abc` |
| is the value of the function call |
| tag (["abc"]) |
| We don't know anything about this value. Therefore, don't |
| let the extractor see this template literal. */ |
| free_token (&token2); |
| } |
| else |
| phase5_unget (&token2); |
| } |
| } |
| |
| |
| /* ========================= Extracting strings. ========================== */ |
| |
| |
| /* Context lookup table. */ |
| static flag_context_list_table_ty *flag_context_list_table; |
| |
| |
| /* The file is broken into tokens. Scan the token stream, looking for |
| a keyword, followed by a left paren, followed by a string. When we |
| see this sequence, we have something to remember. We assume we are |
| looking at a valid JavaScript program, and leave the complaints about |
| the grammar to the compiler. |
| |
| Normal handling: Look for |
| keyword ( ... msgid ... ) |
| Plural handling: Look for |
| keyword ( ... msgid ... msgid_plural ... ) |
| |
| We use recursion because the arguments before msgid or between msgid |
| and msgid_plural can contain subexpressions of the same form. */ |
| |
| |
| /* Extract messages until the next balanced closing parenthesis or bracket. |
| Extracted messages are added to MLP. |
| DELIM can be either token_type_rparen or token_type_rbracket, or |
| token_type_eof to accept both. |
| Return true upon eof, false upon closing parenthesis or bracket. */ |
| static bool |
| extract_balanced (message_list_ty *mlp, |
| token_type_ty delim, |
| flag_context_ty outer_context, |
| flag_context_list_iterator_ty context_iter, |
| struct arglist_parser *argparser) |
| { |
| /* Current argument number. */ |
| int arg = 1; |
| /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ |
| int state; |
| /* Parameters of the keyword just seen. Defined only in state 1. */ |
| const struct callshapes *next_shapes = NULL; |
| /* Context iterator that will be used if the next token is a '('. */ |
| flag_context_list_iterator_ty next_context_iter = |
| passthrough_context_list_iterator; |
| /* Current context. */ |
| flag_context_ty inner_context = |
| inherited_context (outer_context, |
| flag_context_list_iterator_advance (&context_iter)); |
| |
| /* Start state is 0. */ |
| state = 0; |
| |
| for (;;) |
| { |
| token_ty token; |
| |
| x_javascript_lex (&token); |
| switch (token.type) |
| { |
| case token_type_symbol: |
| { |
| void *keyword_value; |
| |
| if (hash_find_entry (&keywords, token.string, strlen (token.string), |
| &keyword_value) |
| == 0) |
| { |
| next_shapes = (const struct callshapes *) keyword_value; |
| state = 1; |
| } |
| else |
| state = 0; |
| } |
| next_context_iter = |
| flag_context_list_iterator ( |
| flag_context_list_table_lookup ( |
| flag_context_list_table, |
| token.string, strlen (token.string))); |
| free (token.string); |
| continue; |
| |
| case token_type_lparen: |
| if (extract_balanced (mlp, token_type_rparen, |
| inner_context, next_context_iter, |
| arglist_parser_alloc (mlp, |
| state ? next_shapes : NULL))) |
| { |
| arglist_parser_done (argparser, arg); |
| return true; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_rparen: |
| if (delim == token_type_rparen || delim == token_type_eof) |
| { |
| arglist_parser_done (argparser, arg); |
| return false; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_comma: |
| arg++; |
| inner_context = |
| inherited_context (outer_context, |
| flag_context_list_iterator_advance ( |
| &context_iter)); |
| next_context_iter = passthrough_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_lbracket: |
| if (extract_balanced (mlp, token_type_rbracket, |
| null_context, null_context_list_iterator, |
| arglist_parser_alloc (mlp, NULL))) |
| { |
| arglist_parser_done (argparser, arg); |
| return true; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_rbracket: |
| if (delim == token_type_rbracket || delim == token_type_eof) |
| { |
| arglist_parser_done (argparser, arg); |
| return false; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_lbrace: |
| if (extract_balanced (mlp, token_type_rbrace, |
| null_context, null_context_list_iterator, |
| arglist_parser_alloc (mlp, NULL))) |
| { |
| arglist_parser_done (argparser, arg); |
| return true; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_rbrace: |
| if (delim == token_type_rbrace || delim == token_type_eof) |
| { |
| arglist_parser_done (argparser, arg); |
| return false; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_string: |
| case token_type_template: |
| { |
| lex_pos_ty pos; |
| |
| pos.file_name = logical_file_name; |
| pos.line_number = token.line_number; |
| |
| if (extract_all) |
| { |
| char *string = mixed_string_contents (token.mixed_string); |
| mixed_string_free (token.mixed_string); |
| remember_a_message (mlp, NULL, string, true, false, |
| inner_context, &pos, |
| NULL, token.comment, true); |
| } |
| else |
| arglist_parser_remember (argparser, arg, token.mixed_string, |
| inner_context, |
| pos.file_name, pos.line_number, |
| token.comment, true); |
| } |
| drop_reference (token.comment); |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_xml_element_start: |
| if (extract_balanced (mlp, token_type_xml_element_end, |
| null_context, null_context_list_iterator, |
| arglist_parser_alloc (mlp, NULL))) |
| { |
| arglist_parser_done (argparser, arg); |
| return true; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_xml_element_end: |
| if (delim == token_type_xml_element_end || delim == token_type_eof) |
| { |
| arglist_parser_done (argparser, arg); |
| return false; |
| } |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| case token_type_eof: |
| arglist_parser_done (argparser, arg); |
| return true; |
| |
| case token_type_ltemplate: |
| case token_type_mtemplate: |
| case token_type_rtemplate: |
| case token_type_keyword: |
| case token_type_start: |
| case token_type_dot: |
| case token_type_plus: |
| case token_type_regexp: |
| case token_type_operator: |
| case token_type_equal: |
| case token_type_xml_tag: |
| case token_type_xml_empty_element: |
| case token_type_other: |
| next_context_iter = null_context_list_iterator; |
| state = 0; |
| continue; |
| |
| default: |
| abort (); |
| } |
| } |
| } |
| |
| |
| void |
| extract_javascript (FILE *f, |
| const char *real_filename, const char *logical_filename, |
| flag_context_list_table_ty *flag_table, |
| msgdomain_list_ty *mdlp) |
| { |
| message_list_ty *mlp = mdlp->item[0]->messages; |
| |
| fp = f; |
| real_file_name = real_filename; |
| logical_file_name = xstrdup (logical_filename); |
| line_number = 1; |
| |
| phase1_pushback_length = 0; |
| |
| lexical_context = lc_outside; |
| |
| phase2_pushback_length = 0; |
| |
| last_comment_line = -1; |
| last_non_comment_line = -1; |
| |
| xgettext_current_file_source_encoding = |
| (xgettext_global_source_encoding != NULL ? xgettext_global_source_encoding : |
| po_charset_ascii); |
| #if HAVE_ICONV |
| xgettext_current_file_source_iconv = xgettext_global_source_iconv; |
| #endif |
| |
| xgettext_current_source_encoding = xgettext_current_file_source_encoding; |
| #if HAVE_ICONV |
| xgettext_current_source_iconv = xgettext_current_file_source_iconv; |
| #endif |
| |
| continuation_or_nonblank_line = false; |
| |
| phase5_pushback_length = 0; |
| last_token_type = token_type_start; |
| |
| template_literal_depth = 0; |
| new_brace_depth_level (); |
| xml_element_depth = 0; |
| inside_embedded_js_in_xml = false; |
| |
| flag_context_list_table = flag_table; |
| |
| init_keywords (); |
| |
| /* Eat tokens until eof is seen. When extract_balanced returns |
| due to an unbalanced closing parenthesis, just restart it. */ |
| while (!extract_balanced (mlp, token_type_eof, |
| null_context, null_context_list_iterator, |
| arglist_parser_alloc (mlp, NULL))) |
| ; |
| |
| fp = NULL; |
| real_file_name = NULL; |
| logical_file_name = NULL; |
| line_number = 0; |
| } |