blob: 13d4a531056b149bfb0e517d0623491fb0d7eae2 [file] [log] [blame]
/* xgettext C# backend.
Copyright (C) 2003-2009, 2011, 2014, 2018-2020 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2003.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/* Specification. */
#include "x-csharp.h"
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "attribute.h"
#include "message.h"
#include "rc-str-list.h"
#include "xgettext.h"
#include "xg-pos.h"
#include "xg-encoding.h"
#include "xg-mixed-string.h"
#include "xg-arglist-context.h"
#include "xg-arglist-callshape.h"
#include "xg-arglist-parser.h"
#include "xg-message.h"
#include "c-ctype.h"
#include "error.h"
#include "error-progname.h"
#include "xalloc.h"
#include "xerror.h"
#include "xvasprintf.h"
#include "mem-hash-map.h"
#include "po-charset.h"
#include "unistr.h"
#include "gettext.h"
#define _(s) gettext(s)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
/* The C# syntax is defined in ECMA-334, second edition. */
/* ====================== Keyword set customization. ====================== */
/* If true extract all strings. */
static bool extract_all = false;
static hash_table keywords;
static bool default_keywords = true;
void
x_csharp_extract_all ()
{
extract_all = true;
}
/* Processes a --keyword option.
Non-ASCII function names can be used if given in UTF-8 encoding. */
void
x_csharp_keyword (const char *name)
{
if (name == NULL)
default_keywords = false;
else
{
const char *end;
struct callshape shape;
const char *colon;
if (keywords.table == NULL)
hash_init (&keywords, 100);
split_keywordspec (name, &end, &shape);
/* The characters between name and end should form a valid C#
identifier sequence with dots.
A colon means an invalid parse in split_keywordspec(). */
colon = strchr (name, ':');
if (colon == NULL || colon >= end)
insert_keyword_callshape (&keywords, name, end - name, &shape);
}
}
/* Finish initializing the keywords hash table.
Called after argument processing, before each file is processed. */
static void
init_keywords ()
{
if (default_keywords)
{
/* When adding new keywords here, also update the documentation in
xgettext.texi! */
x_csharp_keyword ("GetString"); /* Resource{Manager,Set}.GetString */
x_csharp_keyword ("GetPluralString:1,2"); /* GettextResource{Manager,Set}.GetPluralString */
x_csharp_keyword ("GetParticularString:1c,2"); /* Resource{Manager,Set}.GetParticularString */
x_csharp_keyword ("GetParticularPluralString:1c,2,3"); /* Resource{Manager,Set}.GetParticularPluralString */
default_keywords = false;
}
}
void
init_flag_table_csharp ()
{
xgettext_record_flag ("GetString:1:pass-csharp-format");
xgettext_record_flag ("GetPluralString:1:pass-csharp-format");
xgettext_record_flag ("GetPluralString:2:pass-csharp-format");
xgettext_record_flag ("GetParticularString:2:pass-csharp-format");
xgettext_record_flag ("GetParticularPluralString:2:pass-csharp-format");
xgettext_record_flag ("GetParticularPluralString:3:pass-csharp-format");
xgettext_record_flag ("String.Format:1:csharp-format");
}
/* ======================== Reading of characters. ======================== */
/* The input file stream. */
static FILE *fp;
/* Phase 1: line_number handling. */
/* Maximum used, roughly a safer MB_LEN_MAX. */
#define MAX_PHASE1_PUSHBACK 16
static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
static int phase1_pushback_length;
/* Read the next single byte from the input file. */
static int
phase1_getc ()
{
int c;
if (phase1_pushback_length)
{
c = phase1_pushback[--phase1_pushback_length];
if (c == '\n')
++line_number;
return c;
}
c = getc (fp);
if (c == EOF)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
real_file_name);
return EOF;
}
if (c == '\n')
++line_number;
return c;
}
/* Supports MAX_PHASE1_PUSHBACK characters of pushback. */
static void
phase1_ungetc (int c)
{
if (c != EOF)
{
if (c == '\n')
--line_number;
if (phase1_pushback_length == SIZEOF (phase1_pushback))
abort ();
phase1_pushback[phase1_pushback_length++] = c;
}
}
/* Phase 2: Conversion to Unicode.
This is done early because ECMA-334 section 9.1. says that the source is
"an ordered sequence of Unicode characters", and because the recognition
of the line terminators (ECMA-334 section 9.3.1) is hardly possible without
prior conversion to Unicode. */
/* End-of-file indicator for functions returning an UCS-4 character. */
#define UEOF -1
/* Newline Unicode character. */
#define UNL 0x000a
static lexical_context_ty lexical_context;
static int phase2_pushback[1];
static int phase2_pushback_length;
/* Read the next Unicode UCS-4 character from the input file. */
static int
phase2_getc ()
{
if (phase2_pushback_length)
return phase2_pushback[--phase2_pushback_length];
if (xgettext_current_source_encoding == po_charset_ascii)
{
int c = phase1_getc ();
if (c == EOF)
return UEOF;
if (!c_isascii (c))
{
multiline_error (xstrdup (""),
xasprintf ("%s\n%s\n",
non_ascii_error_message (lexical_context,
real_file_name,
line_number),
_("Please specify the source encoding through --from-code.")));
exit (EXIT_FAILURE);
}
return c;
}
else if (xgettext_current_source_encoding != po_charset_utf8)
{
#if HAVE_ICONV
/* Use iconv on an increasing number of bytes. Read only as many bytes
through phase1_getc as needed. This is needed to give reasonable
interactive behaviour when fp is connected to an interactive tty. */
unsigned char buf[MAX_PHASE1_PUSHBACK];
size_t bufcount;
int c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[0] = (unsigned char) c;
bufcount = 1;
for (;;)
{
unsigned char scratchbuf[6];
const char *inptr = (const char *) &buf[0];
size_t insize = bufcount;
char *outptr = (char *) &scratchbuf[0];
size_t outsize = sizeof (scratchbuf);
size_t res = iconv (xgettext_current_source_iconv,
(ICONV_CONST char **) &inptr, &insize,
&outptr, &outsize);
/* We expect that a character has been produced if and only if
some input bytes have been consumed. */
if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
abort ();
if (outsize == sizeof (scratchbuf))
{
/* No character has been produced. Must be an error. */
if (res != (size_t)(-1))
abort ();
if (errno == EILSEQ)
{
/* An invalid multibyte sequence was encountered. */
multiline_error (xstrdup (""),
xasprintf (_("\
%s:%d: Invalid multibyte sequence.\n\
Please specify the correct source encoding through --from-code.\n"),
real_file_name, line_number));
exit (EXIT_FAILURE);
}
else if (errno == EINVAL)
{
/* An incomplete multibyte character. */
int c;
if (bufcount == MAX_PHASE1_PUSHBACK)
{
/* An overlong incomplete multibyte sequence was
encountered. */
multiline_error (xstrdup (""),
xasprintf (_("\
%s:%d: Long incomplete multibyte sequence.\n\
Please specify the correct source encoding through --from-code.\n"),
real_file_name, line_number));
exit (EXIT_FAILURE);
}
/* Read one more byte and retry iconv. */
c = phase1_getc ();
if (c == EOF)
{
multiline_error (xstrdup (""),
xasprintf (_("\
%s:%d: Incomplete multibyte sequence at end of file.\n\
Please specify the correct source encoding through --from-code.\n"),
real_file_name, line_number));
exit (EXIT_FAILURE);
}
if (c == '\n')
{
multiline_error (xstrdup (""),
xasprintf (_("\
%s:%d: Incomplete multibyte sequence at end of line.\n\
Please specify the correct source encoding through --from-code.\n"),
real_file_name, line_number - 1));
exit (EXIT_FAILURE);
}
buf[bufcount++] = (unsigned char) c;
}
else
error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
real_file_name, line_number);
}
else
{
size_t outbytes = sizeof (scratchbuf) - outsize;
size_t bytes = bufcount - insize;
ucs4_t uc;
/* We expect that one character has been produced. */
if (bytes == 0)
abort ();
if (outbytes == 0)
abort ();
/* Push back the unused bytes. */
while (insize > 0)
phase1_ungetc (buf[--insize]);
/* Convert the character from UTF-8 to UCS-4. */
if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes)
{
/* scratchbuf contains an out-of-range Unicode character
(> 0x10ffff). */
multiline_error (xstrdup (""),
xasprintf (_("\
%s:%d: Invalid multibyte sequence.\n\
Please specify the source encoding through --from-code.\n"),
real_file_name, line_number));
exit (EXIT_FAILURE);
}
return uc;
}
}
#else
/* If we don't have iconv(), the only supported values for
xgettext_global_source_encoding and thus also for
xgettext_current_source_encoding are ASCII and UTF-8. */
abort ();
#endif
}
else
{
/* Read an UTF-8 encoded character. */
unsigned char buf[6];
unsigned int count;
int c;
ucs4_t uc;
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[0] = c;
count = 1;
if (buf[0] >= 0xc0)
{
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[1] = c;
count = 2;
}
if (buf[0] >= 0xe0
&& ((buf[1] ^ 0x80) < 0x40))
{
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[2] = c;
count = 3;
}
if (buf[0] >= 0xf0
&& ((buf[1] ^ 0x80) < 0x40)
&& ((buf[2] ^ 0x80) < 0x40))
{
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[3] = c;
count = 4;
}
if (buf[0] >= 0xf8
&& ((buf[1] ^ 0x80) < 0x40)
&& ((buf[2] ^ 0x80) < 0x40)
&& ((buf[3] ^ 0x80) < 0x40))
{
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[4] = c;
count = 5;
}
if (buf[0] >= 0xfc
&& ((buf[1] ^ 0x80) < 0x40)
&& ((buf[2] ^ 0x80) < 0x40)
&& ((buf[3] ^ 0x80) < 0x40)
&& ((buf[4] ^ 0x80) < 0x40))
{
c = phase1_getc ();
if (c == EOF)
return UEOF;
buf[5] = c;
count = 6;
}
u8_mbtouc (&uc, buf, count);
return uc;
}
}
/* Supports only one pushback character. */
static void
phase2_ungetc (int c)
{
if (c != UEOF)
{
if (phase2_pushback_length == SIZEOF (phase2_pushback))
abort ();
phase2_pushback[phase2_pushback_length++] = c;
}
}
/* Phase 3: Convert all line terminators to LF.
See ECMA-334 section 9.3.1. */
/* Line number defined in terms of phase3. */
static int logical_line_number;
static int phase3_pushback[9];
static int phase3_pushback_length;
/* Read the next Unicode UCS-4 character from the input file, mapping
all line terminators to U+000A, and dropping U+001A at the end of file. */
static int
phase3_getc ()
{
int c;
if (phase3_pushback_length)
{
c = phase3_pushback[--phase3_pushback_length];
if (c == UNL)
++logical_line_number;
return c;
}
c = phase2_getc ();
if (c == 0x000d)
{
int c1 = phase2_getc ();
if (c1 != UEOF && c1 != 0x000a)
phase2_ungetc (c1);
/* Seen line terminator CR or CR/LF. */
++logical_line_number;
return UNL;
}
if (c == 0x0085 || c == 0x2028 || c == 0x2029)
{
/* Seen Unicode word processor newline. */
++logical_line_number;
return UNL;
}
if (c == 0x001a)
{
int c1 = phase2_getc ();
if (c1 == UEOF)
/* Seen U+001A right before the end of file. */
return UEOF;
phase2_ungetc (c1);
}
if (c == UNL)
++logical_line_number;
return c;
}
/* Supports 9 characters of pushback. */
static void
phase3_ungetc (int c)
{
if (c != UEOF)
{
if (c == UNL)
--logical_line_number;
if (phase3_pushback_length == SIZEOF (phase3_pushback))
abort ();
phase3_pushback[phase3_pushback_length++] = c;
}
}
/* ========================= Accumulating strings. ======================== */
/* See xg-mixed-string.h for the API.
In this extractor, we add only Unicode characters. */
/* ======================== Accumulating comments. ======================== */
/* Accumulating a single comment line. */
static struct mixed_string_buffer comment_buffer;
static inline void
comment_start ()
{
mixed_string_buffer_init (&comment_buffer, lc_comment,
logical_file_name, line_number);
}
static inline bool
comment_at_start ()
{
return mixed_string_buffer_is_empty (&comment_buffer);
}
static inline void
comment_add (int c)
{
mixed_string_buffer_append_unicode (&comment_buffer, c);
}
static inline void
comment_line_end (size_t chars_to_remove)
{
char *buffer =
mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
size_t buflen = strlen (buffer);
buflen -= chars_to_remove;
while (buflen >= 1
&& (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
--buflen;
buffer[buflen] = '\0';
savable_comment_add (buffer);
lexical_context = lc_outside;
}
/* These are for tracking whether comments count as immediately before
keyword. */
static int last_comment_line;
static int last_non_comment_line;
/* Phase 4: Replace each comment that is not inside a character constant or
string literal with a space or newline character.
See ECMA-334 section 9.3.2. */
static int
phase4_getc ()
{
int c0;
int c;
bool last_was_star;
c0 = phase3_getc ();
if (c0 != '/')
return c0;
c = phase3_getc ();
switch (c)
{
default:
phase3_ungetc (c);
return c0;
case '*':
/* C style comment. */
comment_start ();
last_was_star = false;
for (;;)
{
c = phase3_getc ();
if (c == UEOF)
break;
/* We skip all leading white space, but not EOLs. */
if (!(comment_at_start () && (c == ' ' || c == '\t')))
comment_add (c);
switch (c)
{
case UNL:
comment_line_end (1);
comment_start ();
last_was_star = false;
continue;
case '*':
last_was_star = true;
continue;
case '/':
if (last_was_star)
{
comment_line_end (2);
break;
}
FALLTHROUGH;
default:
last_was_star = false;
continue;
}
break;
}
last_comment_line = logical_line_number;
return ' ';
case '/':
/* C++ style comment. */
last_comment_line = logical_line_number;
comment_start ();
for (;;)
{
c = phase3_getc ();
if (c == UNL || c == UEOF)
break;
/* We skip all leading white space, but not EOLs. */
if (!(comment_at_start () && (c == ' ' || c == '\t')))
comment_add (c);
}
phase3_ungetc (c); /* push back the newline, to decrement logical_line_number */
comment_line_end (0);
phase3_getc (); /* read the newline again */
return UNL;
}
}
/* Supports only one pushback character. */
static void
phase4_ungetc (int c)
{
phase3_ungetc (c);
}
/* ======================= Character classification. ====================== */
/* Return true if a given character is white space.
See ECMA-334 section 9.3.3. */
static bool
is_whitespace (int c)
{
/* Unicode character class Zs, as of Unicode 4.0. */
/* grep '^[^;]*;[^;]*;Zs;' UnicodeData-4.0.0.txt */
switch (c >> 8)
{
case 0x00:
return (c == 0x0020 || c == 0x00a0);
case 0x16:
return (c == 0x1680);
case 0x18:
return (c == 0x180e);
case 0x20:
return ((c >= 0x2000 && c <= 0x200b) || c == 0x202f || c == 0x205f);
case 0x30:
return (c == 0x3000);
default:
return false;
}
}
/* C# allows identifiers containing many Unicode characters. We recognize
them; to use an identifier with Unicode characters in a --keyword option,
it must be specified in UTF-8. */
static inline int
bitmap_lookup (const void *table, unsigned int uc)
{
unsigned int index1 = uc >> 16;
if (index1 < ((const int *) table)[0])
{
int lookup1 = ((const int *) table)[1 + index1];
if (lookup1 >= 0)
{
unsigned int index2 = (uc >> 9) & 0x7f;
int lookup2 = ((const int *) table)[lookup1 + index2];
if (lookup2 >= 0)
{
unsigned int index3 = (uc >> 5) & 0xf;
unsigned int lookup3 = ((const int *) table)[lookup2 + index3];
return (lookup3 >> (uc & 0x1f)) & 1;
}
}
}
return 0;
}
/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, as of Unicode 4.0,
plus the underscore. */
static const
struct
{
int header[1];
int level1[3];
int level2[3 << 7];
/*unsigned*/ int level3[34 << 4];
}
table_identifier_start =
{
{ 3 },
{ 4, 132, 260 },
{
388, 404, 420, 436, 452, 468, 484, 500,
516, 532, 548, 564, 580, -1, 596, 612,
628, -1, -1, -1, -1, -1, -1, -1,
644, -1, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 676, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 692,
660, 660, 708, -1, -1, -1, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 724, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 740, 756, 772, 788,
804, 820, 836, -1, 852, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 868, 884, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 660, 660, 660, 660, 660,
660, 660, 660, 900, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 660, 916, -1, -1
},
{
0x00000000, 0x00000000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
0x00000000, 0x00000000, 0x00000000, 0x04000000,
0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFC03, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0x000000FF, 0x00000000, 0xFFFF0000, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x000007FF, 0xFFFEC000,
0xFFFFFFFF, 0xFFFFFFFF, 0x002FFFFF, 0x9C00C060,
0xFFFD0000, 0x0000FFFF, 0x0000E000, 0x00000000,
0xFFFFFFFF, 0x0002003F, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFF0, 0x23FFFFFF, 0xFF010000, 0x00000003,
0xFFF99FE0, 0x23C5FDFF, 0xB0000000, 0x00030003,
0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
0xFFFBBFE0, 0x23EDFDFF, 0x00010000, 0x00000003,
0xFFF99FE0, 0x23EDFDFF, 0xB0000000, 0x00020003,
0xD63DC7E8, 0x03BFC718, 0x00000000, 0x00000000,
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
0xFFFDDFE0, 0x23EFFDFF, 0x40000000, 0x00000003,
0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
0xFC7FFFE0, 0x2FFBFFFF, 0x0000007F, 0x00000000,
0xFFFFFFFE, 0x000DFFFF, 0x0000007F, 0x00000000,
0xFEF02596, 0x200DECAE, 0x3000005F, 0x00000000,
0x00000001, 0x00000000, 0xFFFFFEFF, 0x000007FF,
0x00000F00, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0x000006FB, 0x003F0000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
0x0003DFFF, 0x0003FFFF, 0x0003FFFF, 0x0001DFFF,
0xFFFFFFFF, 0x000FFFFF, 0x10800000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
0xFFFFFFFF, 0x000001FF, 0x00000000, 0x00000000,
0x1FFFFFFF, 0x00000000, 0xFFFF0000, 0x001F3FFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
0x00000000, 0x00000000, 0x00000000, 0x80020000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
0x0000000F, 0x00000000, 0x00000000, 0x00000000,
0x000000E0, 0x1F3E03FE, 0xFFFFFFFE, 0xFFFFFFFF,
0xE07FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xF7FFFFFF,
0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xA0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
0x00000000, 0x00000000, 0x00000000, 0xFFDF0000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x1FFFFFFF,
0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0xFFFFFFC0,
0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x00000000,
0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
0xFFFFFDFF, 0xFFFFFDFF, 0x000003F7, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000
}
};
/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, Nd, Pc, Mn, Mc, Cf,
as of Unicode 4.0. */
static const
struct
{
int header[1];
int level1[15];
int level2[4 << 7];
/*unsigned*/ int level3[36 << 4];
}
table_identifier_part =
{
{ 15 },
{
16, 144, 272, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 400
},
{
528, 544, 560, 576, 592, 608, 624, 640,
656, 672, 688, 704, 720, -1, 736, 752,
768, -1, -1, -1, -1, -1, -1, -1,
784, -1, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 816, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 832,
800, 800, 848, -1, -1, -1, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 864, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 880, 896, 912, 928,
944, 960, 976, -1, 992, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
1008, -1, 1024, 1040, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 800, 800, 800, 800, 800,
800, 800, 800, 1056, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 800, 1072, -1, -1,
1088, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
},
{
0x00000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x04202400, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
0xFFFFFFFF, 0xFFFFFFFF, 0xE0FFFFFF, 0x0400FFFF,
0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFC7B, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0xFFFE00FF, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
0x003F000F, 0x07FFFFFE, 0x01FFFFFF, 0xFFFFC3FF,
0xFFFFFFFF, 0xFFFFFFFF, 0xBFEFFFFF, 0x9FFFFDFF,
0xFFFF8000, 0xFFFFFFFF, 0x0000E7FF, 0x00000000,
0xFFFFFFFF, 0x0003FFFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0xF3FFFFFF, 0xFF1F3FFF, 0x0000FFCF,
0xFFF99FEE, 0xF3C5FDFF, 0xB080399F, 0x0003FFCF,
0xFFF987EE, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
0xFFFBBFEE, 0xF3EDFDFF, 0x00013BBF, 0x0000FFCF,
0xFFF99FEE, 0xF3EDFDFF, 0xB0C0398F, 0x0002FFC3,
0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xF3EFFDFF, 0x40603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
0xFC7FFFEC, 0x2FFBFFFF, 0xFF5F847F, 0x000C0000,
0xFFFFFFFE, 0x07FFFFFF, 0x03FF7FFF, 0x00000000,
0xFEF02596, 0x3BFFECAE, 0x33FF3F5F, 0x00000000,
0x03000001, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE07FF,
0xFEFF0FDF, 0x1FFFFFFF, 0x00000040, 0x00000000,
0xFFFFFFFF, 0x03C7F6FB, 0x03FF03FF, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x0003FE00,
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
0x001FDFFF, 0x001FFFFF, 0x000FFFFF, 0x000DDFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x308FFFFF, 0x000003FF,
0x03FF3800, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
0xFFFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
0x1FFFFFFF, 0x0FFF0FFF, 0xFFFFFFC0, 0x001F3FFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
0x0000F000, 0x80007C00, 0x00100001, 0x8002FC0F,
0x00000000, 0x00000000, 0x1FFF0000, 0x000007E2,
0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
0x0000000F, 0x00000000, 0x00000000, 0x00000000,
0x000000E0, 0x1F3EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
0xE67FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xE0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
0x0000FFFF, 0x0018000F, 0x0000E000, 0xFFDF0000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x9FFFFFFF,
0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFE0,
0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x0E000000,
0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x3FFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0xFFFFE3E0,
0x00000FE7, 0x00003C00, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
0xFFFFFDFF, 0xFFFFFDFF, 0xFFFFC3F7, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF
}
};
/* Return true if a given character can occur as first character of an
identifier. See ECMA-334 section 9.4.2. */
static bool
is_identifier_start (int c)
{
return bitmap_lookup (&table_identifier_start, c);
/* In ASCII only this would be:
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_');
*/
}
/* Return true if a given character can occur as character of an identifier.
See ECMA-334 section 9.4.2. */
static bool
is_identifier_part (int c)
{
return bitmap_lookup (&table_identifier_part, c);
/* In ASCII only this would be:
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || c == '_');
*/
}
static bool
is_any_character (int c)
{
return true;
}
/* ======================= Preprocessor directives. ======================= */
/* Phase 5: Remove preprocessor lines. See ECMA-334 section 9.5.
As a side effect, this also removes initial whitespace on every line;
this whitespace doesn't matter. */
static int phase5_pushback[10];
static int phase5_pushback_length;
static int
phase5_getc ()
{
int c;
if (phase5_pushback_length)
return phase5_pushback[--phase5_pushback_length];
c = phase4_getc ();
if (c != UNL)
return c;
do
c = phase3_getc ();
while (c != UEOF && is_whitespace (c));
if (c == '#')
{
/* Ignore the entire line containing the preprocessor directive
(including the // comment if it contains one). */
do
c = phase3_getc ();
while (c != UEOF && c != UNL);
return c;
}
else
{
phase3_ungetc (c);
return UNL;
}
}
#ifdef unused
static void
phase5_ungetc (int c)
{
if (c != UEOF)
{
if (phase5_pushback_length == SIZEOF (phase5_pushback))
abort ();
phase5_pushback[phase5_pushback_length++] = c;
}
}
#endif
/* ========================== Reading of tokens. ========================== */
enum token_type_ty
{
token_type_eof,
token_type_lparen, /* ( */
token_type_rparen, /* ) */
token_type_lbrace, /* { */
token_type_rbrace, /* } */
token_type_comma, /* , */
token_type_dot, /* . */
token_type_string_literal, /* "abc", @"abc" */
token_type_number, /* 1.23 */
token_type_symbol, /* identifier, keyword, null */
token_type_plus, /* + */
token_type_other /* character literal, misc. operator */
};
typedef enum token_type_ty token_type_ty;
typedef struct token_ty token_ty;
struct token_ty
{
token_type_ty type;
char *string; /* for token_type_symbol */
mixed_string_ty *mixed_string; /* for token_type_string_literal */
refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
int logical_line_number;
};
/* Free the memory pointed to by a 'struct token_ty'. */
static inline void
free_token (token_ty *tp)
{
if (tp->type == token_type_symbol)
free (tp->string);
if (tp->type == token_type_string_literal)
{
mixed_string_free (tp->mixed_string);
drop_reference (tp->comment);
}
}
/* Read a Unicode escape sequence outside string/character literals.
Reject Unicode escapes that don't fulfill the given predicate.
See ECMA-334 section 9.4.2. */
static int
do_getc_unicode_escaped (bool (*predicate) (int))
{
int c;
/* Use phase 3, because phase 4 elides comments. */
c = phase3_getc ();
if (c == UEOF)
return '\\';
if (c == 'u' || c == 'U')
{
unsigned char buf[8];
int expect;
unsigned int n;
int i;
expect = (c == 'U' ? 8 : 4);
n = 0;
for (i = 0; i < expect; i++)
{
int c1 = phase3_getc ();
if (c1 >= '0' && c1 <= '9')
n = (n << 4) + (c1 - '0');
else if (c1 >= 'A' && c1 <= 'F')
n = (n << 4) + (c1 - 'A' + 10);
else if (c1 >= 'a' && c1 <= 'f')
n = (n << 4) + (c1 - 'a' + 10);
else
{
phase3_ungetc (c1);
while (--i >= 0)
phase3_ungetc (buf[i]);
phase3_ungetc (c);
return '\\';
}
buf[i] = c1;
}
if (n >= 0x110000)
{
error_with_progname = false;
error (0, 0, _("%s:%d: warning: invalid Unicode character"),
logical_file_name, line_number);
error_with_progname = true;
}
else if (predicate (n))
return n;
while (--i >= 0)
phase3_ungetc (buf[i]);
}
phase3_ungetc (c);
return '\\';
}
/* Read an escape sequence inside a string literal or character literal.
See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
static int
do_getc_escaped ()
{
int c;
int n;
int i;
/* Use phase 3, because phase 4 elides comments. */
c = phase3_getc ();
if (c == UEOF)
return '\\';
switch (c)
{
case 'a':
return 0x0007;
case 'b':
return 0x0008;
case 't':
return 0x0009;
case 'n':
return 0x000a;
case 'v':
return 0x000b;
case 'f':
return 0x000c;
case 'r':
return 0x000d;
case '"':
return '"';
case '\'':
return '\'';
case '\\':
return '\\';
case '0':
return 0x0000;
case 'x':
c = phase3_getc ();
switch (c)
{
default:
phase3_ungetc (c);
phase3_ungetc ('x');
return '\\';
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
break;
}
n = 0;
for (i = 0;; i++)
{
switch (c)
{
default:
phase3_ungetc (c);
return n;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
n = n * 16 + c - '0';
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
n = n * 16 + 10 + c - 'A';
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
n = n * 16 + 10 + c - 'a';
break;
}
if (i == 3)
break;
c = phase3_getc ();
}
return n;
case 'u': case 'U':
phase3_ungetc (c);
return do_getc_unicode_escaped (is_any_character);
default:
/* Invalid escape sequence. */
phase3_ungetc (c);
return '\\';
}
}
/* Read a regular string literal or character literal.
See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
static void
accumulate_escaped (struct mixed_string_buffer *literal, int delimiter)
{
int c;
for (;;)
{
/* Use phase 3, because phase 4 elides comments. */
c = phase3_getc ();
if (c == UEOF || c == delimiter)
break;
if (c == UNL)
{
phase3_ungetc (c);
error_with_progname = false;
if (delimiter == '\'')
error (0, 0, _("%s:%d: warning: unterminated character constant"),
logical_file_name, line_number);
else
error (0, 0, _("%s:%d: warning: unterminated string constant"),
logical_file_name, line_number);
error_with_progname = true;
break;
}
if (c == '\\')
c = do_getc_escaped ();
if (literal)
mixed_string_buffer_append_unicode (literal, c);
}
}
/* Combine characters into tokens. Discard whitespace. */
/* Maximum used guaranteed to be < 4. */
static token_ty phase6_pushback[4];
static int phase6_pushback_length;
static void
phase6_get (token_ty *tp)
{
int c;
if (phase6_pushback_length)
{
*tp = phase6_pushback[--phase6_pushback_length];
return;
}
tp->string = NULL;
for (;;)
{
tp->line_number = line_number;
tp->logical_line_number = logical_line_number;
c = phase5_getc ();
if (c == UEOF)
{
tp->type = token_type_eof;
return;
}
switch (c)
{
case UNL:
if (last_non_comment_line > last_comment_line)
savable_comment_reset ();
FALLTHROUGH;
case ' ':
case '\t':
case '\f':
/* Ignore whitespace and comments. */
continue;
}
last_non_comment_line = tp->logical_line_number;
switch (c)
{
case '(':
tp->type = token_type_lparen;
return;
case ')':
tp->type = token_type_rparen;
return;
case '{':
tp->type = token_type_lbrace;
return;
case '}':
tp->type = token_type_rbrace;
return;
case ',':
tp->type = token_type_comma;
return;
case '.':
c = phase4_getc ();
if (!(c >= '0' && c <= '9'))
{
phase4_ungetc (c);
tp->type = token_type_dot;
return;
}
FALLTHROUGH;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
/* Don't need to verify the complicated syntax of integers and
floating-point numbers. We assume a valid C# input.
The simplified syntax that we recognize as number is: any
sequence of alphanumeric characters, additionally '+' and '-'
immediately after 'e' or 'E' except in hexadecimal numbers. */
bool hexadecimal = false;
for (;;)
{
c = phase4_getc ();
if (c >= '0' && c <= '9')
continue;
if ((c >= 'A' && c <= 'Z') || (c >= 'a' &&c <= 'z'))
{
if (c == 'X' || c == 'x')
hexadecimal = true;
if ((c == 'E' || c == 'e') && !hexadecimal)
{
c = phase4_getc ();
if (!(c == '+' || c == '-'))
phase4_ungetc (c);
}
continue;
}
if (c == '.')
continue;
break;
}
phase4_ungetc (c);
tp->type = token_type_number;
return;
}
case '"':
/* Regular string literal. */
{
struct mixed_string_buffer literal;
lexical_context = lc_string;
mixed_string_buffer_init (&literal,
lexical_context,
logical_file_name,
logical_line_number);
accumulate_escaped (&literal, '"');
tp->mixed_string = mixed_string_buffer_result (&literal);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string_literal;
return;
}
case '\'':
/* Character literal. */
{
accumulate_escaped (NULL, '\'');
tp->type = token_type_other;
return;
}
case '+':
c = phase4_getc ();
if (c == '+')
/* Operator ++ */
tp->type = token_type_other;
else if (c == '=')
/* Operator += */
tp->type = token_type_other;
else
{
/* Operator + */
phase4_ungetc (c);
tp->type = token_type_plus;
}
return;
case '@':
c = phase4_getc ();
if (c == '"')
{
/* Verbatim string literal. */
struct mixed_string_buffer literal;
lexical_context = lc_string;
mixed_string_buffer_init (&literal, lexical_context,
logical_file_name, logical_line_number);
for (;;)
{
/* Use phase 2, because phase 4 elides comments and phase 3
mixes up the newline characters. */
c = phase2_getc ();
if (c == UEOF)
break;
if (c == '"')
{
c = phase2_getc ();
if (c != '"')
{
phase2_ungetc (c);
break;
}
}
/* No special treatment of newline and backslash here. */
mixed_string_buffer_append_unicode (&literal, c);
}
tp->mixed_string = mixed_string_buffer_result (&literal);
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string_literal;
return;
}
FALLTHROUGH; /* so that @identifier is recognized. */
default:
if (c == '\\')
c = do_getc_unicode_escaped (is_identifier_start);
if (is_identifier_start (c))
{
struct mixed_string_buffer buffer;
mixed_string_ty *mixed_string;
mixed_string_buffer_init (&buffer, lexical_context,
logical_file_name, logical_line_number);
for (;;)
{
mixed_string_buffer_append_unicode (&buffer, c);
c = phase4_getc ();
if (c == '\\')
c = do_getc_unicode_escaped (is_identifier_part);
if (!is_identifier_part (c))
break;
}
phase4_ungetc (c);
mixed_string = mixed_string_buffer_result (&buffer);
tp->string = mixed_string_contents (mixed_string);
mixed_string_free (mixed_string);
tp->type = token_type_symbol;
return;
}
else
{
/* Misc. operator. */
tp->type = token_type_other;
return;
}
}
}
}
/* Supports 3 tokens of pushback. */
static void
phase6_unget (token_ty *tp)
{
if (tp->type != token_type_eof)
{
if (phase6_pushback_length == SIZEOF (phase6_pushback))
abort ();
phase6_pushback[phase6_pushback_length++] = *tp;
}
}
/* Compile-time optimization of string literal concatenation.
Combine "string1" + ... + "stringN" to the concatenated string if
- the token after this expression is not '.' (because then the last
string could be part of a method call expression). */
static token_ty phase7_pushback[2];
static int phase7_pushback_length;
static void
phase7_get (token_ty *tp)
{
if (phase7_pushback_length)
{
*tp = phase7_pushback[--phase7_pushback_length];
return;
}
phase6_get (tp);
if (tp->type == token_type_string_literal)
{
mixed_string_ty *sum = tp->mixed_string;
for (;;)
{
token_ty token2;
phase6_get (&token2);
if (token2.type == token_type_plus)
{
token_ty token3;
phase6_get (&token3);
if (token3.type == token_type_string_literal)
{
token_ty token_after;
phase6_get (&token_after);
if (token_after.type != token_type_dot)
{
sum = mixed_string_concat_free1 (sum, token3.mixed_string);
phase6_unget (&token_after);
free_token (&token3);
free_token (&token2);
continue;
}
phase6_unget (&token_after);
}
phase6_unget (&token3);
}
phase6_unget (&token2);
break;
}
tp->mixed_string = sum;
}
}
/* Supports 2 tokens of pushback. */
static void
phase7_unget (token_ty *tp)
{
if (tp->type != token_type_eof)
{
if (phase7_pushback_length == SIZEOF (phase7_pushback))
abort ();
phase7_pushback[phase7_pushback_length++] = *tp;
}
}
static void
x_csharp_lex (token_ty *tp)
{
phase7_get (tp);
}
/* Supports 2 tokens of pushback. */
static void
x_csharp_unlex (token_ty *tp)
{
phase7_unget (tp);
}
/* ========================= Extracting strings. ========================== */
/* Context lookup table. */
static flag_context_list_table_ty *flag_context_list_table;
/* The file is broken into tokens. Scan the token stream, looking for
a keyword, followed by a left paren, followed by a string. When we
see this sequence, we have something to remember. We assume we are
looking at a valid C or C++ program, and leave the complaints about
the grammar to the compiler.
Normal handling: Look for
keyword ( ... msgid ... )
Plural handling: Look for
keyword ( ... msgid ... msgid_plural ... )
We use recursion because the arguments before msgid or between msgid
and msgid_plural can contain subexpressions of the same form. */
/* Extract messages until the next balanced closing parenthesis or brace,
depending on TERMINATOR.
Extracted messages are added to MLP.
Return true upon eof, false upon closing parenthesis or brace. */
static bool
extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
flag_context_ty outer_context,
flag_context_list_iterator_ty context_iter,
struct arglist_parser *argparser)
{
/* Current argument number. */
int arg = 1;
/* 0 when no keyword has been seen. 1 right after a keyword is seen. */
int state;
/* Parameters of the keyword just seen. Defined only in state 1. */
const struct callshapes *next_shapes = NULL;
/* Context iterator that will be used if the next token is a '('. */
flag_context_list_iterator_ty next_context_iter =
passthrough_context_list_iterator;
/* Current context. */
flag_context_ty inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (&context_iter));
/* Start state is 0. */
state = 0;
for (;;)
{
token_ty token;
x_csharp_lex (&token);
switch (token.type)
{
case token_type_symbol:
{
/* Combine symbol1 . ... . symbolN to a single strings, so that
we can recognize static function calls like
GettextResource.gettext. The information present for
symbolI.....symbolN has precedence over the information for
symbolJ.....symbolN with J > I. */
char *sum = token.string;
size_t sum_len = strlen (sum);
const char *dottedname;
flag_context_list_ty *context_list;
for (;;)
{
token_ty token2;
x_csharp_lex (&token2);
if (token2.type == token_type_dot)
{
token_ty token3;
x_csharp_lex (&token3);
if (token3.type == token_type_symbol)
{
char *addend = token3.string;
size_t addend_len = strlen (addend);
sum =
(char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
sum[sum_len] = '.';
memcpy (sum + sum_len + 1, addend, addend_len + 1);
sum_len += 1 + addend_len;
free_token (&token3);
free_token (&token2);
continue;
}
x_csharp_unlex (&token3);
}
x_csharp_unlex (&token2);
break;
}
for (dottedname = sum;;)
{
void *keyword_value;
if (hash_find_entry (&keywords, dottedname, strlen (dottedname),
&keyword_value)
== 0)
{
next_shapes = (const struct callshapes *) keyword_value;
state = 1;
break;
}
dottedname = strchr (dottedname, '.');
if (dottedname == NULL)
{
state = 0;
break;
}
dottedname++;
}
for (dottedname = sum;;)
{
context_list =
flag_context_list_table_lookup (
flag_context_list_table,
dottedname, strlen (dottedname));
if (context_list != NULL)
break;
dottedname = strchr (dottedname, '.');
if (dottedname == NULL)
break;
dottedname++;
}
next_context_iter = flag_context_list_iterator (context_list);
free (sum);
continue;
}
case token_type_lparen:
if (extract_parenthesized (mlp, token_type_rparen,
inner_context, next_context_iter,
arglist_parser_alloc (mlp,
state ? next_shapes : NULL)))
{
arglist_parser_done (argparser, arg);
return true;
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_rparen:
if (terminator == token_type_rparen)
{
arglist_parser_done (argparser, arg);
return false;
}
if (terminator == token_type_rbrace)
{
error_with_progname = false;
error (0, 0,
_("%s:%d: warning: ')' found where '}' was expected"),
logical_file_name, token.line_number);
error_with_progname = true;
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_lbrace:
if (extract_parenthesized (mlp, token_type_rbrace,
null_context, null_context_list_iterator,
arglist_parser_alloc (mlp, NULL)))
{
arglist_parser_done (argparser, arg);
return true;
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_rbrace:
if (terminator == token_type_rbrace)
{
arglist_parser_done (argparser, arg);
return false;
}
if (terminator == token_type_rparen)
{
error_with_progname = false;
error (0, 0,
_("%s:%d: warning: '}' found where ')' was expected"),
logical_file_name, token.line_number);
error_with_progname = true;
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_comma:
arg++;
inner_context =
inherited_context (outer_context,
flag_context_list_iterator_advance (
&context_iter));
next_context_iter = passthrough_context_list_iterator;
state = 0;
continue;
case token_type_string_literal:
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = token.line_number;
if (extract_all)
{
char *string = mixed_string_contents (token.mixed_string);
mixed_string_free (token.mixed_string);
remember_a_message (mlp, NULL, string, true, false,
inner_context, &pos,
NULL, token.comment, true);
}
else
arglist_parser_remember (argparser, arg, token.mixed_string,
inner_context,
pos.file_name, pos.line_number,
token.comment, true);
}
drop_reference (token.comment);
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_eof:
arglist_parser_done (argparser, arg);
return true;
case token_type_dot:
case token_type_number:
case token_type_plus:
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
continue;
default:
abort ();
}
}
}
void
extract_csharp (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
message_list_ty *mlp = mdlp->item[0]->messages;
fp = f;
real_file_name = real_filename;
logical_file_name = xstrdup (logical_filename);
line_number = 1;
phase1_pushback_length = 0;
lexical_context = lc_outside;
phase2_pushback_length = 0;
logical_line_number = 1;
phase3_pushback_length = 0;
last_comment_line = -1;
last_non_comment_line = -1;
phase5_pushback_length = 0;
phase6_pushback_length = 0;
phase7_pushback_length = 0;
flag_context_list_table = flag_table;
init_keywords ();
/* Eat tokens until eof is seen. When extract_parenthesized returns
due to an unbalanced closing parenthesis, just restart it. */
while (!extract_parenthesized (mlp, token_type_eof,
null_context, null_context_list_iterator,
arglist_parser_alloc (mlp, NULL)))
;
fp = NULL;
real_file_name = NULL;
logical_file_name = NULL;
line_number = 0;
}