blob: 53c1107435ff8c52a0f67e78ac07cf1ecd614bf8 [file] [log] [blame]
/* -*- Mode: C; indent-tabs-mode:nil; c-basic-offset: 8-*- */
/* libcroco - Library for parsing and applying CSS
* Copyright (C) 2006-2019 Free Software Foundation, Inc.
*
* This file is not part of the GNU gettext program, but is used with
* GNU gettext.
*
* The original copyright notice is as follows:
*/
/*
* This file is part of The Croco Library
*
* Copyright (C) 2003-2004 Dodji Seketeli. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2.1 of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
* Author: Dodji Seketeli
*/
/**
*@file
*The definition of the #CRTknzr (tokenizer)
*class.
*/
#include <config.h>
#include "string.h"
#include "cr-tknzr.h"
#include "cr-doc-handler.h"
struct _CRTknzrPriv {
/**The parser input stream of bytes*/
CRInput *input;
/**
*A cache where tknzr_unget_token()
*puts back the token. tknzr_get_next_token()
*first look in this cache, and if and
*only if it's empty, fetches the next token
*from the input stream.
*/
CRToken *token_cache;
/**
*The position of the end of the previous token
*or char fetched.
*/
CRInputPos prev_pos;
CRDocHandler *sac_handler;
/**
*The reference count of the current instance
*of #CRTknzr. Is manipulated by cr_tknzr_ref()
*and cr_tknzr_unref().
*/
glong ref_count;
};
#define PRIVATE(obj) ((obj)->priv)
/**
*return TRUE if the character is a number ([0-9]), FALSE otherwise
*@param a_char the char to test.
*/
#define IS_NUM(a_char) (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
/**
*Checks if 'status' equals CR_OK. If not, goto the 'error' label.
*
*@param status the status (of type enum CRStatus) to test.
*@param is_exception if set to FALSE, the final status returned the
*current function will be CR_PARSING_ERROR. If set to TRUE, the
*current status will be the current value of the 'status' variable.
*
*/
#define CHECK_PARSING_STATUS(status, is_exception) \
if ((status) != CR_OK) \
{ \
if (is_exception == FALSE) \
{ \
status = CR_PARSING_ERROR ; \
} \
goto error ; \
}
/**
*Peeks the next char from the input stream of the current tokenizer.
*invokes CHECK_PARSING_STATUS on the status returned by
*cr_tknzr_input_peek_char().
*
*@param the current instance of #CRTkzr.
*@param to_char a pointer to the char where to store the
*char peeked.
*/
#define PEEK_NEXT_CHAR(a_tknzr, a_to_char) \
{\
status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \
CHECK_PARSING_STATUS (status, TRUE) \
}
/**
*Reads the next char from the input stream of the current parser.
*In case of error, jumps to the "error:" label located in the
*function where this macro is called.
*@param parser the curent instance of #CRTknzr
*@param to_char a pointer to the guint32 char where to store
*the character read.
*/
#define READ_NEXT_CHAR(a_tknzr, to_char) \
status = cr_tknzr_read_char (a_tknzr, to_char) ;\
CHECK_PARSING_STATUS (status, TRUE)
/**
*Gets information about the current position in
*the input of the parser.
*In case of failure, this macro returns from the
*calling function and
*returns a status code of type enum #CRStatus.
*@param parser the current instance of #CRTknzr.
*@param pos out parameter. A pointer to the position
*inside the current parser input. Must
*/
#define RECORD_INITIAL_POS(a_tknzr, a_pos) \
status = cr_input_get_cur_pos (PRIVATE \
(a_tknzr)->input, a_pos) ; \
g_return_val_if_fail (status == CR_OK, status)
/**
*Gets the address of the current byte inside the
*parser input.
*@param parser the current instance of #CRTknzr.
*@param addr out parameter a pointer (guchar*)
*to where the address must be put.
*/
#define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr) \
status = cr_input_get_cur_byte_addr \
(PRIVATE (a_tknzr)->input, a_addr) ; \
CHECK_PARSING_STATUS (status, TRUE)
/**
*Peeks a byte from the topmost parser input at
*a given offset from the current position.
*If it fails, goto the "error:" label.
*
*@param a_parser the current instance of #CRTknzr.
*@param a_offset the offset of the byte to peek, the
*current byte having the offset '0'.
*@param a_byte_ptr out parameter a pointer (guchar*) to
*where the peeked char is to be stored.
*/
#define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr) \
status = cr_tknzr_peek_byte (a_tknzr, \
a_offset, \
a_byte_ptr) ; \
CHECK_PARSING_STATUS (status, TRUE) ;
#define BYTE(a_input, a_n, a_eof) \
cr_input_peek_byte2 (a_input, a_n, a_eof)
/**
*Reads a byte from the topmost parser input
*steam.
*If it fails, goto the "error" label.
*@param a_parser the current instance of #CRTknzr.
*@param a_byte_ptr the guchar * where to put the read char.
*/
#define READ_NEXT_BYTE(a_tknzr, a_byte_ptr) \
status = \
cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\
CHECK_PARSING_STATUS (status, TRUE) ;
/**
*Skips a given number of byte in the topmost
*parser input. Don't update line and column number.
*In case of error, jumps to the "error:" label
*of the surrounding function.
*@param a_parser the current instance of #CRTknzr.
*@param a_nb_bytes the number of bytes to skip.
*/
#define SKIP_BYTES(a_tknzr, a_nb_bytes) \
status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \
CR_SEEK_CUR, a_nb_bytes) ; \
CHECK_PARSING_STATUS (status, TRUE) ;
/**
*Skip utf8 encoded characters.
*Updates line and column numbers.
*@param a_parser the current instance of #CRTknzr.
*@param a_nb_chars the number of chars to skip. Must be of
*type glong.
*/
#define SKIP_CHARS(a_tknzr, a_nb_chars) \
{ \
gulong nb_chars = a_nb_chars ; \
status = cr_input_consume_chars \
(PRIVATE (a_tknzr)->input,0, &nb_chars) ; \
CHECK_PARSING_STATUS (status, TRUE) ; \
}
/**
*Tests the condition and if it is false, sets
*status to "CR_PARSING_ERROR" and goto the 'error'
*label.
*@param condition the condition to test.
*/
#define ENSURE_PARSING_COND(condition) \
if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
static enum CRStatus cr_tknzr_parse_nl (CRTknzr * a_this,
guchar ** a_start,
guchar ** a_end,
CRParsingLocation *a_location);
static enum CRStatus cr_tknzr_parse_w (CRTknzr * a_this,
guchar ** a_start,
guchar ** a_end,
CRParsingLocation *a_location) ;
static enum CRStatus cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
guint32 * a_unicode,
CRParsingLocation *a_location) ;
static enum CRStatus cr_tknzr_parse_escape (CRTknzr * a_this,
guint32 * a_esc_code,
CRParsingLocation *a_location);
static enum CRStatus cr_tknzr_parse_string (CRTknzr * a_this,
CRString ** a_str);
static enum CRStatus cr_tknzr_parse_comment (CRTknzr * a_this,
CRString ** a_comment);
static enum CRStatus cr_tknzr_parse_nmstart (CRTknzr * a_this,
guint32 * a_char,
CRParsingLocation *a_location);
static enum CRStatus cr_tknzr_parse_num (CRTknzr * a_this,
CRNum ** a_num);
/**********************************
*PRIVATE methods
**********************************/
/**
*Parses a "w" as defined by the css spec at [4.1.1]:
* w ::= [ \t\r\n\f]*
*
*@param a_this the current instance of #CRTknzr.
*@param a_start out param. Upon successfull completion, points
*to the beginning of the parsed white space, points to NULL otherwise.
*Can also point to NULL is there is no white space actually.
*@param a_end out param. Upon successfull completion, points
*to the end of the parsed white space, points to NULL otherwise.
*Can also point to NULL is there is no white space actually.
*/
static enum CRStatus
cr_tknzr_parse_w (CRTknzr * a_this,
guchar ** a_start,
guchar ** a_end,
CRParsingLocation *a_location)
{
guint32 cur_char = 0;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_start && a_end,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
*a_start = NULL;
*a_end = NULL;
READ_NEXT_CHAR (a_this, &cur_char);
if (cr_utils_is_white_space (cur_char) == FALSE) {
status = CR_PARSING_ERROR;
goto error;
}
if (a_location) {
cr_tknzr_get_parsing_location (a_this,
a_location) ;
}
RECORD_CUR_BYTE_ADDR (a_this, a_start);
*a_end = *a_start;
for (;;) {
gboolean is_eof = FALSE;
cr_input_get_end_of_file (PRIVATE (a_this)->input, &is_eof);
if (is_eof)
break;
status = cr_tknzr_peek_char (a_this, &cur_char);
if (status == CR_END_OF_INPUT_ERROR) {
break;
} else if (status != CR_OK) {
goto error;
}
if (cr_utils_is_white_space (cur_char) == TRUE) {
READ_NEXT_CHAR (a_this, &cur_char);
RECORD_CUR_BYTE_ADDR (a_this, a_end);
} else {
break;
}
}
return CR_OK;
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses a newline as defined in the css2 spec:
* nl ::= \n|\r\n|\r|\f
*
*@param a_this the "this pointer" of the current instance of #CRTknzr.
*@param a_start a pointer to the first character of the successfully
*parsed string.
*@param a_end a pointer to the last character of the successfully parsed
*string.
*@result CR_OK uppon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_nl (CRTknzr * a_this,
guchar ** a_start,
guchar ** a_end,
CRParsingLocation *a_location)
{
CRInputPos init_pos;
guchar next_chars[2] = { 0 };
enum CRStatus status = CR_PARSING_ERROR;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& a_start && a_end, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_BYTE (a_this, 1, &next_chars[0]);
PEEK_BYTE (a_this, 2, &next_chars[1]);
if ((next_chars[0] == '\r' && next_chars[1] == '\n')) {
SKIP_BYTES (a_this, 1);
if (a_location) {
cr_tknzr_get_parsing_location
(a_this, a_location) ;
}
SKIP_CHARS (a_this, 1);
RECORD_CUR_BYTE_ADDR (a_this, a_end);
status = CR_OK;
} else if (next_chars[0] == '\n'
|| next_chars[0] == '\r' || next_chars[0] == '\f') {
SKIP_CHARS (a_this, 1);
if (a_location) {
cr_tknzr_get_parsing_location
(a_this, a_location) ;
}
RECORD_CUR_BYTE_ADDR (a_this, a_start);
*a_end = *a_start;
status = CR_OK;
} else {
status = CR_PARSING_ERROR;
goto error;
}
return CR_OK ;
error:
cr_tknzr_set_cur_pos (a_this, &init_pos) ;
return status;
}
/**
*Go ahead in the parser input, skipping all the spaces.
*If the next char if not a white space, this function does nothing.
*In any cases, it stops when it encounters a non white space character.
*
*@param a_this the current instance of #CRTknzr.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_try_to_skip_spaces (CRTknzr * a_this)
{
enum CRStatus status = CR_ERROR;
guint32 cur_char = 0;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
status = cr_input_peek_char (PRIVATE (a_this)->input, &cur_char);
if (status != CR_OK) {
if (status == CR_END_OF_INPUT_ERROR)
return CR_OK;
return status;
}
if (cr_utils_is_white_space (cur_char) == TRUE) {
gulong nb_chars = -1; /*consume all spaces */
status = cr_input_consume_white_spaces
(PRIVATE (a_this)->input, &nb_chars);
}
return status;
}
/**
*Parses a "comment" as defined in the css spec at [4.1.1]:
*COMMENT ::= \/\*[^*]*\*+([^/][^*]*\*+)*\/ .
*This complex regexp is just to say that comments start
*with the two chars '/''*' and ends with the two chars '*''/'.
*It also means that comments cannot be nested.
*So based on that, I've just tried to implement the parsing function
*simply and in a straight forward manner.
*/
static enum CRStatus
cr_tknzr_parse_comment (CRTknzr * a_this,
CRString ** a_comment)
{
enum CRStatus status = CR_OK;
CRInputPos init_pos;
guint32 cur_char = 0, next_char= 0;
CRString *comment = NULL;
CRParsingLocation loc = {0} ;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char) ;
ENSURE_PARSING_COND (cur_char == '/');
cr_tknzr_get_parsing_location (a_this, &loc) ;
READ_NEXT_CHAR (a_this, &cur_char);
ENSURE_PARSING_COND (cur_char == '*');
comment = cr_string_new ();
for (;;) { /* [^*]* */
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char == '*')
break;
READ_NEXT_CHAR (a_this, &cur_char);
g_string_append_unichar (comment->stryng, cur_char);
}
/* Stop condition: next_char == '*' */
for (;;) { /* \*+ */
READ_NEXT_CHAR(a_this, &cur_char);
ENSURE_PARSING_COND (cur_char == '*');
g_string_append_unichar (comment->stryng, cur_char);
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char != '*')
break;
}
/* Stop condition: next_char != '*' */
for (;;) { /* ([^/][^*]*\*+)* */
if (next_char == '/')
break;
READ_NEXT_CHAR(a_this, &cur_char);
g_string_append_unichar (comment->stryng, cur_char);
for (;;) { /* [^*]* */
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char == '*')
break;
READ_NEXT_CHAR (a_this, &cur_char);
g_string_append_unichar (comment->stryng, cur_char);
}
/* Stop condition: next_char = '*', no need to verify, because peek and read exit to error anyway */
for (;;) { /* \*+ */
READ_NEXT_CHAR(a_this, &cur_char);
ENSURE_PARSING_COND (cur_char == '*');
g_string_append_unichar (comment->stryng, cur_char);
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char != '*')
break;
}
/* Continue condition: next_char != '*' */
}
/* Stop condition: next_char == '\/' */
READ_NEXT_CHAR(a_this, &cur_char);
g_string_append_unichar (comment->stryng, cur_char);
if (status == CR_OK) {
cr_parsing_location_copy (&comment->location,
&loc) ;
*a_comment = comment;
return CR_OK;
}
error:
if (comment) {
cr_string_destroy (comment);
comment = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses an 'unicode' escape sequence defined
*in css spec at chap 4.1.1:
*unicode ::= \\[0-9a-f]{1,6}[ \n\r\t\f]?
*@param a_this the current instance of #CRTknzr.
*@param a_start out parameter. A pointer to the start
*of the unicode escape sequence. Must *NOT* be deleted by
*the caller.
*@param a_end out parameter. A pointer to the last character
*of the unicode escape sequence. Must *NOT* be deleted by the caller.
*@return CR_OK if parsing succeded, an error code otherwise.
*Error code can be either CR_PARSING_ERROR if the string
*parsed just doesn't
*respect the production or another error if a
*lower level error occurred.
*/
static enum CRStatus
cr_tknzr_parse_unicode_escape (CRTknzr * a_this,
guint32 * a_unicode,
CRParsingLocation *a_location)
{
guint32 cur_char;
CRInputPos init_pos;
glong occur = 0;
guint32 unicode = 0;
guchar *tmp_char_ptr1 = NULL,
*tmp_char_ptr2 = NULL;
enum CRStatus status = CR_OK;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& a_unicode, CR_BAD_PARAM_ERROR);
/*first, let's backup the current position pointer */
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char != '\\') {
status = CR_PARSING_ERROR;
goto error;
}
if (a_location) {
cr_tknzr_get_parsing_location
(a_this, a_location) ;
}
PEEK_NEXT_CHAR (a_this, &cur_char);
for (occur = 0, unicode = 0; ((cur_char >= '0' && cur_char <= '9')
|| (cur_char >= 'a' && cur_char <= 'f')
|| (cur_char >= 'A' && cur_char <= 'F'))
&& occur < 6; occur++) {
gint cur_char_val = 0;
READ_NEXT_CHAR (a_this, &cur_char);
if ((cur_char >= '0' && cur_char <= '9')) {
cur_char_val = (cur_char - '0');
} else if ((cur_char >= 'a' && cur_char <= 'f')) {
cur_char_val = 10 + (cur_char - 'a');
} else if ((cur_char >= 'A' && cur_char <= 'F')) {
cur_char_val = 10 + (cur_char - 'A');
}
unicode = unicode * 16 + cur_char_val;
PEEK_NEXT_CHAR (a_this, &cur_char);
}
/* Eat a whitespace if possible. */
cr_tknzr_parse_w (a_this, &tmp_char_ptr1,
&tmp_char_ptr2, NULL);
*a_unicode = unicode;
return CR_OK;
error:
/*
*restore the initial position pointer backuped at
*the beginning of this function.
*/
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*parses an escape sequence as defined by the css spec:
*escape ::= {unicode}|\\[ -~\200-\4177777]
*@param a_this the current instance of #CRTknzr .
*/
static enum CRStatus
cr_tknzr_parse_escape (CRTknzr * a_this, guint32 * a_esc_code,
CRParsingLocation *a_location)
{
enum CRStatus status = CR_OK;
guint32 cur_char = 0;
CRInputPos init_pos;
guchar next_chars[2];
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& a_esc_code, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_BYTE (a_this, 1, &next_chars[0]);
PEEK_BYTE (a_this, 2, &next_chars[1]);
if (next_chars[0] != '\\') {
status = CR_PARSING_ERROR;
goto error;
}
if ((next_chars[1] >= '0' && next_chars[1] <= '9')
|| (next_chars[1] >= 'a' && next_chars[1] <= 'f')
|| (next_chars[1] >= 'A' && next_chars[1] <= 'F')) {
status = cr_tknzr_parse_unicode_escape (a_this, a_esc_code,
a_location);
} else {
/*consume the '\' char */
READ_NEXT_CHAR (a_this, &cur_char);
if (a_location) {
cr_tknzr_get_parsing_location (a_this,
a_location) ;
}
/*then read the char after the '\' */
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char != ' ' && (cur_char < 200 || cur_char > 4177777)) {
status = CR_PARSING_ERROR;
goto error;
}
*a_esc_code = cur_char;
}
if (status == CR_OK) {
return CR_OK;
}
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses a string type as defined in css spec [4.1.1]:
*
*string ::= {string1}|{string2}
*string1 ::= \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
*string2 ::= \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
*
*@param a_this the current instance of #CRTknzr.
*@param a_start out parameter. Upon successfull completion,
*points to the beginning of the string, points to an undefined value
*otherwise.
*@param a_end out parameter. Upon successfull completion, points to
*the beginning of the string, points to an undefined value otherwise.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_string (CRTknzr * a_this, CRString ** a_str)
{
guint32 cur_char = 0,
delim = 0;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
CRString *str = NULL;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_str, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char == '"')
delim = '"';
else if (cur_char == '\'')
delim = '\'';
else {
status = CR_PARSING_ERROR;
goto error;
}
str = cr_string_new ();
if (str) {
cr_tknzr_get_parsing_location
(a_this, &str->location) ;
}
for (;;) {
guchar next_chars[2] = { 0 };
PEEK_BYTE (a_this, 1, &next_chars[0]);
PEEK_BYTE (a_this, 2, &next_chars[1]);
if (next_chars[0] == '\\') {
guchar *tmp_char_ptr1 = NULL,
*tmp_char_ptr2 = NULL;
guint32 esc_code = 0;
if (next_chars[1] == '\'' || next_chars[1] == '"') {
g_string_append_unichar (str->stryng,
next_chars[1]);
SKIP_BYTES (a_this, 2);
status = CR_OK;
} else {
status = cr_tknzr_parse_escape
(a_this, &esc_code, NULL);
if (status == CR_OK) {
g_string_append_unichar
(str->stryng,
esc_code);
}
}
if (status != CR_OK) {
/*
*consume the '\' char, and try to parse
*a newline.
*/
READ_NEXT_CHAR (a_this, &cur_char);
status = cr_tknzr_parse_nl
(a_this, &tmp_char_ptr1,
&tmp_char_ptr2, NULL);
}
CHECK_PARSING_STATUS (status, FALSE);
} else if (strchr ("\t !#$%&", next_chars[0])
|| (next_chars[0] >= '(' && next_chars[0] <= '~')) {
READ_NEXT_CHAR (a_this, &cur_char);
g_string_append_unichar (str->stryng,
cur_char);
status = CR_OK;
}
else if (cr_utils_is_nonascii (next_chars[0])) {
READ_NEXT_CHAR (a_this, &cur_char);
g_string_append_unichar (str->stryng, cur_char);
} else if (next_chars[0] == delim) {
READ_NEXT_CHAR (a_this, &cur_char);
break;
} else {
status = CR_PARSING_ERROR;
goto error;
}
}
if (status == CR_OK) {
if (*a_str == NULL) {
*a_str = str;
str = NULL;
} else {
(*a_str)->stryng = g_string_append_len
((*a_str)->stryng,
str->stryng->str,
str->stryng->len);
cr_string_destroy (str);
}
return CR_OK;
}
error:
if (str) {
cr_string_destroy (str) ;
str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses the an nmstart as defined by the css2 spec [4.1.1]:
* nmstart [a-zA-Z]|{nonascii}|{escape}
*
*@param a_this the current instance of #CRTknzr.
*@param a_start out param. A pointer to the starting point of
*the token.
*@param a_end out param. A pointer to the ending point of the
*token.
*@param a_char out param. The actual parsed nmchar.
*@return CR_OK upon successfull completion,
*an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_nmstart (CRTknzr * a_this,
guint32 * a_char,
CRParsingLocation *a_location)
{
CRInputPos init_pos;
enum CRStatus status = CR_OK;
guint32 cur_char = 0,
next_char = 0;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_char, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char == '\\') {
status = cr_tknzr_parse_escape (a_this, a_char,
a_location);
if (status != CR_OK)
goto error;
} else if (cr_utils_is_nonascii (next_char) == TRUE
|| ((next_char >= 'a') && (next_char <= 'z'))
|| ((next_char >= 'A') && (next_char <= 'Z'))
) {
READ_NEXT_CHAR (a_this, &cur_char);
if (a_location) {
cr_tknzr_get_parsing_location (a_this,
a_location) ;
}
*a_char = cur_char;
status = CR_OK;
} else {
status = CR_PARSING_ERROR;
goto error;
}
return CR_OK;
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses an nmchar as described in the css spec at
*chap 4.1.1:
*nmchar ::= [a-z0-9-]|{nonascii}|{escape}
*
*Humm, I have added the possibility for nmchar to
*contain upper case letters.
*
*@param a_this the current instance of #CRTknzr.
*@param a_start out param. A pointer to the starting point of
*the token.
*@param a_end out param. A pointer to the ending point of the
*token.
*@param a_char out param. The actual parsed nmchar.
*@return CR_OK upon successfull completion,
*an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_nmchar (CRTknzr * a_this, guint32 * a_char,
CRParsingLocation *a_location)
{
guint32 cur_char = 0,
next_char = 0;
enum CRStatus status = CR_OK;
CRInputPos init_pos;
g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
status = cr_input_peek_char (PRIVATE (a_this)->input,
&next_char) ;
if (status != CR_OK)
goto error;
if (next_char == '\\') {
status = cr_tknzr_parse_escape (a_this, a_char,
a_location);
if (status != CR_OK)
goto error;
} else if (cr_utils_is_nonascii (next_char) == TRUE
|| ((next_char >= 'a') && (next_char <= 'z'))
|| ((next_char >= 'A') && (next_char <= 'Z'))
|| ((next_char >= '0') && (next_char <= '9'))
|| (next_char == '-')
|| (next_char == '_') /*'_' not allowed by the spec. */
) {
READ_NEXT_CHAR (a_this, &cur_char);
*a_char = cur_char;
status = CR_OK;
if (a_location) {
cr_tknzr_get_parsing_location
(a_this, a_location) ;
}
} else {
status = CR_PARSING_ERROR;
goto error;
}
return CR_OK;
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses an "ident" as defined in css spec [4.1.1]:
*ident ::= {nmstart}{nmchar}*
*
*Actually parses it using the css3 grammar:
*ident ::= -?{nmstart}{nmchar}*
*@param a_this the currens instance of #CRTknzr.
*
*@param a_str a pointer to parsed ident. If *a_str is NULL,
*this function allocates a new instance of CRString. If not,
*the function just appends the parsed string to the one passed.
*In both cases it is up to the caller to free *a_str.
*
*@return CR_OK upon successfull completion, an error code
*otherwise.
*/
static enum CRStatus
cr_tknzr_parse_ident (CRTknzr * a_this, CRString ** a_str)
{
guint32 tmp_char = 0;
CRString *stringue = NULL ;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
gboolean location_is_set = FALSE ;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_str, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_NEXT_CHAR (a_this, &tmp_char) ;
stringue = cr_string_new () ;
g_return_val_if_fail (stringue,
CR_OUT_OF_MEMORY_ERROR) ;
if (tmp_char == '-') {
READ_NEXT_CHAR (a_this, &tmp_char) ;
cr_tknzr_get_parsing_location
(a_this, &stringue->location) ;
location_is_set = TRUE ;
g_string_append_unichar (stringue->stryng,
tmp_char) ;
}
status = cr_tknzr_parse_nmstart (a_this, &tmp_char, NULL);
if (status != CR_OK) {
status = CR_PARSING_ERROR;
goto end ;
}
if (location_is_set == FALSE) {
cr_tknzr_get_parsing_location
(a_this, &stringue->location) ;
location_is_set = TRUE ;
}
g_string_append_unichar (stringue->stryng, tmp_char);
for (;;) {
status = cr_tknzr_parse_nmchar (a_this,
&tmp_char,
NULL);
if (status != CR_OK) {
status = CR_OK ;
break;
}
g_string_append_unichar (stringue->stryng, tmp_char);
}
if (status == CR_OK) {
if (!*a_str) {
*a_str = stringue ;
} else {
g_string_append_len ((*a_str)->stryng,
stringue->stryng->str,
stringue->stryng->len) ;
cr_string_destroy (stringue) ;
}
stringue = NULL ;
}
error:
end:
if (stringue) {
cr_string_destroy (stringue) ;
stringue = NULL ;
}
if (status != CR_OK ) {
cr_tknzr_set_cur_pos (a_this, &init_pos) ;
}
return status ;
}
/**
*Parses a "name" as defined by css spec [4.1.1]:
*name ::= {nmchar}+
*
*@param a_this the current instance of #CRTknzr.
*
*@param a_str out parameter. A pointer to the successfully parsed
*name. If *a_str is set to NULL, this function allocates a new instance
*of CRString. If not, it just appends the parsed name to the passed *a_str.
*In both cases, it is up to the caller to free *a_str.
*
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_name (CRTknzr * a_this,
CRString ** a_str)
{
guint32 tmp_char = 0;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
gboolean str_needs_free = FALSE,
is_first_nmchar=TRUE ;
glong i = 0;
CRParsingLocation loc = {0} ;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_str,
CR_BAD_PARAM_ERROR) ;
RECORD_INITIAL_POS (a_this, &init_pos);
if (*a_str == NULL) {
*a_str = cr_string_new ();
str_needs_free = TRUE;
}
for (i = 0;; i++) {
if (is_first_nmchar == TRUE) {
status = cr_tknzr_parse_nmchar
(a_this, &tmp_char,
&loc) ;
is_first_nmchar = FALSE ;
} else {
status = cr_tknzr_parse_nmchar
(a_this, &tmp_char, NULL) ;
}
if (status != CR_OK)
break;
g_string_append_unichar ((*a_str)->stryng,
tmp_char);
}
if (i > 0) {
cr_parsing_location_copy
(&(*a_str)->location, &loc) ;
return CR_OK;
}
if (str_needs_free == TRUE && *a_str) {
cr_string_destroy (*a_str);
*a_str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return CR_PARSING_ERROR;
}
/**
*Parses a "hash" as defined by the css spec in [4.1.1]:
*HASH ::= #{name}
*/
static enum CRStatus
cr_tknzr_parse_hash (CRTknzr * a_this, CRString ** a_str)
{
guint32 cur_char = 0;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
gboolean str_needs_free = FALSE;
CRParsingLocation loc = {0} ;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char != '#') {
status = CR_PARSING_ERROR;
goto error;
}
if (*a_str == NULL) {
*a_str = cr_string_new ();
str_needs_free = TRUE;
}
cr_tknzr_get_parsing_location (a_this,
&loc) ;
status = cr_tknzr_parse_name (a_this, a_str);
cr_parsing_location_copy (&(*a_str)->location, &loc) ;
if (status != CR_OK) {
goto error;
}
return CR_OK;
error:
if (str_needs_free == TRUE && *a_str) {
cr_string_destroy (*a_str);
*a_str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses an uri as defined by the css spec [4.1.1]:
* URI ::= url\({w}{string}{w}\)
* |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
*
*@param a_this the current instance of #CRTknzr.
*@param a_str the successfully parsed url.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_uri (CRTknzr * a_this,
CRString ** a_str)
{
guint32 cur_char = 0;
CRInputPos init_pos;
enum CRStatus status = CR_PARSING_ERROR;
guchar tab[4] = { 0 }, *tmp_ptr1 = NULL, *tmp_ptr2 = NULL;
CRString *str = NULL;
CRParsingLocation location = {0} ;
g_return_val_if_fail (a_this
&& PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_str,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_BYTE (a_this, 1, &tab[0]);
PEEK_BYTE (a_this, 2, &tab[1]);
PEEK_BYTE (a_this, 3, &tab[2]);
PEEK_BYTE (a_this, 4, &tab[3]);
if (tab[0] != 'u' || tab[1] != 'r' || tab[2] != 'l' || tab[3] != '(') {
status = CR_PARSING_ERROR;
goto error;
}
/*
*Here, we want to skip 4 bytes ('u''r''l''(').
*But we also need to keep track of the parsing location
*of the 'u'. So, we skip 1 byte, we record the parsing
*location, then we skip the 3 remaining bytes.
*/
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this, &location) ;
SKIP_CHARS (a_this, 3);
cr_tknzr_try_to_skip_spaces (a_this);
status = cr_tknzr_parse_string (a_this, a_str);
if (status == CR_OK) {
guint32 next_char = 0;
status = cr_tknzr_parse_w (a_this, &tmp_ptr1,
&tmp_ptr2, NULL);
cr_tknzr_try_to_skip_spaces (a_this);
PEEK_NEXT_CHAR (a_this, &next_char);
if (next_char == ')') {
READ_NEXT_CHAR (a_this, &cur_char);
status = CR_OK;
} else {
status = CR_PARSING_ERROR;
}
}
if (status != CR_OK) {
str = cr_string_new ();
for (;;) {
guint32 next_char = 0;
PEEK_NEXT_CHAR (a_this, &next_char);
if (strchr ("!#$%&", next_char)
|| (next_char >= '*' && next_char <= '~')
|| (cr_utils_is_nonascii (next_char) == TRUE)) {
READ_NEXT_CHAR (a_this, &cur_char);
g_string_append_unichar
(str->stryng, cur_char);
status = CR_OK;
} else {
guint32 esc_code = 0;
status = cr_tknzr_parse_escape
(a_this, &esc_code, NULL);
if (status == CR_OK) {
g_string_append_unichar
(str->stryng,
esc_code);
} else {
status = CR_OK;
break;
}
}
}
cr_tknzr_try_to_skip_spaces (a_this);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char == ')') {
status = CR_OK;
} else {
status = CR_PARSING_ERROR;
goto error;
}
if (str) {
if (*a_str == NULL) {
*a_str = str;
str = NULL;
} else {
g_string_append_len
((*a_str)->stryng,
str->stryng->str,
str->stryng->len);
cr_string_destroy (str);
}
}
}
cr_parsing_location_copy
(&(*a_str)->location,
&location) ;
return CR_OK ;
error:
if (str) {
cr_string_destroy (str);
str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*parses an RGB as defined in the css2 spec.
*rgb: rgb '('S*{num}%?S* ',' {num}#?S*,S*{num}#?S*')'
*
*@param a_this the "this pointer" of the current instance of
*@param a_rgb out parameter the parsed rgb.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_rgb (CRTknzr * a_this, CRRgb ** a_rgb)
{
enum CRStatus status = CR_OK;
CRInputPos init_pos;
CRNum *num = NULL;
guchar next_bytes[3] = { 0 }, cur_byte = 0;
glong red = 0,
green = 0,
blue = 0,
i = 0;
gboolean is_percentage = FALSE;
CRParsingLocation location = {0} ;
g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
PEEK_BYTE (a_this, 1, &next_bytes[0]);
PEEK_BYTE (a_this, 2, &next_bytes[1]);
PEEK_BYTE (a_this, 3, &next_bytes[2]);
if (((next_bytes[0] == 'r') || (next_bytes[0] == 'R'))
&& ((next_bytes[1] == 'g') || (next_bytes[1] == 'G'))
&& ((next_bytes[2] == 'b') || (next_bytes[2] == 'B'))) {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this, &location) ;
SKIP_CHARS (a_this, 2);
} else {
status = CR_PARSING_ERROR;
goto error;
}
READ_NEXT_BYTE (a_this, &cur_byte);
ENSURE_PARSING_COND (cur_byte == '(');
cr_tknzr_try_to_skip_spaces (a_this);
status = cr_tknzr_parse_num (a_this, &num);
ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
if (num->val > G_MAXLONG) {
status = CR_PARSING_ERROR;
goto error;
}
red = num->val;
cr_num_destroy (num);
num = NULL;
PEEK_BYTE (a_this, 1, &next_bytes[0]);
if (next_bytes[0] == '%') {
SKIP_CHARS (a_this, 1);
is_percentage = TRUE;
}
cr_tknzr_try_to_skip_spaces (a_this);
for (i = 0; i < 2; i++) {
READ_NEXT_BYTE (a_this, &cur_byte);
ENSURE_PARSING_COND (cur_byte == ',');
cr_tknzr_try_to_skip_spaces (a_this);
status = cr_tknzr_parse_num (a_this, &num);
ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL));
if (num->val > G_MAXLONG) {
status = CR_PARSING_ERROR;
goto error;
}
PEEK_BYTE (a_this, 1, &next_bytes[0]);
if (next_bytes[0] == '%') {
SKIP_CHARS (a_this, 1);
is_percentage = 1;
}
if (i == 0) {
green = num->val;
} else if (i == 1) {
blue = num->val;
}
if (num) {
cr_num_destroy (num);
num = NULL;
}
cr_tknzr_try_to_skip_spaces (a_this);
}
READ_NEXT_BYTE (a_this, &cur_byte);
if (*a_rgb == NULL) {
*a_rgb = cr_rgb_new_with_vals (red, green, blue,
is_percentage);
if (*a_rgb == NULL) {
status = CR_ERROR;
goto error;
}
status = CR_OK;
} else {
(*a_rgb)->red = red;
(*a_rgb)->green = green;
(*a_rgb)->blue = blue;
(*a_rgb)->is_percentage = is_percentage;
status = CR_OK;
}
if (status == CR_OK) {
if (a_rgb && *a_rgb) {
cr_parsing_location_copy
(&(*a_rgb)->location,
&location) ;
}
return CR_OK;
}
error:
if (num) {
cr_num_destroy (num);
num = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return CR_OK;
}
/**
*Parses a atkeyword as defined by the css spec in [4.1.1]:
*ATKEYWORD ::= @{ident}
*
*@param a_this the "this pointer" of the current instance of
*#CRTknzr.
*
*@param a_str out parameter. The parsed atkeyword. If *a_str is
*set to NULL this function allocates a new instance of CRString and
*sets it to the parsed atkeyword. If not, this function just appends
*the parsed atkeyword to the end of *a_str. In both cases it is up to
*the caller to free *a_str.
*
*@return CR_OK upon successfull completion, an error code otherwise.
*/
static enum CRStatus
cr_tknzr_parse_atkeyword (CRTknzr * a_this,
CRString ** a_str)
{
guint32 cur_char = 0;
CRInputPos init_pos;
gboolean str_needs_free = FALSE;
enum CRStatus status = CR_OK;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_str, CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char != '@') {
status = CR_PARSING_ERROR;
goto error;
}
if (*a_str == NULL) {
*a_str = cr_string_new ();
str_needs_free = TRUE;
}
status = cr_tknzr_parse_ident (a_this, a_str);
if (status != CR_OK) {
goto error;
}
return CR_OK;
error:
if (str_needs_free == TRUE && *a_str) {
cr_string_destroy (*a_str);
*a_str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
static enum CRStatus
cr_tknzr_parse_important (CRTknzr * a_this,
CRParsingLocation *a_location)
{
guint32 cur_char = 0;
CRInputPos init_pos;
enum CRStatus status = CR_OK;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
ENSURE_PARSING_COND (cur_char == '!');
if (a_location) {
cr_tknzr_get_parsing_location (a_this,
a_location) ;
}
cr_tknzr_try_to_skip_spaces (a_this);
if (BYTE (PRIVATE (a_this)->input, 1, NULL) == 'i'
&& BYTE (PRIVATE (a_this)->input, 2, NULL) == 'm'
&& BYTE (PRIVATE (a_this)->input, 3, NULL) == 'p'
&& BYTE (PRIVATE (a_this)->input, 4, NULL) == 'o'
&& BYTE (PRIVATE (a_this)->input, 5, NULL) == 'r'
&& BYTE (PRIVATE (a_this)->input, 6, NULL) == 't'
&& BYTE (PRIVATE (a_this)->input, 7, NULL) == 'a'
&& BYTE (PRIVATE (a_this)->input, 8, NULL) == 'n'
&& BYTE (PRIVATE (a_this)->input, 9, NULL) == 't') {
SKIP_BYTES (a_this, 9);
if (a_location) {
cr_tknzr_get_parsing_location (a_this,
a_location) ;
}
return CR_OK;
} else {
status = CR_PARSING_ERROR;
}
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/**
*Parses a num as defined in the css spec [4.1.1]:
*[0-9]+|[0-9]*\.[0-9]+
*@param a_this the current instance of #CRTknzr.
*@param a_num out parameter. The parsed number.
*@return CR_OK upon successfull completion,
*an error code otherwise.
*
*The CSS specification says that numbers may be
*preceeded by '+' or '-' to indicate the sign.
*Technically, the "num" construction as defined
*by the tokenizer doesn't allow this, but we parse
*it here for simplicity.
*/
static enum CRStatus
cr_tknzr_parse_num (CRTknzr * a_this,
CRNum ** a_num)
{
enum CRStatus status = CR_PARSING_ERROR;
enum CRNumType val_type = NUM_GENERIC;
gboolean parsing_dec, /* true iff seen decimal point. */
parsed; /* true iff the substring seen so far is a valid CSS
number, i.e. `[0-9]+|[0-9]*\.[0-9]+'. */
guint32 cur_char = 0,
next_char = 0;
gdouble numerator, denominator = 1;
CRInputPos init_pos;
CRParsingLocation location = {0} ;
int sign = 1;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input,
CR_BAD_PARAM_ERROR);
RECORD_INITIAL_POS (a_this, &init_pos);
READ_NEXT_CHAR (a_this, &cur_char);
if (cur_char == '+' || cur_char == '-') {
if (cur_char == '-') {
sign = -1;
}
READ_NEXT_CHAR (a_this, &cur_char);
}
if (IS_NUM (cur_char)) {
numerator = (cur_char - '0');
parsing_dec = FALSE;
parsed = TRUE;
} else if (cur_char == '.') {
numerator = 0;
parsing_dec = TRUE;
parsed = FALSE;
} else {
status = CR_PARSING_ERROR;
goto error;
}
cr_tknzr_get_parsing_location (a_this, &location) ;
for (;;) {
status = cr_tknzr_peek_char (a_this, &next_char);
if (status != CR_OK) {
if (status == CR_END_OF_INPUT_ERROR)
status = CR_OK;
break;
}
if (next_char == '.') {
if (parsing_dec) {
status = CR_PARSING_ERROR;
goto error;
}
READ_NEXT_CHAR (a_this, &cur_char);
parsing_dec = TRUE;
parsed = FALSE; /* In CSS, there must be at least
one digit after `.'. */
} else if (IS_NUM (next_char)) {
READ_NEXT_CHAR (a_this, &cur_char);
parsed = TRUE;
numerator = numerator * 10 + (cur_char - '0');
if (parsing_dec) {
denominator *= 10;
}
} else {
break;
}
}
if (!parsed) {
status = CR_PARSING_ERROR;
}
/*
*Now, set the output param values.
*/
if (status == CR_OK) {
gdouble val = (numerator / denominator) * sign;
if (*a_num == NULL) {
*a_num = cr_num_new_with_val (val, val_type);
if (*a_num == NULL) {
status = CR_ERROR;
goto error;
}
} else {
(*a_num)->val = val;
(*a_num)->type = val_type;
}
cr_parsing_location_copy (&(*a_num)->location,
&location) ;
return CR_OK;
}
error:
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
/*********************************************
*PUBLIC methods
********************************************/
CRTknzr *
cr_tknzr_new (CRInput * a_input)
{
CRTknzr *result = NULL;
result = g_try_malloc (sizeof (CRTknzr));
if (result == NULL) {
cr_utils_trace_info ("Out of memory");
return NULL;
}
memset (result, 0, sizeof (CRTknzr));
result->priv = g_try_malloc (sizeof (CRTknzrPriv));
if (result->priv == NULL) {
cr_utils_trace_info ("Out of memory");
if (result) {
g_free (result);
result = NULL;
}
return NULL;
}
memset (result->priv, 0, sizeof (CRTknzrPriv));
if (a_input)
cr_tknzr_set_input (result, a_input);
return result;
}
CRTknzr *
cr_tknzr_new_from_buf (guchar * a_buf, gulong a_len,
enum CREncoding a_enc,
gboolean a_free_at_destroy)
{
CRTknzr *result = NULL;
CRInput *input = NULL;
input = cr_input_new_from_buf (a_buf, a_len, a_enc,
a_free_at_destroy);
g_return_val_if_fail (input != NULL, NULL);
result = cr_tknzr_new (input);
return result;
}
CRTknzr *
cr_tknzr_new_from_uri (const guchar * a_file_uri,
enum CREncoding a_enc)
{
CRTknzr *result = NULL;
CRInput *input = NULL;
input = cr_input_new_from_uri ((const gchar *) a_file_uri, a_enc);
g_return_val_if_fail (input != NULL, NULL);
result = cr_tknzr_new (input);
return result;
}
void
cr_tknzr_ref (CRTknzr * a_this)
{
g_return_if_fail (a_this && PRIVATE (a_this));
PRIVATE (a_this)->ref_count++;
}
gboolean
cr_tknzr_unref (CRTknzr * a_this)
{
g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
if (PRIVATE (a_this)->ref_count > 0) {
PRIVATE (a_this)->ref_count--;
}
if (PRIVATE (a_this)->ref_count == 0) {
cr_tknzr_destroy (a_this);
return TRUE;
}
return FALSE;
}
enum CRStatus
cr_tknzr_set_input (CRTknzr * a_this, CRInput * a_input)
{
g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->input) {
cr_input_unref (PRIVATE (a_this)->input);
}
PRIVATE (a_this)->input = a_input;
cr_input_ref (PRIVATE (a_this)->input);
return CR_OK;
}
enum CRStatus
cr_tknzr_get_input (CRTknzr * a_this, CRInput ** a_input)
{
g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
*a_input = PRIVATE (a_this)->input;
return CR_OK;
}
/*********************************
*Tokenizer input handling routines
*********************************/
/**
*Reads the next byte from the parser input stream.
*@param a_this the "this pointer" of the current instance of
*#CRParser.
*@param a_byte out parameter the place where to store the byte
*read.
*@return CR_OK upon successfull completion, an error
*code otherwise.
*/
enum CRStatus
cr_tknzr_read_byte (CRTknzr * a_this, guchar * a_byte)
{
g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
return cr_input_read_byte (PRIVATE (a_this)->input, a_byte);
}
/**
*Reads the next char from the parser input stream.
*@param a_this the current instance of #CRTknzr.
*@param a_char out parameter. The read char.
*@return CR_OK upon successfull completion, an error code
*otherwise.
*/
enum CRStatus
cr_tknzr_read_char (CRTknzr * a_this, guint32 * a_char)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_char, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_read_char (PRIVATE (a_this)->input, a_char);
}
/**
*Peeks a char from the parser input stream.
*To "peek a char" means reads the next char without consuming it.
*Subsequent calls to this function return the same char.
*@param a_this the current instance of #CRTknzr.
*@param a_char out parameter. The peeked char uppon successfull completion.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
enum CRStatus
cr_tknzr_peek_char (CRTknzr * a_this, guint32 * a_char)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_char, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_peek_char (PRIVATE (a_this)->input, a_char);
}
/**
*Peeks a byte ahead at a given postion in the parser input stream.
*@param a_this the current instance of #CRTknzr.
*@param a_offset the offset of the peeked byte starting from the current
*byte in the parser input stream.
*@param a_byte out parameter. The peeked byte upon
*successfull completion.
*@return CR_OK upon successfull completion, an error code otherwise.
*/
enum CRStatus
cr_tknzr_peek_byte (CRTknzr * a_this, gulong a_offset, guchar * a_byte)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input && a_byte,
CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_peek_byte (PRIVATE (a_this)->input,
CR_SEEK_CUR, a_offset, a_byte);
}
/**
*Same as cr_tknzr_peek_byte() but this api returns the byte peeked.
*@param a_this the current instance of #CRTknzr.
*@param a_offset the offset of the peeked byte starting from the current
*byte in the parser input stream.
*@param a_eof out parameter. If not NULL, is set to TRUE if we reached end of
*file, FALE otherwise. If the caller sets it to NULL, this parameter
*is just ignored.
*@return the peeked byte.
*/
guchar
cr_tknzr_peek_byte2 (CRTknzr * a_this, gulong a_offset, gboolean * a_eof)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, 0);
return cr_input_peek_byte2 (PRIVATE (a_this)->input, a_offset, a_eof);
}
/**
*Gets the number of bytes left in the topmost input stream
*associated to this parser.
*@param a_this the current instance of #CRTknzr
*@return the number of bytes left or -1 in case of error.
*/
glong
cr_tknzr_get_nb_bytes_left (CRTknzr * a_this)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_get_nb_bytes_left (PRIVATE (a_this)->input);
}
enum CRStatus
cr_tknzr_get_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_pos, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_get_cur_pos (PRIVATE (a_this)->input, a_pos);
}
enum CRStatus
cr_tknzr_get_parsing_location (CRTknzr *a_this,
CRParsingLocation *a_loc)
{
g_return_val_if_fail (a_this
&& PRIVATE (a_this)
&& a_loc,
CR_BAD_PARAM_ERROR) ;
return cr_input_get_parsing_location
(PRIVATE (a_this)->input, a_loc) ;
}
enum CRStatus
cr_tknzr_get_cur_byte_addr (CRTknzr * a_this, guchar ** a_addr)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_get_cur_byte_addr (PRIVATE (a_this)->input, a_addr);
}
enum CRStatus
cr_tknzr_seek_index (CRTknzr * a_this, enum CRSeekPos a_origin, gint a_pos)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_seek_index (PRIVATE (a_this)->input, a_origin, a_pos);
}
enum CRStatus
cr_tknzr_consume_chars (CRTknzr * a_this, guint32 a_char, glong * a_nb_char)
{
gulong consumed = *(gulong *) a_nb_char;
enum CRStatus status;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_input_set_cur_pos (PRIVATE (a_this)->input,
&PRIVATE (a_this)->prev_pos);
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
status = cr_input_consume_chars (PRIVATE (a_this)->input,
a_char, &consumed);
*a_nb_char = (glong) consumed;
return status;
}
enum CRStatus
cr_tknzr_set_cur_pos (CRTknzr * a_this, CRInputPos * a_pos)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
return cr_input_set_cur_pos (PRIVATE (a_this)->input, a_pos);
}
enum CRStatus
cr_tknzr_unget_token (CRTknzr * a_this, CRToken * a_token)
{
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->token_cache == NULL,
CR_BAD_PARAM_ERROR);
PRIVATE (a_this)->token_cache = a_token;
return CR_OK;
}
/**
*Returns the next token of the input stream.
*This method is really central. Each parsing
*method calls it.
*@param a_this the current tokenizer.
*@param a_tk out parameter. The returned token.
*for the sake of mem leak avoidance, *a_tk must
*be NULL.
*@param CR_OK upon successfull completion, an error code
*otherwise.
*/
enum CRStatus
cr_tknzr_get_next_token (CRTknzr * a_this, CRToken ** a_tk)
{
enum CRStatus status = CR_OK;
CRToken *token = NULL;
CRInputPos init_pos;
guint32 next_char = 0;
guchar next_bytes[4] = { 0 };
gboolean reached_eof = FALSE;
CRInput *input = NULL;
CRString *str = NULL;
CRRgb *rgb = NULL;
CRParsingLocation location = {0} ;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& a_tk && *a_tk == NULL
&& PRIVATE (a_this)->input,
CR_BAD_PARAM_ERROR);
if (PRIVATE (a_this)->token_cache) {
*a_tk = PRIVATE (a_this)->token_cache;
PRIVATE (a_this)->token_cache = NULL;
return CR_OK;
}
RECORD_INITIAL_POS (a_this, &init_pos);
status = cr_input_get_end_of_file
(PRIVATE (a_this)->input, &reached_eof);
ENSURE_PARSING_COND (status == CR_OK);
if (reached_eof == TRUE) {
status = CR_END_OF_INPUT_ERROR;
goto error;
}
input = PRIVATE (a_this)->input;
PEEK_NEXT_CHAR (a_this, &next_char);
token = cr_token_new ();
ENSURE_PARSING_COND (token);
switch (next_char) {
case '@':
{
if (BYTE (input, 2, NULL) == 'f'
&& BYTE (input, 3, NULL) == 'o'
&& BYTE (input, 4, NULL) == 'n'
&& BYTE (input, 5, NULL) == 't'
&& BYTE (input, 6, NULL) == '-'
&& BYTE (input, 7, NULL) == 'f'
&& BYTE (input, 8, NULL) == 'a'
&& BYTE (input, 9, NULL) == 'c'
&& BYTE (input, 10, NULL) == 'e') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location
(a_this, &location) ;
SKIP_CHARS (a_this, 9);
status = cr_token_set_font_face_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
if (BYTE (input, 2, NULL) == 'c'
&& BYTE (input, 3, NULL) == 'h'
&& BYTE (input, 4, NULL) == 'a'
&& BYTE (input, 5, NULL) == 'r'
&& BYTE (input, 6, NULL) == 's'
&& BYTE (input, 7, NULL) == 'e'
&& BYTE (input, 8, NULL) == 't') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location
(a_this, &location) ;
SKIP_CHARS (a_this, 7);
status = cr_token_set_charset_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
if (BYTE (input, 2, NULL) == 'i'
&& BYTE (input, 3, NULL) == 'm'
&& BYTE (input, 4, NULL) == 'p'
&& BYTE (input, 5, NULL) == 'o'
&& BYTE (input, 6, NULL) == 'r'
&& BYTE (input, 7, NULL) == 't') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location
(a_this, &location) ;
SKIP_CHARS (a_this, 6);
status = cr_token_set_import_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
if (BYTE (input, 2, NULL) == 'm'
&& BYTE (input, 3, NULL) == 'e'
&& BYTE (input, 4, NULL) == 'd'
&& BYTE (input, 5, NULL) == 'i'
&& BYTE (input, 6, NULL) == 'a') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 5);
status = cr_token_set_media_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
if (BYTE (input, 2, NULL) == 'p'
&& BYTE (input, 3, NULL) == 'a'
&& BYTE (input, 4, NULL) == 'g'
&& BYTE (input, 5, NULL) == 'e') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 4);
status = cr_token_set_page_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
status = cr_tknzr_parse_atkeyword (a_this, &str);
if (status == CR_OK) {
status = cr_token_set_atkeyword (token, str);
CHECK_PARSING_STATUS (status, TRUE);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
goto done;
}
}
break;
case 'u':
if (BYTE (input, 2, NULL) == 'r'
&& BYTE (input, 3, NULL) == 'l'
&& BYTE (input, 4, NULL) == '(') {
CRString *str2 = NULL;
status = cr_tknzr_parse_uri (a_this, &str2);
if (status == CR_OK) {
status = cr_token_set_uri (token, str2);
CHECK_PARSING_STATUS (status, TRUE);
if (str2) {
cr_parsing_location_copy (&token->location,
&str2->location) ;
}
goto done;
}
}
goto fallback;
break;
case 'r':
if (BYTE (input, 2, NULL) == 'g'
&& BYTE (input, 3, NULL) == 'b'
&& BYTE (input, 4, NULL) == '(') {
status = cr_tknzr_parse_rgb (a_this, &rgb);
if (status == CR_OK && rgb) {
status = cr_token_set_rgb (token, rgb);
CHECK_PARSING_STATUS (status, TRUE);
if (rgb) {
cr_parsing_location_copy (&token->location,
&rgb->location) ;
}
rgb = NULL;
goto done;
}
}
goto fallback;
break;
case '<':
if (BYTE (input, 2, NULL) == '!'
&& BYTE (input, 3, NULL) == '-'
&& BYTE (input, 4, NULL) == '-') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 3);
status = cr_token_set_cdo (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
break;
case '-':
if (BYTE (input, 2, NULL) == '-'
&& BYTE (input, 3, NULL) == '>') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 2);
status = cr_token_set_cdc (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
} else {
status = cr_tknzr_parse_ident
(a_this, &str);
if (status == CR_OK) {
cr_token_set_ident
(token, str);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
goto done;
} else {
goto parse_number;
}
}
break;
case '~':
if (BYTE (input, 2, NULL) == '=') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 1);
status = cr_token_set_includes (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
break;
case '|':
if (BYTE (input, 2, NULL) == '=') {
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
SKIP_CHARS (a_this, 1);
status = cr_token_set_dashmatch (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
break;
case '/':
if (BYTE (input, 2, NULL) == '*') {
status = cr_tknzr_parse_comment (a_this, &str);
if (status == CR_OK) {
status = cr_token_set_comment (token, str);
str = NULL;
CHECK_PARSING_STATUS (status, TRUE);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
goto done;
}
}
break ;
case ';':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_semicolon (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case '{':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_cbo (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_tknzr_get_parsing_location (a_this,
&location) ;
goto done;
case '}':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_cbc (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case '(':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_po (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case ')':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_pc (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case '[':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_bo (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case ']':
SKIP_CHARS (a_this, 1);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_bc (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
case ' ':
case '\t':
case '\n':
case '\f':
case '\r':
{
guchar *start = NULL,
*end = NULL;
status = cr_tknzr_parse_w (a_this, &start,
&end, &location);
if (status == CR_OK) {
status = cr_token_set_s (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_tknzr_get_parsing_location (a_this,
&location) ;
goto done;
}
}
break;
case '#':
{
status = cr_tknzr_parse_hash (a_this, &str);
if (status == CR_OK && str) {
status = cr_token_set_hash (token, str);
CHECK_PARSING_STATUS (status, TRUE);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
str = NULL;
goto done;
}
}
break;
case '\'':
case '"':
status = cr_tknzr_parse_string (a_this, &str);
if (status == CR_OK && str) {
status = cr_token_set_string (token, str);
CHECK_PARSING_STATUS (status, TRUE);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
str = NULL;
goto done;
}
break;
case '!':
status = cr_tknzr_parse_important (a_this, &location);
if (status == CR_OK) {
status = cr_token_set_important_sym (token);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
goto done;
}
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
case '+':
/* '-' case is handled separately above for --> comments */
parse_number:
{
CRNum *num = NULL;
status = cr_tknzr_parse_num (a_this, &num);
if (status == CR_OK && num) {
next_bytes[0] = BYTE (input, 1, NULL);
next_bytes[1] = BYTE (input, 2, NULL);
next_bytes[2] = BYTE (input, 3, NULL);
next_bytes[3] = BYTE (input, 4, NULL);
if (next_bytes[0] == 'e'
&& next_bytes[1] == 'm') {
num->type = NUM_LENGTH_EM;
status = cr_token_set_ems (token,
num);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'e'
&& next_bytes[1] == 'x') {
num->type = NUM_LENGTH_EX;
status = cr_token_set_exs (token,
num);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'p'
&& next_bytes[1] == 'x') {
num->type = NUM_LENGTH_PX;
status = cr_token_set_length
(token, num, LENGTH_PX_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'c'
&& next_bytes[1] == 'm') {
num->type = NUM_LENGTH_CM;
status = cr_token_set_length
(token, num, LENGTH_CM_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'm'
&& next_bytes[1] == 'm') {
num->type = NUM_LENGTH_MM;
status = cr_token_set_length
(token, num, LENGTH_MM_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'i'
&& next_bytes[1] == 'n') {
num->type = NUM_LENGTH_IN;
status = cr_token_set_length
(token, num, LENGTH_IN_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'p'
&& next_bytes[1] == 't') {
num->type = NUM_LENGTH_PT;
status = cr_token_set_length
(token, num, LENGTH_PT_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'p'
&& next_bytes[1] == 'c') {
num->type = NUM_LENGTH_PC;
status = cr_token_set_length
(token, num, LENGTH_PC_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'd'
&& next_bytes[1] == 'e'
&& next_bytes[2] == 'g') {
num->type = NUM_ANGLE_DEG;
status = cr_token_set_angle
(token, num, ANGLE_DEG_ET);
num = NULL;
SKIP_CHARS (a_this, 3);
} else if (next_bytes[0] == 'r'
&& next_bytes[1] == 'a'
&& next_bytes[2] == 'd') {
num->type = NUM_ANGLE_RAD;
status = cr_token_set_angle
(token, num, ANGLE_RAD_ET);
num = NULL;
SKIP_CHARS (a_this, 3);
} else if (next_bytes[0] == 'g'
&& next_bytes[1] == 'r'
&& next_bytes[2] == 'a'
&& next_bytes[3] == 'd') {
num->type = NUM_ANGLE_GRAD;
status = cr_token_set_angle
(token, num, ANGLE_GRAD_ET);
num = NULL;
SKIP_CHARS (a_this, 4);
} else if (next_bytes[0] == 'm'
&& next_bytes[1] == 's') {
num->type = NUM_TIME_MS;
status = cr_token_set_time
(token, num, TIME_MS_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 's') {
num->type = NUM_TIME_S;
status = cr_token_set_time
(token, num, TIME_S_ET);
num = NULL;
SKIP_CHARS (a_this, 1);
} else if (next_bytes[0] == 'H'
&& next_bytes[1] == 'z') {
num->type = NUM_FREQ_HZ;
status = cr_token_set_freq
(token, num, FREQ_HZ_ET);
num = NULL;
SKIP_CHARS (a_this, 2);
} else if (next_bytes[0] == 'k'
&& next_bytes[1] == 'H'
&& next_bytes[2] == 'z') {
num->type = NUM_FREQ_KHZ;
status = cr_token_set_freq
(token, num, FREQ_KHZ_ET);
num = NULL;
SKIP_CHARS (a_this, 3);
} else if (next_bytes[0] == '%') {
num->type = NUM_PERCENTAGE;
status = cr_token_set_percentage
(token, num);
num = NULL;
SKIP_CHARS (a_this, 1);
} else {
status = cr_tknzr_parse_ident (a_this,
&str);
if (status == CR_OK && str) {
num->type = NUM_UNKNOWN_TYPE;
status = cr_token_set_dimen
(token, num, str);
num = NULL;
CHECK_PARSING_STATUS (status,
TRUE);
str = NULL;
} else {
status = cr_token_set_number
(token, num);
num = NULL;
CHECK_PARSING_STATUS (status, CR_OK);
str = NULL;
}
}
if (token && token->u.num) {
cr_parsing_location_copy (&token->location,
&token->u.num->location) ;
} else {
status = CR_ERROR ;
}
goto done ;
}
}
break;
default:
fallback:
/*process the fallback cases here */
if (next_char == '\\'
|| (cr_utils_is_nonascii (next_bytes[0]) == TRUE)
|| ((next_char >= 'a') && (next_char <= 'z'))
|| ((next_char >= 'A') && (next_char <= 'Z'))) {
status = cr_tknzr_parse_ident (a_this, &str);
if (status == CR_OK && str) {
guint32 next_c = 0;
status = cr_input_peek_char
(PRIVATE (a_this)->input, &next_c);
if (status == CR_OK && next_c == '(') {
SKIP_CHARS (a_this, 1);
status = cr_token_set_function
(token, str);
CHECK_PARSING_STATUS (status, TRUE);
/*ownership is transfered
*to token by cr_token_set_function.
*/
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
str = NULL;
} else {
status = cr_token_set_ident (token,
str);
CHECK_PARSING_STATUS (status, TRUE);
if (str) {
cr_parsing_location_copy (&token->location,
&str->location) ;
}
str = NULL;
}
goto done;
} else {
if (str) {
cr_string_destroy (str);
str = NULL;
}
}
}
break;
}
READ_NEXT_CHAR (a_this, &next_char);
cr_tknzr_get_parsing_location (a_this,
&location) ;
status = cr_token_set_delim (token, next_char);
CHECK_PARSING_STATUS (status, TRUE);
cr_parsing_location_copy (&token->location,
&location) ;
done:
if (status == CR_OK && token) {
*a_tk = token;
/*
*store the previous position input stream pos.
*/
memmove (&PRIVATE (a_this)->prev_pos,
&init_pos, sizeof (CRInputPos));
return CR_OK;
}
error:
if (token) {
cr_token_destroy (token);
token = NULL;
}
if (str) {
cr_string_destroy (str);
str = NULL;
}
cr_tknzr_set_cur_pos (a_this, &init_pos);
return status;
}
enum CRStatus
cr_tknzr_parse_token (CRTknzr * a_this, enum CRTokenType a_type,
enum CRTokenExtraType a_et, gpointer a_res,
gpointer a_extra_res)
{
enum CRStatus status = CR_OK;
CRToken *token = NULL;
g_return_val_if_fail (a_this && PRIVATE (a_this)
&& PRIVATE (a_this)->input
&& a_res, CR_BAD_PARAM_ERROR);
status = cr_tknzr_get_next_token (a_this, &token);
if (status != CR_OK)
return status;
if (token == NULL)
return CR_PARSING_ERROR;
if (token->type == a_type) {
switch (a_type) {
case NO_TK:
case S_TK:
case CDO_TK:
case CDC_TK:
case INCLUDES_TK:
case DASHMATCH_TK:
case IMPORT_SYM_TK:
case PAGE_SYM_TK:
case MEDIA_SYM_TK:
case FONT_FACE_SYM_TK:
case CHARSET_SYM_TK:
case IMPORTANT_SYM_TK:
status = CR_OK;
break;
case STRING_TK:
case IDENT_TK:
case HASH_TK:
case ATKEYWORD_TK:
case FUNCTION_TK:
case COMMENT_TK:
case URI_TK:
*((CRString **) a_res) = token->u.str;
token->u.str = NULL;
status = CR_OK;
break;
case EMS_TK:
case EXS_TK:
case PERCENTAGE_TK:
case NUMBER_TK:
*((CRNum **) a_res) = token->u.num;
token->u.num = NULL;
status = CR_OK;
break;
case LENGTH_TK:
case ANGLE_TK:
case TIME_TK:
case FREQ_TK:
if (token->extra_type == a_et) {
*((CRNum **) a_res) = token->u.num;
token->u.num = NULL;
status = CR_OK;
}
break;
case DIMEN_TK:
*((CRNum **) a_res) = token->u.num;
if (a_extra_res == NULL) {
status = CR_BAD_PARAM_ERROR;
goto error;
}
*((CRString **) a_extra_res) = token->dimen;
token->u.num = NULL;
token->dimen = NULL;
status = CR_OK;
break;
case DELIM_TK:
*((guint32 *) a_res) = token->u.unichar;
status = CR_OK;
break;
case UNICODERANGE_TK:
default:
status = CR_PARSING_ERROR;
break;
}
cr_token_destroy (token);
token = NULL;
} else {
cr_tknzr_unget_token (a_this, token);
token = NULL;
status = CR_PARSING_ERROR;
}
return status;
error:
if (token) {
cr_tknzr_unget_token (a_this, token);
token = NULL;
}
return status;
}
void
cr_tknzr_destroy (CRTknzr * a_this)
{
g_return_if_fail (a_this);
if (PRIVATE (a_this) && PRIVATE (a_this)->input) {
if (cr_input_unref (PRIVATE (a_this)->input)
== TRUE) {
PRIVATE (a_this)->input = NULL;
}
}
if (PRIVATE (a_this)->token_cache) {
cr_token_destroy (PRIVATE (a_this)->token_cache);
PRIVATE (a_this)->token_cache = NULL;
}
if (PRIVATE (a_this)) {
g_free (PRIVATE (a_this));
PRIVATE (a_this) = NULL;
}
g_free (a_this);
}