src/extra/tre/tre-match-utils.h - R - Git at Google

 /*
   tre-match-utils.h - TRE matcher helper definitions

   This software is released under a BSD-style license.
   See the file LICENSE for details and copyright.

 */

 #define str_source ((const tre_str_source*)string)

 #ifdef TRE_WCHAR

 #ifdef TRE_MULTIBYTE

 /* Wide character and multibyte support. */

 #define GET_NEXT_WCHAR()						      \
   do {									      \
     prev_c = next_c;							      \
     if (type == STR_BYTE)						      \
       {									      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = '\0';						      \
 	else								      \
 	  next_c = (unsigned char)(*str_byte++);			      \
       }									      \
     else if (type == STR_WIDE)						      \
       {									      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = L'\0';						      \
 	else								      \
 	  next_c = *str_wide++;						      \
       }									      \
     else if (type == STR_MBS)						      \
       {									      \
         pos += pos_add_next;					      	      \
 	if (str_byte == NULL)						      \
 	  next_c = L'\0';						      \
 	else								      \
 	  {								      \
 	    size_t w;							      \
 	    int max;							      \
 	    if (len >= 0)						      \
 	      max = len - pos;						      \
 	    else							      \
 	      max = 32;							      \
 	    if (max <= 0)						      \
 	      {								      \
 		next_c = L'\0';						      \
 		pos_add_next = 1;					      \
 	      }								      \
 	    else							      \
 	      {								      \
 		w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
 		if (w == (size_t)-1 || w == (size_t)-2)			      \
 		  return REG_NOMATCH;					      \
 		if (w == 0 && len >= 0)					      \
 		  {							      \
 		    pos_add_next = 1;					      \
 		    next_c = 0;						      \
 		    str_byte++;						      \
 		  }							      \
 		else							      \
 		  {							      \
 		    pos_add_next = (unsigned int)w;					      \
 		    str_byte += w;					      \
 		  }							      \
 	      }								      \
 	  }								      \
       }									      \
     else if (type == STR_USER)						      \
       {									      \
         pos += pos_add_next;					      	      \
 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
                                                  str_source->context);	      \
       }									      \
   } while(/*CONSTCOND*/(void)0,0)

 #else /* !TRE_MULTIBYTE */

 /* Wide character support, no multibyte support. */

 #define GET_NEXT_WCHAR()						      \
   do {									      \
     prev_c = next_c;							      \
     if (type == STR_BYTE)						      \
       {									      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = '\0';						      \
 	else								      \
 	  next_c = (unsigned char)(*str_byte++);			      \
       }									      \
     else if (type == STR_WIDE)						      \
       {									      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = L'\0';						      \
 	else								      \
 	  next_c = *str_wide++;						      \
       }									      \
     else if (type == STR_USER)						      \
       {									      \
         pos += pos_add_next;					      	      \
 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
                                                  str_source->context);	      \
       }									      \
   } while(/*CONSTCOND*/(void)0,0)

 #endif /* !TRE_MULTIBYTE */

 #else /* !TRE_WCHAR */

 /* No wide character or multibyte support. */

 #define GET_NEXT_WCHAR()						      \
   do {									      \
     prev_c = next_c;							      \
     if (type == STR_BYTE)						      \
       {									      \
 	pos++;								      \
 	if (len >= 0 && pos >= len)					      \
 	  next_c = '\0';						      \
 	else								      \
 	  next_c = (unsigned char)(*str_byte++);			      \
       }									      \
     else if (type == STR_USER)						      \
       {									      \
 	pos += pos_add_next;						      \
 	str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
 						 str_source->context);	      \
       }									      \
   } while(/*CONSTCOND*/(void)0,0)

 #endif /* !TRE_WCHAR */


 #define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))

 #define CHECK_ASSERTIONS(assertions)					      \
   (((assertions & ASSERT_AT_BOL)					      \
     && (pos > 0 || reg_notbol)						      \
     && (prev_c != L'\n' || !reg_newline))				      \
    || ((assertions & ASSERT_AT_EOL)					      \
        && (next_c != L'\0' || reg_noteol)				      \
        && (next_c != L'\n' || !reg_newline))				      \
    || ((assertions & ASSERT_AT_BOW)					      \
        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
    || ((assertions & ASSERT_AT_EOW)					      \
        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
    || ((assertions & ASSERT_AT_WB)					      \
        && (pos != 0 && next_c != L'\0'					      \
 	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
    || ((assertions & ASSERT_AT_WB_NEG)					      \
        && (pos == 0 || next_c == L'\0'					      \
 	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))

 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
        && !(tnfa->cflags & REG_ICASE)                                         \
        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
         && (tnfa->cflags & REG_ICASE)                                         \
         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
 	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
                                       tnfa->cflags & REG_ICASE)))


 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
 inline static int
 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
 	      int *t1, int *t2)
 {
   int i;
   for (i = 0; i < num_tags; i++)
     {
       if (tag_directions[i] == TRE_TAG_MINIMIZE)
 	{
 	  if (t1[i] < t2[i])
 	    return 1;
 	  if (t1[i] > t2[i])
 	    return 0;
 	}
       else
 	{
 	  if (t1[i] > t2[i])
 	    return 1;
 	  if (t1[i] < t2[i])
 	    return 0;
 	}
     }
   /*  assert(0);*/
   return 0;
 }

 inline static int
 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
 {
   DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
   while (*classes != (tre_ctype_t)0)
     if ((!icase && tre_isctype(wc, *classes))
 	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
 		      || tre_isctype(tre_tolower(wc), *classes))))
       return 1; /* Match. */
     else
       classes++;
   return 0; /* No match. */
 }
	/*
	tre-match-utils.h - TRE matcher helper definitions

	This software is released under a BSD-style license.
	See the file LICENSE for details and copyright.

	*/

	#define str_source ((const tre_str_source*)string)

	#ifdef TRE_WCHAR

	#ifdef TRE_MULTIBYTE

	/* Wide character and multibyte support. */

	#define GET_NEXT_WCHAR() \
	do { \
	prev_c = next_c; \
	if (type == STR_BYTE) \
	{ \
	pos++; \
	if (len >= 0 && pos >= len) \
	next_c = '\0'; \
	else \
	next_c = (unsigned char)(*str_byte++); \
	} \
	else if (type == STR_WIDE) \
	{ \
	pos++; \
	if (len >= 0 && pos >= len) \
	next_c = L'\0'; \
	else \
	next_c = *str_wide++; \
	} \
	else if (type == STR_MBS) \
	{ \
	pos += pos_add_next; \
	if (str_byte == NULL) \
	next_c = L'\0'; \
	else \
	{ \
	size_t w; \
	int max; \
	if (len >= 0) \
	max = len - pos; \
	else \
	max = 32; \
	if (max <= 0) \
	{ \
	next_c = L'\0'; \
	pos_add_next = 1; \
	} \
	else \
	{ \
	w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \
	if (w == (size_t)-1 \|\| w == (size_t)-2) \
	return REG_NOMATCH; \
	if (w == 0 && len >= 0) \
	{ \
	pos_add_next = 1; \
	next_c = 0; \
	str_byte++; \
	} \
	else \
	{ \
	pos_add_next = (unsigned int)w; \
	str_byte += w; \
	} \
	} \
	} \
	} \
	else if (type == STR_USER) \
	{ \
	pos += pos_add_next; \
	str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
	str_source->context); \
	} \
	} while(/CONSTCOND/(void)0,0)

	#else /* !TRE_MULTIBYTE */

	/* Wide character support, no multibyte support. */

	#define GET_NEXT_WCHAR() \
	do { \
	prev_c = next_c; \
	if (type == STR_BYTE) \
	{ \
	pos++; \
	if (len >= 0 && pos >= len) \
	next_c = '\0'; \
	else \
	next_c = (unsigned char)(*str_byte++); \
	} \
	else if (type == STR_WIDE) \
	{ \
	pos++; \
	if (len >= 0 && pos >= len) \
	next_c = L'\0'; \
	else \
	next_c = *str_wide++; \
	} \
	else if (type == STR_USER) \
	{ \
	pos += pos_add_next; \
	str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
	str_source->context); \
	} \
	} while(/CONSTCOND/(void)0,0)

	#endif /* !TRE_MULTIBYTE */

	#else /* !TRE_WCHAR */

	/* No wide character or multibyte support. */

	#define GET_NEXT_WCHAR() \
	do { \
	prev_c = next_c; \
	if (type == STR_BYTE) \
	{ \
	pos++; \
	if (len >= 0 && pos >= len) \
	next_c = '\0'; \
	else \
	next_c = (unsigned char)(*str_byte++); \
	} \
	else if (type == STR_USER) \
	{ \
	pos += pos_add_next; \
	str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
	str_source->context); \
	} \
	} while(/CONSTCOND/(void)0,0)

	#endif /* !TRE_WCHAR */



	#define IS_WORD_CHAR(c) ((c) == L'_' \|\| tre_isalnum(c))

	#define CHECK_ASSERTIONS(assertions) \
	(((assertions & ASSERT_AT_BOL) \
	&& (pos > 0 \|\| reg_notbol) \
	&& (prev_c != L'\n' \|\| !reg_newline)) \
	\|\| ((assertions & ASSERT_AT_EOL) \
	&& (next_c != L'\0' \|\| reg_noteol) \
	&& (next_c != L'\n' \|\| !reg_newline)) \
	\|\| ((assertions & ASSERT_AT_BOW) \
	&& (IS_WORD_CHAR(prev_c) \|\| !IS_WORD_CHAR(next_c))) \
	\|\| ((assertions & ASSERT_AT_EOW) \
	&& (!IS_WORD_CHAR(prev_c) \|\| IS_WORD_CHAR(next_c))) \
	\|\| ((assertions & ASSERT_AT_WB) \
	&& (pos != 0 && next_c != L'\0' \
	&& IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
	\|\| ((assertions & ASSERT_AT_WB_NEG) \
	&& (pos == 0 \|\| next_c == L'\0' \
	\|\| IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))

	#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
	(((trans_i->assertions & ASSERT_CHAR_CLASS) \
	&& !(tnfa->cflags & REG_ICASE) \
	&& !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
	\|\| ((trans_i->assertions & ASSERT_CHAR_CLASS) \
	&& (tnfa->cflags & REG_ICASE) \
	&& !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
	\|\| ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
	&& tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
	tnfa->cflags & REG_ICASE)))




	/* Returns 1 if `t1' wins `t2', 0 otherwise. */
	inline static int
	tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
	int t1, int t2)
	{
	int i;
	for (i = 0; i < num_tags; i++)
	{
	if (tag_directions[i] == TRE_TAG_MINIMIZE)
	{
	if (t1[i] < t2[i])
	return 1;
	if (t1[i] > t2[i])
	return 0;
	}
	else
	{
	if (t1[i] > t2[i])
	return 1;
	if (t1[i] < t2[i])
	return 0;
	}
	}
	/* assert(0);*/
	return 0;
	}

	inline static int
	tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
	{
	DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
	while (*classes != (tre_ctype_t)0)
	if ((!icase && tre_isctype(wc, *classes))
	\|\| (icase && (tre_isctype(tre_toupper(wc), *classes)
	\|\| tre_isctype(tre_tolower(wc), *classes))))
	return 1; /* Match. */
	else
	classes++;
	return 0; /* No match. */
	}