root/ext/mbstring/oniguruma/regparse.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. onig_null_warn
  2. onig_set_warn_func
  3. onig_set_verb_warn_func
  4. bbuf_free
  5. bbuf_clone
  6. bitset_set_range
  7. bitset_set_all
  8. bitset_invert
  9. bitset_invert_to
  10. bitset_and
  11. bitset_or
  12. bitset_copy
  13. onig_strncmp
  14. onig_strcpy
  15. strdup_with_null
  16. strcat_capa
  17. strcat_capa_from_static
  18. str_end_cmp
  19. str_end_hash
  20. onig_st_init_strend_table_with_size
  21. onig_st_lookup_strend
  22. onig_st_insert_strend
  23. i_print_name_entry
  24. onig_print_names
  25. i_free_name_entry
  26. names_clear
  27. onig_names_free
  28. name_find
  29. i_names
  30. onig_foreach_name
  31. i_renumber_name
  32. onig_renumber_name_table
  33. onig_number_of_names
  34. onig_print_names
  35. names_clear
  36. onig_names_free
  37. name_find
  38. onig_foreach_name
  39. onig_number_of_names
  40. name_add
  41. onig_name_to_group_numbers
  42. onig_name_to_backref_number
  43. onig_name_to_group_numbers
  44. onig_name_to_backref_number
  45. onig_foreach_name
  46. onig_number_of_names
  47. onig_noname_group_capture_is_active
  48. scan_env_clear
  49. scan_env_add_mem_entry
  50. scan_env_set_mem_node
  51. onig_node_free
  52. onig_free_node_list
  53. node_new
  54. initialize_cclass
  55. node_new_cclass
  56. node_new_cclass_by_codepoint_range
  57. node_new_ctype
  58. node_new_anychar
  59. node_new_list
  60. onig_node_new_list
  61. onig_node_list_add
  62. onig_node_new_alt
  63. onig_node_new_anchor
  64. node_new_backref
  65. node_new_call
  66. node_new_quantifier
  67. node_new_enclose
  68. onig_node_new_enclose
  69. node_new_enclose_memory
  70. node_new_option
  71. onig_node_str_cat
  72. onig_node_str_set
  73. node_str_cat_char
  74. onig_node_conv_to_str_node
  75. onig_node_str_clear
  76. node_new_str
  77. onig_node_new_str
  78. node_new_str_raw
  79. node_new_empty
  80. node_new_str_raw_char
  81. str_node_split_last_char
  82. str_node_can_be_split
  83. node_str_head_pad
  84. onig_scan_unsigned_number
  85. scan_unsigned_hexadecimal_number
  86. scan_unsigned_octal_number
  87. new_code_range
  88. add_code_range_to_buf
  89. add_code_range
  90. not_code_range_buf
  91. or_code_range_buf
  92. and_code_range1
  93. and_code_range_buf
  94. and_cclass
  95. or_cclass
  96. conv_backslash_value
  97. is_invalid_quantifier_target
  98. popular_quantifier_num
  99. onig_reduce_nested_quantifier
  100. fetch_range_quantifier
  101. fetch_escaped_value
  102. get_name_end_code_point
  103. fetch_name_with_level
  104. fetch_name
  105. fetch_name
  106. CC_ESC_WARN
  107. CLOSE_BRACKET_WITHOUT_ESC_WARN
  108. find_str_position
  109. str_exist_check_with_esc
  110. fetch_token_in_cc
  111. fetch_token
  112. add_ctype_to_cc_by_range
  113. add_ctype_to_cc
  114. parse_posix_bracket
  115. fetch_char_property_to_ctype
  116. parse_char_property
  117. next_state_class
  118. next_state_val
  119. code_exist_check
  120. parse_char_class
  121. parse_enclose
  122. set_quantifier
  123. type_cclass_cmp
  124. type_cclass_hash
  125. i_free_shared_class
  126. onig_free_shared_cclass_table
  127. clear_not_flag_cclass
  128. i_apply_case_fold
  129. parse_exp
  130. parse_branch
  131. parse_subexp
  132. parse_regexp
  133. onig_parse_make_tree
  134. onig_scan_env_set_error_string

   1 /**********************************************************************
   2   regparse.c -  Oniguruma (regular expression library)
   3 **********************************************************************/
   4 /*-
   5  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #include "regparse.h"
  31 #include "st.h"
  32 
  33 #define WARN_BUFSIZE    256
  34 
  35 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
  36 
  37 
  38 OnigSyntaxType OnigSyntaxRuby = {
  39   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
  40      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
  41      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
  42      ONIG_SYN_OP_ESC_C_CONTROL )
  43    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
  44   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
  45       ONIG_SYN_OP2_OPTION_RUBY |
  46       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
  47       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
  48       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
  49       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
  50       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
  51       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
  52       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
  53       ONIG_SYN_OP2_ESC_H_XDIGIT )
  54   , ( SYN_GNU_REGEX_BV | 
  55       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
  56       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
  57       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
  58       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
  59       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
  60       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
  61       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
  62   , ONIG_OPTION_NONE
  63   ,
  64   {
  65       (OnigCodePoint )'\\'                       /* esc */
  66     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
  67     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
  68     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
  69     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
  70     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
  71   }
  72 };
  73 
  74 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
  75 
  76 extern void onig_null_warn(const char* s ARG_UNUSED) { }
  77 
  78 #ifdef DEFAULT_WARN_FUNCTION
  79 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
  80 #else
  81 static OnigWarnFunc onig_warn = onig_null_warn;
  82 #endif
  83 
  84 #ifdef DEFAULT_VERB_WARN_FUNCTION
  85 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
  86 #else
  87 static OnigWarnFunc onig_verb_warn = onig_null_warn;
  88 #endif
  89 
  90 extern void onig_set_warn_func(OnigWarnFunc f)
  91 {
  92   onig_warn = f;
  93 }
  94 
  95 extern void onig_set_verb_warn_func(OnigWarnFunc f)
  96 {
  97   onig_verb_warn = f;
  98 }
  99 
 100 static void
 101 bbuf_free(BBuf* bbuf)
 102 {
 103   if (IS_NOT_NULL(bbuf)) {
 104     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
 105     xfree(bbuf);
 106   }
 107 }
 108 
 109 static int
 110 bbuf_clone(BBuf** rto, BBuf* from)
 111 {
 112   int r;
 113   BBuf *to;
 114 
 115   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
 116   CHECK_NULL_RETURN_MEMERR(to);
 117   r = BBUF_INIT(to, from->alloc);
 118   if (r != 0) return r;
 119   to->used = from->used;
 120   xmemcpy(to->p, from->p, from->used);
 121   return 0;
 122 }
 123 
 124 #define BACKREF_REL_TO_ABS(rel_no, env) \
 125   ((env)->num_mem + 1 + (rel_no))
 126 
 127 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
 128 
 129 #define MBCODE_START_POS(enc) \
 130   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
 131 
 132 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
 133   add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
 134 
 135 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
 136   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
 137     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
 138     if (r) return r;\
 139   }\
 140 } while (0)
 141 
 142 
 143 #define BITSET_IS_EMPTY(bs,empty) do {\
 144   int i;\
 145   empty = 1;\
 146   for (i = 0; i < (int )BITSET_SIZE; i++) {\
 147     if ((bs)[i] != 0) {\
 148       empty = 0; break;\
 149     }\
 150   }\
 151 } while (0)
 152 
 153 static void
 154 bitset_set_range(BitSetRef bs, int from, int to)
 155 {
 156   int i;
 157   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
 158     BITSET_SET_BIT(bs, i);
 159   }
 160 }
 161 
 162 #if 0
 163 static void
 164 bitset_set_all(BitSetRef bs)
 165 {
 166   int i;
 167   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
 168 }
 169 #endif
 170 
 171 static void
 172 bitset_invert(BitSetRef bs)
 173 {
 174   int i;
 175   for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
 176 }
 177 
 178 static void
 179 bitset_invert_to(BitSetRef from, BitSetRef to)
 180 {
 181   int i;
 182   for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
 183 }
 184 
 185 static void
 186 bitset_and(BitSetRef dest, BitSetRef bs)
 187 {
 188   int i;
 189   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
 190 }
 191 
 192 static void
 193 bitset_or(BitSetRef dest, BitSetRef bs)
 194 {
 195   int i;
 196   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
 197 }
 198 
 199 static void
 200 bitset_copy(BitSetRef dest, BitSetRef bs)
 201 {
 202   int i;
 203   for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
 204 }
 205 
 206 extern int
 207 onig_strncmp(const UChar* s1, const UChar* s2, int n)
 208 {
 209   int x;
 210 
 211   while (n-- > 0) {
 212     x = *s2++ - *s1++;
 213     if (x) return x;
 214   }
 215   return 0;
 216 }
 217 
 218 extern void
 219 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
 220 {
 221   int len = end - src;
 222   if (len > 0) {
 223     xmemcpy(dest, src, len);
 224     dest[len] = (UChar )0;
 225   }
 226 }
 227 
 228 #ifdef USE_NAMED_GROUP
 229 static UChar*
 230 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
 231 {
 232   int slen, term_len, i;
 233   UChar *r;
 234 
 235   slen = end - s;
 236   term_len = ONIGENC_MBC_MINLEN(enc);
 237 
 238   r = (UChar* )xmalloc(slen + term_len);
 239   CHECK_NULL_RETURN(r);
 240   xmemcpy(r, s, slen);
 241 
 242   for (i = 0; i < term_len; i++)
 243     r[slen + i] = (UChar )0;
 244 
 245   return r;
 246 }
 247 #endif
 248 
 249 /* scan pattern methods */
 250 #define PEND_VALUE   0
 251 
 252 #define PFETCH_READY  UChar* pfetch_prev
 253 #define PEND         (p < end ?  0 : 1)
 254 #define PUNFETCH     p = pfetch_prev
 255 #define PINC       do { \
 256   pfetch_prev = p; \
 257   p += ONIGENC_MBC_ENC_LEN(enc, p); \
 258 } while (0)
 259 #define PFETCH(c)  do { \
 260   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
 261   pfetch_prev = p; \
 262   p += ONIGENC_MBC_ENC_LEN(enc, p); \
 263 } while (0)
 264 
 265 #define PINC_S     do { \
 266   p += ONIGENC_MBC_ENC_LEN(enc, p); \
 267 } while (0)
 268 #define PFETCH_S(c) do { \
 269   c = ONIGENC_MBC_TO_CODE(enc, p, end); \
 270   p += ONIGENC_MBC_ENC_LEN(enc, p); \
 271 } while (0)
 272 
 273 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
 274 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
 275 
 276 static UChar*
 277 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
 278               int capa)
 279 {
 280   UChar* r;
 281 
 282   if (dest)
 283     r = (UChar* )xrealloc(dest, capa + 1);
 284   else
 285     r = (UChar* )xmalloc(capa + 1);
 286 
 287   CHECK_NULL_RETURN(r);
 288   onig_strcpy(r + (dest_end - dest), src, src_end);
 289   return r;
 290 }
 291 
 292 /* dest on static area */
 293 static UChar*
 294 strcat_capa_from_static(UChar* dest, UChar* dest_end,
 295                         const UChar* src, const UChar* src_end, int capa)
 296 {
 297   UChar* r;
 298 
 299   r = (UChar* )xmalloc(capa + 1);
 300   CHECK_NULL_RETURN(r);
 301   onig_strcpy(r, dest, dest_end);
 302   onig_strcpy(r + (dest_end - dest), src, src_end);
 303   return r;
 304 }
 305 
 306 
 307 #ifdef USE_ST_LIBRARY
 308 
 309 typedef struct {
 310   UChar* s;
 311   UChar* end;
 312 } st_str_end_key;
 313 
 314 static int
 315 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
 316 {
 317   UChar *p, *q;
 318   int c;
 319 
 320   if ((x->end - x->s) != (y->end - y->s))
 321     return 1;
 322 
 323   p = x->s;
 324   q = y->s;
 325   while (p < x->end) {
 326     c = (int )*p - (int )*q;
 327     if (c != 0) return c;
 328 
 329     p++; q++;
 330   }
 331 
 332   return 0;
 333 }
 334 
 335 static int
 336 str_end_hash(st_str_end_key* x)
 337 {
 338   UChar *p;
 339   int val = 0;
 340 
 341   p = x->s;
 342   while (p < x->end) {
 343     val = val * 997 + (int )*p++;
 344   }
 345 
 346   return val + (val >> 5);
 347 }
 348 
 349 extern hash_table_type*
 350 onig_st_init_strend_table_with_size(int size)
 351 {
 352   static struct st_hash_type hashType = {
 353     str_end_cmp,
 354     str_end_hash,
 355   };
 356 
 357   return (hash_table_type* )
 358            onig_st_init_table_with_size(&hashType, size);
 359 }
 360 
 361 extern int
 362 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
 363                       const UChar* end_key, hash_data_type *value)
 364 {
 365   st_str_end_key key;
 366 
 367   key.s   = (UChar* )str_key;
 368   key.end = (UChar* )end_key;
 369 
 370   return onig_st_lookup(table, (st_data_t )(&key), value);
 371 }
 372 
 373 extern int
 374 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
 375                       const UChar* end_key, hash_data_type value)
 376 {
 377   st_str_end_key* key;
 378   int result;
 379 
 380   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
 381   key->s   = (UChar* )str_key;
 382   key->end = (UChar* )end_key;
 383   result = onig_st_insert(table, (st_data_t )key, value);
 384   if (result) {
 385     xfree(key);
 386   }
 387   return result;
 388 }
 389 
 390 #endif /* USE_ST_LIBRARY */
 391 
 392 
 393 #ifdef USE_NAMED_GROUP
 394 
 395 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
 396 
 397 typedef struct {
 398   UChar* name;
 399   int    name_len;   /* byte length */
 400   int    back_num;   /* number of backrefs */
 401   int    back_alloc;
 402   int    back_ref1;
 403   int*   back_refs;
 404 } NameEntry;
 405 
 406 #ifdef USE_ST_LIBRARY
 407 
 408 typedef st_table  NameTable;
 409 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
 410 
 411 #define NAMEBUF_SIZE    24
 412 #define NAMEBUF_SIZE_1  25
 413 
 414 #ifdef ONIG_DEBUG
 415 static int
 416 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
 417 {
 418   int i;
 419   FILE* fp = (FILE* )arg;
 420 
 421   fprintf(fp, "%s: ", e->name);
 422   if (e->back_num == 0)
 423     fputs("-", fp);
 424   else if (e->back_num == 1)
 425     fprintf(fp, "%d", e->back_ref1);
 426   else {
 427     for (i = 0; i < e->back_num; i++) {
 428       if (i > 0) fprintf(fp, ", ");
 429       fprintf(fp, "%d", e->back_refs[i]);
 430     }
 431   }
 432   fputs("\n", fp);
 433   return ST_CONTINUE;
 434 }
 435 
 436 extern int
 437 onig_print_names(FILE* fp, regex_t* reg)
 438 {
 439   NameTable* t = (NameTable* )reg->name_table;
 440 
 441   if (IS_NOT_NULL(t)) {
 442     fprintf(fp, "name table\n");
 443     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
 444     fputs("\n", fp);
 445   }
 446   return 0;
 447 }
 448 #endif /* ONIG_DEBUG */
 449 
 450 static int
 451 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
 452 {
 453   xfree(e->name);
 454   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
 455   xfree(key);
 456   xfree(e);
 457   return ST_DELETE;
 458 }
 459 
 460 static int
 461 names_clear(regex_t* reg)
 462 {
 463   NameTable* t = (NameTable* )reg->name_table;
 464 
 465   if (IS_NOT_NULL(t)) {
 466     onig_st_foreach(t, i_free_name_entry, 0);
 467   }
 468   return 0;
 469 }
 470 
 471 extern int
 472 onig_names_free(regex_t* reg)
 473 {
 474   int r;
 475   NameTable* t;
 476 
 477   r = names_clear(reg);
 478   if (r) return r;
 479 
 480   t = (NameTable* )reg->name_table;
 481   if (IS_NOT_NULL(t)) onig_st_free_table(t);
 482   reg->name_table = (void* )NULL;
 483   return 0;
 484 }
 485 
 486 static NameEntry*
 487 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
 488 {
 489   NameEntry* e;
 490   NameTable* t = (NameTable* )reg->name_table;
 491 
 492   e = (NameEntry* )NULL;
 493   if (IS_NOT_NULL(t)) {
 494     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
 495   }
 496   return e;
 497 }
 498 
 499 typedef struct {
 500   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
 501   regex_t* reg;
 502   void* arg;
 503   int ret;
 504   OnigEncoding enc;
 505 } INamesArg;
 506 
 507 static int
 508 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
 509 {
 510   int r = (*(arg->func))(e->name,
 511                          e->name + e->name_len,
 512                          e->back_num,
 513                          (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
 514                          arg->reg, arg->arg);
 515   if (r != 0) {
 516     arg->ret = r;
 517     return ST_STOP;
 518   }
 519   return ST_CONTINUE;
 520 }
 521 
 522 extern int
 523 onig_foreach_name(regex_t* reg,
 524   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 525 {
 526   INamesArg narg;
 527   NameTable* t = (NameTable* )reg->name_table;
 528 
 529   narg.ret = 0;
 530   if (IS_NOT_NULL(t)) {
 531     narg.func = func;
 532     narg.reg  = reg;
 533     narg.arg  = arg;
 534     narg.enc  = reg->enc; /* should be pattern encoding. */
 535     onig_st_foreach(t, i_names, (HashDataType )&narg);
 536   }
 537   return narg.ret;
 538 }
 539 
 540 static int
 541 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
 542 {
 543   int i;
 544 
 545   if (e->back_num > 1) {
 546     for (i = 0; i < e->back_num; i++) {
 547       e->back_refs[i] = map[e->back_refs[i]].new_val;
 548     }
 549   }
 550   else if (e->back_num == 1) {
 551     e->back_ref1 = map[e->back_ref1].new_val;
 552   }
 553 
 554   return ST_CONTINUE;
 555 }
 556 
 557 extern int
 558 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
 559 {
 560   NameTable* t = (NameTable* )reg->name_table;
 561 
 562   if (IS_NOT_NULL(t)) {
 563     onig_st_foreach(t, i_renumber_name, (HashDataType )map);
 564   }
 565   return 0;
 566 }
 567 
 568 
 569 extern int
 570 onig_number_of_names(regex_t* reg)
 571 {
 572   NameTable* t = (NameTable* )reg->name_table;
 573 
 574   if (IS_NOT_NULL(t))
 575     return t->num_entries;
 576   else
 577     return 0;
 578 }
 579 
 580 #else  /* USE_ST_LIBRARY */
 581 
 582 #define INIT_NAMES_ALLOC_NUM    8
 583 
 584 typedef struct {
 585   NameEntry* e;
 586   int        num;
 587   int        alloc;
 588 } NameTable;
 589 
 590 #ifdef ONIG_DEBUG
 591 extern int
 592 onig_print_names(FILE* fp, regex_t* reg)
 593 {
 594   int i, j;
 595   NameEntry* e;
 596   NameTable* t = (NameTable* )reg->name_table;
 597 
 598   if (IS_NOT_NULL(t) && t->num > 0) {
 599     fprintf(fp, "name table\n");
 600     for (i = 0; i < t->num; i++) {
 601       e = &(t->e[i]);
 602       fprintf(fp, "%s: ", e->name);
 603       if (e->back_num == 0) {
 604         fputs("-", fp);
 605       }
 606       else if (e->back_num == 1) {
 607         fprintf(fp, "%d", e->back_ref1);
 608       }
 609       else {
 610         for (j = 0; j < e->back_num; j++) {
 611           if (j > 0) fprintf(fp, ", ");
 612           fprintf(fp, "%d", e->back_refs[j]);
 613         }
 614       }
 615       fputs("\n", fp);
 616     }
 617     fputs("\n", fp);
 618   }
 619   return 0;
 620 }
 621 #endif
 622 
 623 static int
 624 names_clear(regex_t* reg)
 625 {
 626   int i;
 627   NameEntry* e;
 628   NameTable* t = (NameTable* )reg->name_table;
 629 
 630   if (IS_NOT_NULL(t)) {
 631     for (i = 0; i < t->num; i++) {
 632       e = &(t->e[i]);
 633       if (IS_NOT_NULL(e->name)) {
 634         xfree(e->name);
 635         e->name       = NULL;
 636         e->name_len   = 0;
 637         e->back_num   = 0;
 638         e->back_alloc = 0;
 639         if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
 640         e->back_refs = (int* )NULL;
 641       }
 642     }
 643     if (IS_NOT_NULL(t->e)) {
 644       xfree(t->e);
 645       t->e = NULL;
 646     }
 647     t->num = 0;
 648   }
 649   return 0;
 650 }
 651 
 652 extern int
 653 onig_names_free(regex_t* reg)
 654 {
 655   int r;
 656   NameTable* t;
 657 
 658   r = names_clear(reg);
 659   if (r) return r;
 660 
 661   t = (NameTable* )reg->name_table;
 662   if (IS_NOT_NULL(t)) xfree(t);
 663   reg->name_table = NULL;
 664   return 0;
 665 }
 666 
 667 static NameEntry*
 668 name_find(regex_t* reg, UChar* name, UChar* name_end)
 669 {
 670   int i, len;
 671   NameEntry* e;
 672   NameTable* t = (NameTable* )reg->name_table;
 673 
 674   if (IS_NOT_NULL(t)) {
 675     len = name_end - name;
 676     for (i = 0; i < t->num; i++) {
 677       e = &(t->e[i]);
 678       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
 679         return e;
 680     }
 681   }
 682   return (NameEntry* )NULL;
 683 }
 684 
 685 extern int
 686 onig_foreach_name(regex_t* reg,
 687   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 688 {
 689   int i, r;
 690   NameEntry* e;
 691   NameTable* t = (NameTable* )reg->name_table;
 692 
 693   if (IS_NOT_NULL(t)) {
 694     for (i = 0; i < t->num; i++) {
 695       e = &(t->e[i]);
 696       r = (*func)(e->name, e->name + e->name_len, e->back_num,
 697                   (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
 698                   reg, arg);
 699       if (r != 0) return r;
 700     }
 701   }
 702   return 0;
 703 }
 704 
 705 extern int
 706 onig_number_of_names(regex_t* reg)
 707 {
 708   NameTable* t = (NameTable* )reg->name_table;
 709 
 710   if (IS_NOT_NULL(t))
 711     return t->num;
 712   else
 713     return 0;
 714 }
 715 
 716 #endif /* else USE_ST_LIBRARY */
 717 
 718 static int
 719 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
 720 {
 721   int alloc;
 722   NameEntry* e;
 723   NameTable* t = (NameTable* )reg->name_table;
 724 
 725   if (name_end - name <= 0)
 726     return ONIGERR_EMPTY_GROUP_NAME;
 727 
 728   e = name_find(reg, name, name_end);
 729   if (IS_NULL(e)) {
 730 #ifdef USE_ST_LIBRARY
 731     if (IS_NULL(t)) {
 732       t = onig_st_init_strend_table_with_size(5);
 733       reg->name_table = (void* )t;
 734     }
 735     e = (NameEntry* )xmalloc(sizeof(NameEntry));
 736     CHECK_NULL_RETURN_MEMERR(e);
 737 
 738     e->name = strdup_with_null(reg->enc, name, name_end);
 739     if (IS_NULL(e->name)) {
 740       xfree(e);  return ONIGERR_MEMORY;
 741     }
 742     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
 743                           (HashDataType )e);
 744 
 745     e->name_len   = name_end - name;
 746     e->back_num   = 0;
 747     e->back_alloc = 0;
 748     e->back_refs  = (int* )NULL;
 749 
 750 #else
 751 
 752     if (IS_NULL(t)) {
 753       alloc = INIT_NAMES_ALLOC_NUM;
 754       t = (NameTable* )xmalloc(sizeof(NameTable));
 755       CHECK_NULL_RETURN_MEMERR(t);
 756       t->e     = NULL;
 757       t->alloc = 0;
 758       t->num   = 0;
 759 
 760       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
 761       if (IS_NULL(t->e)) {
 762         xfree(t);
 763         return ONIGERR_MEMORY;
 764       }
 765       t->alloc = alloc;
 766       reg->name_table = t;
 767       goto clear;
 768     }
 769     else if (t->num == t->alloc) {
 770       int i;
 771 
 772       alloc = t->alloc * 2;
 773       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
 774       CHECK_NULL_RETURN_MEMERR(t->e);
 775       t->alloc = alloc;
 776 
 777     clear:
 778       for (i = t->num; i < t->alloc; i++) {
 779         t->e[i].name       = NULL;
 780         t->e[i].name_len   = 0;
 781         t->e[i].back_num   = 0;
 782         t->e[i].back_alloc = 0;
 783         t->e[i].back_refs  = (int* )NULL;
 784       }
 785     }
 786     e = &(t->e[t->num]);
 787     t->num++;
 788     e->name = strdup_with_null(reg->enc, name, name_end);
 789     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
 790     e->name_len = name_end - name;
 791 #endif
 792   }
 793 
 794   if (e->back_num >= 1 &&
 795       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
 796     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
 797                                     name, name_end);
 798     return ONIGERR_MULTIPLEX_DEFINED_NAME;
 799   }
 800 
 801   e->back_num++;
 802   if (e->back_num == 1) {
 803     e->back_ref1 = backref;
 804   }
 805   else {
 806     if (e->back_num == 2) {
 807       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
 808       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
 809       CHECK_NULL_RETURN_MEMERR(e->back_refs);
 810       e->back_alloc = alloc;
 811       e->back_refs[0] = e->back_ref1;
 812       e->back_refs[1] = backref;
 813     }
 814     else {
 815       if (e->back_num > e->back_alloc) {
 816         alloc = e->back_alloc * 2;
 817         e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
 818         CHECK_NULL_RETURN_MEMERR(e->back_refs);
 819         e->back_alloc = alloc;
 820       }
 821       e->back_refs[e->back_num - 1] = backref;
 822     }
 823   }
 824 
 825   return 0;
 826 }
 827 
 828 extern int
 829 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
 830                            const UChar* name_end, int** nums)
 831 {
 832   NameEntry* e = name_find(reg, name, name_end);
 833 
 834   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
 835 
 836   switch (e->back_num) {
 837   case 0:
 838     break;
 839   case 1:
 840     *nums = &(e->back_ref1);
 841     break;
 842   default:
 843     *nums = e->back_refs;
 844     break;
 845   }
 846   return e->back_num;
 847 }
 848 
 849 extern int
 850 onig_name_to_backref_number(regex_t* reg, const UChar* name,
 851                             const UChar* name_end, OnigRegion *region)
 852 {
 853   int i, n, *nums;
 854 
 855   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
 856   if (n < 0)
 857     return n;
 858   else if (n == 0)
 859     return ONIGERR_PARSER_BUG;
 860   else if (n == 1)
 861     return nums[0];
 862   else {
 863     if (IS_NOT_NULL(region)) {
 864       for (i = n - 1; i >= 0; i--) {
 865         if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
 866           return nums[i];
 867       }
 868     }
 869     return nums[n - 1];
 870   }
 871 }
 872 
 873 #else /* USE_NAMED_GROUP */
 874 
 875 extern int
 876 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
 877                            const UChar* name_end, int** nums)
 878 {
 879   return ONIG_NO_SUPPORT_CONFIG;
 880 }
 881 
 882 extern int
 883 onig_name_to_backref_number(regex_t* reg, const UChar* name,
 884                             const UChar* name_end, OnigRegion* region)
 885 {
 886   return ONIG_NO_SUPPORT_CONFIG;
 887 }
 888 
 889 extern int
 890 onig_foreach_name(regex_t* reg,
 891   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 892 {
 893   return ONIG_NO_SUPPORT_CONFIG;
 894 }
 895 
 896 extern int
 897 onig_number_of_names(regex_t* reg)
 898 {
 899   return 0;
 900 }
 901 #endif /* else USE_NAMED_GROUP */
 902 
 903 extern int
 904 onig_noname_group_capture_is_active(regex_t* reg)
 905 {
 906   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
 907     return 0;
 908 
 909 #ifdef USE_NAMED_GROUP
 910   if (onig_number_of_names(reg) > 0 &&
 911       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
 912       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
 913     return 0;
 914   }
 915 #endif
 916 
 917   return 1;
 918 }
 919 
 920 
 921 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
 922 
 923 static void
 924 scan_env_clear(ScanEnv* env)
 925 {
 926   int i;
 927 
 928   BIT_STATUS_CLEAR(env->capture_history);
 929   BIT_STATUS_CLEAR(env->bt_mem_start);
 930   BIT_STATUS_CLEAR(env->bt_mem_end);
 931   BIT_STATUS_CLEAR(env->backrefed_mem);
 932   env->error      = (UChar* )NULL;
 933   env->error_end  = (UChar* )NULL;
 934   env->num_call   = 0;
 935   env->num_mem    = 0;
 936 #ifdef USE_NAMED_GROUP
 937   env->num_named  = 0;
 938 #endif
 939   env->mem_alloc         = 0;
 940   env->mem_nodes_dynamic = (Node** )NULL;
 941 
 942   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
 943     env->mem_nodes_static[i] = NULL_NODE;
 944 
 945 #ifdef USE_COMBINATION_EXPLOSION_CHECK
 946   env->num_comb_exp_check  = 0;
 947   env->comb_exp_max_regnum = 0;
 948   env->curr_max_regnum     = 0;
 949   env->has_recursion       = 0;
 950 #endif
 951 }
 952 
 953 static int
 954 scan_env_add_mem_entry(ScanEnv* env)
 955 {
 956   int i, need, alloc;
 957   Node** p;
 958 
 959   need = env->num_mem + 1;
 960   if (need >= SCANENV_MEMNODES_SIZE) {
 961     if (env->mem_alloc <= need) {
 962       if (IS_NULL(env->mem_nodes_dynamic)) {
 963         alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
 964         p = (Node** )xmalloc(sizeof(Node*) * alloc);
 965         xmemcpy(p, env->mem_nodes_static,
 966                 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
 967       }
 968       else {
 969         alloc = env->mem_alloc * 2;
 970         p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
 971       }
 972       CHECK_NULL_RETURN_MEMERR(p);
 973 
 974       for (i = env->num_mem + 1; i < alloc; i++)
 975         p[i] = NULL_NODE;
 976 
 977       env->mem_nodes_dynamic = p;
 978       env->mem_alloc = alloc;
 979     }
 980   }
 981 
 982   env->num_mem++;
 983   return env->num_mem;
 984 }
 985 
 986 static int
 987 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
 988 {
 989   if (env->num_mem >= num)
 990     SCANENV_MEM_NODES(env)[num] = node;
 991   else
 992     return ONIGERR_PARSER_BUG;
 993   return 0;
 994 }
 995 
 996 
 997 #ifdef USE_PARSE_TREE_NODE_RECYCLE
 998 typedef struct _FreeNode {
 999   struct _FreeNode* next;
1000 } FreeNode;
1001 
1002 static FreeNode* FreeNodeList = (FreeNode* )NULL;
1003 #endif
1004 
1005 extern void
1006 onig_node_free(Node* node)
1007 {
1008  start:
1009   if (IS_NULL(node)) return ;
1010 
1011   switch (NTYPE(node)) {
1012   case NT_STR:
1013     if (NSTR(node)->capa != 0 &&
1014         IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1015       xfree(NSTR(node)->s);
1016     }
1017     break;
1018 
1019   case NT_LIST:
1020   case NT_ALT:
1021     onig_node_free(NCAR(node));
1022     {
1023       Node* next_node = NCDR(node);
1024 
1025 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1026       {
1027         FreeNode* n = (FreeNode* )node;
1028 
1029         THREAD_ATOMIC_START;
1030         n->next = FreeNodeList;
1031         FreeNodeList = n;
1032         THREAD_ATOMIC_END;
1033       }
1034 #else
1035       xfree(node);
1036 #endif
1037       node = next_node;
1038       goto start;
1039     }
1040     break;
1041 
1042   case NT_CCLASS:
1043     {
1044       CClassNode* cc = NCCLASS(node);
1045 
1046       if (IS_NCCLASS_SHARE(cc)) return ;
1047       if (cc->mbuf)
1048         bbuf_free(cc->mbuf);
1049     }
1050     break;
1051 
1052   case NT_QTFR:
1053     if (NQTFR(node)->target)
1054       onig_node_free(NQTFR(node)->target);
1055     break;
1056 
1057   case NT_ENCLOSE:
1058     if (NENCLOSE(node)->target)
1059       onig_node_free(NENCLOSE(node)->target);
1060     break;
1061 
1062   case NT_BREF:
1063     if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1064       xfree(NBREF(node)->back_dynamic);
1065     break;
1066 
1067   case NT_ANCHOR:
1068     if (NANCHOR(node)->target)
1069       onig_node_free(NANCHOR(node)->target);
1070     break;
1071   }
1072 
1073 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1074   {
1075     FreeNode* n = (FreeNode* )node;
1076 
1077     THREAD_ATOMIC_START;
1078     n->next = FreeNodeList;
1079     FreeNodeList = n;
1080     THREAD_ATOMIC_END;
1081   }
1082 #else
1083   xfree(node);
1084 #endif
1085 }
1086 
1087 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1088 extern int
1089 onig_free_node_list(void)
1090 {
1091   FreeNode* n;
1092 
1093   /* THREAD_ATOMIC_START; */
1094   while (IS_NOT_NULL(FreeNodeList)) {
1095     n = FreeNodeList;
1096     FreeNodeList = FreeNodeList->next;
1097     xfree(n);
1098   }
1099   /* THREAD_ATOMIC_END; */
1100   return 0;
1101 }
1102 #endif
1103 
1104 static Node*
1105 node_new(void)
1106 {
1107   Node* node;
1108 
1109 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1110   THREAD_ATOMIC_START;
1111   if (IS_NOT_NULL(FreeNodeList)) {
1112     node = (Node* )FreeNodeList;
1113     FreeNodeList = FreeNodeList->next;
1114     THREAD_ATOMIC_END;
1115     return node;
1116   }
1117   THREAD_ATOMIC_END;
1118 #endif
1119 
1120   node = (Node* )xmalloc(sizeof(Node));
1121   /* xmemset(node, 0, sizeof(Node)); */
1122   return node;
1123 }
1124 
1125 
1126 static void
1127 initialize_cclass(CClassNode* cc)
1128 {
1129   BITSET_CLEAR(cc->bs);
1130   /* cc->base.flags = 0; */
1131   cc->flags = 0;
1132   cc->mbuf  = NULL;
1133 }
1134 
1135 static Node*
1136 node_new_cclass(void)
1137 {
1138   Node* node = node_new();
1139   CHECK_NULL_RETURN(node);
1140 
1141   SET_NTYPE(node, NT_CCLASS);
1142   initialize_cclass(NCCLASS(node));
1143   return node;
1144 }
1145 
1146 static Node*
1147 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
1148                                    const OnigCodePoint ranges[])
1149 {
1150   int n, i;
1151   CClassNode* cc;
1152   OnigCodePoint j;
1153 
1154   Node* node = node_new_cclass();
1155   CHECK_NULL_RETURN(node);
1156 
1157   cc = NCCLASS(node);
1158   if (not != 0) NCCLASS_SET_NOT(cc);
1159 
1160   BITSET_CLEAR(cc->bs);
1161   if (sb_out > 0 && IS_NOT_NULL(ranges)) {
1162     n = ONIGENC_CODE_RANGE_NUM(ranges);
1163     for (i = 0; i < n; i++) {
1164       for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
1165            j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
1166         if (j >= sb_out) goto sb_end;
1167 
1168         BITSET_SET_BIT(cc->bs, j);
1169       }
1170     }
1171   }
1172 
1173  sb_end:
1174   if (IS_NULL(ranges)) {
1175   is_null:
1176     cc->mbuf = NULL;
1177   }
1178   else {
1179     BBuf* bbuf;
1180 
1181     n = ONIGENC_CODE_RANGE_NUM(ranges);
1182     if (n == 0) goto is_null;
1183 
1184     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
1185     CHECK_NULL_RETURN(bbuf);
1186     bbuf->alloc = n + 1;
1187     bbuf->used  = n + 1;
1188     bbuf->p     = (UChar* )((void* )ranges);
1189 
1190     cc->mbuf = bbuf;
1191   }
1192 
1193   return node;
1194 }
1195 
1196 static Node*
1197 node_new_ctype(int type, int not)
1198 {
1199   Node* node = node_new();
1200   CHECK_NULL_RETURN(node);
1201 
1202   SET_NTYPE(node, NT_CTYPE);
1203   NCTYPE(node)->ctype = type;
1204   NCTYPE(node)->not   = not;
1205   return node;
1206 }
1207 
1208 static Node*
1209 node_new_anychar(void)
1210 {
1211   Node* node = node_new();
1212   CHECK_NULL_RETURN(node);
1213 
1214   SET_NTYPE(node, NT_CANY);
1215   return node;
1216 }
1217 
1218 static Node*
1219 node_new_list(Node* left, Node* right)
1220 {
1221   Node* node = node_new();
1222   CHECK_NULL_RETURN(node);
1223 
1224   SET_NTYPE(node, NT_LIST);
1225   NCAR(node)  = left;
1226   NCDR(node) = right;
1227   return node;
1228 }
1229 
1230 extern Node*
1231 onig_node_new_list(Node* left, Node* right)
1232 {
1233   return node_new_list(left, right);
1234 }
1235 
1236 extern Node*
1237 onig_node_list_add(Node* list, Node* x)
1238 {
1239   Node *n;
1240 
1241   n = onig_node_new_list(x, NULL);
1242   if (IS_NULL(n)) return NULL_NODE;
1243 
1244   if (IS_NOT_NULL(list)) {
1245     while (IS_NOT_NULL(NCDR(list)))
1246       list = NCDR(list);
1247 
1248     NCDR(list) = n;
1249   }
1250 
1251   return n;
1252 }
1253 
1254 extern Node*
1255 onig_node_new_alt(Node* left, Node* right)
1256 {
1257   Node* node = node_new();
1258   CHECK_NULL_RETURN(node);
1259 
1260   SET_NTYPE(node, NT_ALT);
1261   NCAR(node)  = left;
1262   NCDR(node) = right;
1263   return node;
1264 }
1265 
1266 extern Node*
1267 onig_node_new_anchor(int type)
1268 {
1269   Node* node = node_new();
1270   CHECK_NULL_RETURN(node);
1271 
1272   SET_NTYPE(node, NT_ANCHOR);
1273   NANCHOR(node)->type     = type;
1274   NANCHOR(node)->target   = NULL;
1275   NANCHOR(node)->char_len = -1;
1276   return node;
1277 }
1278 
1279 static Node*
1280 node_new_backref(int back_num, int* backrefs, int by_name,
1281 #ifdef USE_BACKREF_WITH_LEVEL
1282                  int exist_level, int nest_level,
1283 #endif
1284                  ScanEnv* env)
1285 {
1286   int i;
1287   Node* node = node_new();
1288 
1289   CHECK_NULL_RETURN(node);
1290 
1291   SET_NTYPE(node, NT_BREF);
1292   NBREF(node)->state    = 0;
1293   NBREF(node)->back_num = back_num;
1294   NBREF(node)->back_dynamic = (int* )NULL;
1295   if (by_name != 0)
1296     NBREF(node)->state |= NST_NAME_REF;
1297 
1298 #ifdef USE_BACKREF_WITH_LEVEL
1299   if (exist_level != 0) {
1300     NBREF(node)->state |= NST_NEST_LEVEL;
1301     NBREF(node)->nest_level  = nest_level;
1302   }
1303 #endif
1304 
1305   for (i = 0; i < back_num; i++) {
1306     if (backrefs[i] <= env->num_mem &&
1307         IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1308       NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
1309       break;
1310     }
1311   }
1312 
1313   if (back_num <= NODE_BACKREFS_SIZE) {
1314     for (i = 0; i < back_num; i++)
1315       NBREF(node)->back_static[i] = backrefs[i];
1316   }
1317   else {
1318     int* p = (int* )xmalloc(sizeof(int) * back_num);
1319     if (IS_NULL(p)) {
1320       onig_node_free(node);
1321       return NULL;
1322     }
1323     NBREF(node)->back_dynamic = p;
1324     for (i = 0; i < back_num; i++)
1325       p[i] = backrefs[i];
1326   }
1327   return node;
1328 }
1329 
1330 #ifdef USE_SUBEXP_CALL
1331 static Node*
1332 node_new_call(UChar* name, UChar* name_end, int gnum)
1333 {
1334   Node* node = node_new();
1335   CHECK_NULL_RETURN(node);
1336 
1337   SET_NTYPE(node, NT_CALL);
1338   NCALL(node)->state     = 0;
1339   NCALL(node)->target    = NULL_NODE;
1340   NCALL(node)->name      = name;
1341   NCALL(node)->name_end  = name_end;
1342   NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
1343   return node;
1344 }
1345 #endif
1346 
1347 static Node*
1348 node_new_quantifier(int lower, int upper, int by_number)
1349 {
1350   Node* node = node_new();
1351   CHECK_NULL_RETURN(node);
1352 
1353   SET_NTYPE(node, NT_QTFR);
1354   NQTFR(node)->state  = 0;
1355   NQTFR(node)->target = NULL;
1356   NQTFR(node)->lower  = lower;
1357   NQTFR(node)->upper  = upper;
1358   NQTFR(node)->greedy = 1;
1359   NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1360   NQTFR(node)->head_exact        = NULL_NODE;
1361   NQTFR(node)->next_head_exact   = NULL_NODE;
1362   NQTFR(node)->is_refered        = 0;
1363   if (by_number != 0)
1364     NQTFR(node)->state |= NST_BY_NUMBER;
1365 
1366 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1367   NQTFR(node)->comb_exp_check_num = 0;
1368 #endif
1369 
1370   return node;
1371 }
1372 
1373 static Node*
1374 node_new_enclose(int type)
1375 {
1376   Node* node = node_new();
1377   CHECK_NULL_RETURN(node);
1378 
1379   SET_NTYPE(node, NT_ENCLOSE);
1380   NENCLOSE(node)->type      = type;
1381   NENCLOSE(node)->state     =  0;
1382   NENCLOSE(node)->regnum    =  0;
1383   NENCLOSE(node)->option    =  0;
1384   NENCLOSE(node)->target    = NULL;
1385   NENCLOSE(node)->call_addr = -1;
1386   NENCLOSE(node)->opt_count =  0;
1387   return node;
1388 }
1389 
1390 extern Node*
1391 onig_node_new_enclose(int type)
1392 {
1393   return node_new_enclose(type);
1394 }
1395 
1396 static Node*
1397 node_new_enclose_memory(OnigOptionType option, int is_named)
1398 {
1399   Node* node = node_new_enclose(ENCLOSE_MEMORY);
1400   CHECK_NULL_RETURN(node);
1401   if (is_named != 0)
1402     SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
1403 
1404 #ifdef USE_SUBEXP_CALL
1405   NENCLOSE(node)->option = option;
1406 #endif
1407   return node;
1408 }
1409 
1410 static Node*
1411 node_new_option(OnigOptionType option)
1412 {
1413   Node* node = node_new_enclose(ENCLOSE_OPTION);
1414   CHECK_NULL_RETURN(node);
1415   NENCLOSE(node)->option = option;
1416   return node;
1417 }
1418 
1419 extern int
1420 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1421 {
1422   int addlen = end - s;
1423 
1424   if (addlen > 0) {
1425     int len  = NSTR(node)->end - NSTR(node)->s;
1426 
1427     if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1428       UChar* p;
1429       int capa = len + addlen + NODE_STR_MARGIN;
1430 
1431       if (capa <= NSTR(node)->capa) {
1432         onig_strcpy(NSTR(node)->s + len, s, end);
1433       }
1434       else {
1435         if (NSTR(node)->s == NSTR(node)->buf)
1436           p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1437                                       s, end, capa);
1438         else
1439           p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1440 
1441         CHECK_NULL_RETURN_MEMERR(p);
1442         NSTR(node)->s    = p;
1443         NSTR(node)->capa = capa;
1444       }
1445     }
1446     else {
1447       onig_strcpy(NSTR(node)->s + len, s, end);
1448     }
1449     NSTR(node)->end = NSTR(node)->s + len + addlen;
1450   }
1451 
1452   return 0;
1453 }
1454 
1455 extern int
1456 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1457 {
1458   onig_node_str_clear(node);
1459   return onig_node_str_cat(node, s, end);
1460 }
1461 
1462 static int
1463 node_str_cat_char(Node* node, UChar c)
1464 {
1465   UChar s[1];
1466 
1467   s[0] = c;
1468   return onig_node_str_cat(node, s, s + 1);
1469 }
1470 
1471 extern void
1472 onig_node_conv_to_str_node(Node* node, int flag)
1473 {
1474   SET_NTYPE(node, NT_STR);
1475   NSTR(node)->flag = flag;
1476   NSTR(node)->capa = 0;
1477   NSTR(node)->s    = NSTR(node)->buf;
1478   NSTR(node)->end  = NSTR(node)->buf;
1479 }
1480 
1481 extern void
1482 onig_node_str_clear(Node* node)
1483 {
1484   if (NSTR(node)->capa != 0 &&
1485       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1486     xfree(NSTR(node)->s);
1487   }
1488 
1489   NSTR(node)->capa = 0;
1490   NSTR(node)->flag = 0;
1491   NSTR(node)->s    = NSTR(node)->buf;
1492   NSTR(node)->end  = NSTR(node)->buf;
1493 }
1494 
1495 static Node*
1496 node_new_str(const UChar* s, const UChar* end)
1497 {
1498   Node* node = node_new();
1499   CHECK_NULL_RETURN(node);
1500 
1501   SET_NTYPE(node, NT_STR);
1502   NSTR(node)->capa = 0;
1503   NSTR(node)->flag = 0;
1504   NSTR(node)->s    = NSTR(node)->buf;
1505   NSTR(node)->end  = NSTR(node)->buf;
1506   if (onig_node_str_cat(node, s, end)) {
1507     onig_node_free(node);
1508     return NULL;
1509   }
1510   return node;
1511 }
1512 
1513 extern Node*
1514 onig_node_new_str(const UChar* s, const UChar* end)
1515 {
1516   return node_new_str(s, end);
1517 }
1518 
1519 static Node*
1520 node_new_str_raw(UChar* s, UChar* end)
1521 {
1522   Node* node = node_new_str(s, end);
1523   NSTRING_SET_RAW(node);
1524   return node;
1525 }
1526 
1527 static Node*
1528 node_new_empty(void)
1529 {
1530   return node_new_str(NULL, NULL);
1531 }
1532 
1533 static Node*
1534 node_new_str_raw_char(UChar c)
1535 {
1536   UChar p[1];
1537 
1538   p[0] = c;
1539   return node_new_str_raw(p, p + 1);
1540 }
1541 
1542 static Node*
1543 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
1544 {
1545   const UChar *p;
1546   Node* n = NULL_NODE;
1547 
1548   if (sn->end > sn->s) {
1549     p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
1550     if (p && p > sn->s) { /* can be splitted. */
1551       n = node_new_str(p, sn->end);
1552       if ((sn->flag & NSTR_RAW) != 0)
1553         NSTRING_SET_RAW(n);
1554       sn->end = (UChar* )p;
1555     }
1556   }
1557   return n;
1558 }
1559 
1560 static int
1561 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
1562 {
1563   if (sn->end > sn->s) {
1564     return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
1565   }
1566   return 0;
1567 }
1568 
1569 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1570 static int
1571 node_str_head_pad(StrNode* sn, int num, UChar val)
1572 {
1573   UChar buf[NODE_STR_BUF_SIZE];
1574   int i, len;
1575 
1576   len = sn->end - sn->s;
1577   onig_strcpy(buf, sn->s, sn->end);
1578   onig_strcpy(&(sn->s[num]), buf, buf + len);
1579   sn->end += num;
1580 
1581   for (i = 0; i < num; i++) {
1582     sn->s[i] = val;
1583   }
1584 }
1585 #endif
1586 
1587 extern int
1588 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
1589 {
1590   unsigned int num, val;
1591   OnigCodePoint c;
1592   UChar* p = *src;
1593   PFETCH_READY;
1594 
1595   num = 0;
1596   while (!PEND) {
1597     PFETCH(c);
1598     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1599       val = (unsigned int )DIGITVAL(c);
1600       if ((INT_MAX_LIMIT - val) / 10UL < num)
1601         return -1;  /* overflow */
1602 
1603       num = num * 10 + val;
1604     }
1605     else {
1606       PUNFETCH;
1607       break;
1608     }
1609   }
1610   *src = p;
1611   return num;
1612 }
1613 
1614 static int
1615 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
1616                                  OnigEncoding enc)
1617 {
1618   OnigCodePoint c;
1619   unsigned int num, val;
1620   UChar* p = *src;
1621   PFETCH_READY;
1622 
1623   num = 0;
1624   while (!PEND && maxlen-- != 0) {
1625     PFETCH(c);
1626     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1627       val = (unsigned int )XDIGITVAL(enc,c);
1628       if ((INT_MAX_LIMIT - val) / 16UL < num)
1629         return -1;  /* overflow */
1630 
1631       num = (num << 4) + XDIGITVAL(enc,c);
1632     }
1633     else {
1634       PUNFETCH;
1635       break;
1636     }
1637   }
1638   *src = p;
1639   return num;
1640 }
1641 
1642 static int
1643 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
1644                            OnigEncoding enc)
1645 {
1646   OnigCodePoint c;
1647   unsigned int num, val;
1648   UChar* p = *src;
1649   PFETCH_READY;
1650 
1651   num = 0;
1652   while (!PEND && maxlen-- != 0) {
1653     PFETCH(c);
1654     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1655       val = ODIGITVAL(c);
1656       if ((INT_MAX_LIMIT - val) / 8UL < num)
1657         return -1;  /* overflow */
1658 
1659       num = (num << 3) + val;
1660     }
1661     else {
1662       PUNFETCH;
1663       break;
1664     }
1665   }
1666   *src = p;
1667   return num;
1668 }
1669 
1670 
1671 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1672     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1673 
1674 /* data format:
1675      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1676      (all data size is OnigCodePoint)
1677  */
1678 static int
1679 new_code_range(BBuf** pbuf)
1680 {
1681 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
1682   int r;
1683   OnigCodePoint n;
1684   BBuf* bbuf;
1685 
1686   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1687   CHECK_NULL_RETURN_MEMERR(*pbuf);
1688   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
1689   if (r) return r;
1690 
1691   n = 0;
1692   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1693   return 0;
1694 }
1695 
1696 static int
1697 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
1698 {
1699   int r, inc_n, pos;
1700   int low, high, bound, x;
1701   OnigCodePoint n, *data;
1702   BBuf* bbuf;
1703 
1704   if (from > to) {
1705     n = from; from = to; to = n;
1706   }
1707 
1708   if (IS_NULL(*pbuf)) {
1709     r = new_code_range(pbuf);
1710     if (r) return r;
1711     bbuf = *pbuf;
1712     n = 0;
1713   }
1714   else {
1715     bbuf = *pbuf;
1716     GET_CODE_POINT(n, bbuf->p);
1717   }
1718   data = (OnigCodePoint* )(bbuf->p);
1719   data++;
1720 
1721   for (low = 0, bound = n; low < bound; ) {
1722     x = (low + bound) >> 1;
1723     if (from > data[x*2 + 1])
1724       low = x + 1;
1725     else
1726       bound = x;
1727   }
1728 
1729   for (high = low, bound = n; high < bound; ) {
1730     x = (high + bound) >> 1;
1731     if (to >= data[x*2] - 1)
1732       high = x + 1;
1733     else
1734       bound = x;
1735   }
1736 
1737   inc_n = low + 1 - high;
1738   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1739     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
1740 
1741   if (inc_n != 1) {
1742     if (from > data[low*2])
1743       from = data[low*2];
1744     if (to < data[(high - 1)*2 + 1])
1745       to = data[(high - 1)*2 + 1];
1746   }
1747 
1748   if (inc_n != 0 && (OnigCodePoint )high < n) {
1749     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1750     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1751     int size = (n - high) * 2 * SIZE_CODE_POINT;
1752 
1753     if (inc_n > 0) {
1754       BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1755     }
1756     else {
1757       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1758     }
1759   }
1760 
1761   pos = SIZE_CODE_POINT * (1 + low * 2);
1762   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1763   BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1764   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1765   n += inc_n;
1766   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1767 
1768   return 0;
1769 }
1770 
1771 static int
1772 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
1773 {
1774   if (from > to) {
1775     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
1776       return 0;
1777     else
1778       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
1779   }
1780 
1781   return add_code_range_to_buf(pbuf, from, to);
1782 }
1783 
1784 static int
1785 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
1786 {
1787   int r, i, n;
1788   OnigCodePoint pre, from, *data, to = 0;
1789 
1790   *pbuf = (BBuf* )NULL;
1791   if (IS_NULL(bbuf)) {
1792   set_all:
1793     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1794   }
1795 
1796   data = (OnigCodePoint* )(bbuf->p);
1797   GET_CODE_POINT(n, data);
1798   data++;
1799   if (n <= 0) goto set_all;
1800 
1801   r = 0;
1802   pre = MBCODE_START_POS(enc);
1803   for (i = 0; i < n; i++) {
1804     from = data[i*2];
1805     to   = data[i*2+1];
1806     if (pre <= from - 1) {
1807       r = add_code_range_to_buf(pbuf, pre, from - 1);
1808       if (r != 0) return r;
1809     }
1810     if (to == ~((OnigCodePoint )0)) break;
1811     pre = to + 1;
1812   }
1813   if (to < ~((OnigCodePoint )0)) {
1814     r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
1815   }
1816   return r;
1817 }
1818 
1819 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1820   BBuf *tbuf; \
1821   int  tnot; \
1822   tnot = not1;  not1  = not2;  not2  = tnot; \
1823   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1824 } while (0)
1825 
1826 static int
1827 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
1828                   BBuf* bbuf2, int not2, BBuf** pbuf)
1829 {
1830   int r;
1831   OnigCodePoint i, n1, *data1;
1832   OnigCodePoint from, to;
1833 
1834   *pbuf = (BBuf* )NULL;
1835   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1836     if (not1 != 0 || not2 != 0)
1837       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1838     return 0;
1839   }
1840 
1841   r = 0;
1842   if (IS_NULL(bbuf2))
1843     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1844 
1845   if (IS_NULL(bbuf1)) {
1846     if (not1 != 0) {
1847       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1848     }
1849     else {
1850       if (not2 == 0) {
1851         return bbuf_clone(pbuf, bbuf2);
1852       }
1853       else {
1854         return not_code_range_buf(enc, bbuf2, pbuf);
1855       }
1856     }
1857   }
1858 
1859   if (not1 != 0)
1860     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1861 
1862   data1 = (OnigCodePoint* )(bbuf1->p);
1863   GET_CODE_POINT(n1, data1);
1864   data1++;
1865 
1866   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1867     r = bbuf_clone(pbuf, bbuf2);
1868   }
1869   else if (not1 == 0) { /* 1 OR (not 2) */
1870     r = not_code_range_buf(enc, bbuf2, pbuf);
1871   }
1872   if (r != 0) return r;
1873 
1874   for (i = 0; i < n1; i++) {
1875     from = data1[i*2];
1876     to   = data1[i*2+1];
1877     r = add_code_range_to_buf(pbuf, from, to);
1878     if (r != 0) return r;
1879   }
1880   return 0;
1881 }
1882 
1883 static int
1884 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
1885                 OnigCodePoint* data, int n)
1886 {
1887   int i, r;
1888   OnigCodePoint from2, to2;
1889 
1890   for (i = 0; i < n; i++) {
1891     from2 = data[i*2];
1892     to2   = data[i*2+1];
1893     if (from2 < from1) {
1894       if (to2 < from1) continue;
1895       else {
1896         from1 = to2 + 1;
1897       }
1898     }
1899     else if (from2 <= to1) {
1900       if (to2 < to1) {
1901         if (from1 <= from2 - 1) {
1902           r = add_code_range_to_buf(pbuf, from1, from2-1);
1903           if (r != 0) return r;
1904         }
1905         from1 = to2 + 1;
1906       }
1907       else {
1908         to1 = from2 - 1;
1909       }
1910     }
1911     else {
1912       from1 = from2;
1913     }
1914     if (from1 > to1) break;
1915   }
1916   if (from1 <= to1) {
1917     r = add_code_range_to_buf(pbuf, from1, to1);
1918     if (r != 0) return r;
1919   }
1920   return 0;
1921 }
1922 
1923 static int
1924 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
1925 {
1926   int r;
1927   OnigCodePoint i, j, n1, n2, *data1, *data2;
1928   OnigCodePoint from, to, from1, to1, from2, to2;
1929 
1930   *pbuf = (BBuf* )NULL;
1931   if (IS_NULL(bbuf1)) {
1932     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1933       return bbuf_clone(pbuf, bbuf2);
1934     return 0;
1935   }
1936   else if (IS_NULL(bbuf2)) {
1937     if (not2 != 0)
1938       return bbuf_clone(pbuf, bbuf1);
1939     return 0;
1940   }
1941 
1942   if (not1 != 0)
1943     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1944 
1945   data1 = (OnigCodePoint* )(bbuf1->p);
1946   data2 = (OnigCodePoint* )(bbuf2->p);
1947   GET_CODE_POINT(n1, data1);
1948   GET_CODE_POINT(n2, data2);
1949   data1++;
1950   data2++;
1951 
1952   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
1953     for (i = 0; i < n1; i++) {
1954       from1 = data1[i*2];
1955       to1   = data1[i*2+1];
1956       for (j = 0; j < n2; j++) {
1957         from2 = data2[j*2];
1958         to2   = data2[j*2+1];
1959         if (from2 > to1) break;
1960         if (to2 < from1) continue;
1961         from = MAX(from1, from2);
1962         to   = MIN(to1, to2);
1963         r = add_code_range_to_buf(pbuf, from, to);
1964         if (r != 0) return r;
1965       }
1966     }
1967   }
1968   else if (not1 == 0) { /* 1 AND (not 2) */
1969     for (i = 0; i < n1; i++) {
1970       from1 = data1[i*2];
1971       to1   = data1[i*2+1];
1972       r = and_code_range1(pbuf, from1, to1, data2, n2);
1973       if (r != 0) return r;
1974     }
1975   }
1976 
1977   return 0;
1978 }
1979 
1980 static int
1981 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
1982 {
1983   int r, not1, not2;
1984   BBuf *buf1, *buf2, *pbuf;
1985   BitSetRef bsr1, bsr2;
1986   BitSet bs1, bs2;
1987 
1988   not1 = IS_NCCLASS_NOT(dest);
1989   bsr1 = dest->bs;
1990   buf1 = dest->mbuf;
1991   not2 = IS_NCCLASS_NOT(cc);
1992   bsr2 = cc->bs;
1993   buf2 = cc->mbuf;
1994 
1995   if (not1 != 0) {
1996     bitset_invert_to(bsr1, bs1);
1997     bsr1 = bs1;
1998   }
1999   if (not2 != 0) {
2000     bitset_invert_to(bsr2, bs2);
2001     bsr2 = bs2;
2002   }
2003   bitset_and(bsr1, bsr2);
2004   if (bsr1 != dest->bs) {
2005     bitset_copy(dest->bs, bsr1);
2006     bsr1 = dest->bs;
2007   }
2008   if (not1 != 0) {
2009     bitset_invert(dest->bs);
2010   }
2011 
2012   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2013     if (not1 != 0 && not2 != 0) {
2014       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
2015     }
2016     else {
2017       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
2018       if (r == 0 && not1 != 0) {
2019         BBuf *tbuf;
2020         r = not_code_range_buf(enc, pbuf, &tbuf);
2021         if (r != 0) {
2022           bbuf_free(pbuf);
2023           return r;
2024         }
2025         bbuf_free(pbuf);
2026         pbuf = tbuf;
2027       }
2028     }
2029     if (r != 0) return r;
2030 
2031     dest->mbuf = pbuf;
2032     bbuf_free(buf1);
2033     return r;
2034   }
2035   return 0;
2036 }
2037 
2038 static int
2039 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
2040 {
2041   int r, not1, not2;
2042   BBuf *buf1, *buf2, *pbuf;
2043   BitSetRef bsr1, bsr2;
2044   BitSet bs1, bs2;
2045 
2046   not1 = IS_NCCLASS_NOT(dest);
2047   bsr1 = dest->bs;
2048   buf1 = dest->mbuf;
2049   not2 = IS_NCCLASS_NOT(cc);
2050   bsr2 = cc->bs;
2051   buf2 = cc->mbuf;
2052 
2053   if (not1 != 0) {
2054     bitset_invert_to(bsr1, bs1);
2055     bsr1 = bs1;
2056   }
2057   if (not2 != 0) {
2058     bitset_invert_to(bsr2, bs2);
2059     bsr2 = bs2;
2060   }
2061   bitset_or(bsr1, bsr2);
2062   if (bsr1 != dest->bs) {
2063     bitset_copy(dest->bs, bsr1);
2064     bsr1 = dest->bs;
2065   }
2066   if (not1 != 0) {
2067     bitset_invert(dest->bs);
2068   }
2069 
2070   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2071     if (not1 != 0 && not2 != 0) {
2072       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
2073     }
2074     else {
2075       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
2076       if (r == 0 && not1 != 0) {
2077         BBuf *tbuf;
2078         r = not_code_range_buf(enc, pbuf, &tbuf);
2079         if (r != 0) {
2080           bbuf_free(pbuf);
2081           return r;
2082         }
2083         bbuf_free(pbuf);
2084         pbuf = tbuf;
2085       }
2086     }
2087     if (r != 0) return r;
2088 
2089     dest->mbuf = pbuf;
2090     bbuf_free(buf1);
2091     return r;
2092   }
2093   else
2094     return 0;
2095 }
2096 
2097 static int
2098 conv_backslash_value(int c, ScanEnv* env)
2099 {
2100   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
2101     switch (c) {
2102     case 'n': return '\n';
2103     case 't': return '\t';
2104     case 'r': return '\r';
2105     case 'f': return '\f';
2106     case 'a': return '\007';
2107     case 'b': return '\010';
2108     case 'e': return '\033';
2109     case 'v':
2110       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
2111         return '\v';
2112       break;
2113 
2114     default:
2115       break;
2116     }
2117   }
2118   return c;
2119 }
2120 
2121 static int
2122 is_invalid_quantifier_target(Node* node)
2123 {
2124   switch (NTYPE(node)) {
2125   case NT_ANCHOR:
2126     return 1;
2127     break;
2128 
2129   case NT_ENCLOSE:
2130     /* allow enclosed elements */
2131     /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2132     break;
2133 
2134   case NT_LIST:
2135     do {
2136       if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2137     } while (IS_NOT_NULL(node = NCDR(node)));
2138     return 0;
2139     break;
2140 
2141   case NT_ALT:
2142     do {
2143       if (is_invalid_quantifier_target(NCAR(node))) return 1;
2144     } while (IS_NOT_NULL(node = NCDR(node)));
2145     break;
2146 
2147   default:
2148     break;
2149   }
2150   return 0;
2151 }
2152 
2153 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2154 static int
2155 popular_quantifier_num(QtfrNode* q)
2156 {
2157   if (q->greedy) {
2158     if (q->lower == 0) {
2159       if (q->upper == 1) return 0;
2160       else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2161     }
2162     else if (q->lower == 1) {
2163       if (IS_REPEAT_INFINITE(q->upper)) return 2;
2164     }
2165   }
2166   else {
2167     if (q->lower == 0) {
2168       if (q->upper == 1) return 3;
2169       else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2170     }
2171     else if (q->lower == 1) {
2172       if (IS_REPEAT_INFINITE(q->upper)) return 5;
2173     }
2174   }
2175   return -1;
2176 }
2177 
2178 
2179 enum ReduceType {
2180   RQ_ASIS = 0, /* as is */
2181   RQ_DEL  = 1, /* delete parent */
2182   RQ_A,        /* to '*'    */
2183   RQ_AQ,       /* to '*?'   */
2184   RQ_QQ,       /* to '??'   */
2185   RQ_P_QQ,     /* to '+)??' */
2186   RQ_PQ_Q      /* to '+?)?' */
2187 };
2188 
2189 static enum ReduceType ReduceTypeTable[6][6] = {
2190   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
2191   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
2192   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
2193   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
2194   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
2195   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
2196 };
2197 
2198 extern void
2199 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
2200 {
2201   int pnum, cnum;
2202   QtfrNode *p, *c;
2203 
2204   p = NQTFR(pnode);
2205   c = NQTFR(cnode);
2206   pnum = popular_quantifier_num(p);
2207   cnum = popular_quantifier_num(c);
2208   if (pnum < 0 || cnum < 0) return ;
2209 
2210   switch(ReduceTypeTable[cnum][pnum]) {
2211   case RQ_DEL:
2212     *pnode = *cnode;
2213     break;
2214   case RQ_A:
2215     p->target = c->target;
2216     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
2217     break;
2218   case RQ_AQ:
2219     p->target = c->target;
2220     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
2221     break;
2222   case RQ_QQ:
2223     p->target = c->target;
2224     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
2225     break;
2226   case RQ_P_QQ:
2227     p->target = cnode;
2228     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
2229     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
2230     return ;
2231     break;
2232   case RQ_PQ_Q:
2233     p->target = cnode;
2234     p->lower  = 0;  p->upper = 1;  p->greedy = 1;
2235     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
2236     return ;
2237     break;
2238   case RQ_ASIS:
2239     p->target = cnode;
2240     return ;
2241     break;
2242   }
2243 
2244   c->target = NULL_NODE;
2245   onig_node_free(cnode);
2246 }
2247 
2248 
2249 enum TokenSyms {
2250   TK_EOT      = 0,   /* end of token */
2251   TK_RAW_BYTE = 1,
2252   TK_CHAR,
2253   TK_STRING,
2254   TK_CODE_POINT,
2255   TK_ANYCHAR,
2256   TK_CHAR_TYPE,
2257   TK_BACKREF,
2258   TK_CALL,
2259   TK_ANCHOR,
2260   TK_OP_REPEAT,
2261   TK_INTERVAL,
2262   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
2263   TK_ALT,
2264   TK_SUBEXP_OPEN,
2265   TK_SUBEXP_CLOSE,
2266   TK_CC_OPEN,
2267   TK_QUOTE_OPEN,
2268   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
2269   /* in cc */
2270   TK_CC_CLOSE,
2271   TK_CC_RANGE,
2272   TK_POSIX_BRACKET_OPEN,
2273   TK_CC_AND,             /* && */
2274   TK_CC_CC_OPEN          /* [ */
2275 };
2276 
2277 typedef struct {
2278   enum TokenSyms type;
2279   int escaped;
2280   int base;   /* is number: 8, 16 (used in [....]) */
2281   UChar* backp;
2282   union {
2283     UChar* s;
2284     int   c;
2285     OnigCodePoint code;
2286     int   anchor;
2287     int   subtype;
2288     struct {
2289       int lower;
2290       int upper;
2291       int greedy;
2292       int possessive;
2293     } repeat;
2294     struct {
2295       int  num;
2296       int  ref1;
2297       int* refs;
2298       int  by_name;
2299 #ifdef USE_BACKREF_WITH_LEVEL
2300       int  exist_level;
2301       int  level;   /* \k<name+n> */
2302 #endif
2303     } backref;
2304     struct {
2305       UChar* name;
2306       UChar* name_end;
2307       int    gnum;
2308     } call;
2309     struct {
2310       int ctype;
2311       int not;
2312     } prop;
2313   } u;
2314 } OnigToken;
2315 
2316 
2317 static int
2318 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
2319 {
2320   int low, up, syn_allow, non_low = 0;
2321   int r = 0;
2322   OnigCodePoint c;
2323   OnigEncoding enc = env->enc;
2324   UChar* p = *src;
2325   PFETCH_READY;
2326 
2327   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
2328 
2329   if (PEND) {
2330     if (syn_allow)
2331       return 1;  /* "....{" : OK! */
2332     else
2333       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
2334   }
2335 
2336   if (! syn_allow) {
2337     c = PPEEK;
2338     if (c == ')' || c == '(' || c == '|') {
2339       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
2340     }
2341   }
2342 
2343   low = onig_scan_unsigned_number(&p, end, env->enc);
2344   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2345   if (low > ONIG_MAX_REPEAT_NUM)
2346     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2347 
2348   if (p == *src) { /* can't read low */
2349     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
2350       /* allow {,n} as {0,n} */
2351       low = 0;
2352       non_low = 1;
2353     }
2354     else
2355       goto invalid;
2356   }
2357 
2358   if (PEND) goto invalid;
2359   PFETCH(c);
2360   if (c == ',') {
2361     UChar* prev = p;
2362     up = onig_scan_unsigned_number(&p, end, env->enc);
2363     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2364     if (up > ONIG_MAX_REPEAT_NUM)
2365       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2366 
2367     if (p == prev) {
2368       if (non_low != 0)
2369         goto invalid;
2370       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
2371     }
2372   }
2373   else {
2374     if (non_low != 0)
2375       goto invalid;
2376 
2377     PUNFETCH;
2378     up = low;  /* {n} : exact n times */
2379     r = 2;     /* fixed */
2380   }
2381 
2382   if (PEND) goto invalid;
2383   PFETCH(c);
2384   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
2385     if (c != MC_ESC(env->syntax)) goto invalid;
2386     PFETCH(c);
2387   }
2388   if (c != '}') goto invalid;
2389 
2390   if (!IS_REPEAT_INFINITE(up) && low > up) {
2391     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
2392   }
2393 
2394   tok->type = TK_INTERVAL;
2395   tok->u.repeat.lower = low;
2396   tok->u.repeat.upper = up;
2397   *src = p;
2398   return r; /* 0: normal {n,m}, 2: fixed {n} */
2399 
2400  invalid:
2401   if (syn_allow)
2402     return 1;  /* OK */
2403   else
2404     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
2405 }
2406 
2407 /* \M-, \C-, \c, or \... */
2408 static int
2409 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
2410 {
2411   int v;
2412   OnigCodePoint c;
2413   OnigEncoding enc = env->enc;
2414   UChar* p = *src;
2415 
2416   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2417 
2418   PFETCH_S(c);
2419   switch (c) {
2420   case 'M':
2421     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
2422       if (PEND) return ONIGERR_END_PATTERN_AT_META;
2423       PFETCH_S(c);
2424       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2425       if (PEND) return ONIGERR_END_PATTERN_AT_META;
2426       PFETCH_S(c);
2427       if (c == MC_ESC(env->syntax)) {
2428         v = fetch_escaped_value(&p, end, env);
2429         if (v < 0) return v;
2430         c = (OnigCodePoint )v;
2431       }
2432       c = ((c & 0xff) | 0x80);
2433     }
2434     else
2435       goto backslash;
2436     break;
2437 
2438   case 'C':
2439     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
2440       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
2441       PFETCH_S(c);
2442       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2443       goto control;
2444     }
2445     else
2446       goto backslash;
2447 
2448   case 'c':
2449     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
2450     control:
2451       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
2452       PFETCH_S(c);
2453       if (c == '?') {
2454         c = 0177;
2455       }
2456       else {
2457         if (c == MC_ESC(env->syntax)) {
2458           v = fetch_escaped_value(&p, end, env);
2459           if (v < 0) return v;
2460           c = (OnigCodePoint )v;
2461         }
2462         c &= 0x9f;
2463       }
2464       break;
2465     }
2466     /* fall through */
2467 
2468   default:
2469     {
2470     backslash:
2471       c = conv_backslash_value(c, env);
2472     }
2473     break;
2474   }
2475 
2476   *src = p;
2477   return c;
2478 }
2479 
2480 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2481 
2482 static OnigCodePoint
2483 get_name_end_code_point(OnigCodePoint start)
2484 {
2485   switch (start) {
2486   case '<':  return (OnigCodePoint )'>'; break;
2487   case '\'': return (OnigCodePoint )'\''; break;
2488   default:
2489     break;
2490   }
2491 
2492   return (OnigCodePoint )0;
2493 }
2494 
2495 #ifdef USE_NAMED_GROUP
2496 #ifdef USE_BACKREF_WITH_LEVEL
2497 /*
2498    \k<name+n>, \k<name-n>
2499    \k<num+n>,  \k<num-n>
2500    \k<-num+n>, \k<-num-n>
2501 */
2502 static int
2503 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
2504                       UChar** rname_end, ScanEnv* env,
2505                       int* rback_num, int* rlevel)
2506 {
2507   int r, sign, is_num, exist_level;
2508   OnigCodePoint end_code;
2509   OnigCodePoint c = 0;
2510   OnigEncoding enc = env->enc;
2511   UChar *name_end;
2512   UChar *pnum_head;
2513   UChar *p = *src;
2514   PFETCH_READY;
2515 
2516   *rback_num = 0;
2517   is_num = exist_level = 0;
2518   sign = 1;
2519   pnum_head = *src;
2520 
2521   end_code = get_name_end_code_point(start_code);
2522 
2523   name_end = end;
2524   r = 0;
2525   if (PEND) {
2526     return ONIGERR_EMPTY_GROUP_NAME;
2527   }
2528   else {
2529     PFETCH(c);
2530     if (c == end_code)
2531       return ONIGERR_EMPTY_GROUP_NAME;
2532 
2533     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2534       is_num = 1;
2535     }
2536     else if (c == '-') {
2537       is_num = 2;
2538       sign = -1;
2539       pnum_head = p;
2540     }
2541     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2542       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2543     }
2544   }
2545 
2546   while (!PEND) {
2547     name_end = p;
2548     PFETCH(c);
2549     if (c == end_code || c == ')' || c == '+' || c == '-') {
2550       if (is_num == 2)  r = ONIGERR_INVALID_GROUP_NAME;
2551       break;
2552     }
2553 
2554     if (is_num != 0) {
2555       if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2556         is_num = 1;
2557       }
2558       else {
2559         r = ONIGERR_INVALID_GROUP_NAME;
2560         is_num = 0;
2561       }
2562     }
2563     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2564       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2565     }
2566   }
2567 
2568   if (r == 0 && c != end_code) {
2569     if (c == '+' || c == '-') {
2570       int level;
2571       int flag = (c == '-' ? -1 : 1);
2572 
2573       PFETCH(c);
2574       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2575       PUNFETCH;
2576       level = onig_scan_unsigned_number(&p, end, enc);
2577       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2578       *rlevel = (level * flag);
2579       exist_level = 1;
2580 
2581       PFETCH(c);
2582       if (c == end_code)
2583         goto end;
2584     }
2585 
2586   err:
2587     r = ONIGERR_INVALID_GROUP_NAME;
2588     name_end = end;
2589   }
2590 
2591  end:
2592   if (r == 0) {
2593     if (is_num != 0) {
2594       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2595       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2596       else if (*rback_num == 0) goto err;
2597 
2598       *rback_num *= sign;
2599     }
2600 
2601     *rname_end = name_end;
2602     *src = p;
2603     return (exist_level ? 1 : 0);
2604   }
2605   else {
2606     onig_scan_env_set_error_string(env, r, *src, name_end);
2607     return r;
2608   }
2609 }
2610 #endif /* USE_BACKREF_WITH_LEVEL */
2611 
2612 /*
2613   def: 0 -> define name    (don't allow number name)
2614        1 -> reference name (allow number name)
2615 */
2616 static int
2617 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2618            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2619 {
2620   int r, is_num, sign;
2621   OnigCodePoint end_code;
2622   OnigCodePoint c = 0;
2623   OnigEncoding enc = env->enc;
2624   UChar *name_end;
2625   UChar *pnum_head;
2626   UChar *p = *src;
2627 
2628   *rback_num = 0;
2629 
2630   end_code = get_name_end_code_point(start_code);
2631 
2632   name_end = end;
2633   pnum_head = *src;
2634   r = 0;
2635   is_num = 0;
2636   sign = 1;
2637   if (PEND) {
2638     return ONIGERR_EMPTY_GROUP_NAME;
2639   }
2640   else {
2641     PFETCH_S(c);
2642     if (c == end_code)
2643       return ONIGERR_EMPTY_GROUP_NAME;
2644 
2645     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2646       if (ref == 1)
2647         is_num = 1;
2648       else {
2649         r = ONIGERR_INVALID_GROUP_NAME;
2650         is_num = 0;
2651       }
2652     }
2653     else if (c == '-') {
2654       if (ref == 1) {
2655         is_num = 2;
2656         sign = -1;
2657         pnum_head = p;
2658       }
2659       else {
2660         r = ONIGERR_INVALID_GROUP_NAME;
2661         is_num = 0;
2662       }
2663     }
2664     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2665       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2666     }
2667   }
2668 
2669   if (r == 0) {
2670     while (!PEND) {
2671       name_end = p;
2672       PFETCH_S(c);
2673       if (c == end_code || c == ')') {
2674         if (is_num == 2)        r = ONIGERR_INVALID_GROUP_NAME;
2675         break;
2676       }
2677 
2678       if (is_num != 0) {
2679         if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2680           is_num = 1;
2681         }
2682         else {
2683           if (!ONIGENC_IS_CODE_WORD(enc, c))
2684             r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2685           else
2686             r = ONIGERR_INVALID_GROUP_NAME;
2687           is_num = 0;
2688         }
2689       }
2690       else {
2691         if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2692           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2693         }
2694       }
2695     }
2696 
2697     if (c != end_code) {
2698       r = ONIGERR_INVALID_GROUP_NAME;
2699       name_end = end;
2700     }
2701 
2702     if (is_num != 0) {
2703       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2704       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2705       else if (*rback_num == 0) {
2706         r = ONIGERR_INVALID_GROUP_NAME;
2707         goto err;
2708       }
2709 
2710       *rback_num *= sign;
2711     }
2712 
2713     *rname_end = name_end;
2714     *src = p;
2715     return 0;
2716   }
2717   else {
2718     while (!PEND) {
2719       name_end = p;
2720       PFETCH_S(c);
2721       if (c == end_code || c == ')')
2722         break;
2723     }
2724     if (PEND)
2725       name_end = end;
2726 
2727   err:
2728     onig_scan_env_set_error_string(env, r, *src, name_end);
2729     return r;
2730   }
2731 }
2732 #else
2733 static int
2734 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2735            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2736 {
2737   int r, is_num, sign;
2738   OnigCodePoint end_code;
2739   OnigCodePoint c = 0;
2740   UChar *name_end;
2741   OnigEncoding enc = env->enc;
2742   UChar *pnum_head;
2743   UChar *p = *src;
2744   PFETCH_READY;
2745 
2746   *rback_num = 0;
2747 
2748   end_code = get_name_end_code_point(start_code);
2749 
2750   *rname_end = name_end = end;
2751   r = 0;
2752   pnum_head = *src;
2753   is_num = 0;
2754   sign = 1;
2755 
2756   if (PEND) {
2757     return ONIGERR_EMPTY_GROUP_NAME;
2758   }
2759   else {
2760     PFETCH(c);
2761     if (c == end_code)
2762       return ONIGERR_EMPTY_GROUP_NAME;
2763 
2764     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2765       is_num = 1;
2766     }
2767     else if (c == '-') {
2768       is_num = 2;
2769       sign = -1;
2770       pnum_head = p;
2771     }
2772     else {
2773       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2774     }
2775   }
2776 
2777   while (!PEND) {
2778     name_end = p;
2779 
2780     PFETCH(c);
2781     if (c == end_code || c == ')') break;
2782     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2783       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
2784   }
2785   if (r == 0 && c != end_code) {
2786     r = ONIGERR_INVALID_GROUP_NAME;
2787     name_end = end;
2788   }
2789 
2790   if (r == 0) {
2791     *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2792     if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2793     else if (*rback_num == 0) {
2794       r = ONIGERR_INVALID_GROUP_NAME;
2795       goto err;
2796     }
2797     *rback_num *= sign;
2798 
2799     *rname_end = name_end;
2800     *src = p;
2801     return 0;
2802   }
2803   else {
2804   err:
2805     onig_scan_env_set_error_string(env, r, *src, name_end);
2806     return r;
2807   }
2808 }
2809 #endif /* USE_NAMED_GROUP */
2810 
2811 static void
2812 CC_ESC_WARN(ScanEnv* env, UChar *c)
2813 {
2814   if (onig_warn == onig_null_warn) return ;
2815 
2816   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
2817       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
2818     UChar buf[WARN_BUFSIZE];
2819     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
2820                 env->pattern, env->pattern_end,
2821                 (UChar* )"character class has '%s' without escape", c);
2822     (*onig_warn)((char* )buf);
2823   }
2824 }
2825 
2826 static void
2827 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
2828 {
2829   if (onig_warn == onig_null_warn) return ;
2830 
2831   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
2832     UChar buf[WARN_BUFSIZE];
2833     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
2834                 (env)->pattern, (env)->pattern_end,
2835                 (UChar* )"regular expression has '%s' without escape", c);
2836     (*onig_warn)((char* )buf);
2837   }
2838 }
2839 
2840 static UChar*
2841 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
2842                   UChar **next, OnigEncoding enc)
2843 {
2844   int i;
2845   OnigCodePoint x;
2846   UChar *q;
2847   UChar *p = from;
2848   
2849   while (p < to) {
2850     x = ONIGENC_MBC_TO_CODE(enc, p, to);
2851     q = p + enclen(enc, p);
2852     if (x == s[0]) {
2853       for (i = 1; i < n && q < to; i++) {
2854         x = ONIGENC_MBC_TO_CODE(enc, q, to);
2855         if (x != s[i]) break;
2856         q += enclen(enc, q);
2857       }
2858       if (i >= n) {
2859         if (IS_NOT_NULL(next))
2860           *next = q;
2861         return p;
2862       }
2863     }
2864     p = q;
2865   }
2866   return NULL_UCHARP;
2867 }
2868 
2869 static int
2870 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
2871                  OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
2872 {
2873   int i, in_esc;
2874   OnigCodePoint x;
2875   UChar *q;
2876   UChar *p = from;
2877 
2878   in_esc = 0;
2879   while (p < to) {
2880     if (in_esc) {
2881       in_esc = 0;
2882       p += enclen(enc, p);
2883     }
2884     else {
2885       x = ONIGENC_MBC_TO_CODE(enc, p, to);
2886       q = p + enclen(enc, p);
2887       if (x == s[0]) {
2888         for (i = 1; i < n && q < to; i++) {
2889           x = ONIGENC_MBC_TO_CODE(enc, q, to);
2890           if (x != s[i]) break;
2891           q += enclen(enc, q);
2892         }
2893         if (i >= n) return 1;
2894         p += enclen(enc, p);
2895       }
2896       else {
2897         x = ONIGENC_MBC_TO_CODE(enc, p, to);
2898         if (x == bad) return 0;
2899         else if (x == MC_ESC(syn)) in_esc = 1;
2900         p = q;
2901       }
2902     }
2903   }
2904   return 0;
2905 }
2906 
2907 static int
2908 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
2909 {
2910   int num;
2911   OnigCodePoint c, c2;
2912   OnigSyntaxType* syn = env->syntax;
2913   OnigEncoding enc = env->enc;
2914   UChar* prev;
2915   UChar* p = *src;
2916   PFETCH_READY;
2917 
2918   if (PEND) {
2919     tok->type = TK_EOT;
2920     return tok->type;
2921   }
2922 
2923   PFETCH(c);
2924   tok->type = TK_CHAR;
2925   tok->base = 0;
2926   tok->u.c  = c;
2927   tok->escaped = 0;
2928 
2929   if (c == ']') {
2930     tok->type = TK_CC_CLOSE;
2931   }
2932   else if (c == '-') {
2933     tok->type = TK_CC_RANGE;
2934   }
2935   else if (c == MC_ESC(syn)) {
2936     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
2937       goto end;
2938 
2939     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2940 
2941     PFETCH(c);
2942     tok->escaped = 1;
2943     tok->u.c = c;
2944     switch (c) {
2945     case 'w':
2946       tok->type = TK_CHAR_TYPE;
2947       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
2948       tok->u.prop.not   = 0;
2949       break;
2950     case 'W':
2951       tok->type = TK_CHAR_TYPE;
2952       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
2953       tok->u.prop.not   = 1;
2954       break;
2955     case 'd':
2956       tok->type = TK_CHAR_TYPE;
2957       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
2958       tok->u.prop.not   = 0;
2959       break;
2960     case 'D':
2961       tok->type = TK_CHAR_TYPE;
2962       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
2963       tok->u.prop.not   = 1;
2964       break;
2965     case 's':
2966       tok->type = TK_CHAR_TYPE;
2967       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
2968       tok->u.prop.not   = 0;
2969       break;
2970     case 'S':
2971       tok->type = TK_CHAR_TYPE;
2972       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
2973       tok->u.prop.not   = 1;
2974       break;
2975     case 'h':
2976       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
2977       tok->type = TK_CHAR_TYPE;
2978       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
2979       tok->u.prop.not   = 0;
2980       break;
2981     case 'H':
2982       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
2983       tok->type = TK_CHAR_TYPE;
2984       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
2985       tok->u.prop.not   = 1;
2986       break;
2987 
2988     case 'p':
2989     case 'P':
2990       c2 = PPEEK;
2991       if (c2 == '{' &&
2992           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
2993         PINC;
2994         tok->type = TK_CHAR_PROPERTY;
2995         tok->u.prop.not = (c == 'P' ? 1 : 0);
2996 
2997         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
2998           PFETCH(c2);
2999           if (c2 == '^') {
3000             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3001           }
3002           else
3003             PUNFETCH;
3004         }
3005       }
3006       break;
3007 
3008     case 'x':
3009       if (PEND) break;
3010 
3011       prev = p;
3012       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
3013         PINC;
3014         num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
3015         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3016         if (!PEND) {
3017           c2 = PPEEK;
3018           if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3019             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3020         }
3021 
3022         if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
3023           PINC;
3024           tok->type   = TK_CODE_POINT;
3025           tok->base   = 16;
3026           tok->u.code = (OnigCodePoint )num;
3027         }
3028         else {
3029           /* can't read nothing or invalid format */
3030           p = prev;
3031         }
3032       }
3033       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3034         num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
3035         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3036         if (p == prev) {  /* can't read nothing. */
3037           num = 0; /* but, it's not error */
3038         }
3039         tok->type = TK_RAW_BYTE;
3040         tok->base = 16;
3041         tok->u.c  = num;
3042       }
3043       break;
3044 
3045     case 'u':
3046       if (PEND) break;
3047 
3048       prev = p;
3049       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
3050         num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
3051         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3052         if (p == prev) {  /* can't read nothing. */
3053           num = 0; /* but, it's not error */
3054         }
3055         tok->type   = TK_CODE_POINT;
3056         tok->base   = 16;
3057         tok->u.code = (OnigCodePoint )num;
3058       }
3059       break;
3060 
3061     case '0':
3062     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3063       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
3064         PUNFETCH;
3065         prev = p;
3066         num = scan_unsigned_octal_number(&p, end, 3, enc);
3067         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3068         if (p == prev) {  /* can't read nothing. */
3069           num = 0; /* but, it's not error */
3070         }
3071         tok->type = TK_RAW_BYTE;
3072         tok->base = 8;
3073         tok->u.c  = num;
3074       }
3075       break;
3076 
3077     default:
3078       PUNFETCH;
3079       num = fetch_escaped_value(&p, end, env);
3080       if (num < 0) return num;
3081       if (tok->u.c != num) {
3082         tok->u.code = (OnigCodePoint )num;
3083         tok->type   = TK_CODE_POINT;
3084       }
3085       break;
3086     }
3087   }
3088   else if (c == '[') {
3089     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3090       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3091       tok->backp = p; /* point at '[' is readed */
3092       PINC;
3093       if (str_exist_check_with_esc(send, 2, p, end,
3094                                    (OnigCodePoint )']', enc, syn)) {
3095         tok->type = TK_POSIX_BRACKET_OPEN;
3096       }
3097       else {
3098         PUNFETCH;
3099         goto cc_in_cc;
3100       }
3101     }
3102     else {
3103     cc_in_cc:
3104       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
3105         tok->type = TK_CC_CC_OPEN;
3106       }
3107       else {
3108         CC_ESC_WARN(env, (UChar* )"[");
3109       }
3110     }
3111   }
3112   else if (c == '&') {
3113     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
3114         !PEND && (PPEEK_IS('&'))) {
3115       PINC;
3116       tok->type = TK_CC_AND;
3117     }
3118   }
3119 
3120  end:
3121   *src = p;
3122   return tok->type;
3123 }
3124 
3125 static int
3126 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
3127 {
3128   int r, num;
3129   OnigCodePoint c;
3130   OnigEncoding enc = env->enc;
3131   OnigSyntaxType* syn = env->syntax;
3132   UChar* prev;
3133   UChar* p = *src;
3134   PFETCH_READY;
3135 
3136  start:
3137   if (PEND) {
3138     tok->type = TK_EOT;
3139     return tok->type;
3140   }
3141 
3142   tok->type  = TK_STRING;
3143   tok->base  = 0;
3144   tok->backp = p;
3145 
3146   PFETCH(c);
3147   if (IS_MC_ESC_CODE(c, syn)) {
3148     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3149 
3150     tok->backp = p;
3151     PFETCH(c);
3152 
3153     tok->u.c = c;
3154     tok->escaped = 1;
3155     switch (c) {
3156     case '*':
3157       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
3158       tok->type = TK_OP_REPEAT;
3159       tok->u.repeat.lower = 0;
3160       tok->u.repeat.upper = REPEAT_INFINITE;
3161       goto greedy_check;
3162       break;
3163 
3164     case '+':
3165       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3166       tok->type = TK_OP_REPEAT;
3167       tok->u.repeat.lower = 1;
3168       tok->u.repeat.upper = REPEAT_INFINITE;
3169       goto greedy_check;
3170       break;
3171 
3172     case '?':
3173       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
3174       tok->type = TK_OP_REPEAT;
3175       tok->u.repeat.lower = 0;
3176       tok->u.repeat.upper = 1;
3177     greedy_check:
3178       if (!PEND && PPEEK_IS('?') &&
3179           IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
3180         PFETCH(c);
3181         tok->u.repeat.greedy     = 0;
3182         tok->u.repeat.possessive = 0;
3183       }
3184       else {
3185       possessive_check:
3186         if (!PEND && PPEEK_IS('+') &&
3187             ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
3188               tok->type != TK_INTERVAL)  ||
3189              (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
3190               tok->type == TK_INTERVAL))) {
3191           PFETCH(c);
3192           tok->u.repeat.greedy     = 1;
3193           tok->u.repeat.possessive = 1;
3194         }
3195         else {
3196           tok->u.repeat.greedy     = 1;
3197           tok->u.repeat.possessive = 0;
3198         }
3199       }
3200       break;
3201 
3202     case '{':
3203       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
3204       r = fetch_range_quantifier(&p, end, tok, env);
3205       if (r < 0) return r;  /* error */
3206       if (r == 0) goto greedy_check;
3207       else if (r == 2) { /* {n} */
3208         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
3209           goto possessive_check;
3210 
3211         goto greedy_check;
3212       }
3213       /* r == 1 : normal char */
3214       break;
3215 
3216     case '|':
3217       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3218       tok->type = TK_ALT;
3219       break;
3220 
3221     case '(':
3222       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3223       tok->type = TK_SUBEXP_OPEN;
3224       break;
3225 
3226     case ')':
3227       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3228       tok->type = TK_SUBEXP_CLOSE;
3229       break;
3230 
3231     case 'w':
3232       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3233       tok->type = TK_CHAR_TYPE;
3234       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3235       tok->u.prop.not   = 0;
3236       break;
3237 
3238     case 'W':
3239       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3240       tok->type = TK_CHAR_TYPE;
3241       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3242       tok->u.prop.not   = 1;
3243       break;
3244 
3245     case 'b':
3246       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3247       tok->type = TK_ANCHOR;
3248       tok->u.anchor = ANCHOR_WORD_BOUND;
3249       break;
3250 
3251     case 'B':
3252       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3253       tok->type = TK_ANCHOR;
3254       tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
3255       break;
3256 
3257 #ifdef USE_WORD_BEGIN_END
3258     case '<':
3259       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
3260       tok->type = TK_ANCHOR;
3261       tok->u.anchor = ANCHOR_WORD_BEGIN;
3262       break;
3263 
3264     case '>':
3265       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
3266       tok->type = TK_ANCHOR;
3267       tok->u.anchor = ANCHOR_WORD_END;
3268       break;
3269 #endif
3270 
3271     case 's':
3272       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3273       tok->type = TK_CHAR_TYPE;
3274       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3275       tok->u.prop.not   = 0;
3276       break;
3277 
3278     case 'S':
3279       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3280       tok->type = TK_CHAR_TYPE;
3281       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3282       tok->u.prop.not   = 1;
3283       break;
3284 
3285     case 'd':
3286       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3287       tok->type = TK_CHAR_TYPE;
3288       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3289       tok->u.prop.not   = 0;
3290       break;
3291 
3292     case 'D':
3293       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3294       tok->type = TK_CHAR_TYPE;
3295       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3296       tok->u.prop.not   = 1;
3297       break;
3298 
3299     case 'h':
3300       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3301       tok->type = TK_CHAR_TYPE;
3302       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3303       tok->u.prop.not   = 0;
3304       break;
3305 
3306     case 'H':
3307       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3308       tok->type = TK_CHAR_TYPE;
3309       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3310       tok->u.prop.not   = 1;
3311       break;
3312 
3313     case 'A':
3314       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3315     begin_buf:
3316       tok->type = TK_ANCHOR;
3317       tok->u.subtype = ANCHOR_BEGIN_BUF;
3318       break;
3319 
3320     case 'Z':
3321       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3322       tok->type = TK_ANCHOR;
3323       tok->u.subtype = ANCHOR_SEMI_END_BUF;
3324       break;
3325 
3326     case 'z':
3327       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3328     end_buf:
3329       tok->type = TK_ANCHOR;
3330       tok->u.subtype = ANCHOR_END_BUF;
3331       break;
3332 
3333     case 'G':
3334       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
3335       tok->type = TK_ANCHOR;
3336       tok->u.subtype = ANCHOR_BEGIN_POSITION;
3337       break;
3338 
3339     case '`':
3340       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
3341       goto begin_buf;
3342       break;
3343 
3344     case '\'':
3345       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
3346       goto end_buf;
3347       break;
3348 
3349     case 'x':
3350       if (PEND) break;
3351 
3352       prev = p;
3353       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
3354         PINC;
3355         num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
3356         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3357         if (!PEND) {
3358           if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3359             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3360         }
3361 
3362         if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
3363           PINC;
3364           tok->type   = TK_CODE_POINT;
3365           tok->u.code = (OnigCodePoint )num;
3366         }
3367         else {
3368           /* can't read nothing or invalid format */
3369           p = prev;
3370         }
3371       }
3372       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3373         num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
3374         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3375         if (p == prev) {  /* can't read nothing. */
3376           num = 0; /* but, it's not error */
3377         }
3378         tok->type = TK_RAW_BYTE;
3379         tok->base = 16;
3380         tok->u.c  = num;
3381       }
3382       break;
3383 
3384     case 'u':
3385       if (PEND) break;
3386 
3387       prev = p;
3388       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
3389         num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
3390         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3391         if (p == prev) {  /* can't read nothing. */
3392           num = 0; /* but, it's not error */
3393         }
3394         tok->type   = TK_CODE_POINT;
3395         tok->base   = 16;
3396         tok->u.code = (OnigCodePoint )num;
3397       }
3398       break;
3399 
3400     case '1': case '2': case '3': case '4':
3401     case '5': case '6': case '7': case '8': case '9':
3402       PUNFETCH;
3403       prev = p;
3404       num = onig_scan_unsigned_number(&p, end, enc);
3405       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3406         goto skip_backref;
3407       }
3408 
3409       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && 
3410           (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3411         if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3412           if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3413             return ONIGERR_INVALID_BACKREF;
3414         }
3415 
3416         tok->type = TK_BACKREF;
3417         tok->u.backref.num     = 1;
3418         tok->u.backref.ref1    = num;
3419         tok->u.backref.by_name = 0;
3420 #ifdef USE_BACKREF_WITH_LEVEL
3421         tok->u.backref.exist_level = 0;
3422 #endif
3423         break;
3424       }
3425 
3426     skip_backref:
3427       if (c == '8' || c == '9') {
3428         /* normal char */
3429         p = prev; PINC;
3430         break;
3431       }
3432 
3433       p = prev;
3434       /* fall through */
3435     case '0':
3436       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
3437         prev = p;
3438         num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3439         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3440         if (p == prev) {  /* can't read nothing. */
3441           num = 0; /* but, it's not error */
3442         }
3443         tok->type = TK_RAW_BYTE;
3444         tok->base = 8;
3445         tok->u.c  = num;
3446       }
3447       else if (c != '0') {
3448         PINC;
3449       }
3450       break;
3451 
3452 #ifdef USE_NAMED_GROUP
3453     case 'k':
3454       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
3455         PFETCH(c);
3456         if (c == '<' || c == '\'') {
3457           UChar* name_end;
3458           int* backs;
3459           int back_num;
3460 
3461           prev = p;
3462 
3463 #ifdef USE_BACKREF_WITH_LEVEL
3464           name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3465           r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
3466                                     env, &back_num, &tok->u.backref.level);
3467           if (r == 1) tok->u.backref.exist_level = 1;
3468           else        tok->u.backref.exist_level = 0;
3469 #else
3470           r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3471 #endif
3472           if (r < 0) return r;
3473 
3474           if (back_num != 0) {
3475             if (back_num < 0) {
3476               back_num = BACKREF_REL_TO_ABS(back_num, env);
3477               if (back_num <= 0)
3478                 return ONIGERR_INVALID_BACKREF;
3479             }
3480 
3481             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3482               if (back_num > env->num_mem ||
3483                   IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3484                 return ONIGERR_INVALID_BACKREF;
3485             }
3486             tok->type = TK_BACKREF;
3487             tok->u.backref.by_name = 0;
3488             tok->u.backref.num  = 1;
3489             tok->u.backref.ref1 = back_num;
3490           }
3491           else {
3492             num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3493             if (num <= 0) {
3494               onig_scan_env_set_error_string(env,
3495                              ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3496               return ONIGERR_UNDEFINED_NAME_REFERENCE;
3497             }
3498             if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
3499               int i;
3500               for (i = 0; i < num; i++) {
3501                 if (backs[i] > env->num_mem ||
3502                     IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3503                   return ONIGERR_INVALID_BACKREF;
3504               }
3505             }
3506 
3507             tok->type = TK_BACKREF;
3508             tok->u.backref.by_name = 1;
3509             if (num == 1) {
3510               tok->u.backref.num  = 1;
3511               tok->u.backref.ref1 = backs[0];
3512             }
3513             else {
3514               tok->u.backref.num  = num;
3515               tok->u.backref.refs = backs;
3516             }
3517           }
3518         }
3519         else
3520           PUNFETCH;
3521       }
3522       break;
3523 #endif
3524 
3525 #ifdef USE_SUBEXP_CALL
3526     case 'g':
3527       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
3528         PFETCH(c);
3529         if (c == '<' || c == '\'') {
3530           int gnum;
3531           UChar* name_end;
3532 
3533           prev = p;
3534           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3535           if (r < 0) return r;
3536 
3537           tok->type = TK_CALL;
3538           tok->u.call.name     = prev;
3539           tok->u.call.name_end = name_end;
3540           tok->u.call.gnum     = gnum;
3541         }
3542         else
3543           PUNFETCH;
3544       }
3545       break;
3546 #endif
3547 
3548     case 'Q':
3549       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
3550         tok->type = TK_QUOTE_OPEN;
3551       }
3552       break;
3553 
3554     case 'p':
3555     case 'P':
3556       if (PPEEK_IS('{') &&
3557           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
3558         PINC;
3559         tok->type = TK_CHAR_PROPERTY;
3560         tok->u.prop.not = (c == 'P' ? 1 : 0);
3561 
3562         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
3563           PFETCH(c);
3564           if (c == '^') {
3565             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3566           }
3567           else
3568             PUNFETCH;
3569         }
3570       }
3571       break;
3572 
3573     default:
3574       PUNFETCH;
3575       num = fetch_escaped_value(&p, end, env);
3576       if (num < 0) return num;
3577       /* set_raw: */
3578       if (tok->u.c != num) {
3579         tok->type = TK_CODE_POINT;
3580         tok->u.code = (OnigCodePoint )num;
3581       }
3582       else { /* string */
3583         p = tok->backp + enclen(enc, tok->backp);
3584       }
3585       break;
3586     }
3587   }
3588   else {
3589     tok->u.c = c;
3590     tok->escaped = 0;
3591 
3592 #ifdef USE_VARIABLE_META_CHARS
3593     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3594         IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
3595       if (c == MC_ANYCHAR(syn))
3596         goto any_char;
3597       else if (c == MC_ANYTIME(syn))
3598         goto anytime;
3599       else if (c == MC_ZERO_OR_ONE_TIME(syn))
3600         goto zero_or_one_time;
3601       else if (c == MC_ONE_OR_MORE_TIME(syn))
3602         goto one_or_more_time;
3603       else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3604         tok->type = TK_ANYCHAR_ANYTIME;
3605         goto out;
3606       }
3607     }
3608 #endif
3609 
3610     switch (c) {
3611     case '.':
3612       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3613 #ifdef USE_VARIABLE_META_CHARS
3614     any_char:
3615 #endif
3616       tok->type = TK_ANYCHAR;
3617       break;
3618 
3619     case '*':
3620       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
3621 #ifdef USE_VARIABLE_META_CHARS
3622     anytime:
3623 #endif
3624       tok->type = TK_OP_REPEAT;
3625       tok->u.repeat.lower = 0;
3626       tok->u.repeat.upper = REPEAT_INFINITE;
3627       goto greedy_check;
3628       break;
3629 
3630     case '+':
3631       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3632 #ifdef USE_VARIABLE_META_CHARS
3633     one_or_more_time:
3634 #endif
3635       tok->type = TK_OP_REPEAT;
3636       tok->u.repeat.lower = 1;
3637       tok->u.repeat.upper = REPEAT_INFINITE;
3638       goto greedy_check;
3639       break;
3640 
3641     case '?':
3642       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3643 #ifdef USE_VARIABLE_META_CHARS
3644     zero_or_one_time:
3645 #endif
3646       tok->type = TK_OP_REPEAT;
3647       tok->u.repeat.lower = 0;
3648       tok->u.repeat.upper = 1;
3649       goto greedy_check;
3650       break;
3651 
3652     case '{':
3653       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3654       r = fetch_range_quantifier(&p, end, tok, env);
3655       if (r < 0) return r;  /* error */
3656       if (r == 0) goto greedy_check;
3657       else if (r == 2) { /* {n} */
3658         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
3659           goto possessive_check;
3660 
3661         goto greedy_check;
3662       }
3663       /* r == 1 : normal char */
3664       break;
3665 
3666     case '|':
3667       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3668       tok->type = TK_ALT;
3669       break;
3670 
3671     case '(':
3672       if (PPEEK_IS('?') &&
3673           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
3674         PINC;
3675         if (PPEEK_IS('#')) {
3676           PFETCH(c);
3677           while (1) {
3678             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
3679             PFETCH(c);
3680             if (c == MC_ESC(syn)) {
3681               if (!PEND) PFETCH(c);
3682             }
3683             else {
3684               if (c == ')') break;
3685             }
3686           }
3687           goto start;
3688         }
3689         PUNFETCH;
3690       }
3691 
3692       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3693       tok->type = TK_SUBEXP_OPEN;
3694       break;
3695 
3696     case ')':
3697       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3698       tok->type = TK_SUBEXP_CLOSE;
3699       break;
3700 
3701     case '^':
3702       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3703       tok->type = TK_ANCHOR;
3704       tok->u.subtype = (IS_SINGLELINE(env->option)
3705                         ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
3706       break;
3707 
3708     case '$':
3709       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3710       tok->type = TK_ANCHOR;
3711       tok->u.subtype = (IS_SINGLELINE(env->option)
3712                         ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
3713       break;
3714 
3715     case '[':
3716       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
3717       tok->type = TK_CC_OPEN;
3718       break;
3719 
3720     case ']':
3721       if (*src > env->pattern)   /* /].../ is allowed. */
3722         CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
3723       break;
3724 
3725     case '#':
3726       if (IS_EXTEND(env->option)) {
3727         while (!PEND) {
3728           PFETCH(c);
3729           if (ONIGENC_IS_CODE_NEWLINE(enc, c))
3730             break;
3731         }
3732         goto start;
3733         break;
3734       }
3735       break;
3736 
3737     case ' ': case '\t': case '\n': case '\r': case '\f':
3738       if (IS_EXTEND(env->option))
3739         goto start;
3740       break;
3741 
3742     default:
3743       /* string */
3744       break;
3745     }
3746   }
3747 
3748 #ifdef USE_VARIABLE_META_CHARS
3749  out:
3750 #endif
3751   *src = p;
3752   return tok->type;
3753 }
3754 
3755 static int
3756 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
3757                          OnigEncoding enc ARG_UNUSED,
3758                          OnigCodePoint sb_out, const OnigCodePoint mbr[])
3759 {
3760   int i, r;
3761   OnigCodePoint j;
3762 
3763   int n = ONIGENC_CODE_RANGE_NUM(mbr);
3764 
3765   if (not == 0) {
3766     for (i = 0; i < n; i++) {
3767       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
3768            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
3769         if (j >= sb_out) {
3770           if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
3771           else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
3772             r = add_code_range_to_buf(&(cc->mbuf), j,
3773                                       ONIGENC_CODE_RANGE_TO(mbr, i));
3774             if (r != 0) return r;
3775             i++;
3776           }
3777 
3778           goto sb_end;
3779         }
3780         BITSET_SET_BIT(cc->bs, j);
3781       }
3782     }
3783 
3784   sb_end:
3785     for ( ; i < n; i++) {
3786       r = add_code_range_to_buf(&(cc->mbuf),
3787                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
3788                                 ONIGENC_CODE_RANGE_TO(mbr, i));
3789       if (r != 0) return r;
3790     }
3791   }
3792   else {
3793     OnigCodePoint prev = 0;
3794 
3795     for (i = 0; i < n; i++) {
3796       for (j = prev;
3797            j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
3798         if (j >= sb_out) {
3799           goto sb_end2;
3800         }
3801         BITSET_SET_BIT(cc->bs, j);
3802       }
3803       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
3804     }
3805     for (j = prev; j < sb_out; j++) {
3806       BITSET_SET_BIT(cc->bs, j);
3807     }
3808 
3809   sb_end2:
3810     prev = sb_out;
3811 
3812     for (i = 0; i < n; i++) {
3813       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
3814         r = add_code_range_to_buf(&(cc->mbuf), prev,
3815                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
3816         if (r != 0) return r;
3817       }
3818       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
3819     }
3820     if (prev < 0x7fffffff) {
3821       r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
3822       if (r != 0) return r;
3823     }
3824   }
3825 
3826   return 0;
3827 }
3828 
3829 static int
3830 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
3831 {
3832   int c, r;
3833   const OnigCodePoint *ranges;
3834   OnigCodePoint sb_out;
3835   OnigEncoding enc = env->enc;
3836 
3837   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
3838   if (r == 0) {
3839     return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
3840   }
3841   else if (r != ONIG_NO_SUPPORT_CONFIG) {
3842     return r;
3843   }
3844 
3845   r = 0;
3846   switch (ctype) {
3847   case ONIGENC_CTYPE_ALPHA:
3848   case ONIGENC_CTYPE_BLANK:
3849   case ONIGENC_CTYPE_CNTRL:
3850   case ONIGENC_CTYPE_DIGIT:
3851   case ONIGENC_CTYPE_LOWER:
3852   case ONIGENC_CTYPE_PUNCT:
3853   case ONIGENC_CTYPE_SPACE:
3854   case ONIGENC_CTYPE_UPPER:
3855   case ONIGENC_CTYPE_XDIGIT:
3856   case ONIGENC_CTYPE_ASCII:
3857   case ONIGENC_CTYPE_ALNUM:
3858     if (not != 0) {
3859       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3860         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3861           BITSET_SET_BIT(cc->bs, c);
3862       }
3863       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3864     }
3865     else {
3866       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3867         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3868           BITSET_SET_BIT(cc->bs, c);
3869       }
3870     }
3871     break;
3872 
3873   case ONIGENC_CTYPE_GRAPH:
3874   case ONIGENC_CTYPE_PRINT:
3875     if (not != 0) {
3876       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3877         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3878           BITSET_SET_BIT(cc->bs, c);
3879       }
3880     }
3881     else {
3882       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3883         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
3884           BITSET_SET_BIT(cc->bs, c);
3885       }
3886       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3887     }
3888     break;
3889 
3890   case ONIGENC_CTYPE_WORD:
3891     if (not == 0) {
3892       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3893         if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
3894       }
3895       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
3896     }
3897     else {
3898       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
3899         if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
3900             && ! ONIGENC_IS_CODE_WORD(enc, c))
3901           BITSET_SET_BIT(cc->bs, c);
3902       }
3903     }
3904     break;
3905 
3906   default:
3907     return ONIGERR_PARSER_BUG;
3908     break;
3909   }
3910 
3911   return r;
3912 }
3913 
3914 static int
3915 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
3916 {
3917 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
3918 #define POSIX_BRACKET_NAME_MIN_LEN         4
3919 
3920   static PosixBracketEntryType PBS[] = {
3921     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
3922     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
3923     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
3924     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
3925     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
3926     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
3927     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
3928     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
3929     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
3930     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
3931     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
3932     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
3933     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
3934     { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
3935     { (UChar* )NULL,     -1, 0 }
3936   };
3937 
3938   PosixBracketEntryType *pb;
3939   int not, i, r;
3940   OnigCodePoint c;
3941   OnigEncoding enc = env->enc;
3942   UChar *p = *src;
3943 
3944   if (PPEEK_IS('^')) {
3945     PINC_S;
3946     not = 1;
3947   }
3948   else
3949     not = 0;
3950 
3951   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
3952     goto not_posix_bracket;
3953 
3954   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
3955     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
3956       p = (UChar* )onigenc_step(enc, p, end, pb->len);
3957       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
3958         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
3959 
3960       r = add_ctype_to_cc(cc, pb->ctype, not, env);
3961       if (r != 0) return r;
3962 
3963       PINC_S; PINC_S;
3964       *src = p;
3965       return 0;
3966     }
3967   }
3968 
3969  not_posix_bracket:
3970   c = 0;
3971   i = 0;
3972   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
3973     PINC_S;
3974     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
3975   }
3976   if (c == ':' && ! PEND) {
3977     PINC_S;
3978     if (! PEND) {
3979       PFETCH_S(c);
3980       if (c == ']')
3981         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
3982     }
3983   }
3984 
3985   return 1;  /* 1: is not POSIX bracket, but no error. */
3986 }
3987 
3988 static int
3989 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
3990 {
3991   int r;
3992   OnigCodePoint c;
3993   OnigEncoding enc = env->enc;
3994   UChar *prev, *start, *p = *src;
3995 
3996   r = 0;
3997   start = prev = p;
3998 
3999   while (!PEND) {
4000     prev = p;
4001     PFETCH_S(c);
4002     if (c == '}') {
4003       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4004       if (r < 0) break;
4005 
4006       *src = p;
4007       return r;
4008     }
4009     else if (c == '(' || c == ')' || c == '{' || c == '|') {
4010       r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
4011       break;
4012     }
4013   }
4014 
4015   onig_scan_env_set_error_string(env, r, *src, prev);
4016   return r;
4017 }
4018 
4019 static int
4020 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
4021                     ScanEnv* env)
4022 {
4023   int r, ctype;
4024   CClassNode* cc;
4025 
4026   ctype = fetch_char_property_to_ctype(src, end, env);
4027   if (ctype < 0) return ctype;
4028 
4029   *np = node_new_cclass();
4030   CHECK_NULL_RETURN_MEMERR(*np);
4031   cc = NCCLASS(*np);
4032   r = add_ctype_to_cc(cc, ctype, 0, env);
4033   if (r != 0) return r;
4034   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4035 
4036   return 0;
4037 }
4038 
4039 
4040 enum CCSTATE {
4041   CCS_VALUE,
4042   CCS_RANGE,
4043   CCS_COMPLETE,
4044   CCS_START
4045 };
4046 
4047 enum CCVALTYPE {
4048   CCV_SB,
4049   CCV_CODE_POINT,
4050   CCV_CLASS
4051 };
4052 
4053 static int
4054 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
4055                  enum CCSTATE* state, ScanEnv* env)
4056 {
4057   int r;
4058 
4059   if (*state == CCS_RANGE)
4060     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
4061 
4062   if (*state == CCS_VALUE && *type != CCV_CLASS) {
4063     if (*type == CCV_SB)
4064       BITSET_SET_BIT(cc->bs, (int )(*vs));
4065     else if (*type == CCV_CODE_POINT) {
4066       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4067       if (r < 0) return r;
4068     }
4069   }
4070 
4071   *state = CCS_VALUE;
4072   *type  = CCV_CLASS;
4073   return 0;
4074 }
4075 
4076 static int
4077 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
4078                int* vs_israw, int v_israw,
4079                enum CCVALTYPE intype, enum CCVALTYPE* type,
4080                enum CCSTATE* state, ScanEnv* env)
4081 {
4082   int r;
4083 
4084   switch (*state) {
4085   case CCS_VALUE:
4086     if (*type == CCV_SB)
4087       BITSET_SET_BIT(cc->bs, (int )(*vs));
4088     else if (*type == CCV_CODE_POINT) {
4089       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4090       if (r < 0) return r;
4091     }
4092     break;
4093 
4094   case CCS_RANGE:
4095     if (intype == *type) {
4096       if (intype == CCV_SB) {
4097         if (*vs > 0xff || v > 0xff)
4098           return ONIGERR_INVALID_CODE_POINT_VALUE;
4099 
4100         if (*vs > v) {
4101           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
4102             goto ccs_range_end;
4103           else
4104             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
4105         }
4106         bitset_set_range(cc->bs, (int )*vs, (int )v);
4107       }
4108       else {
4109         r = add_code_range(&(cc->mbuf), env, *vs, v);
4110         if (r < 0) return r;
4111       }
4112     }
4113     else {
4114 #if 0
4115       if (intype == CCV_CODE_POINT && *type == CCV_SB) {
4116 #endif
4117         if (*vs > v) {
4118           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
4119             goto ccs_range_end;
4120           else
4121             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
4122         }
4123         bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
4124         r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
4125         if (r < 0) return r;
4126 #if 0
4127       }
4128       else
4129         return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
4130 #endif
4131     }
4132   ccs_range_end:
4133     *state = CCS_COMPLETE;
4134     break;
4135 
4136   case CCS_COMPLETE:
4137   case CCS_START:
4138     *state = CCS_VALUE;
4139     break;
4140 
4141   default:
4142     break;
4143   }
4144 
4145   *vs_israw = v_israw;
4146   *vs       = v;
4147   *type     = intype;
4148   return 0;
4149 }
4150 
4151 static int
4152 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4153                  ScanEnv* env)
4154 {
4155   int in_esc;
4156   OnigCodePoint code;
4157   OnigEncoding enc = env->enc;
4158   UChar* p = from;
4159 
4160   in_esc = 0;
4161   while (! PEND) {
4162     if (ignore_escaped && in_esc) {
4163       in_esc = 0;
4164     }
4165     else {
4166       PFETCH_S(code);
4167       if (code == c) return 1;
4168       if (code == MC_ESC(env->syntax)) in_esc = 1;
4169     }
4170   }
4171   return 0;
4172 }
4173 
4174 static int
4175 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
4176                  ScanEnv* env)
4177 {
4178   int r, neg, len, fetched, and_start;
4179   OnigCodePoint v, vs;
4180   UChar *p;
4181   Node* node;
4182   CClassNode *cc, *prev_cc;
4183   CClassNode work_cc;
4184 
4185   enum CCSTATE state;
4186   enum CCVALTYPE val_type, in_type;
4187   int val_israw, in_israw;
4188 
4189   prev_cc = (CClassNode* )NULL;
4190   *np = NULL_NODE;
4191   r = fetch_token_in_cc(tok, src, end, env);
4192   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4193     neg = 1;
4194     r = fetch_token_in_cc(tok, src, end, env);
4195   }
4196   else {
4197     neg = 0;
4198   }
4199 
4200   if (r < 0) return r;
4201   if (r == TK_CC_CLOSE) {
4202     if (! code_exist_check((OnigCodePoint )']',
4203                            *src, env->pattern_end, 1, env))
4204       return ONIGERR_EMPTY_CHAR_CLASS;
4205 
4206     CC_ESC_WARN(env, (UChar* )"]");
4207     r = tok->type = TK_CHAR;  /* allow []...] */
4208   }
4209 
4210   *np = node = node_new_cclass();
4211   CHECK_NULL_RETURN_MEMERR(node);
4212   cc = NCCLASS(node);
4213 
4214   and_start = 0;
4215   state = CCS_START;
4216   p = *src;
4217   while (r != TK_CC_CLOSE) {
4218     fetched = 0;
4219     switch (r) {
4220     case TK_CHAR:
4221       len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
4222       if (len > 1) {
4223         in_type = CCV_CODE_POINT;
4224       }
4225       else if (len < 0) {
4226         r = len;
4227         goto err;
4228       }
4229       else {
4230       sb_char:
4231         in_type = CCV_SB;
4232       }
4233       v = (OnigCodePoint )tok->u.c;
4234       in_israw = 0;
4235       goto val_entry2;
4236       break;
4237 
4238     case TK_RAW_BYTE:
4239       /* tok->base != 0 : octal or hexadec. */
4240       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4241         UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4242         UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
4243         UChar* psave = p;
4244         int i, base = tok->base;
4245 
4246         buf[0] = tok->u.c;
4247         for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4248           r = fetch_token_in_cc(tok, &p, end, env);
4249           if (r < 0) goto err;
4250           if (r != TK_RAW_BYTE || tok->base != base) {
4251             fetched = 1;
4252             break;
4253           }
4254           buf[i] = tok->u.c;
4255         }
4256 
4257         if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4258           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
4259           goto err;
4260         }
4261 
4262         len = enclen(env->enc, buf);
4263         if (i < len) {
4264           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
4265           goto err;
4266         }
4267         else if (i > len) { /* fetch back */
4268           p = psave;
4269           for (i = 1; i < len; i++) {
4270             r = fetch_token_in_cc(tok, &p, end, env);
4271           }
4272           fetched = 0;
4273         }
4274 
4275         if (i == 1) {
4276           v = (OnigCodePoint )buf[0];
4277           goto raw_single;
4278         }
4279         else {
4280           v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4281           in_type = CCV_CODE_POINT;
4282         }
4283       }
4284       else {
4285         v = (OnigCodePoint )tok->u.c;
4286       raw_single:
4287         in_type = CCV_SB;
4288       }
4289       in_israw = 1;
4290       goto val_entry2;
4291       break;
4292 
4293     case TK_CODE_POINT:
4294       v = tok->u.code;
4295       in_israw = 1;
4296     val_entry:
4297       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4298       if (len < 0) {
4299         r = len;
4300         goto err;
4301       }
4302       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4303     val_entry2:
4304       r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4305                          &state, env);
4306       if (r != 0) goto err;
4307       break;
4308 
4309     case TK_POSIX_BRACKET_OPEN:
4310       r = parse_posix_bracket(cc, &p, end, env);
4311       if (r < 0) goto err;
4312       if (r == 1) {  /* is not POSIX bracket */
4313         CC_ESC_WARN(env, (UChar* )"[");
4314         p = tok->backp;
4315         v = (OnigCodePoint )tok->u.c;
4316         in_israw = 0;
4317         goto val_entry;
4318       }
4319       goto next_class;
4320       break;
4321 
4322     case TK_CHAR_TYPE:
4323       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
4324       if (r != 0) return r;
4325 
4326     next_class:
4327       r = next_state_class(cc, &vs, &val_type, &state, env);
4328       if (r != 0) goto err;
4329       break;
4330 
4331     case TK_CHAR_PROPERTY:
4332       {
4333         int ctype;
4334 
4335         ctype = fetch_char_property_to_ctype(&p, end, env);
4336         if (ctype < 0) return ctype;
4337         r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
4338         if (r != 0) return r;
4339         goto next_class;
4340       }
4341       break;
4342 
4343     case TK_CC_RANGE:
4344       if (state == CCS_VALUE) {
4345         r = fetch_token_in_cc(tok, &p, end, env);
4346         if (r < 0) goto err;
4347         fetched = 1;
4348         if (r == TK_CC_CLOSE) { /* allow [x-] */
4349         range_end_val:
4350           v = (OnigCodePoint )'-';
4351           in_israw = 0;
4352           goto val_entry;
4353         }
4354         else if (r == TK_CC_AND) {
4355           CC_ESC_WARN(env, (UChar* )"-");
4356           goto range_end_val;
4357         }
4358         state = CCS_RANGE;
4359       }
4360       else if (state == CCS_START) {
4361         /* [-xa] is allowed */
4362         v = (OnigCodePoint )tok->u.c;
4363         in_israw = 0;
4364 
4365         r = fetch_token_in_cc(tok, &p, end, env);
4366         if (r < 0) goto err;
4367         fetched = 1;
4368         /* [--x] or [a&&-x] is warned. */
4369         if (r == TK_CC_RANGE || and_start != 0)
4370           CC_ESC_WARN(env, (UChar* )"-");
4371 
4372         goto val_entry;
4373       }
4374       else if (state == CCS_RANGE) {
4375         CC_ESC_WARN(env, (UChar* )"-");
4376         goto sb_char;  /* [!--x] is allowed */
4377       }
4378       else { /* CCS_COMPLETE */
4379         r = fetch_token_in_cc(tok, &p, end, env);
4380         if (r < 0) goto err;
4381         fetched = 1;
4382         if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4383         else if (r == TK_CC_AND) {
4384           CC_ESC_WARN(env, (UChar* )"-");
4385           goto range_end_val;
4386         }
4387         
4388         if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
4389           CC_ESC_WARN(env, (UChar* )"-");
4390           goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
4391         }
4392         r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
4393         goto err;
4394       }
4395       break;
4396 
4397     case TK_CC_CC_OPEN: /* [ */
4398       {
4399         Node *anode;
4400         CClassNode* acc;
4401 
4402         r = parse_char_class(&anode, tok, &p, end, env);
4403         if (r != 0) goto cc_open_err;
4404         acc = NCCLASS(anode);
4405         r = or_cclass(cc, acc, env->enc);
4406 
4407         onig_node_free(anode);
4408       cc_open_err:
4409         if (r != 0) goto err;
4410       }
4411       break;
4412 
4413     case TK_CC_AND: /* && */
4414       {
4415         if (state == CCS_VALUE) {
4416           r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4417                              &val_type, &state, env);
4418           if (r != 0) goto err;
4419         }
4420         /* initialize local variables */
4421         and_start = 1;
4422         state = CCS_START;
4423 
4424         if (IS_NOT_NULL(prev_cc)) {
4425           r = and_cclass(prev_cc, cc, env->enc);
4426           if (r != 0) goto err;
4427           bbuf_free(cc->mbuf);
4428         }
4429         else {
4430           prev_cc = cc;
4431           cc = &work_cc;
4432         }
4433         initialize_cclass(cc);
4434       }
4435       break;
4436 
4437     case TK_EOT:
4438       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
4439       goto err;
4440       break;
4441     default:
4442       r = ONIGERR_PARSER_BUG;
4443       goto err;
4444       break;
4445     }
4446 
4447     if (fetched)
4448       r = tok->type;
4449     else {
4450       r = fetch_token_in_cc(tok, &p, end, env);
4451       if (r < 0) goto err;
4452     }
4453   }
4454 
4455   if (state == CCS_VALUE) {
4456     r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4457                        &val_type, &state, env);
4458     if (r != 0) goto err;
4459   }
4460 
4461   if (IS_NOT_NULL(prev_cc)) {
4462     r = and_cclass(prev_cc, cc, env->enc);
4463     if (r != 0) goto err;
4464     bbuf_free(cc->mbuf);
4465     cc = prev_cc;
4466   }
4467 
4468   if (neg != 0)
4469     NCCLASS_SET_NOT(cc);
4470   else
4471     NCCLASS_CLEAR_NOT(cc);
4472   if (IS_NCCLASS_NOT(cc) &&
4473       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
4474     int is_empty;
4475 
4476     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4477     if (is_empty != 0)
4478       BITSET_IS_EMPTY(cc->bs, is_empty);
4479 
4480     if (is_empty == 0) {
4481 #define NEWLINE_CODE    0x0a
4482 
4483       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
4484         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4485           BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
4486         else
4487           add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4488       }
4489     }
4490   }
4491   *src = p;
4492   return 0;
4493 
4494  err:
4495   if (cc != NCCLASS(*np))
4496     bbuf_free(cc->mbuf);
4497   onig_node_free(*np);
4498   return r;
4499 }
4500 
4501 static int parse_subexp(Node** top, OnigToken* tok, int term,
4502                         UChar** src, UChar* end, ScanEnv* env);
4503 
4504 static int
4505 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4506               ScanEnv* env)
4507 {
4508   int r, num;
4509   Node *target;
4510   OnigOptionType option;
4511   OnigCodePoint c;
4512   OnigEncoding enc = env->enc;
4513 
4514 #ifdef USE_NAMED_GROUP
4515   int list_capture;
4516 #endif
4517 
4518   UChar* p = *src;
4519   PFETCH_READY;
4520 
4521   *np = NULL;
4522   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
4523 
4524   option = env->option;
4525   if (PPEEK_IS('?') &&
4526       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
4527     PINC;
4528     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4529 
4530     PFETCH(c);
4531     switch (c) {
4532     case ':':   /* (?:...) grouping only */
4533     group:
4534       r = fetch_token(tok, &p, end, env);
4535       if (r < 0) return r;
4536       r = parse_subexp(np, tok, term, &p, end, env);
4537       if (r < 0) return r;
4538       *src = p;
4539       return 1; /* group */
4540       break;
4541 
4542     case '=':
4543       *np = onig_node_new_anchor(ANCHOR_PREC_READ);
4544       break;
4545     case '!':  /*         preceding read */
4546       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
4547       break;
4548     case '>':            /* (?>...) stop backtrack */
4549       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
4550       break;
4551 
4552 #ifdef USE_NAMED_GROUP
4553     case '\'':
4554       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4555         goto named_group1;
4556       }
4557       else
4558         return ONIGERR_UNDEFINED_GROUP_OPTION;
4559       break;
4560 #endif
4561 
4562     case '<':   /* look behind (?<=...), (?<!...) */
4563       PFETCH(c);
4564       if (c == '=')
4565         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
4566       else if (c == '!')
4567         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
4568 #ifdef USE_NAMED_GROUP
4569       else {
4570         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4571           UChar *name;
4572           UChar *name_end;
4573 
4574           PUNFETCH;
4575           c = '<';
4576 
4577         named_group1:
4578           list_capture = 0;
4579 
4580         named_group2:
4581           name = p;
4582           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
4583           if (r < 0) return r;
4584 
4585           num = scan_env_add_mem_entry(env);
4586           if (num < 0) return num;
4587           if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
4588             return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
4589 
4590           r = name_add(env->reg, name, name_end, num, env);
4591           if (r != 0) return r;
4592           *np = node_new_enclose_memory(env->option, 1);
4593           CHECK_NULL_RETURN_MEMERR(*np);
4594           NENCLOSE(*np)->regnum = num;
4595           if (list_capture != 0)
4596             BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
4597           env->num_named++;
4598         }
4599         else {
4600           return ONIGERR_UNDEFINED_GROUP_OPTION;
4601         }
4602       }
4603 #else
4604       else {
4605         return ONIGERR_UNDEFINED_GROUP_OPTION;
4606       }
4607 #endif
4608       break;
4609 
4610     case '@':
4611       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
4612 #ifdef USE_NAMED_GROUP
4613         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
4614           PFETCH(c);
4615           if (c == '<' || c == '\'') {
4616             list_capture = 1;
4617             goto named_group2; /* (?@<name>...) */
4618           }
4619           PUNFETCH;
4620         }
4621 #endif
4622         *np = node_new_enclose_memory(env->option, 0);
4623         CHECK_NULL_RETURN_MEMERR(*np);
4624         num = scan_env_add_mem_entry(env);
4625         if (num < 0) {
4626           onig_node_free(*np);
4627           return num;
4628         }
4629         else if (num >= (int )BIT_STATUS_BITS_NUM) {
4630           onig_node_free(*np);
4631           return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
4632         }
4633         NENCLOSE(*np)->regnum = num;
4634         BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
4635       }
4636       else {
4637         return ONIGERR_UNDEFINED_GROUP_OPTION;
4638       }
4639       break;
4640 
4641 #ifdef USE_POSIXLINE_OPTION
4642     case 'p':
4643 #endif
4644     case '-': case 'i': case 'm': case 's': case 'x':
4645       {
4646         int neg = 0;
4647 
4648         while (1) {
4649           switch (c) {
4650           case ':':
4651           case ')':
4652           break;
4653 
4654           case '-':  neg = 1; break;
4655           case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
4656           case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
4657           case 's':
4658             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
4659               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
4660             }
4661             else
4662               return ONIGERR_UNDEFINED_GROUP_OPTION;
4663             break;
4664 
4665           case 'm':
4666             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
4667               ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
4668             }
4669             else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
4670               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
4671             }
4672             else
4673               return ONIGERR_UNDEFINED_GROUP_OPTION;
4674             break;
4675 #ifdef USE_POSIXLINE_OPTION
4676           case 'p':
4677             ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
4678             break;
4679 #endif
4680           default:
4681             return ONIGERR_UNDEFINED_GROUP_OPTION;
4682           }
4683 
4684           if (c == ')') {
4685             *np = node_new_option(option);
4686             CHECK_NULL_RETURN_MEMERR(*np);
4687             *src = p;
4688             return 2; /* option only */
4689           }
4690           else if (c == ':') {
4691             OnigOptionType prev = env->option;
4692 
4693             env->option     = option;
4694             r = fetch_token(tok, &p, end, env);
4695             if (r < 0) return r;
4696             r = parse_subexp(&target, tok, term, &p, end, env);
4697             env->option = prev;
4698             if (r < 0) return r;
4699             *np = node_new_option(option);
4700             CHECK_NULL_RETURN_MEMERR(*np);
4701             NENCLOSE(*np)->target = target;
4702             *src = p;
4703             return 0;
4704           }
4705 
4706           if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4707           PFETCH(c);
4708         }
4709       }
4710       break;
4711 
4712     default:
4713       return ONIGERR_UNDEFINED_GROUP_OPTION;
4714     }
4715   }
4716   else {
4717     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
4718       goto group;
4719 
4720     *np = node_new_enclose_memory(env->option, 0);
4721     CHECK_NULL_RETURN_MEMERR(*np);
4722     num = scan_env_add_mem_entry(env);
4723     if (num < 0) return num;
4724     NENCLOSE(*np)->regnum = num;
4725   }
4726 
4727   CHECK_NULL_RETURN_MEMERR(*np);
4728   r = fetch_token(tok, &p, end, env);
4729   if (r < 0) return r;
4730   r = parse_subexp(&target, tok, term, &p, end, env);
4731   if (r < 0) return r;
4732 
4733   if (NTYPE(*np) == NT_ANCHOR)
4734     NANCHOR(*np)->target = target;
4735   else {
4736     NENCLOSE(*np)->target = target;
4737     if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
4738       /* Don't move this to previous of parse_subexp() */
4739       r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
4740       if (r != 0) return r;
4741     }
4742   }
4743 
4744   *src = p;
4745   return 0;
4746 }
4747 
4748 static const char* PopularQStr[] = {
4749   "?", "*", "+", "??", "*?", "+?"
4750 };
4751 
4752 static const char* ReduceQStr[] = {
4753   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
4754 };
4755 
4756 static int
4757 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
4758 {
4759   QtfrNode* qn;
4760 
4761   qn = NQTFR(qnode);
4762   if (qn->lower == 1 && qn->upper == 1) {
4763     return 1;
4764   }
4765 
4766   switch (NTYPE(target)) {
4767   case NT_STR:
4768     if (! group) {
4769       StrNode* sn = NSTR(target);
4770       if (str_node_can_be_split(sn, env->enc)) {
4771         Node* n = str_node_split_last_char(sn, env->enc);
4772         if (IS_NOT_NULL(n)) {
4773           qn->target = n;
4774           return 2;
4775         }
4776       }
4777     }
4778     break;
4779 
4780   case NT_QTFR:
4781     { /* check redundant double repeat. */
4782       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
4783       QtfrNode* qnt   = NQTFR(target);
4784       int nestq_num   = popular_quantifier_num(qn);
4785       int targetq_num = popular_quantifier_num(qnt);
4786 
4787 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
4788       if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
4789           IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
4790         UChar buf[WARN_BUFSIZE];
4791 
4792         switch(ReduceTypeTable[targetq_num][nestq_num]) {
4793         case RQ_ASIS:
4794           break;
4795 
4796         case RQ_DEL:
4797           if (onig_verb_warn != onig_null_warn) {
4798             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4799                                  env->pattern, env->pattern_end,
4800                                  (UChar* )"redundant nested repeat operator");
4801             (*onig_verb_warn)((char* )buf);
4802           }
4803           goto warn_exit;
4804           break;
4805 
4806         default:
4807           if (onig_verb_warn != onig_null_warn) {
4808             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4809                                        env->pattern, env->pattern_end,
4810             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
4811             PopularQStr[targetq_num], PopularQStr[nestq_num],
4812             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
4813             (*onig_verb_warn)((char* )buf);
4814           }
4815           goto warn_exit;
4816           break;
4817         }
4818       }
4819 
4820     warn_exit:
4821 #endif
4822       if (targetq_num >= 0) {
4823         if (nestq_num >= 0) {
4824           onig_reduce_nested_quantifier(qnode, target);
4825           goto q_exit;
4826         }
4827         else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
4828           /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
4829           if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
4830             qn->upper = (qn->lower == 0 ? 1 : qn->lower);
4831           }
4832         }
4833       }
4834     }
4835     break;
4836 
4837   default:
4838     break;
4839   }
4840 
4841   qn->target = target;
4842  q_exit:
4843   return 0;
4844 }
4845 
4846 
4847 #ifdef USE_SHARED_CCLASS_TABLE
4848 
4849 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
4850 
4851 /* for ctype node hash table */
4852 
4853 typedef struct {
4854   OnigEncoding enc;
4855   int not;
4856   int type;
4857 } type_cclass_key;
4858 
4859 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
4860 {
4861   if (x->type != y->type) return 1;
4862   if (x->enc  != y->enc)  return 1;
4863   if (x->not  != y->not)  return 1;
4864   return 0;
4865 }
4866 
4867 static int type_cclass_hash(type_cclass_key* key)
4868 {
4869   int i, val;
4870   UChar *p;
4871 
4872   val = 0;
4873 
4874   p = (UChar* )&(key->enc);
4875   for (i = 0; i < (int )sizeof(key->enc); i++) {
4876     val = val * 997 + (int )*p++;
4877   }
4878 
4879   p = (UChar* )(&key->type);
4880   for (i = 0; i < (int )sizeof(key->type); i++) {
4881     val = val * 997 + (int )*p++;
4882   }
4883 
4884   val += key->not;
4885   return val + (val >> 5);
4886 }
4887 
4888 static struct st_hash_type type_type_cclass_hash = {
4889     type_cclass_cmp,
4890     type_cclass_hash,
4891 };
4892 
4893 static st_table* OnigTypeCClassTable;
4894 
4895 
4896 static int
4897 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
4898 {
4899   if (IS_NOT_NULL(node)) {
4900     CClassNode* cc = NCCLASS(node);
4901     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
4902     xfree(node);
4903   }
4904 
4905   if (IS_NOT_NULL(key)) xfree(key);
4906   return ST_DELETE;
4907 }
4908 
4909 extern int
4910 onig_free_shared_cclass_table(void)
4911 {
4912   if (IS_NOT_NULL(OnigTypeCClassTable)) {
4913     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
4914     onig_st_free_table(OnigTypeCClassTable);
4915     OnigTypeCClassTable = NULL;
4916   }
4917 
4918   return 0;
4919 }
4920 
4921 #endif /* USE_SHARED_CCLASS_TABLE */
4922 
4923 
4924 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
4925 static int
4926 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
4927 {
4928   BBuf *tbuf;
4929   int r;
4930 
4931   if (IS_NCCLASS_NOT(cc)) {
4932     bitset_invert(cc->bs);
4933 
4934     if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4935       r = not_code_range_buf(enc, cc->mbuf, &tbuf);
4936       if (r != 0) return r;
4937 
4938       bbuf_free(cc->mbuf);
4939       cc->mbuf = tbuf;
4940     }
4941 
4942     NCCLASS_CLEAR_NOT(cc);
4943   }
4944 
4945   return 0;
4946 }
4947 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
4948 
4949 typedef struct {
4950   ScanEnv*    env;
4951   CClassNode* cc;
4952   Node*       alt_root;
4953   Node**      ptail;
4954 } IApplyCaseFoldArg;
4955 
4956 static int
4957 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
4958                   int to_len, void* arg)
4959 {
4960   IApplyCaseFoldArg* iarg;
4961   ScanEnv* env;
4962   CClassNode* cc;
4963   BitSetRef bs;
4964 
4965   iarg = (IApplyCaseFoldArg* )arg;
4966   env = iarg->env;
4967   cc  = iarg->cc;
4968   bs = cc->bs;
4969 
4970   if (to_len == 1) {
4971     int is_in = onig_is_code_in_cc(env->enc, from, cc);
4972 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
4973     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
4974         (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
4975       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
4976         add_code_range(&(cc->mbuf), env, *to, *to);
4977       }
4978       else {
4979         BITSET_SET_BIT(bs, *to);
4980       }
4981     }
4982 #else
4983     if (is_in != 0) {
4984       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
4985         if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
4986         add_code_range(&(cc->mbuf), env, *to, *to);
4987       }
4988       else {
4989         if (IS_NCCLASS_NOT(cc)) {
4990           BITSET_CLEAR_BIT(bs, *to);
4991         }
4992         else
4993           BITSET_SET_BIT(bs, *to);
4994       }
4995     }
4996 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
4997   }
4998   else {
4999     int r, i, len;
5000     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5001     Node *snode = NULL_NODE;
5002 
5003     if (onig_is_code_in_cc(env->enc, from, cc)
5004 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5005         && !IS_NCCLASS_NOT(cc)
5006 #endif
5007         ) {
5008       for (i = 0; i < to_len; i++) {
5009         len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5010         if (i == 0) {
5011           snode = onig_node_new_str(buf, buf + len);
5012           CHECK_NULL_RETURN_MEMERR(snode);
5013 
5014           /* char-class expanded multi-char only
5015              compare with string folded at match time. */
5016           NSTRING_SET_AMBIG(snode);
5017         }
5018         else {
5019           r = onig_node_str_cat(snode, buf, buf + len);
5020           if (r < 0) {
5021             onig_node_free(snode);
5022             return r;
5023           }
5024         }
5025       }
5026 
5027       *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5028       CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
5029       iarg->ptail = &(NCDR((*(iarg->ptail))));
5030     }
5031   }
5032 
5033   return 0;
5034 }
5035 
5036 static int
5037 parse_exp(Node** np, OnigToken* tok, int term,
5038           UChar** src, UChar* end, ScanEnv* env)
5039 {
5040   int r, len, group = 0;
5041   Node* qn;
5042   Node** targetp;
5043 
5044   *np = NULL;
5045   if (tok->type == (enum TokenSyms )term)
5046     goto end_of_token;
5047 
5048   switch (tok->type) {
5049   case TK_ALT:
5050   case TK_EOT:
5051   end_of_token:
5052   *np = node_new_empty();
5053   return tok->type;
5054   break;
5055 
5056   case TK_SUBEXP_OPEN:
5057     r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
5058     if (r < 0) return r;
5059     if (r == 1) group = 1;
5060     else if (r == 2) { /* option only */
5061       Node* target;
5062       OnigOptionType prev = env->option;
5063 
5064       env->option = NENCLOSE(*np)->option;
5065       r = fetch_token(tok, src, end, env);
5066       if (r < 0) return r;
5067       r = parse_subexp(&target, tok, term, src, end, env);
5068       env->option = prev;
5069       if (r < 0) return r;
5070       NENCLOSE(*np)->target = target;   
5071       return tok->type;
5072     }
5073     break;
5074 
5075   case TK_SUBEXP_CLOSE:
5076     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
5077       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
5078 
5079     if (tok->escaped) goto tk_raw_byte;
5080     else goto tk_byte;
5081     break;
5082 
5083   case TK_STRING:
5084   tk_byte:
5085     {
5086       *np = node_new_str(tok->backp, *src);
5087       CHECK_NULL_RETURN_MEMERR(*np);
5088 
5089       while (1) {
5090         r = fetch_token(tok, src, end, env);
5091         if (r < 0) return r;
5092         if (r != TK_STRING) break;
5093 
5094         r = onig_node_str_cat(*np, tok->backp, *src);
5095         if (r < 0) return r;
5096       }
5097 
5098     string_end:
5099       targetp = np;
5100       goto repeat;
5101     }
5102     break;
5103 
5104   case TK_RAW_BYTE:
5105   tk_raw_byte:
5106     {
5107       *np = node_new_str_raw_char((UChar )tok->u.c);
5108       CHECK_NULL_RETURN_MEMERR(*np);
5109       len = 1;
5110       while (1) {
5111         if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
5112           if (len == enclen(env->enc, NSTR(*np)->s)) {
5113             r = fetch_token(tok, src, end, env);
5114             NSTRING_CLEAR_RAW(*np);
5115             goto string_end;
5116           }
5117         }
5118 
5119         r = fetch_token(tok, src, end, env);
5120         if (r < 0) return r;
5121         if (r != TK_RAW_BYTE) {
5122           /* Don't use this, it is wrong for little endian encodings. */
5123 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
5124           int rem;
5125           if (len < ONIGENC_MBC_MINLEN(env->enc)) {
5126             rem = ONIGENC_MBC_MINLEN(env->enc) - len;
5127             (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
5128             if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
5129               NSTRING_CLEAR_RAW(*np);
5130               goto string_end;
5131             }
5132           }
5133 #endif
5134           return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
5135         }
5136 
5137         r = node_str_cat_char(*np, (UChar )tok->u.c);
5138         if (r < 0) return r;
5139 
5140         len++;
5141       }
5142     }
5143     break;
5144 
5145   case TK_CODE_POINT:
5146     {
5147       UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5148       int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
5149       if (num < 0) return num;
5150 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
5151       *np = node_new_str_raw(buf, buf + num);
5152 #else
5153       *np = node_new_str(buf, buf + num);
5154 #endif
5155       CHECK_NULL_RETURN_MEMERR(*np);
5156     }
5157     break;
5158 
5159   case TK_QUOTE_OPEN:
5160     {
5161       OnigCodePoint end_op[2];
5162       UChar *qstart, *qend, *nextp;
5163 
5164       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
5165       end_op[1] = (OnigCodePoint )'E';
5166       qstart = *src;
5167       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
5168       if (IS_NULL(qend)) {
5169         nextp = qend = end;
5170       }
5171       *np = node_new_str(qstart, qend);
5172       CHECK_NULL_RETURN_MEMERR(*np);
5173       *src = nextp;
5174     }
5175     break;
5176 
5177   case TK_CHAR_TYPE:
5178     {
5179       switch (tok->u.prop.ctype) {
5180       case ONIGENC_CTYPE_WORD:
5181         *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
5182         CHECK_NULL_RETURN_MEMERR(*np);
5183         break;
5184 
5185       case ONIGENC_CTYPE_SPACE:
5186       case ONIGENC_CTYPE_DIGIT:
5187       case ONIGENC_CTYPE_XDIGIT:
5188         {
5189           CClassNode* cc;
5190 
5191 #ifdef USE_SHARED_CCLASS_TABLE
5192           const OnigCodePoint *mbr;
5193           OnigCodePoint sb_out;
5194 
5195           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
5196                                            &sb_out, &mbr);
5197           if (r == 0 &&
5198               ONIGENC_CODE_RANGE_NUM(mbr)
5199               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
5200             type_cclass_key  key;
5201             type_cclass_key* new_key;
5202 
5203             key.enc  = env->enc;
5204             key.not  = tok->u.prop.not;
5205             key.type = tok->u.prop.ctype;
5206 
5207             THREAD_ATOMIC_START;
5208 
5209             if (IS_NULL(OnigTypeCClassTable)) {
5210               OnigTypeCClassTable
5211                 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
5212               if (IS_NULL(OnigTypeCClassTable)) {
5213                 THREAD_ATOMIC_END;
5214                 return ONIGERR_MEMORY;
5215               }
5216             }
5217             else {
5218               if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
5219                                  (st_data_t* )np)) {
5220                 THREAD_ATOMIC_END;
5221                 break;
5222               }
5223             }
5224 
5225             *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
5226                                                      sb_out, mbr);
5227             if (IS_NULL(*np)) {
5228               THREAD_ATOMIC_END;
5229               return ONIGERR_MEMORY;
5230             }
5231 
5232             cc = NCCLASS(*np);
5233             NCCLASS_SET_SHARE(cc);
5234             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
5235             xmemcpy(new_key, &key, sizeof(type_cclass_key));
5236             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
5237                                (st_data_t )*np);
5238             
5239             THREAD_ATOMIC_END;
5240           }
5241           else {
5242 #endif
5243             *np = node_new_cclass();
5244             CHECK_NULL_RETURN_MEMERR(*np);
5245             cc = NCCLASS(*np);
5246             add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
5247             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
5248 #ifdef USE_SHARED_CCLASS_TABLE
5249           }
5250 #endif
5251         }
5252         break;
5253 
5254       default:
5255         return ONIGERR_PARSER_BUG;
5256         break;
5257       }
5258     }
5259     break;
5260 
5261   case TK_CHAR_PROPERTY:
5262     r = parse_char_property(np, tok, src, end, env);
5263     if (r != 0) return r;
5264     break;
5265 
5266   case TK_CC_OPEN:
5267     {
5268       CClassNode* cc;
5269 
5270       r = parse_char_class(np, tok, src, end, env);
5271       if (r != 0) return r;
5272 
5273       cc = NCCLASS(*np);
5274       if (IS_IGNORECASE(env->option)) {
5275         IApplyCaseFoldArg iarg;
5276 
5277         iarg.env      = env;
5278         iarg.cc       = cc;
5279         iarg.alt_root = NULL_NODE;
5280         iarg.ptail    = &(iarg.alt_root);
5281 
5282         r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
5283                                         i_apply_case_fold, &iarg);
5284         if (r != 0) {
5285           onig_node_free(iarg.alt_root);
5286           return r;
5287         }
5288         if (IS_NOT_NULL(iarg.alt_root)) {
5289           Node* work = onig_node_new_alt(*np, iarg.alt_root);
5290           if (IS_NULL(work)) {
5291             onig_node_free(iarg.alt_root);
5292             return ONIGERR_MEMORY;
5293           }
5294           *np = work;
5295         }
5296       }
5297     }
5298     break;
5299 
5300   case TK_ANYCHAR:
5301     *np = node_new_anychar();
5302     CHECK_NULL_RETURN_MEMERR(*np);
5303     break;
5304 
5305   case TK_ANYCHAR_ANYTIME:
5306     *np = node_new_anychar();
5307     CHECK_NULL_RETURN_MEMERR(*np);
5308     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
5309     CHECK_NULL_RETURN_MEMERR(qn);
5310     NQTFR(qn)->target = *np;
5311     *np = qn;
5312     break;
5313 
5314   case TK_BACKREF:
5315     len = tok->u.backref.num;
5316     *np = node_new_backref(len,
5317                    (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
5318                            tok->u.backref.by_name,
5319 #ifdef USE_BACKREF_WITH_LEVEL
5320                            tok->u.backref.exist_level,
5321                            tok->u.backref.level,
5322 #endif
5323                            env);
5324     CHECK_NULL_RETURN_MEMERR(*np);
5325     break;
5326 
5327 #ifdef USE_SUBEXP_CALL
5328   case TK_CALL:
5329     {
5330       int gnum = tok->u.call.gnum;
5331 
5332       if (gnum < 0) {
5333         gnum = BACKREF_REL_TO_ABS(gnum, env);
5334         if (gnum <= 0)
5335           return ONIGERR_INVALID_BACKREF;
5336       }
5337       *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
5338       CHECK_NULL_RETURN_MEMERR(*np);
5339       env->num_call++;
5340     }
5341     break;
5342 #endif
5343 
5344   case TK_ANCHOR:
5345     *np = onig_node_new_anchor(tok->u.anchor);
5346     break;
5347 
5348   case TK_OP_REPEAT:
5349   case TK_INTERVAL:
5350     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
5351       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
5352         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
5353       else
5354         *np = node_new_empty();
5355     }
5356     else {
5357       goto tk_byte;
5358     }
5359     break;
5360 
5361   default:
5362     return ONIGERR_PARSER_BUG;
5363     break;
5364   }
5365 
5366   {
5367     targetp = np;
5368 
5369   re_entry:
5370     r = fetch_token(tok, src, end, env);
5371     if (r < 0) return r;
5372 
5373   repeat:
5374     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
5375       if (is_invalid_quantifier_target(*targetp))
5376         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
5377 
5378       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
5379                                (r == TK_INTERVAL ? 1 : 0));
5380       CHECK_NULL_RETURN_MEMERR(qn);
5381       NQTFR(qn)->greedy = tok->u.repeat.greedy;
5382       r = set_quantifier(qn, *targetp, group, env);
5383       if (r < 0) {
5384         onig_node_free(qn);
5385         return r;
5386       }
5387 
5388       if (tok->u.repeat.possessive != 0) {
5389         Node* en;
5390         en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
5391         if (IS_NULL(en)) {
5392           onig_node_free(qn);
5393           return ONIGERR_MEMORY;
5394         }
5395         NENCLOSE(en)->target = qn;
5396         qn = en;
5397       }
5398 
5399       if (r == 0) {
5400         *targetp = qn;
5401       }
5402       else if (r == 1) {
5403         onig_node_free(qn);
5404       }
5405       else if (r == 2) { /* split case: /abc+/ */
5406         Node *tmp;
5407 
5408         *targetp = node_new_list(*targetp, NULL);
5409         if (IS_NULL(*targetp)) {
5410           onig_node_free(qn);
5411           return ONIGERR_MEMORY;
5412         }
5413         tmp = NCDR(*targetp) = node_new_list(qn, NULL);
5414         if (IS_NULL(tmp)) {
5415           onig_node_free(qn);
5416           return ONIGERR_MEMORY;
5417         }
5418         targetp = &(NCAR(tmp));
5419       }
5420       goto re_entry;
5421     }
5422   }
5423 
5424   return r;
5425 }
5426 
5427 static int
5428 parse_branch(Node** top, OnigToken* tok, int term,
5429              UChar** src, UChar* end, ScanEnv* env)
5430 {
5431   int r;
5432   Node *node, **headp;
5433 
5434   *top = NULL;
5435   r = parse_exp(&node, tok, term, src, end, env);
5436   if (r < 0) return r;
5437 
5438   if (r == TK_EOT || r == term || r == TK_ALT) {
5439     *top = node;
5440   }
5441   else {
5442     *top  = node_new_list(node, NULL);
5443     headp = &(NCDR(*top));
5444     while (r != TK_EOT && r != term && r != TK_ALT) {
5445       r = parse_exp(&node, tok, term, src, end, env);
5446       if (r < 0) return r;
5447 
5448       if (NTYPE(node) == NT_LIST) {
5449         *headp = node;
5450         while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
5451         headp = &(NCDR(node));
5452       }
5453       else {
5454         *headp = node_new_list(node, NULL);
5455         headp = &(NCDR(*headp));
5456       }
5457     }
5458   }
5459 
5460   return r;
5461 }
5462 
5463 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
5464 static int
5465 parse_subexp(Node** top, OnigToken* tok, int term,
5466              UChar** src, UChar* end, ScanEnv* env)
5467 {
5468   int r;
5469   Node *node, **headp;
5470 
5471   *top = NULL;
5472   r = parse_branch(&node, tok, term, src, end, env);
5473   if (r < 0) {
5474     onig_node_free(node);
5475     return r;
5476   }
5477 
5478   if (r == term) {
5479     *top = node;
5480   }
5481   else if (r == TK_ALT) {
5482     *top  = onig_node_new_alt(node, NULL);
5483     headp = &(NCDR(*top));
5484     while (r == TK_ALT) {
5485       r = fetch_token(tok, src, end, env);
5486       if (r < 0) return r;
5487       r = parse_branch(&node, tok, term, src, end, env);
5488       if (r < 0) return r;
5489 
5490       *headp = onig_node_new_alt(node, NULL);
5491       headp = &(NCDR(*headp));
5492     }
5493 
5494     if (tok->type != (enum TokenSyms )term)
5495       goto err;
5496   }
5497   else {
5498   err:
5499     if (term == TK_SUBEXP_CLOSE)
5500       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
5501     else
5502       return ONIGERR_PARSER_BUG;
5503   }
5504 
5505   return r;
5506 }
5507 
5508 static int
5509 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
5510 {
5511   int r;
5512   OnigToken tok;
5513 
5514   r = fetch_token(&tok, src, end, env);
5515   if (r < 0) return r;
5516   r = parse_subexp(top, &tok, TK_EOT, src, end, env);
5517   if (r < 0) return r;
5518   return 0;
5519 }
5520 
5521 extern int
5522 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
5523                      regex_t* reg, ScanEnv* env)
5524 {
5525   int r;
5526   UChar* p;
5527 
5528 #ifdef USE_NAMED_GROUP
5529   names_clear(reg);
5530 #endif
5531 
5532   scan_env_clear(env);
5533   env->option         = reg->options;
5534   env->case_fold_flag = reg->case_fold_flag;
5535   env->enc            = reg->enc;
5536   env->syntax         = reg->syntax;
5537   env->pattern        = (UChar* )pattern;
5538   env->pattern_end    = (UChar* )end;
5539   env->reg            = reg;
5540 
5541   *root = NULL;
5542   p = (UChar* )pattern;
5543   r = parse_regexp(root, &p, (UChar* )end, env);
5544   reg->num_mem = env->num_mem;
5545   return r;
5546 }
5547 
5548 extern void
5549 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
5550                                 UChar* arg, UChar* arg_end)
5551 {
5552   env->error     = arg;
5553   env->error_end = arg_end;
5554 }

/* [<][>][^][v][top][bottom][index][help] */