This source file includes following definitions.
- find_error_text
- expand_workspace
- is_counted_repeat
- check_escape
- get_ucp
- read_repeat_counts
- first_significant_code
- find_fixedlength
- find_recurse
- could_be_empty_branch
- could_be_empty
- get_repeat_base
- check_char_prop
- get_chr_property_list
- compare_opcodes
- auto_possessify
- check_posix_syntax
- check_posix_name
- adjust_recurse
- auto_callout
- complete_callout
- get_othercase_range
- add_to_class
- add_list_to_class
- add_not_list_to_class
- compile_branch
- compile_regex
- is_anchored
- is_startline
- find_firstassertedchar
- add_name
- pcre_compile
- pcre_compile2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 #include "config.h"
46
47 #define NLBLOCK cd
48 #define PSSTART start_pattern
49 #define PSEND end_pattern
50
51 #include "pcre_internal.h"
52
53
54
55
56
57
58
59 #ifdef PCRE_DEBUG
60
61 #define PCRE_INCLUDED
62 #include "pcre_printint.c"
63 #undef PCRE_INCLUDED
64 #endif
65
66
67
68
69 #define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7))
70
71
72
73
74
75
76 #define OFLOW_MAX (INT_MAX - 20)
77
78
79
80 static int
81 add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *,
82 const pcre_uint32 *, unsigned int);
83
84 static BOOL
85 compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
86 pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *,
87 compile_data *, int *);
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 #define COMPILE_WORK_SIZE (2048*LINK_SIZE)
114 #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
115
116
117
118
119
120
121 #define NAMED_GROUP_LIST_SIZE 20
122
123
124
125
126 #define WORK_SIZE_SAFETY_MARGIN (100)
127
128
129
130 #define REQ_CASELESS (1 << 0)
131 #define REQ_VARY (1 << 1)
132
133 #define REQ_UNSET (-2)
134 #define REQ_NONE (-1)
135
136
137
138 #define UTF_LENGTH 0x10000000l
139
140
141
142
143
144
145 #ifndef EBCDIC
146
147
148
149
150 static const short int escapes[] = {
151 0, 0,
152 0, 0,
153 0, 0,
154 0, 0,
155 0, 0,
156 CHAR_COLON, CHAR_SEMICOLON,
157 CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN,
158 CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK,
159 CHAR_COMMERCIAL_AT, -ESC_A,
160 -ESC_B, -ESC_C,
161 -ESC_D, -ESC_E,
162 0, -ESC_G,
163 -ESC_H, 0,
164 0, -ESC_K,
165 0, 0,
166 -ESC_N, 0,
167 -ESC_P, -ESC_Q,
168 -ESC_R, -ESC_S,
169 0, 0,
170 -ESC_V, -ESC_W,
171 -ESC_X, 0,
172 -ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
173 CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
174 CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
175 CHAR_GRAVE_ACCENT, ESC_a,
176 -ESC_b, 0,
177 -ESC_d, ESC_e,
178 ESC_f, 0,
179 -ESC_h, 0,
180 0, -ESC_k,
181 0, 0,
182 ESC_n, 0,
183 -ESC_p, 0,
184 ESC_r, -ESC_s,
185 ESC_tee, 0,
186 -ESC_v, -ESC_w,
187 0, 0,
188 -ESC_z
189 };
190
191 #else
192
193
194
195 static const short int escapes[] = {
196 0, 0, 0, '.', '<', '(', '+', '|',
197 '&', 0, 0, 0, 0, 0, 0, 0,
198 0, 0, '!', '$', '*', ')', ';', '~',
199 '-', '/', 0, 0, 0, 0, 0, 0,
200 0, 0, '|', ',', '%', '_', '>', '?',
201 0, 0, 0, 0, 0, 0, 0, 0,
202 0, '`', ':', '#', '@', '\'', '=', '"',
203 0, ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
204 -ESC_h, 0, 0, '{', 0, 0, 0, 0,
205 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
206 0, ESC_r, 0, '}', 0, 0, 0, 0,
207 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
208 0,-ESC_z, 0, 0, 0, '[', 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, ']', '=', '-',
211 '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
212 -ESC_H, 0, 0, 0, 0, 0, 0, 0,
213 '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P,
214 -ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
215 '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
216 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
217 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 0, 0, 0, 0, 0, 0, 0
219 };
220
221
222
223
224 static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
225
226 #endif
227
228
229
230
231
232
233
234
235 typedef struct verbitem {
236 int len;
237 int op;
238 int op_arg;
239 } verbitem;
240
241 static const char verbnames[] =
242 "\0"
243 STRING_MARK0
244 STRING_ACCEPT0
245 STRING_COMMIT0
246 STRING_F0
247 STRING_FAIL0
248 STRING_PRUNE0
249 STRING_SKIP0
250 STRING_THEN;
251
252 static const verbitem verbs[] = {
253 { 0, -1, OP_MARK },
254 { 4, -1, OP_MARK },
255 { 6, OP_ACCEPT, -1 },
256 { 6, OP_COMMIT, -1 },
257 { 1, OP_FAIL, -1 },
258 { 4, OP_FAIL, -1 },
259 { 5, OP_PRUNE, OP_PRUNE_ARG },
260 { 4, OP_SKIP, OP_SKIP_ARG },
261 { 4, OP_THEN, OP_THEN_ARG }
262 };
263
264 static const int verbcount = sizeof(verbs)/sizeof(verbitem);
265
266
267
268
269
270 static const pcre_uchar sub_start_of_word[] = {
271 CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
272 CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' };
273
274 static const pcre_uchar sub_end_of_word[] = {
275 CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
276 CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w,
277 CHAR_RIGHT_PARENTHESIS, '\0' };
278
279
280
281
282
283
284
285
286
287 static const char posix_names[] =
288 STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
289 STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
290 STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
291 STRING_word0 STRING_xdigit;
292
293 static const pcre_uint8 posix_name_lengths[] = {
294 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
295
296 #define PC_GRAPH 8
297 #define PC_PRINT 9
298 #define PC_PUNCT 10
299
300
301
302
303
304
305
306
307
308
309
310
311 static const int posix_class_maps[] = {
312 cbit_word, cbit_digit, -2,
313 cbit_lower, -1, 0,
314 cbit_upper, -1, 0,
315 cbit_word, -1, 2,
316 cbit_print, cbit_cntrl, 0,
317 cbit_space, -1, 1,
318 cbit_cntrl, -1, 0,
319 cbit_digit, -1, 0,
320 cbit_graph, -1, 0,
321 cbit_print, -1, 0,
322 cbit_punct, -1, 0,
323 cbit_space, -1, 0,
324 cbit_word, -1, 0,
325 cbit_xdigit,-1, 0
326 };
327
328
329
330
331 #ifdef SUPPORT_UCP
332 static const pcre_uchar string_PNd[] = {
333 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
334 CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
335 static const pcre_uchar string_pNd[] = {
336 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
337 CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
338 static const pcre_uchar string_PXsp[] = {
339 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
340 CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
341 static const pcre_uchar string_pXsp[] = {
342 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
343 CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
344 static const pcre_uchar string_PXwd[] = {
345 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
346 CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
347 static const pcre_uchar string_pXwd[] = {
348 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
349 CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
350
351 static const pcre_uchar *substitutes[] = {
352 string_PNd,
353 string_pNd,
354 string_PXsp,
355 string_pXsp,
356 string_PXwd,
357 string_pXwd
358 };
359
360
361
362
363
364
365
366 static const pcre_uchar string_pL[] = {
367 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
368 CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
369 static const pcre_uchar string_pLl[] = {
370 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
371 CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
372 static const pcre_uchar string_pLu[] = {
373 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
374 CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
375 static const pcre_uchar string_pXan[] = {
376 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
377 CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
378 static const pcre_uchar string_h[] = {
379 CHAR_BACKSLASH, CHAR_h, '\0' };
380 static const pcre_uchar string_pXps[] = {
381 CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
382 CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
383 static const pcre_uchar string_PL[] = {
384 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
385 CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
386 static const pcre_uchar string_PLl[] = {
387 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
388 CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
389 static const pcre_uchar string_PLu[] = {
390 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
391 CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
392 static const pcre_uchar string_PXan[] = {
393 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
394 CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
395 static const pcre_uchar string_H[] = {
396 CHAR_BACKSLASH, CHAR_H, '\0' };
397 static const pcre_uchar string_PXps[] = {
398 CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
399 CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
400
401 static const pcre_uchar *posix_substitutes[] = {
402 string_pL,
403 string_pLl,
404 string_pLu,
405 string_pXan,
406 NULL,
407 string_h,
408 NULL,
409 string_pNd,
410 NULL,
411 NULL,
412 NULL,
413 string_pXps,
414 string_pXwd,
415 NULL,
416
417 string_PL,
418 string_PLl,
419 string_PLu,
420 string_PXan,
421 NULL,
422 string_H,
423 NULL,
424 string_PNd,
425 NULL,
426 NULL,
427 NULL,
428 string_PXps,
429 string_PXwd,
430 NULL
431 };
432 #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
433 #endif
434
435 #define STRING(a) # a
436 #define XSTRING(s) STRING(s)
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452 static const char error_texts[] =
453 "no error\0"
454 "\\ at end of pattern\0"
455 "\\c at end of pattern\0"
456 "unrecognized character follows \\\0"
457 "numbers out of order in {} quantifier\0"
458
459 "number too big in {} quantifier\0"
460 "missing terminating ] for character class\0"
461 "invalid escape sequence in character class\0"
462 "range out of order in character class\0"
463 "nothing to repeat\0"
464
465 "internal error: invalid forward reference offset\0"
466 "internal error: unexpected repeat\0"
467 "unrecognized character after (? or (?-\0"
468 "POSIX named classes are supported only within a class\0"
469 "missing )\0"
470
471 "reference to non-existent subpattern\0"
472 "erroffset passed as NULL\0"
473 "unknown option bit(s) set\0"
474 "missing ) after comment\0"
475 "parentheses nested too deeply\0"
476
477 "regular expression is too large\0"
478 "failed to get memory\0"
479 "unmatched parentheses\0"
480 "internal error: code overflow\0"
481 "unrecognized character after (?<\0"
482
483 "lookbehind assertion is not fixed length\0"
484 "malformed number or name after (?(\0"
485 "conditional group contains more than two branches\0"
486 "assertion expected after (?(\0"
487 "(?R or (?[+-]digits must be followed by )\0"
488
489 "unknown POSIX class name\0"
490 "POSIX collating elements are not supported\0"
491 "this version of PCRE is compiled without UTF support\0"
492 "spare error\0"
493 "character value in \\x{} or \\o{} is too large\0"
494
495 "invalid condition (?(0)\0"
496 "\\C not allowed in lookbehind assertion\0"
497 "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
498 "number after (?C is > 255\0"
499 "closing ) for (?C expected\0"
500
501 "recursive call could loop indefinitely\0"
502 "unrecognized character after (?P\0"
503 "syntax error in subpattern name (missing terminator)\0"
504 "two named subpatterns have the same name\0"
505 "invalid UTF-8 string\0"
506
507 "support for \\P, \\p, and \\X has not been compiled\0"
508 "malformed \\P or \\p sequence\0"
509 "unknown property name after \\P or \\p\0"
510 "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
511 "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
512
513 "repeated subpattern is too long\0"
514 "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
515 "internal error: overran compiling workspace\0"
516 "internal error: previously-checked referenced subpattern not found\0"
517 "DEFINE group contains more than one branch\0"
518
519 "repeating a DEFINE group is not allowed\0"
520 "inconsistent NEWLINE options\0"
521 "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
522 "a numbered reference must not be zero\0"
523 "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
524
525 "(*VERB) not recognized or malformed\0"
526 "number is too big\0"
527 "subpattern name expected\0"
528 "digit expected after (?+\0"
529 "] is an invalid data character in JavaScript compatibility mode\0"
530
531 "different names for subpatterns of the same number are not allowed\0"
532 "(*MARK) must have an argument\0"
533 "this version of PCRE is not compiled with Unicode property support\0"
534 #ifndef EBCDIC
535 "\\c must be followed by an ASCII character\0"
536 #else
537 "\\c must be followed by a letter or one of [\\]^_?\0"
538 #endif
539 "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
540
541 "internal error: unknown opcode in find_fixedlength()\0"
542 "\\N is not supported in a class\0"
543 "too many forward references\0"
544 "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
545 "invalid UTF-16 string\0"
546
547 "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
548 "character value in \\u.... sequence is too large\0"
549 "invalid UTF-32 string\0"
550 "setting UTF is disabled by the application\0"
551 "non-hex character in \\x{} (closing brace missing?)\0"
552
553 "non-octal character in \\o{} (closing brace missing?)\0"
554 "missing opening brace after \\o\0"
555 "parentheses are too deeply nested\0"
556 "invalid range in character class\0"
557 "group name must start with a non-digit\0"
558
559 "parentheses are too deeply nested (stack check)\0"
560 "digits missing in \\x{} or \\o{}\0"
561 ;
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583 #define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
584
585 #ifndef EBCDIC
586
587
588
589
590 static const pcre_uint8 digitab[] =
591 {
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
599 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
624
625 #else
626
627
628
629 static const pcre_uint8 digitab[] =
630 {
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
637 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
647 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
648 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
649 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
650 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
651 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
656 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
662 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};
663
664 static const pcre_uint8 ebcdic_chartab[] = {
665 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
666 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00,
667 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
668 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
670 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80,
675 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00,
677 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80,
679 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
680 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
681 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
682 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
683 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
684 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
685 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
686 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
687 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
688 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,
689 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
690 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
691 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
692 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
693 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
694 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
695 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,
696 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};
697 #endif
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
714 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
715
716 static const pcre_uint8 autoposstab[APTROWS][APTCOLS] = {
717
718 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
719 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },
720 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },
721 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
722 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
723 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },
724 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
725 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
726 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },
727 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
728 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
729 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },
730 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },
731 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },
732 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },
733 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },
734 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }
735 };
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768 static const pcre_uint8 propposstab[PT_TABSIZE][PT_TABSIZE] = {
769
770 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
771 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 },
772 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 },
773 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 },
774 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 },
775 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 },
776 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 },
777 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 },
778 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 },
779 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
780 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }
781 };
782
783
784
785
786
787
788
789
790 static const pcre_uint8 catposstab[7][30] = {
791
792 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
793 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
794 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
795 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
796 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
797 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },
798 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }
799 };
800
801
802
803
804
805
806
807
808
809
810
811
812 static const pcre_uint8 posspropstab[3][4] = {
813 { ucp_L, ucp_N, ucp_N, ucp_Nl },
814 { ucp_Z, ucp_Z, ucp_C, ucp_Cc },
815 { ucp_L, ucp_N, ucp_P, ucp_Po }
816 };
817
818
819
820
821
822
823
824 static const pcre_uint8 opcode_possessify[] = {
825 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
826 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
827
828 0,
829 OP_POSSTAR, 0,
830 OP_POSPLUS, 0,
831 OP_POSQUERY, 0,
832 OP_POSUPTO, 0,
833 0,
834 0, 0, 0, 0,
835
836 OP_POSSTARI, 0,
837 OP_POSPLUSI, 0,
838 OP_POSQUERYI, 0,
839 OP_POSUPTOI, 0,
840 0,
841 0, 0, 0, 0,
842
843 OP_NOTPOSSTAR, 0,
844 OP_NOTPOSPLUS, 0,
845 OP_NOTPOSQUERY, 0,
846 OP_NOTPOSUPTO, 0,
847 0,
848 0, 0, 0, 0,
849
850 OP_NOTPOSSTARI, 0,
851 OP_NOTPOSPLUSI, 0,
852 OP_NOTPOSQUERYI, 0,
853 OP_NOTPOSUPTOI, 0,
854 0,
855 0, 0, 0, 0,
856
857 OP_TYPEPOSSTAR, 0,
858 OP_TYPEPOSPLUS, 0,
859 OP_TYPEPOSQUERY, 0,
860 OP_TYPEPOSUPTO, 0,
861 0,
862 0, 0, 0, 0,
863
864 OP_CRPOSSTAR, 0,
865 OP_CRPOSPLUS, 0,
866 OP_CRPOSQUERY, 0,
867 OP_CRPOSRANGE, 0,
868 0, 0, 0, 0,
869
870 0, 0, 0,
871 0, 0,
872 0, 0,
873 0, 0
874 };
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891 static const char *
892 find_error_text(int n)
893 {
894 const char *s = error_texts;
895 for (; n > 0; n--)
896 {
897 while (*s++ != CHAR_NULL) {};
898 if (*s == CHAR_NULL) return "Error text not found (please report)";
899 }
900 return s;
901 }
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918 static int
919 expand_workspace(compile_data *cd)
920 {
921 pcre_uchar *newspace;
922 int newsize = cd->workspace_size * 2;
923
924 if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX;
925 if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||
926 newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)
927 return ERR72;
928
929 newspace = (PUBL(malloc))(IN_UCHARS(newsize));
930 if (newspace == NULL) return ERR21;
931 memcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar));
932 cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace);
933 if (cd->workspace_size > COMPILE_WORK_SIZE)
934 (PUBL(free))((void *)cd->start_workspace);
935 cd->start_workspace = newspace;
936 cd->workspace_size = newsize;
937 return 0;
938 }
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 static BOOL
958 is_counted_repeat(const pcre_uchar *p)
959 {
960 if (!IS_DIGIT(*p)) return FALSE;
961 p++;
962 while (IS_DIGIT(*p)) p++;
963 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
964
965 if (*p++ != CHAR_COMMA) return FALSE;
966 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
967
968 if (!IS_DIGIT(*p)) return FALSE;
969 p++;
970 while (IS_DIGIT(*p)) p++;
971
972 return (*p == CHAR_RIGHT_CURLY_BRACKET);
973 }
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002 static int
1003 check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
1004 int bracount, int options, BOOL isclass)
1005 {
1006
1007 BOOL utf = (options & PCRE_UTF8) != 0;
1008 const pcre_uchar *ptr = *ptrptr + 1;
1009 pcre_uint32 c;
1010 int escape = 0;
1011 int i;
1012
1013 GETCHARINCTEST(c, ptr);
1014 ptr--;
1015
1016
1017
1018 if (c == CHAR_NULL) *errorcodeptr = ERR1;
1019
1020
1021
1022
1023
1024 #ifndef EBCDIC
1025
1026 else if (c < CHAR_0 || c > CHAR_z) {}
1027 else if ((i = escapes[c - CHAR_0]) != 0)
1028 { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
1029
1030 #else
1031
1032 else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
1033 else if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
1034 #endif
1035
1036
1037
1038 else
1039 {
1040 const pcre_uchar *oldptr;
1041 BOOL braced, negated, overflow;
1042 int s;
1043
1044 switch (c)
1045 {
1046
1047
1048
1049 case CHAR_l:
1050 case CHAR_L:
1051 *errorcodeptr = ERR37;
1052 break;
1053
1054 case CHAR_u:
1055 if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
1056 {
1057
1058
1059 if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
1060 && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
1061 && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
1062 && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
1063 {
1064 c = 0;
1065 for (i = 0; i < 4; ++i)
1066 {
1067 register pcre_uint32 cc = *(++ptr);
1068 #ifndef EBCDIC
1069 if (cc >= CHAR_a) cc -= 32;
1070 c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1071 #else
1072 if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;
1073 c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1074 #endif
1075 }
1076
1077 #if defined COMPILE_PCRE8
1078 if (c > (utf ? 0x10ffffU : 0xffU))
1079 #elif defined COMPILE_PCRE16
1080 if (c > (utf ? 0x10ffffU : 0xffffU))
1081 #elif defined COMPILE_PCRE32
1082 if (utf && c > 0x10ffffU)
1083 #endif
1084 {
1085 *errorcodeptr = ERR76;
1086 }
1087 else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1088 }
1089 }
1090 else
1091 *errorcodeptr = ERR37;
1092 break;
1093
1094 case CHAR_U:
1095
1096 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
1097 break;
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 case CHAR_g:
1117 if (isclass) break;
1118 if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
1119 {
1120 escape = ESC_g;
1121 break;
1122 }
1123
1124
1125
1126 if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1127 {
1128 const pcre_uchar *p;
1129 for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
1130 if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
1131 if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
1132 {
1133 escape = ESC_k;
1134 break;
1135 }
1136 braced = TRUE;
1137 ptr++;
1138 }
1139 else braced = FALSE;
1140
1141 if (ptr[1] == CHAR_MINUS)
1142 {
1143 negated = TRUE;
1144 ptr++;
1145 }
1146 else negated = FALSE;
1147
1148
1149 s = 0;
1150 overflow = FALSE;
1151 while (IS_DIGIT(ptr[1]))
1152 {
1153 if (s > INT_MAX / 10 - 1)
1154 {
1155 overflow = TRUE;
1156 break;
1157 }
1158 s = s * 10 + (int)(*(++ptr) - CHAR_0);
1159 }
1160 if (overflow)
1161 {
1162 while (IS_DIGIT(ptr[1]))
1163 ptr++;
1164 *errorcodeptr = ERR61;
1165 break;
1166 }
1167
1168 if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
1169 {
1170 *errorcodeptr = ERR57;
1171 break;
1172 }
1173
1174 if (s == 0)
1175 {
1176 *errorcodeptr = ERR58;
1177 break;
1178 }
1179
1180 if (negated)
1181 {
1182 if (s > bracount)
1183 {
1184 *errorcodeptr = ERR15;
1185 break;
1186 }
1187 s = bracount - (s - 1);
1188 }
1189
1190 escape = -s;
1191 break;
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209 case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
1210 case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
1211
1212 if (!isclass)
1213 {
1214 oldptr = ptr;
1215
1216 s = (int)(c -CHAR_0);
1217 overflow = FALSE;
1218 while (IS_DIGIT(ptr[1]))
1219 {
1220 if (s > INT_MAX / 10 - 1)
1221 {
1222 overflow = TRUE;
1223 break;
1224 }
1225 s = s * 10 + (int)(*(++ptr) - CHAR_0);
1226 }
1227 if (overflow)
1228 {
1229 while (IS_DIGIT(ptr[1]))
1230 ptr++;
1231 *errorcodeptr = ERR61;
1232 break;
1233 }
1234 if (s < 8 || s <= bracount)
1235 {
1236 escape = -s;
1237 break;
1238 }
1239 ptr = oldptr;
1240 }
1241
1242
1243
1244
1245
1246
1247 if ((c = *ptr) >= CHAR_8) break;
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257 case CHAR_0:
1258 c -= CHAR_0;
1259 while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
1260 c = c * 8 + *(++ptr) - CHAR_0;
1261 #ifdef COMPILE_PCRE8
1262 if (!utf && c > 0xff) *errorcodeptr = ERR51;
1263 #endif
1264 break;
1265
1266
1267
1268
1269 case CHAR_o:
1270 if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
1271 if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else
1272 {
1273 ptr += 2;
1274 c = 0;
1275 overflow = FALSE;
1276 while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
1277 {
1278 register pcre_uint32 cc = *ptr++;
1279 if (c == 0 && cc == CHAR_0) continue;
1280 #ifdef COMPILE_PCRE32
1281 if (c >= 0x20000000l) { overflow = TRUE; break; }
1282 #endif
1283 c = (c << 3) + cc - CHAR_0 ;
1284 #if defined COMPILE_PCRE8
1285 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1286 #elif defined COMPILE_PCRE16
1287 if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1288 #elif defined COMPILE_PCRE32
1289 if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1290 #endif
1291 }
1292 if (overflow)
1293 {
1294 while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
1295 *errorcodeptr = ERR34;
1296 }
1297 else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1298 {
1299 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1300 }
1301 else *errorcodeptr = ERR80;
1302 }
1303 break;
1304
1305
1306
1307
1308 case CHAR_x:
1309 if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
1310 {
1311 if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
1312 && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
1313 {
1314 c = 0;
1315 for (i = 0; i < 2; ++i)
1316 {
1317 register pcre_uint32 cc = *(++ptr);
1318 #ifndef EBCDIC
1319 if (cc >= CHAR_a) cc -= 32;
1320 c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1321 #else
1322 if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;
1323 c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1324 #endif
1325 }
1326 }
1327 }
1328
1329
1330
1331
1332
1333
1334
1335
1336 else
1337 {
1338 if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1339 {
1340 ptr += 2;
1341 if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1342 {
1343 *errorcodeptr = ERR86;
1344 break;
1345 }
1346 c = 0;
1347 overflow = FALSE;
1348 while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
1349 {
1350 register pcre_uint32 cc = *ptr++;
1351 if (c == 0 && cc == CHAR_0) continue;
1352
1353 #ifdef COMPILE_PCRE32
1354 if (c >= 0x10000000l) { overflow = TRUE; break; }
1355 #endif
1356
1357 #ifndef EBCDIC
1358 if (cc >= CHAR_a) cc -= 32;
1359 c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1360 #else
1361 if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;
1362 c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1363 #endif
1364
1365 #if defined COMPILE_PCRE8
1366 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1367 #elif defined COMPILE_PCRE16
1368 if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1369 #elif defined COMPILE_PCRE32
1370 if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1371 #endif
1372 }
1373
1374 if (overflow)
1375 {
1376 while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++;
1377 *errorcodeptr = ERR34;
1378 }
1379
1380 else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1381 {
1382 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1383 }
1384
1385
1386
1387
1388
1389
1390 else *errorcodeptr = ERR79;
1391 }
1392
1393
1394
1395 else
1396 {
1397 c = 0;
1398 while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1399 {
1400 pcre_uint32 cc;
1401 cc = *(++ptr);
1402 #ifndef EBCDIC
1403 if (cc >= CHAR_a) cc -= 32;
1404 c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
1405 #else
1406 if (cc <= CHAR_z) cc += 64;
1407 c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
1408 #endif
1409 }
1410 }
1411 }
1412 break;
1413
1414
1415
1416
1417
1418
1419 case CHAR_c:
1420 c = *(++ptr);
1421 if (c == CHAR_NULL)
1422 {
1423 *errorcodeptr = ERR2;
1424 break;
1425 }
1426 #ifndef EBCDIC
1427 if (c > 127)
1428 {
1429 *errorcodeptr = ERR68;
1430 break;
1431 }
1432 if (c >= CHAR_a && c <= CHAR_z) c -= 32;
1433 c ^= 0x40;
1434 #else
1435 if (c >= CHAR_a && c <= CHAR_z) c += 64;
1436 if (c == CHAR_QUESTION_MARK)
1437 c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff;
1438 else
1439 {
1440 for (i = 0; i < 32; i++)
1441 {
1442 if (c == ebcdic_escape_c[i]) break;
1443 }
1444 if (i < 32) c = i; else *errorcodeptr = ERR68;
1445 }
1446 #endif
1447 break;
1448
1449
1450
1451
1452
1453
1454
1455 default:
1456 if ((options & PCRE_EXTRA) != 0) switch(c)
1457 {
1458 default:
1459 *errorcodeptr = ERR3;
1460 break;
1461 }
1462 break;
1463 }
1464 }
1465
1466
1467
1468
1469
1470 if (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
1471 !is_counted_repeat(ptr+2))
1472 *errorcodeptr = ERR37;
1473
1474
1475
1476 if ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
1477 escape += (ESC_DU - ESC_D);
1478
1479
1480
1481 *ptrptr = ptr;
1482 *chptr = c;
1483 return escape;
1484 }
1485
1486
1487
1488 #ifdef SUPPORT_UCP
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 static BOOL
1509 get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
1510 unsigned int *pdataptr, int *errorcodeptr)
1511 {
1512 pcre_uchar c;
1513 int i, bot, top;
1514 const pcre_uchar *ptr = *ptrptr;
1515 pcre_uchar name[32];
1516
1517 c = *(++ptr);
1518 if (c == CHAR_NULL) goto ERROR_RETURN;
1519
1520 *negptr = FALSE;
1521
1522
1523
1524
1525 if (c == CHAR_LEFT_CURLY_BRACKET)
1526 {
1527 if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
1528 {
1529 *negptr = TRUE;
1530 ptr++;
1531 }
1532 for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
1533 {
1534 c = *(++ptr);
1535 if (c == CHAR_NULL) goto ERROR_RETURN;
1536 if (c == CHAR_RIGHT_CURLY_BRACKET) break;
1537 name[i] = c;
1538 }
1539 if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
1540 name[i] = 0;
1541 }
1542
1543
1544
1545 else
1546 {
1547 name[0] = c;
1548 name[1] = 0;
1549 }
1550
1551 *ptrptr = ptr;
1552
1553
1554
1555 bot = 0;
1556 top = PRIV(utt_size);
1557
1558 while (bot < top)
1559 {
1560 int r;
1561 i = (bot + top) >> 1;
1562 r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1563 if (r == 0)
1564 {
1565 *ptypeptr = PRIV(utt)[i].type;
1566 *pdataptr = PRIV(utt)[i].value;
1567 return TRUE;
1568 }
1569 if (r > 0) bot = i + 1; else top = i;
1570 }
1571
1572 *errorcodeptr = ERR47;
1573 *ptrptr = ptr;
1574 return FALSE;
1575
1576 ERROR_RETURN:
1577 *errorcodeptr = ERR46;
1578 *ptrptr = ptr;
1579 return FALSE;
1580 }
1581 #endif
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 static const pcre_uchar *
1605 read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
1606 {
1607 int min = 0;
1608 int max = -1;
1609
1610 while (IS_DIGIT(*p))
1611 {
1612 min = min * 10 + (int)(*p++ - CHAR_0);
1613 if (min > 65535)
1614 {
1615 *errorcodeptr = ERR5;
1616 return p;
1617 }
1618 }
1619
1620 if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
1621 {
1622 if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1623 {
1624 max = 0;
1625 while(IS_DIGIT(*p))
1626 {
1627 max = max * 10 + (int)(*p++ - CHAR_0);
1628 if (max > 65535)
1629 {
1630 *errorcodeptr = ERR5;
1631 return p;
1632 }
1633 }
1634 if (max < min)
1635 {
1636 *errorcodeptr = ERR4;
1637 return p;
1638 }
1639 }
1640 }
1641
1642 *minp = min;
1643 *maxp = max;
1644 return p;
1645 }
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666 static const pcre_uchar*
1667 first_significant_code(const pcre_uchar *code, BOOL skipassert)
1668 {
1669 for (;;)
1670 {
1671 switch ((int)*code)
1672 {
1673 case OP_ASSERT_NOT:
1674 case OP_ASSERTBACK:
1675 case OP_ASSERTBACK_NOT:
1676 if (!skipassert) return code;
1677 do code += GET(code, 1); while (*code == OP_ALT);
1678 code += PRIV(OP_lengths)[*code];
1679 break;
1680
1681 case OP_WORD_BOUNDARY:
1682 case OP_NOT_WORD_BOUNDARY:
1683 if (!skipassert) return code;
1684
1685
1686 case OP_CALLOUT:
1687 case OP_CREF:
1688 case OP_DNCREF:
1689 case OP_RREF:
1690 case OP_DNRREF:
1691 case OP_DEF:
1692 code += PRIV(OP_lengths)[*code];
1693 break;
1694
1695 default:
1696 return code;
1697 }
1698 }
1699
1700 }
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733 static int
1734 find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
1735 recurse_check *recurses)
1736 {
1737 int length = -1;
1738 recurse_check this_recurse;
1739 register int branchlength = 0;
1740 register pcre_uchar *cc = code + 1 + LINK_SIZE;
1741
1742
1743
1744
1745 for (;;)
1746 {
1747 int d;
1748 pcre_uchar *ce, *cs;
1749 register pcre_uchar op = *cc;
1750
1751 switch (op)
1752 {
1753
1754
1755
1756
1757
1758 case OP_CBRA:
1759 case OP_BRA:
1760 case OP_ONCE:
1761 case OP_ONCE_NC:
1762 case OP_COND:
1763 d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
1764 recurses);
1765 if (d < 0) return d;
1766 branchlength += d;
1767 do cc += GET(cc, 1); while (*cc == OP_ALT);
1768 cc += 1 + LINK_SIZE;
1769 break;
1770
1771
1772
1773
1774
1775
1776
1777 case OP_ALT:
1778 case OP_KET:
1779 case OP_END:
1780 case OP_ACCEPT:
1781 case OP_ASSERT_ACCEPT:
1782 if (length < 0) length = branchlength;
1783 else if (length != branchlength) return -1;
1784 if (*cc != OP_ALT) return length;
1785 cc += 1 + LINK_SIZE;
1786 branchlength = 0;
1787 break;
1788
1789
1790
1791
1792
1793 case OP_RECURSE:
1794 if (!atend) return -3;
1795 cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);
1796 do ce += GET(ce, 1); while (*ce == OP_ALT);
1797 if (cc > cs && cc < ce) return -1;
1798 else
1799 {
1800 recurse_check *r = recurses;
1801 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
1802 if (r != NULL) return -1;
1803 }
1804 this_recurse.prev = recurses;
1805 this_recurse.group = cs;
1806 d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
1807 if (d < 0) return d;
1808 branchlength += d;
1809 cc += 1 + LINK_SIZE;
1810 break;
1811
1812
1813
1814 case OP_ASSERT:
1815 case OP_ASSERT_NOT:
1816 case OP_ASSERTBACK:
1817 case OP_ASSERTBACK_NOT:
1818 do cc += GET(cc, 1); while (*cc == OP_ALT);
1819 cc += 1 + LINK_SIZE;
1820 break;
1821
1822
1823
1824 case OP_MARK:
1825 case OP_PRUNE_ARG:
1826 case OP_SKIP_ARG:
1827 case OP_THEN_ARG:
1828 cc += cc[1] + PRIV(OP_lengths)[*cc];
1829 break;
1830
1831 case OP_CALLOUT:
1832 case OP_CIRC:
1833 case OP_CIRCM:
1834 case OP_CLOSE:
1835 case OP_COMMIT:
1836 case OP_CREF:
1837 case OP_DEF:
1838 case OP_DNCREF:
1839 case OP_DNRREF:
1840 case OP_DOLL:
1841 case OP_DOLLM:
1842 case OP_EOD:
1843 case OP_EODN:
1844 case OP_FAIL:
1845 case OP_NOT_WORD_BOUNDARY:
1846 case OP_PRUNE:
1847 case OP_REVERSE:
1848 case OP_RREF:
1849 case OP_SET_SOM:
1850 case OP_SKIP:
1851 case OP_SOD:
1852 case OP_SOM:
1853 case OP_THEN:
1854 case OP_WORD_BOUNDARY:
1855 cc += PRIV(OP_lengths)[*cc];
1856 break;
1857
1858
1859
1860 case OP_CHAR:
1861 case OP_CHARI:
1862 case OP_NOT:
1863 case OP_NOTI:
1864 branchlength++;
1865 cc += 2;
1866 #ifdef SUPPORT_UTF
1867 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1868 #endif
1869 break;
1870
1871
1872
1873
1874 case OP_EXACT:
1875 case OP_EXACTI:
1876 case OP_NOTEXACT:
1877 case OP_NOTEXACTI:
1878 branchlength += (int)GET2(cc,1);
1879 cc += 2 + IMM2_SIZE;
1880 #ifdef SUPPORT_UTF
1881 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1882 #endif
1883 break;
1884
1885 case OP_TYPEEXACT:
1886 branchlength += GET2(cc,1);
1887 if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
1888 cc += 2;
1889 cc += 1 + IMM2_SIZE + 1;
1890 break;
1891
1892
1893
1894 case OP_PROP:
1895 case OP_NOTPROP:
1896 cc += 2;
1897
1898
1899 case OP_HSPACE:
1900 case OP_VSPACE:
1901 case OP_NOT_HSPACE:
1902 case OP_NOT_VSPACE:
1903 case OP_NOT_DIGIT:
1904 case OP_DIGIT:
1905 case OP_NOT_WHITESPACE:
1906 case OP_WHITESPACE:
1907 case OP_NOT_WORDCHAR:
1908 case OP_WORDCHAR:
1909 case OP_ANY:
1910 case OP_ALLANY:
1911 branchlength++;
1912 cc++;
1913 break;
1914
1915
1916
1917
1918 case OP_ANYBYTE:
1919 return -2;
1920
1921
1922
1923 case OP_CLASS:
1924 case OP_NCLASS:
1925 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1926 case OP_XCLASS:
1927
1928
1929 if (op == OP_XCLASS)
1930 cc += GET(cc, 1);
1931 else
1932 cc += PRIV(OP_lengths)[OP_CLASS];
1933 #else
1934 cc += PRIV(OP_lengths)[OP_CLASS];
1935 #endif
1936
1937 switch (*cc)
1938 {
1939 case OP_CRSTAR:
1940 case OP_CRMINSTAR:
1941 case OP_CRPLUS:
1942 case OP_CRMINPLUS:
1943 case OP_CRQUERY:
1944 case OP_CRMINQUERY:
1945 case OP_CRPOSSTAR:
1946 case OP_CRPOSPLUS:
1947 case OP_CRPOSQUERY:
1948 return -1;
1949
1950 case OP_CRRANGE:
1951 case OP_CRMINRANGE:
1952 case OP_CRPOSRANGE:
1953 if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1954 branchlength += (int)GET2(cc,1);
1955 cc += 1 + 2 * IMM2_SIZE;
1956 break;
1957
1958 default:
1959 branchlength++;
1960 }
1961 break;
1962
1963
1964
1965 case OP_ANYNL:
1966 case OP_BRAMINZERO:
1967 case OP_BRAPOS:
1968 case OP_BRAPOSZERO:
1969 case OP_BRAZERO:
1970 case OP_CBRAPOS:
1971 case OP_EXTUNI:
1972 case OP_KETRMAX:
1973 case OP_KETRMIN:
1974 case OP_KETRPOS:
1975 case OP_MINPLUS:
1976 case OP_MINPLUSI:
1977 case OP_MINQUERY:
1978 case OP_MINQUERYI:
1979 case OP_MINSTAR:
1980 case OP_MINSTARI:
1981 case OP_MINUPTO:
1982 case OP_MINUPTOI:
1983 case OP_NOTMINPLUS:
1984 case OP_NOTMINPLUSI:
1985 case OP_NOTMINQUERY:
1986 case OP_NOTMINQUERYI:
1987 case OP_NOTMINSTAR:
1988 case OP_NOTMINSTARI:
1989 case OP_NOTMINUPTO:
1990 case OP_NOTMINUPTOI:
1991 case OP_NOTPLUS:
1992 case OP_NOTPLUSI:
1993 case OP_NOTPOSPLUS:
1994 case OP_NOTPOSPLUSI:
1995 case OP_NOTPOSQUERY:
1996 case OP_NOTPOSQUERYI:
1997 case OP_NOTPOSSTAR:
1998 case OP_NOTPOSSTARI:
1999 case OP_NOTPOSUPTO:
2000 case OP_NOTPOSUPTOI:
2001 case OP_NOTQUERY:
2002 case OP_NOTQUERYI:
2003 case OP_NOTSTAR:
2004 case OP_NOTSTARI:
2005 case OP_NOTUPTO:
2006 case OP_NOTUPTOI:
2007 case OP_PLUS:
2008 case OP_PLUSI:
2009 case OP_POSPLUS:
2010 case OP_POSPLUSI:
2011 case OP_POSQUERY:
2012 case OP_POSQUERYI:
2013 case OP_POSSTAR:
2014 case OP_POSSTARI:
2015 case OP_POSUPTO:
2016 case OP_POSUPTOI:
2017 case OP_QUERY:
2018 case OP_QUERYI:
2019 case OP_REF:
2020 case OP_REFI:
2021 case OP_DNREF:
2022 case OP_DNREFI:
2023 case OP_SBRA:
2024 case OP_SBRAPOS:
2025 case OP_SCBRA:
2026 case OP_SCBRAPOS:
2027 case OP_SCOND:
2028 case OP_SKIPZERO:
2029 case OP_STAR:
2030 case OP_STARI:
2031 case OP_TYPEMINPLUS:
2032 case OP_TYPEMINQUERY:
2033 case OP_TYPEMINSTAR:
2034 case OP_TYPEMINUPTO:
2035 case OP_TYPEPLUS:
2036 case OP_TYPEPOSPLUS:
2037 case OP_TYPEPOSQUERY:
2038 case OP_TYPEPOSSTAR:
2039 case OP_TYPEPOSUPTO:
2040 case OP_TYPEQUERY:
2041 case OP_TYPESTAR:
2042 case OP_TYPEUPTO:
2043 case OP_UPTO:
2044 case OP_UPTOI:
2045 return -1;
2046
2047
2048
2049
2050 default:
2051 return -4;
2052 }
2053 }
2054
2055 }
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077 const pcre_uchar *
2078 PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
2079 {
2080 for (;;)
2081 {
2082 register pcre_uchar c = *code;
2083
2084 if (c == OP_END) return NULL;
2085
2086
2087
2088
2089
2090 if (c == OP_XCLASS) code += GET(code, 1);
2091
2092
2093
2094 else if (c == OP_REVERSE)
2095 {
2096 if (number < 0) return (pcre_uchar *)code;
2097 code += PRIV(OP_lengths)[c];
2098 }
2099
2100
2101
2102 else if (c == OP_CBRA || c == OP_SCBRA ||
2103 c == OP_CBRAPOS || c == OP_SCBRAPOS)
2104 {
2105 int n = (int)GET2(code, 1+LINK_SIZE);
2106 if (n == number) return (pcre_uchar *)code;
2107 code += PRIV(OP_lengths)[c];
2108 }
2109
2110
2111
2112
2113
2114
2115 else
2116 {
2117 switch(c)
2118 {
2119 case OP_TYPESTAR:
2120 case OP_TYPEMINSTAR:
2121 case OP_TYPEPLUS:
2122 case OP_TYPEMINPLUS:
2123 case OP_TYPEQUERY:
2124 case OP_TYPEMINQUERY:
2125 case OP_TYPEPOSSTAR:
2126 case OP_TYPEPOSPLUS:
2127 case OP_TYPEPOSQUERY:
2128 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
2129 break;
2130
2131 case OP_TYPEUPTO:
2132 case OP_TYPEMINUPTO:
2133 case OP_TYPEEXACT:
2134 case OP_TYPEPOSUPTO:
2135 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2136 code += 2;
2137 break;
2138
2139 case OP_MARK:
2140 case OP_PRUNE_ARG:
2141 case OP_SKIP_ARG:
2142 case OP_THEN_ARG:
2143 code += code[1];
2144 break;
2145 }
2146
2147
2148
2149 code += PRIV(OP_lengths)[c];
2150
2151
2152
2153
2154
2155 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2156 if (utf) switch(c)
2157 {
2158 case OP_CHAR:
2159 case OP_CHARI:
2160 case OP_NOT:
2161 case OP_NOTI:
2162 case OP_EXACT:
2163 case OP_EXACTI:
2164 case OP_NOTEXACT:
2165 case OP_NOTEXACTI:
2166 case OP_UPTO:
2167 case OP_UPTOI:
2168 case OP_NOTUPTO:
2169 case OP_NOTUPTOI:
2170 case OP_MINUPTO:
2171 case OP_MINUPTOI:
2172 case OP_NOTMINUPTO:
2173 case OP_NOTMINUPTOI:
2174 case OP_POSUPTO:
2175 case OP_POSUPTOI:
2176 case OP_NOTPOSUPTO:
2177 case OP_NOTPOSUPTOI:
2178 case OP_STAR:
2179 case OP_STARI:
2180 case OP_NOTSTAR:
2181 case OP_NOTSTARI:
2182 case OP_MINSTAR:
2183 case OP_MINSTARI:
2184 case OP_NOTMINSTAR:
2185 case OP_NOTMINSTARI:
2186 case OP_POSSTAR:
2187 case OP_POSSTARI:
2188 case OP_NOTPOSSTAR:
2189 case OP_NOTPOSSTARI:
2190 case OP_PLUS:
2191 case OP_PLUSI:
2192 case OP_NOTPLUS:
2193 case OP_NOTPLUSI:
2194 case OP_MINPLUS:
2195 case OP_MINPLUSI:
2196 case OP_NOTMINPLUS:
2197 case OP_NOTMINPLUSI:
2198 case OP_POSPLUS:
2199 case OP_POSPLUSI:
2200 case OP_NOTPOSPLUS:
2201 case OP_NOTPOSPLUSI:
2202 case OP_QUERY:
2203 case OP_QUERYI:
2204 case OP_NOTQUERY:
2205 case OP_NOTQUERYI:
2206 case OP_MINQUERY:
2207 case OP_MINQUERYI:
2208 case OP_NOTMINQUERY:
2209 case OP_NOTMINQUERYI:
2210 case OP_POSQUERY:
2211 case OP_POSQUERYI:
2212 case OP_NOTPOSQUERY:
2213 case OP_NOTPOSQUERYI:
2214 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2215 break;
2216 }
2217 #else
2218 (void)(utf);
2219 #endif
2220 }
2221 }
2222 }
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240 static const pcre_uchar *
2241 find_recurse(const pcre_uchar *code, BOOL utf)
2242 {
2243 for (;;)
2244 {
2245 register pcre_uchar c = *code;
2246 if (c == OP_END) return NULL;
2247 if (c == OP_RECURSE) return code;
2248
2249
2250
2251
2252
2253 if (c == OP_XCLASS) code += GET(code, 1);
2254
2255
2256
2257
2258
2259
2260 else
2261 {
2262 switch(c)
2263 {
2264 case OP_TYPESTAR:
2265 case OP_TYPEMINSTAR:
2266 case OP_TYPEPLUS:
2267 case OP_TYPEMINPLUS:
2268 case OP_TYPEQUERY:
2269 case OP_TYPEMINQUERY:
2270 case OP_TYPEPOSSTAR:
2271 case OP_TYPEPOSPLUS:
2272 case OP_TYPEPOSQUERY:
2273 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
2274 break;
2275
2276 case OP_TYPEPOSUPTO:
2277 case OP_TYPEUPTO:
2278 case OP_TYPEMINUPTO:
2279 case OP_TYPEEXACT:
2280 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2281 code += 2;
2282 break;
2283
2284 case OP_MARK:
2285 case OP_PRUNE_ARG:
2286 case OP_SKIP_ARG:
2287 case OP_THEN_ARG:
2288 code += code[1];
2289 break;
2290 }
2291
2292
2293
2294 code += PRIV(OP_lengths)[c];
2295
2296
2297
2298
2299
2300 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2301 if (utf) switch(c)
2302 {
2303 case OP_CHAR:
2304 case OP_CHARI:
2305 case OP_NOT:
2306 case OP_NOTI:
2307 case OP_EXACT:
2308 case OP_EXACTI:
2309 case OP_NOTEXACT:
2310 case OP_NOTEXACTI:
2311 case OP_UPTO:
2312 case OP_UPTOI:
2313 case OP_NOTUPTO:
2314 case OP_NOTUPTOI:
2315 case OP_MINUPTO:
2316 case OP_MINUPTOI:
2317 case OP_NOTMINUPTO:
2318 case OP_NOTMINUPTOI:
2319 case OP_POSUPTO:
2320 case OP_POSUPTOI:
2321 case OP_NOTPOSUPTO:
2322 case OP_NOTPOSUPTOI:
2323 case OP_STAR:
2324 case OP_STARI:
2325 case OP_NOTSTAR:
2326 case OP_NOTSTARI:
2327 case OP_MINSTAR:
2328 case OP_MINSTARI:
2329 case OP_NOTMINSTAR:
2330 case OP_NOTMINSTARI:
2331 case OP_POSSTAR:
2332 case OP_POSSTARI:
2333 case OP_NOTPOSSTAR:
2334 case OP_NOTPOSSTARI:
2335 case OP_PLUS:
2336 case OP_PLUSI:
2337 case OP_NOTPLUS:
2338 case OP_NOTPLUSI:
2339 case OP_MINPLUS:
2340 case OP_MINPLUSI:
2341 case OP_NOTMINPLUS:
2342 case OP_NOTMINPLUSI:
2343 case OP_POSPLUS:
2344 case OP_POSPLUSI:
2345 case OP_NOTPOSPLUS:
2346 case OP_NOTPOSPLUSI:
2347 case OP_QUERY:
2348 case OP_QUERYI:
2349 case OP_NOTQUERY:
2350 case OP_NOTQUERYI:
2351 case OP_MINQUERY:
2352 case OP_MINQUERYI:
2353 case OP_NOTMINQUERY:
2354 case OP_NOTMINQUERYI:
2355 case OP_POSQUERY:
2356 case OP_POSQUERYI:
2357 case OP_NOTPOSQUERY:
2358 case OP_NOTPOSQUERYI:
2359 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2360 break;
2361 }
2362 #else
2363 (void)(utf);
2364 #endif
2365 }
2366 }
2367 }
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393 static BOOL
2394 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2395 BOOL utf, compile_data *cd, recurse_check *recurses)
2396 {
2397 register pcre_uchar c;
2398 recurse_check this_recurse;
2399
2400 for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2401 code < endcode;
2402 code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
2403 {
2404 const pcre_uchar *ccode;
2405
2406 c = *code;
2407
2408
2409
2410
2411 if (c == OP_ASSERT)
2412 {
2413 do code += GET(code, 1); while (*code == OP_ALT);
2414 c = *code;
2415 continue;
2416 }
2417
2418
2419
2420
2421
2422
2423
2424
2425 if (c == OP_RECURSE)
2426 {
2427 const pcre_uchar *scode = cd->start_code + GET(code, 1);
2428 const pcre_uchar *endgroup = scode;
2429 BOOL empty_branch;
2430
2431
2432
2433
2434
2435 if (cd->start_workspace != NULL)
2436 {
2437 const pcre_uchar *tcode;
2438 for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2439 if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2440 if (GET(scode, 1) == 0) return TRUE;
2441 }
2442
2443
2444
2445
2446
2447
2448 do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2449 if (code >= scode && code <= endgroup) continue;
2450 else
2451 {
2452 recurse_check *r = recurses;
2453 for (r = recurses; r != NULL; r = r->prev)
2454 if (r->group == scode) break;
2455 if (r != NULL) continue;
2456 }
2457
2458
2459
2460
2461 empty_branch = FALSE;
2462 this_recurse.prev = recurses;
2463 this_recurse.group = scode;
2464
2465 do
2466 {
2467 if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2468 {
2469 empty_branch = TRUE;
2470 break;
2471 }
2472 scode += GET(scode, 1);
2473 }
2474 while (*scode == OP_ALT);
2475
2476 if (!empty_branch) return FALSE;
2477 continue;
2478 }
2479
2480
2481
2482 if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
2483 c == OP_BRAPOSZERO)
2484 {
2485 code += PRIV(OP_lengths)[c];
2486 do code += GET(code, 1); while (*code == OP_ALT);
2487 c = *code;
2488 continue;
2489 }
2490
2491
2492
2493
2494 if (c == OP_SBRA || c == OP_SBRAPOS ||
2495 c == OP_SCBRA || c == OP_SCBRAPOS)
2496 {
2497 do code += GET(code, 1); while (*code == OP_ALT);
2498 c = *code;
2499 continue;
2500 }
2501
2502
2503
2504 if (c == OP_BRA || c == OP_BRAPOS ||
2505 c == OP_CBRA || c == OP_CBRAPOS ||
2506 c == OP_ONCE || c == OP_ONCE_NC ||
2507 c == OP_COND || c == OP_SCOND)
2508 {
2509 BOOL empty_branch;
2510 if (GET(code, 1) == 0) return TRUE;
2511
2512
2513
2514
2515
2516 if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
2517 code += GET(code, 1);
2518 else
2519 {
2520 empty_branch = FALSE;
2521 do
2522 {
2523 if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
2524 recurses)) empty_branch = TRUE;
2525 code += GET(code, 1);
2526 }
2527 while (*code == OP_ALT);
2528 if (!empty_branch) return FALSE;
2529 }
2530
2531 c = *code;
2532 continue;
2533 }
2534
2535
2536
2537 switch (c)
2538 {
2539
2540
2541
2542
2543
2544
2545 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2546 case OP_XCLASS:
2547 ccode = code += GET(code, 1);
2548 goto CHECK_CLASS_REPEAT;
2549 #endif
2550
2551 case OP_CLASS:
2552 case OP_NCLASS:
2553 ccode = code + PRIV(OP_lengths)[OP_CLASS];
2554
2555 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2556 CHECK_CLASS_REPEAT:
2557 #endif
2558
2559 switch (*ccode)
2560 {
2561 case OP_CRSTAR:
2562 case OP_CRMINSTAR:
2563 case OP_CRQUERY:
2564 case OP_CRMINQUERY:
2565 case OP_CRPOSSTAR:
2566 case OP_CRPOSQUERY:
2567 break;
2568
2569 default:
2570 case OP_CRPLUS:
2571 case OP_CRMINPLUS:
2572 case OP_CRPOSPLUS:
2573 return FALSE;
2574
2575 case OP_CRRANGE:
2576 case OP_CRMINRANGE:
2577 case OP_CRPOSRANGE:
2578 if (GET2(ccode, 1) > 0) return FALSE;
2579 break;
2580 }
2581 break;
2582
2583
2584
2585 case OP_ANY:
2586 case OP_ALLANY:
2587 case OP_ANYBYTE:
2588
2589 case OP_PROP:
2590 case OP_NOTPROP:
2591 case OP_ANYNL:
2592
2593 case OP_NOT_HSPACE:
2594 case OP_HSPACE:
2595 case OP_NOT_VSPACE:
2596 case OP_VSPACE:
2597 case OP_EXTUNI:
2598
2599 case OP_NOT_DIGIT:
2600 case OP_DIGIT:
2601 case OP_NOT_WHITESPACE:
2602 case OP_WHITESPACE:
2603 case OP_NOT_WORDCHAR:
2604 case OP_WORDCHAR:
2605
2606 case OP_CHAR:
2607 case OP_CHARI:
2608 case OP_NOT:
2609 case OP_NOTI:
2610
2611 case OP_PLUS:
2612 case OP_PLUSI:
2613 case OP_MINPLUS:
2614 case OP_MINPLUSI:
2615
2616 case OP_NOTPLUS:
2617 case OP_NOTPLUSI:
2618 case OP_NOTMINPLUS:
2619 case OP_NOTMINPLUSI:
2620
2621 case OP_POSPLUS:
2622 case OP_POSPLUSI:
2623 case OP_NOTPOSPLUS:
2624 case OP_NOTPOSPLUSI:
2625
2626 case OP_EXACT:
2627 case OP_EXACTI:
2628 case OP_NOTEXACT:
2629 case OP_NOTEXACTI:
2630
2631 case OP_TYPEPLUS:
2632 case OP_TYPEMINPLUS:
2633 case OP_TYPEPOSPLUS:
2634 case OP_TYPEEXACT:
2635
2636 return FALSE;
2637
2638
2639
2640
2641 case OP_TYPESTAR:
2642 case OP_TYPEMINSTAR:
2643 case OP_TYPEPOSSTAR:
2644 case OP_TYPEQUERY:
2645 case OP_TYPEMINQUERY:
2646 case OP_TYPEPOSQUERY:
2647 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
2648 break;
2649
2650
2651
2652 case OP_TYPEUPTO:
2653 case OP_TYPEMINUPTO:
2654 case OP_TYPEPOSUPTO:
2655 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
2656 code += 2;
2657 break;
2658
2659
2660
2661 case OP_KET:
2662 case OP_KETRMAX:
2663 case OP_KETRMIN:
2664 case OP_KETRPOS:
2665 case OP_ALT:
2666 return TRUE;
2667
2668
2669
2670
2671
2672 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2673 case OP_STAR:
2674 case OP_STARI:
2675 case OP_NOTSTAR:
2676 case OP_NOTSTARI:
2677
2678 case OP_MINSTAR:
2679 case OP_MINSTARI:
2680 case OP_NOTMINSTAR:
2681 case OP_NOTMINSTARI:
2682
2683 case OP_POSSTAR:
2684 case OP_POSSTARI:
2685 case OP_NOTPOSSTAR:
2686 case OP_NOTPOSSTARI:
2687
2688 case OP_QUERY:
2689 case OP_QUERYI:
2690 case OP_NOTQUERY:
2691 case OP_NOTQUERYI:
2692
2693 case OP_MINQUERY:
2694 case OP_MINQUERYI:
2695 case OP_NOTMINQUERY:
2696 case OP_NOTMINQUERYI:
2697
2698 case OP_POSQUERY:
2699 case OP_POSQUERYI:
2700 case OP_NOTPOSQUERY:
2701 case OP_NOTPOSQUERYI:
2702
2703 if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2704 break;
2705
2706 case OP_UPTO:
2707 case OP_UPTOI:
2708 case OP_NOTUPTO:
2709 case OP_NOTUPTOI:
2710
2711 case OP_MINUPTO:
2712 case OP_MINUPTOI:
2713 case OP_NOTMINUPTO:
2714 case OP_NOTMINUPTOI:
2715
2716 case OP_POSUPTO:
2717 case OP_POSUPTOI:
2718 case OP_NOTPOSUPTO:
2719 case OP_NOTPOSUPTOI:
2720
2721 if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2722 break;
2723 #endif
2724
2725
2726
2727
2728 case OP_MARK:
2729 case OP_PRUNE_ARG:
2730 case OP_SKIP_ARG:
2731 case OP_THEN_ARG:
2732 code += code[1];
2733 break;
2734
2735
2736
2737 default:
2738 break;
2739 }
2740 }
2741
2742 return TRUE;
2743 }
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768 static BOOL
2769 could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
2770 branch_chain *bcptr, BOOL utf, compile_data *cd)
2771 {
2772 while (bcptr != NULL && bcptr->current_branch >= code)
2773 {
2774 if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2775 return FALSE;
2776 bcptr = bcptr->outer;
2777 }
2778 return TRUE;
2779 }
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794 static pcre_uchar
2795 get_repeat_base(pcre_uchar c)
2796 {
2797 return (c > OP_TYPEPOSUPTO)? c :
2798 (c >= OP_TYPESTAR)? OP_TYPESTAR :
2799 (c >= OP_NOTSTARI)? OP_NOTSTARI :
2800 (c >= OP_NOTSTAR)? OP_NOTSTAR :
2801 (c >= OP_STARI)? OP_STARI :
2802 OP_STAR;
2803 }
2804
2805
2806
2807 #ifdef SUPPORT_UCP
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824 static BOOL
2825 check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata,
2826 BOOL negated)
2827 {
2828 const pcre_uint32 *p;
2829 const ucd_record *prop = GET_UCD(c);
2830
2831 switch(ptype)
2832 {
2833 case PT_LAMP:
2834 return (prop->chartype == ucp_Lu ||
2835 prop->chartype == ucp_Ll ||
2836 prop->chartype == ucp_Lt) == negated;
2837
2838 case PT_GC:
2839 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
2840
2841 case PT_PC:
2842 return (pdata == prop->chartype) == negated;
2843
2844 case PT_SC:
2845 return (pdata == prop->script) == negated;
2846
2847
2848
2849 case PT_ALNUM:
2850 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2851 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
2852
2853
2854
2855
2856
2857 case PT_SPACE:
2858 case PT_PXSPACE:
2859 switch(c)
2860 {
2861 HSPACE_CASES:
2862 VSPACE_CASES:
2863 return negated;
2864
2865 default:
2866 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
2867 }
2868 break;
2869
2870 case PT_WORD:
2871 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2872 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2873 c == CHAR_UNDERSCORE) == negated;
2874
2875 case PT_CLIST:
2876 p = PRIV(ucd_caseless_sets) + prop->caseset;
2877 for (;;)
2878 {
2879 if (c < *p) return !negated;
2880 if (c == *p++) return negated;
2881 }
2882 break;
2883 }
2884
2885 return FALSE;
2886 }
2887 #endif
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912 static const pcre_uchar *
2913 get_chr_property_list(const pcre_uchar *code, BOOL utf,
2914 const pcre_uint8 *fcc, pcre_uint32 *list)
2915 {
2916 pcre_uchar c = *code;
2917 pcre_uchar base;
2918 const pcre_uchar *end;
2919 pcre_uint32 chr;
2920
2921 #ifdef SUPPORT_UCP
2922 pcre_uint32 *clist_dest;
2923 const pcre_uint32 *clist_src;
2924 #else
2925 utf = utf;
2926 #endif
2927
2928 list[0] = c;
2929 list[1] = FALSE;
2930 code++;
2931
2932 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
2933 {
2934 base = get_repeat_base(c);
2935 c -= (base - OP_STAR);
2936
2937 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
2938 code += IMM2_SIZE;
2939
2940 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && c != OP_POSPLUS);
2941
2942 switch(base)
2943 {
2944 case OP_STAR:
2945 list[0] = OP_CHAR;
2946 break;
2947
2948 case OP_STARI:
2949 list[0] = OP_CHARI;
2950 break;
2951
2952 case OP_NOTSTAR:
2953 list[0] = OP_NOT;
2954 break;
2955
2956 case OP_NOTSTARI:
2957 list[0] = OP_NOTI;
2958 break;
2959
2960 case OP_TYPESTAR:
2961 list[0] = *code;
2962 code++;
2963 break;
2964 }
2965 c = list[0];
2966 }
2967
2968 switch(c)
2969 {
2970 case OP_NOT_DIGIT:
2971 case OP_DIGIT:
2972 case OP_NOT_WHITESPACE:
2973 case OP_WHITESPACE:
2974 case OP_NOT_WORDCHAR:
2975 case OP_WORDCHAR:
2976 case OP_ANY:
2977 case OP_ALLANY:
2978 case OP_ANYNL:
2979 case OP_NOT_HSPACE:
2980 case OP_HSPACE:
2981 case OP_NOT_VSPACE:
2982 case OP_VSPACE:
2983 case OP_EXTUNI:
2984 case OP_EODN:
2985 case OP_EOD:
2986 case OP_DOLL:
2987 case OP_DOLLM:
2988 return code;
2989
2990 case OP_CHAR:
2991 case OP_NOT:
2992 GETCHARINCTEST(chr, code);
2993 list[2] = chr;
2994 list[3] = NOTACHAR;
2995 return code;
2996
2997 case OP_CHARI:
2998 case OP_NOTI:
2999 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
3000 GETCHARINCTEST(chr, code);
3001 list[2] = chr;
3002
3003 #ifdef SUPPORT_UCP
3004 if (chr < 128 || (chr < 256 && !utf))
3005 list[3] = fcc[chr];
3006 else
3007 list[3] = UCD_OTHERCASE(chr);
3008 #elif defined SUPPORT_UTF || !defined COMPILE_PCRE8
3009 list[3] = (chr < 256) ? fcc[chr] : chr;
3010 #else
3011 list[3] = fcc[chr];
3012 #endif
3013
3014
3015
3016 if (chr == list[3])
3017 list[3] = NOTACHAR;
3018 else
3019 list[4] = NOTACHAR;
3020 return code;
3021
3022 #ifdef SUPPORT_UCP
3023 case OP_PROP:
3024 case OP_NOTPROP:
3025 if (code[0] != PT_CLIST)
3026 {
3027 list[2] = code[0];
3028 list[3] = code[1];
3029 return code + 2;
3030 }
3031
3032
3033
3034 clist_src = PRIV(ucd_caseless_sets) + code[1];
3035 clist_dest = list + 2;
3036 code += 2;
3037
3038 do {
3039 if (clist_dest >= list + 8)
3040 {
3041
3042
3043 list[2] = code[0];
3044 list[3] = code[1];
3045 return code;
3046 }
3047 *clist_dest++ = *clist_src;
3048 }
3049 while(*clist_src++ != NOTACHAR);
3050
3051
3052
3053
3054 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
3055 return code;
3056 #endif
3057
3058 case OP_NCLASS:
3059 case OP_CLASS:
3060 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3061 case OP_XCLASS:
3062 if (c == OP_XCLASS)
3063 end = code + GET(code, 0) - 1;
3064 else
3065 #endif
3066 end = code + 32 / sizeof(pcre_uchar);
3067
3068 switch(*end)
3069 {
3070 case OP_CRSTAR:
3071 case OP_CRMINSTAR:
3072 case OP_CRQUERY:
3073 case OP_CRMINQUERY:
3074 case OP_CRPOSSTAR:
3075 case OP_CRPOSQUERY:
3076 list[1] = TRUE;
3077 end++;
3078 break;
3079
3080 case OP_CRPLUS:
3081 case OP_CRMINPLUS:
3082 case OP_CRPOSPLUS:
3083 end++;
3084 break;
3085
3086 case OP_CRRANGE:
3087 case OP_CRMINRANGE:
3088 case OP_CRPOSRANGE:
3089 list[1] = (GET2(end, 1) == 0);
3090 end += 1 + 2 * IMM2_SIZE;
3091 break;
3092 }
3093 list[2] = (pcre_uint32)(end - code);
3094 return end;
3095 }
3096 return NULL;
3097 }
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117 static BOOL
3118 compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3119 const pcre_uint32 *base_list, const pcre_uchar *base_end, int *rec_limit)
3120 {
3121 pcre_uchar c;
3122 pcre_uint32 list[8];
3123 const pcre_uint32 *chr_ptr;
3124 const pcre_uint32 *ochr_ptr;
3125 const pcre_uint32 *list_ptr;
3126 const pcre_uchar *next_code;
3127 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3128 const pcre_uchar *xclass_flags;
3129 #endif
3130 const pcre_uint8 *class_bitset;
3131 const pcre_uint8 *set1, *set2, *set_end;
3132 pcre_uint32 chr;
3133 BOOL accepted, invert_bits;
3134 BOOL entered_a_group = FALSE;
3135
3136 if (*rec_limit == 0) return FALSE;
3137 --(*rec_limit);
3138
3139
3140
3141
3142
3143
3144 for(;;)
3145 {
3146
3147
3148
3149 c = *code;
3150
3151
3152
3153 if (c == OP_CALLOUT)
3154 {
3155 code += PRIV(OP_lengths)[c];
3156 continue;
3157 }
3158
3159 if (c == OP_ALT)
3160 {
3161 do code += GET(code, 1); while (*code == OP_ALT);
3162 c = *code;
3163 }
3164
3165 switch(c)
3166 {
3167 case OP_END:
3168 case OP_KETRPOS:
3169
3170
3171
3172
3173 return base_list[1] != 0;
3174
3175 case OP_KET:
3176
3177
3178
3179
3180 if (base_list[1] == 0) return FALSE;
3181
3182 switch(*(code - GET(code, 1)))
3183 {
3184 case OP_ASSERT:
3185 case OP_ASSERT_NOT:
3186 case OP_ASSERTBACK:
3187 case OP_ASSERTBACK_NOT:
3188 case OP_ONCE:
3189 case OP_ONCE_NC:
3190
3191
3192
3193
3194 return !entered_a_group;
3195 }
3196
3197 code += PRIV(OP_lengths)[c];
3198 continue;
3199
3200 case OP_ONCE:
3201 case OP_ONCE_NC:
3202 case OP_BRA:
3203 case OP_CBRA:
3204 next_code = code + GET(code, 1);
3205 code += PRIV(OP_lengths)[c];
3206
3207 while (*next_code == OP_ALT)
3208 {
3209 if (!compare_opcodes(code, utf, cd, base_list, base_end, rec_limit))
3210 return FALSE;
3211 code = next_code + 1 + LINK_SIZE;
3212 next_code += GET(next_code, 1);
3213 }
3214
3215 entered_a_group = TRUE;
3216 continue;
3217
3218 case OP_BRAZERO:
3219 case OP_BRAMINZERO:
3220
3221 next_code = code + 1;
3222 if (*next_code != OP_BRA && *next_code != OP_CBRA
3223 && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
3224
3225 do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
3226
3227
3228
3229 next_code += 1 + LINK_SIZE;
3230 if (!compare_opcodes(next_code, utf, cd, base_list, base_end, rec_limit))
3231 return FALSE;
3232
3233 code += PRIV(OP_lengths)[c];
3234 continue;
3235
3236 default:
3237 break;
3238 }
3239
3240
3241
3242 code = get_chr_property_list(code, utf, cd->fcc, list);
3243 if (code == NULL) return FALSE;
3244
3245
3246
3247
3248 if (base_list[0] == OP_CHAR)
3249 {
3250 chr_ptr = base_list + 2;
3251 list_ptr = list;
3252 }
3253 else if (list[0] == OP_CHAR)
3254 {
3255 chr_ptr = list + 2;
3256 list_ptr = base_list;
3257 }
3258
3259
3260
3261 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
3262 #ifdef COMPILE_PCRE8
3263
3264 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
3265 #endif
3266 )
3267 {
3268 #ifdef COMPILE_PCRE8
3269 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
3270 #else
3271 if (base_list[0] == OP_CLASS)
3272 #endif
3273 {
3274 set1 = (pcre_uint8 *)(base_end - base_list[2]);
3275 list_ptr = list;
3276 }
3277 else
3278 {
3279 set1 = (pcre_uint8 *)(code - list[2]);
3280 list_ptr = base_list;
3281 }
3282
3283 invert_bits = FALSE;
3284 switch(list_ptr[0])
3285 {
3286 case OP_CLASS:
3287 case OP_NCLASS:
3288 set2 = (pcre_uint8 *)
3289 ((list_ptr == list ? code : base_end) - list_ptr[2]);
3290 break;
3291
3292 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3293 case OP_XCLASS:
3294 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
3295 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
3296 if ((*xclass_flags & XCL_MAP) == 0)
3297 {
3298
3299 if (list[1] == 0) return TRUE;
3300
3301 continue;
3302 }
3303 set2 = (pcre_uint8 *)(xclass_flags + 1);
3304 break;
3305 #endif
3306
3307 case OP_NOT_DIGIT:
3308 invert_bits = TRUE;
3309
3310 case OP_DIGIT:
3311 set2 = (pcre_uint8 *)(cd->cbits + cbit_digit);
3312 break;
3313
3314 case OP_NOT_WHITESPACE:
3315 invert_bits = TRUE;
3316
3317 case OP_WHITESPACE:
3318 set2 = (pcre_uint8 *)(cd->cbits + cbit_space);
3319 break;
3320
3321 case OP_NOT_WORDCHAR:
3322 invert_bits = TRUE;
3323
3324 case OP_WORDCHAR:
3325 set2 = (pcre_uint8 *)(cd->cbits + cbit_word);
3326 break;
3327
3328 default:
3329 return FALSE;
3330 }
3331
3332
3333
3334 set_end = set1 + 32;
3335 if (invert_bits)
3336 {
3337 do
3338 {
3339 if ((*set1++ & ~(*set2++)) != 0) return FALSE;
3340 }
3341 while (set1 < set_end);
3342 }
3343 else
3344 {
3345 do
3346 {
3347 if ((*set1++ & *set2++) != 0) return FALSE;
3348 }
3349 while (set1 < set_end);
3350 }
3351
3352 if (list[1] == 0) return TRUE;
3353
3354 continue;
3355 }
3356
3357
3358
3359
3360 else
3361 {
3362 pcre_uint32 leftop, rightop;
3363
3364 leftop = base_list[0];
3365 rightop = list[0];
3366
3367 #ifdef SUPPORT_UCP
3368 accepted = FALSE;
3369 if (leftop == OP_PROP || leftop == OP_NOTPROP)
3370 {
3371 if (rightop == OP_EOD)
3372 accepted = TRUE;
3373 else if (rightop == OP_PROP || rightop == OP_NOTPROP)
3374 {
3375 int n;
3376 const pcre_uint8 *p;
3377 BOOL same = leftop == rightop;
3378 BOOL lisprop = leftop == OP_PROP;
3379 BOOL risprop = rightop == OP_PROP;
3380 BOOL bothprop = lisprop && risprop;
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391 n = propposstab[base_list[2]][list[2]];
3392 switch(n)
3393 {
3394 case 0: break;
3395 case 1: accepted = bothprop; break;
3396 case 2: accepted = (base_list[3] == list[3]) != same; break;
3397 case 3: accepted = !same; break;
3398
3399 case 4:
3400 accepted = risprop && catposstab[base_list[3]][list[3]] == same;
3401 break;
3402
3403 case 5:
3404 accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
3405 break;
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426 case 6:
3427 case 7:
3428 case 8:
3429 p = posspropstab[n-6];
3430 accepted = risprop && lisprop ==
3431 (list[3] != p[0] &&
3432 list[3] != p[1] &&
3433 (list[3] != p[2] || !lisprop));
3434 break;
3435
3436 case 9:
3437 case 10:
3438 case 11:
3439 p = posspropstab[n-9];
3440 accepted = lisprop && risprop ==
3441 (base_list[3] != p[0] &&
3442 base_list[3] != p[1] &&
3443 (base_list[3] != p[2] || !risprop));
3444 break;
3445
3446 case 12:
3447 case 13:
3448 case 14:
3449 p = posspropstab[n-12];
3450 accepted = risprop && lisprop ==
3451 (catposstab[p[0]][list[3]] &&
3452 catposstab[p[1]][list[3]] &&
3453 (list[3] != p[3] || !lisprop));
3454 break;
3455
3456 case 15:
3457 case 16:
3458 case 17:
3459 p = posspropstab[n-15];
3460 accepted = lisprop && risprop ==
3461 (catposstab[p[0]][base_list[3]] &&
3462 catposstab[p[1]][base_list[3]] &&
3463 (base_list[3] != p[3] || !risprop));
3464 break;
3465 }
3466 }
3467 }
3468
3469 else
3470 #endif
3471
3472 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
3473 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
3474 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
3475
3476 if (!accepted) return FALSE;
3477
3478 if (list[1] == 0) return TRUE;
3479
3480 continue;
3481 }
3482
3483
3484
3485
3486 do
3487 {
3488 chr = *chr_ptr;
3489
3490 switch(list_ptr[0])
3491 {
3492 case OP_CHAR:
3493 ochr_ptr = list_ptr + 2;
3494 do
3495 {
3496 if (chr == *ochr_ptr) return FALSE;
3497 ochr_ptr++;
3498 }
3499 while(*ochr_ptr != NOTACHAR);
3500 break;
3501
3502 case OP_NOT:
3503 ochr_ptr = list_ptr + 2;
3504 do
3505 {
3506 if (chr == *ochr_ptr)
3507 break;
3508 ochr_ptr++;
3509 }
3510 while(*ochr_ptr != NOTACHAR);
3511 if (*ochr_ptr == NOTACHAR) return FALSE;
3512 break;
3513
3514
3515
3516
3517 case OP_DIGIT:
3518 if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE;
3519 break;
3520
3521 case OP_NOT_DIGIT:
3522 if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE;
3523 break;
3524
3525 case OP_WHITESPACE:
3526 if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE;
3527 break;
3528
3529 case OP_NOT_WHITESPACE:
3530 if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE;
3531 break;
3532
3533 case OP_WORDCHAR:
3534 if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE;
3535 break;
3536
3537 case OP_NOT_WORDCHAR:
3538 if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE;
3539 break;
3540
3541 case OP_HSPACE:
3542 switch(chr)
3543 {
3544 HSPACE_CASES: return FALSE;
3545 default: break;
3546 }
3547 break;
3548
3549 case OP_NOT_HSPACE:
3550 switch(chr)
3551 {
3552 HSPACE_CASES: break;
3553 default: return FALSE;
3554 }
3555 break;
3556
3557 case OP_ANYNL:
3558 case OP_VSPACE:
3559 switch(chr)
3560 {
3561 VSPACE_CASES: return FALSE;
3562 default: break;
3563 }
3564 break;
3565
3566 case OP_NOT_VSPACE:
3567 switch(chr)
3568 {
3569 VSPACE_CASES: break;
3570 default: return FALSE;
3571 }
3572 break;
3573
3574 case OP_DOLL:
3575 case OP_EODN:
3576 switch (chr)
3577 {
3578 case CHAR_CR:
3579 case CHAR_LF:
3580 case CHAR_VT:
3581 case CHAR_FF:
3582 case CHAR_NEL:
3583 #ifndef EBCDIC
3584 case 0x2028:
3585 case 0x2029:
3586 #endif
3587 return FALSE;
3588 }
3589 break;
3590
3591 case OP_EOD:
3592 break;
3593
3594 #ifdef SUPPORT_UCP
3595 case OP_PROP:
3596 case OP_NOTPROP:
3597 if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
3598 list_ptr[0] == OP_NOTPROP))
3599 return FALSE;
3600 break;
3601 #endif
3602
3603 case OP_NCLASS:
3604 if (chr > 255) return FALSE;
3605
3606
3607 case OP_CLASS:
3608 if (chr > 255) break;
3609 class_bitset = (pcre_uint8 *)
3610 ((list_ptr == list ? code : base_end) - list_ptr[2]);
3611 if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
3612 break;
3613
3614 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3615 case OP_XCLASS:
3616 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
3617 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
3618 break;
3619 #endif
3620
3621 default:
3622 return FALSE;
3623 }
3624
3625 chr_ptr++;
3626 }
3627 while(*chr_ptr != NOTACHAR);
3628
3629
3630
3631 if (list[1] == 0) return TRUE;
3632 }
3633
3634
3635
3636
3637 }
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656 static void
3657 auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
3658 {
3659 register pcre_uchar c;
3660 const pcre_uchar *end;
3661 pcre_uchar *repeat_opcode;
3662 pcre_uint32 list[8];
3663 int rec_limit;
3664
3665 for (;;)
3666 {
3667 c = *code;
3668
3669
3670
3671
3672
3673
3674
3675 if (c >= OP_TABLE_LENGTH) return;
3676
3677 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
3678 {
3679 c -= get_repeat_base(c) - OP_STAR;
3680 end = (c <= OP_MINUPTO) ?
3681 get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3682 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3683
3684 rec_limit = 1000;
3685 if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
3686 {
3687 switch(c)
3688 {
3689 case OP_STAR:
3690 *code += OP_POSSTAR - OP_STAR;
3691 break;
3692
3693 case OP_MINSTAR:
3694 *code += OP_POSSTAR - OP_MINSTAR;
3695 break;
3696
3697 case OP_PLUS:
3698 *code += OP_POSPLUS - OP_PLUS;
3699 break;
3700
3701 case OP_MINPLUS:
3702 *code += OP_POSPLUS - OP_MINPLUS;
3703 break;
3704
3705 case OP_QUERY:
3706 *code += OP_POSQUERY - OP_QUERY;
3707 break;
3708
3709 case OP_MINQUERY:
3710 *code += OP_POSQUERY - OP_MINQUERY;
3711 break;
3712
3713 case OP_UPTO:
3714 *code += OP_POSUPTO - OP_UPTO;
3715 break;
3716
3717 case OP_MINUPTO:
3718 *code += OP_POSUPTO - OP_MINUPTO;
3719 break;
3720 }
3721 }
3722 c = *code;
3723 }
3724 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
3725 {
3726 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3727 if (c == OP_XCLASS)
3728 repeat_opcode = code + GET(code, 1);
3729 else
3730 #endif
3731 repeat_opcode = code + 1 + (32 / sizeof(pcre_uchar));
3732
3733 c = *repeat_opcode;
3734 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
3735 {
3736
3737 end = get_chr_property_list(code, utf, cd->fcc, list);
3738
3739 list[1] = (c & 1) == 0;
3740
3741 rec_limit = 1000;
3742 if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
3743 {
3744 switch (c)
3745 {
3746 case OP_CRSTAR:
3747 case OP_CRMINSTAR:
3748 *repeat_opcode = OP_CRPOSSTAR;
3749 break;
3750
3751 case OP_CRPLUS:
3752 case OP_CRMINPLUS:
3753 *repeat_opcode = OP_CRPOSPLUS;
3754 break;
3755
3756 case OP_CRQUERY:
3757 case OP_CRMINQUERY:
3758 *repeat_opcode = OP_CRPOSQUERY;
3759 break;
3760
3761 case OP_CRRANGE:
3762 case OP_CRMINRANGE:
3763 *repeat_opcode = OP_CRPOSRANGE;
3764 break;
3765 }
3766 }
3767 }
3768 c = *code;
3769 }
3770
3771 switch(c)
3772 {
3773 case OP_END:
3774 return;
3775
3776 case OP_TYPESTAR:
3777 case OP_TYPEMINSTAR:
3778 case OP_TYPEPLUS:
3779 case OP_TYPEMINPLUS:
3780 case OP_TYPEQUERY:
3781 case OP_TYPEMINQUERY:
3782 case OP_TYPEPOSSTAR:
3783 case OP_TYPEPOSPLUS:
3784 case OP_TYPEPOSQUERY:
3785 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
3786 break;
3787
3788 case OP_TYPEUPTO:
3789 case OP_TYPEMINUPTO:
3790 case OP_TYPEEXACT:
3791 case OP_TYPEPOSUPTO:
3792 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
3793 code += 2;
3794 break;
3795
3796 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3797 case OP_XCLASS:
3798 code += GET(code, 1);
3799 break;
3800 #endif
3801
3802 case OP_MARK:
3803 case OP_PRUNE_ARG:
3804 case OP_SKIP_ARG:
3805 case OP_THEN_ARG:
3806 code += code[1];
3807 break;
3808 }
3809
3810
3811
3812 code += PRIV(OP_lengths)[c];
3813
3814
3815
3816
3817
3818 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3819 if (utf) switch(c)
3820 {
3821 case OP_CHAR:
3822 case OP_CHARI:
3823 case OP_NOT:
3824 case OP_NOTI:
3825 case OP_STAR:
3826 case OP_MINSTAR:
3827 case OP_PLUS:
3828 case OP_MINPLUS:
3829 case OP_QUERY:
3830 case OP_MINQUERY:
3831 case OP_UPTO:
3832 case OP_MINUPTO:
3833 case OP_EXACT:
3834 case OP_POSSTAR:
3835 case OP_POSPLUS:
3836 case OP_POSQUERY:
3837 case OP_POSUPTO:
3838 case OP_STARI:
3839 case OP_MINSTARI:
3840 case OP_PLUSI:
3841 case OP_MINPLUSI:
3842 case OP_QUERYI:
3843 case OP_MINQUERYI:
3844 case OP_UPTOI:
3845 case OP_MINUPTOI:
3846 case OP_EXACTI:
3847 case OP_POSSTARI:
3848 case OP_POSPLUSI:
3849 case OP_POSQUERYI:
3850 case OP_POSUPTOI:
3851 case OP_NOTSTAR:
3852 case OP_NOTMINSTAR:
3853 case OP_NOTPLUS:
3854 case OP_NOTMINPLUS:
3855 case OP_NOTQUERY:
3856 case OP_NOTMINQUERY:
3857 case OP_NOTUPTO:
3858 case OP_NOTMINUPTO:
3859 case OP_NOTEXACT:
3860 case OP_NOTPOSSTAR:
3861 case OP_NOTPOSPLUS:
3862 case OP_NOTPOSQUERY:
3863 case OP_NOTPOSUPTO:
3864 case OP_NOTSTARI:
3865 case OP_NOTMINSTARI:
3866 case OP_NOTPLUSI:
3867 case OP_NOTMINPLUSI:
3868 case OP_NOTQUERYI:
3869 case OP_NOTMINQUERYI:
3870 case OP_NOTUPTOI:
3871 case OP_NOTMINUPTOI:
3872 case OP_NOTEXACTI:
3873 case OP_NOTPOSSTARI:
3874 case OP_NOTPOSPLUSI:
3875 case OP_NOTPOSQUERYI:
3876 case OP_NOTPOSUPTOI:
3877 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
3878 break;
3879 }
3880 #else
3881 (void)(utf);
3882 #endif
3883 }
3884 }
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930 static BOOL
3931 check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
3932 {
3933 pcre_uchar terminator;
3934 terminator = *(++ptr);
3935 for (++ptr; *ptr != CHAR_NULL; ptr++)
3936 {
3937 if (*ptr == CHAR_BACKSLASH &&
3938 (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET ||
3939 ptr[1] == CHAR_BACKSLASH))
3940 ptr++;
3941 else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
3942 *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
3943 else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
3944 {
3945 *endptr = ptr;
3946 return TRUE;
3947 }
3948 }
3949 return FALSE;
3950 }
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969 static int
3970 check_posix_name(const pcre_uchar *ptr, int len)
3971 {
3972 const char *pn = posix_names;
3973 register int yield = 0;
3974 while (posix_name_lengths[yield] != 0)
3975 {
3976 if (len == posix_name_lengths[yield] &&
3977 STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield;
3978 pn += posix_name_lengths[yield] + 1;
3979 yield++;
3980 }
3981 return -1;
3982 }
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017 static void
4018 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
4019 size_t save_hwm_offset)
4020 {
4021 int offset;
4022 pcre_uchar *hc;
4023 pcre_uchar *ptr = group;
4024
4025 while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4026 {
4027 for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4028 hc += LINK_SIZE)
4029 {
4030 offset = (int)GET(hc, 0);
4031 if (cd->start_code + offset == ptr + 1) break;
4032 }
4033
4034
4035
4036
4037 if (hc >= cd->hwm)
4038 {
4039 offset = (int)GET(ptr, 1);
4040 if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
4041 }
4042
4043 ptr += 1 + LINK_SIZE;
4044 }
4045
4046
4047
4048 for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4049 hc += LINK_SIZE)
4050 {
4051 offset = (int)GET(hc, 0);
4052 PUT(hc, 0, offset + adjust);
4053 }
4054 }
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073 static pcre_uchar *
4074 auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
4075 {
4076 *code++ = OP_CALLOUT;
4077 *code++ = 255;
4078 PUT(code, 0, (int)(ptr - cd->start_pattern));
4079 PUT(code, LINK_SIZE, 0);
4080 return code + 2 * LINK_SIZE;
4081 }
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101 static void
4102 complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
4103 {
4104 int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
4105 PUT(previous_callout, 2 + LINK_SIZE, length);
4106 }
4107
4108
4109
4110 #ifdef SUPPORT_UCP
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133 static int
4134 get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
4135 pcre_uint32 *odptr)
4136 {
4137 pcre_uint32 c, othercase, next;
4138 unsigned int co;
4139
4140
4141
4142
4143 for (c = *cptr; c <= d; c++)
4144 {
4145 if ((co = UCD_CASESET(c)) != 0)
4146 {
4147 *ocptr = c++;
4148 *cptr = c;
4149 return (int)co;
4150 }
4151 if ((othercase = UCD_OTHERCASE(c)) != c) break;
4152 }
4153
4154 if (c > d) return -1;
4155
4156
4157
4158
4159
4160 *ocptr = othercase;
4161 next = othercase + 1;
4162
4163 for (++c; c <= d; c++)
4164 {
4165 if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
4166 next++;
4167 }
4168
4169 *odptr = next - 1;
4170 *cptr = c;
4171 return 0;
4172 }
4173 #endif
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198 static int
4199 add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
4200 compile_data *cd, pcre_uint32 start, pcre_uint32 end)
4201 {
4202 pcre_uint32 c;
4203 pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff);
4204 int n8 = 0;
4205
4206
4207
4208
4209
4210
4211 if ((options & PCRE_CASELESS) != 0)
4212 {
4213 #ifdef SUPPORT_UCP
4214 if ((options & PCRE_UTF8) != 0)
4215 {
4216 int rc;
4217 pcre_uint32 oc, od;
4218
4219 options &= ~PCRE_CASELESS;
4220 c = start;
4221
4222 while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
4223 {
4224
4225
4226 if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd,
4227 PRIV(ucd_caseless_sets) + rc, oc);
4228
4229
4230
4231 else if (oc >= start && od <= end) continue;
4232
4233
4234
4235
4236
4237 else if (oc < start && od >= start - 1) start = oc;
4238 else if (od > end && oc <= end + 1)
4239 {
4240 end = od;
4241 if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
4242 }
4243 else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
4244 }
4245 }
4246 else
4247 #endif
4248
4249
4250
4251 for (c = start; c <= classbits_end; c++)
4252 {
4253 SETBIT(classbits, cd->fcc[c]);
4254 n8++;
4255 }
4256 }
4257
4258
4259
4260
4261
4262 #if defined COMPILE_PCRE8
4263 #ifdef SUPPORT_UTF
4264 if ((options & PCRE_UTF8) == 0)
4265 #endif
4266 if (end > 0xff) end = 0xff;
4267
4268 #elif defined COMPILE_PCRE16
4269 #ifdef SUPPORT_UTF
4270 if ((options & PCRE_UTF16) == 0)
4271 #endif
4272 if (end > 0xffff) end = 0xffff;
4273
4274 #endif
4275
4276
4277
4278 for (c = start; c <= classbits_end; c++)
4279 {
4280
4281 SETBIT(classbits, c);
4282 n8++;
4283 }
4284
4285 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4286 if (start <= 0xff) start = 0xff + 1;
4287
4288 if (end >= start)
4289 {
4290 pcre_uchar *uchardata = *uchardptr;
4291 #ifdef SUPPORT_UTF
4292 if ((options & PCRE_UTF8) != 0)
4293 {
4294 if (start < end)
4295 {
4296 *uchardata++ = XCL_RANGE;
4297 uchardata += PRIV(ord2utf)(start, uchardata);
4298 uchardata += PRIV(ord2utf)(end, uchardata);
4299 }
4300 else if (start == end)
4301 {
4302 *uchardata++ = XCL_SINGLE;
4303 uchardata += PRIV(ord2utf)(start, uchardata);
4304 }
4305 }
4306 else
4307 #endif
4308
4309
4310
4311
4312 #ifdef COMPILE_PCRE8
4313 {}
4314 #else
4315 if (start < end)
4316 {
4317 *uchardata++ = XCL_RANGE;
4318 *uchardata++ = start;
4319 *uchardata++ = end;
4320 }
4321 else if (start == end)
4322 {
4323 *uchardata++ = XCL_SINGLE;
4324 *uchardata++ = start;
4325 }
4326 #endif
4327
4328 *uchardptr = uchardata;
4329 }
4330 #endif
4331
4332 return n8;
4333 }
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362 static int
4363 add_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
4364 compile_data *cd, const pcre_uint32 *p, unsigned int except)
4365 {
4366 int n8 = 0;
4367 while (p[0] < NOTACHAR)
4368 {
4369 int n = 0;
4370 if (p[0] != except)
4371 {
4372 while(p[n+1] == p[0] + n + 1) n++;
4373 n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]);
4374 }
4375 p += n + 1;
4376 }
4377 return n8;
4378 }
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400 static int
4401 add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
4402 int options, compile_data *cd, const pcre_uint32 *p)
4403 {
4404 BOOL utf = (options & PCRE_UTF8) != 0;
4405 int n8 = 0;
4406 if (p[0] > 0)
4407 n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1);
4408 while (p[0] < NOTACHAR)
4409 {
4410 while (p[1] == p[0] + 1) p++;
4411 n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
4412 (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
4413 p++;
4414 }
4415 return n8;
4416 }
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449 static BOOL
4450 compile_branch(int *optionsptr, pcre_uchar **codeptr,
4451 const pcre_uchar **ptrptr, int *errorcodeptr,
4452 pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
4453 pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
4454 branch_chain *bcptr, int cond_depth,
4455 compile_data *cd, int *lengthptr)
4456 {
4457 int repeat_type, op_type;
4458 int repeat_min = 0, repeat_max = 0;
4459 int bravalue = 0;
4460 int greedy_default, greedy_non_default;
4461 pcre_uint32 firstchar, reqchar;
4462 pcre_int32 firstcharflags, reqcharflags;
4463 pcre_uint32 zeroreqchar, zerofirstchar;
4464 pcre_int32 zeroreqcharflags, zerofirstcharflags;
4465 pcre_int32 req_caseopt, reqvary, tempreqvary;
4466 int options = *optionsptr;
4467 int after_manual_callout = 0;
4468 int length_prevgroup = 0;
4469 register pcre_uint32 c;
4470 int escape;
4471 register pcre_uchar *code = *codeptr;
4472 pcre_uchar *last_code = code;
4473 pcre_uchar *orig_code = code;
4474 pcre_uchar *tempcode;
4475 BOOL inescq = FALSE;
4476 BOOL groupsetfirstchar = FALSE;
4477 const pcre_uchar *ptr = *ptrptr;
4478 const pcre_uchar *tempptr;
4479 const pcre_uchar *nestptr = NULL;
4480 pcre_uchar *previous = NULL;
4481 pcre_uchar *previous_callout = NULL;
4482 size_t item_hwm_offset = 0;
4483 pcre_uint8 classbits[32];
4484
4485
4486
4487
4488
4489 #ifdef SUPPORT_UTF
4490
4491 BOOL utf = (options & PCRE_UTF8) != 0;
4492 #ifndef COMPILE_PCRE32
4493 pcre_uchar utf_chars[6];
4494 #endif
4495 #else
4496 BOOL utf = FALSE;
4497 #endif
4498
4499
4500
4501
4502
4503
4504 pcre_uchar *class_uchardata;
4505 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4506 BOOL xclass;
4507 pcre_uchar *class_uchardata_base;
4508 #endif
4509
4510 #ifdef PCRE_DEBUG
4511 if (lengthptr != NULL) DPRINTF((">> start branch\n"));
4512 #endif
4513
4514
4515
4516 greedy_default = ((options & PCRE_UNGREEDY) != 0);
4517 greedy_non_default = greedy_default ^ 1;
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529 firstchar = reqchar = zerofirstchar = zeroreqchar = 0;
4530 firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET;
4531
4532
4533
4534
4535
4536
4537
4538 req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
4539
4540
4541
4542 for (;; ptr++)
4543 {
4544 BOOL negate_class;
4545 BOOL should_flip_negation;
4546 BOOL possessive_quantifier;
4547 BOOL is_quantifier;
4548 BOOL is_recurse;
4549 BOOL reset_bracount;
4550 int class_has_8bitchar;
4551 int class_one_char;
4552 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4553 BOOL xclass_has_prop;
4554 #endif
4555 int newoptions;
4556 int recno;
4557 int refsign;
4558 int skipbytes;
4559 pcre_uint32 subreqchar, subfirstchar;
4560 pcre_int32 subreqcharflags, subfirstcharflags;
4561 int terminator;
4562 unsigned int mclength;
4563 unsigned int tempbracount;
4564 pcre_uint32 ec;
4565 pcre_uchar mcbuffer[8];
4566
4567
4568
4569 c = *ptr;
4570
4571
4572
4573
4574 if (c == CHAR_NULL && nestptr != NULL)
4575 {
4576 ptr = nestptr;
4577 nestptr = NULL;
4578 c = *ptr;
4579 }
4580
4581
4582
4583
4584 if (lengthptr != NULL)
4585 {
4586 #ifdef PCRE_DEBUG
4587 if (code > cd->hwm) cd->hwm = code;
4588 #endif
4589 if (code > cd->start_workspace + cd->workspace_size -
4590 WORK_SIZE_SAFETY_MARGIN)
4591 {
4592 *errorcodeptr = ERR52;
4593 goto FAILED;
4594 }
4595
4596
4597
4598
4599
4600
4601
4602 if (code < last_code) code = last_code;
4603
4604
4605
4606 if (OFLOW_MAX - *lengthptr < code - last_code)
4607 {
4608 *errorcodeptr = ERR20;
4609 goto FAILED;
4610 }
4611
4612 *lengthptr += (int)(code - last_code);
4613 DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
4614 (int)(code - last_code), c, c));
4615
4616
4617
4618
4619
4620 if (previous != NULL)
4621 {
4622 if (previous > orig_code)
4623 {
4624 memmove(orig_code, previous, IN_UCHARS(code - previous));
4625 code -= previous - orig_code;
4626 previous = orig_code;
4627 }
4628 }
4629 else code = orig_code;
4630
4631
4632
4633
4634 last_code = code;
4635 }
4636
4637
4638
4639
4640 else if (cd->hwm > cd->start_workspace + cd->workspace_size)
4641 {
4642 *errorcodeptr = ERR52;
4643 goto FAILED;
4644 }
4645
4646
4647
4648 if (inescq && c != CHAR_NULL)
4649 {
4650 if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
4651 {
4652 inescq = FALSE;
4653 ptr++;
4654 continue;
4655 }
4656 else
4657 {
4658 if (previous_callout != NULL)
4659 {
4660 if (lengthptr == NULL)
4661 complete_callout(previous_callout, ptr, cd);
4662 previous_callout = NULL;
4663 }
4664 if ((options & PCRE_AUTO_CALLOUT) != 0)
4665 {
4666 previous_callout = code;
4667 code = auto_callout(code, ptr, cd);
4668 }
4669 goto NORMAL_CHAR;
4670 }
4671
4672 }
4673
4674
4675
4676
4677 if ((options & PCRE_EXTENDED) != 0)
4678 {
4679 for (;;)
4680 {
4681 while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
4682 if (c != CHAR_NUMBER_SIGN) break;
4683 ptr++;
4684 while (*ptr != CHAR_NULL)
4685 {
4686 if (IS_NEWLINE(ptr))
4687 {
4688 ptr += cd->nllen;
4689 break;
4690 }
4691 ptr++;
4692 #ifdef SUPPORT_UTF
4693 if (utf) FORWARDCHAR(ptr);
4694 #endif
4695 }
4696 c = *ptr;
4697 }
4698 }
4699
4700
4701
4702 is_quantifier =
4703 c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
4704 (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
4705
4706
4707
4708
4709 if (!is_quantifier && previous_callout != NULL && nestptr == NULL &&
4710 after_manual_callout-- <= 0)
4711 {
4712 if (lengthptr == NULL)
4713 complete_callout(previous_callout, ptr, cd);
4714 previous_callout = NULL;
4715 }
4716
4717
4718
4719
4720 if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
4721 {
4722 previous_callout = code;
4723 code = auto_callout(code, ptr, cd);
4724 }
4725
4726
4727
4728 switch(c)
4729 {
4730
4731 case CHAR_NULL:
4732 case CHAR_VERTICAL_LINE:
4733 case CHAR_RIGHT_PARENTHESIS:
4734 *firstcharptr = firstchar;
4735 *firstcharflagsptr = firstcharflags;
4736 *reqcharptr = reqchar;
4737 *reqcharflagsptr = reqcharflags;
4738 *codeptr = code;
4739 *ptrptr = ptr;
4740 if (lengthptr != NULL)
4741 {
4742 if (OFLOW_MAX - *lengthptr < code - last_code)
4743 {
4744 *errorcodeptr = ERR20;
4745 goto FAILED;
4746 }
4747 *lengthptr += (int)(code - last_code);
4748 DPRINTF((">> end branch\n"));
4749 }
4750 return TRUE;
4751
4752
4753
4754
4755
4756
4757 case CHAR_CIRCUMFLEX_ACCENT:
4758 previous = NULL;
4759 if ((options & PCRE_MULTILINE) != 0)
4760 {
4761 if (firstcharflags == REQ_UNSET)
4762 zerofirstcharflags = firstcharflags = REQ_NONE;
4763 *code++ = OP_CIRCM;
4764 }
4765 else *code++ = OP_CIRC;
4766 break;
4767
4768 case CHAR_DOLLAR_SIGN:
4769 previous = NULL;
4770 *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL;
4771 break;
4772
4773
4774
4775
4776 case CHAR_DOT:
4777 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4778 zerofirstchar = firstchar;
4779 zerofirstcharflags = firstcharflags;
4780 zeroreqchar = reqchar;
4781 zeroreqcharflags = reqcharflags;
4782 previous = code;
4783 item_hwm_offset = cd->hwm - cd->start_workspace;
4784 *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4785 break;
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803 case CHAR_RIGHT_SQUARE_BRACKET:
4804 if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
4805 {
4806 *errorcodeptr = ERR64;
4807 goto FAILED;
4808 }
4809 goto NORMAL_CHAR;
4810
4811
4812
4813
4814
4815
4816
4817 case CHAR_LEFT_SQUARE_BRACKET:
4818 if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
4819 {
4820 nestptr = ptr + 7;
4821 ptr = sub_start_of_word - 1;
4822 continue;
4823 }
4824
4825 if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
4826 {
4827 nestptr = ptr + 7;
4828 ptr = sub_end_of_word - 1;
4829 continue;
4830 }
4831
4832
4833
4834 previous = code;
4835 item_hwm_offset = cd->hwm - cd->start_workspace;
4836
4837
4838
4839
4840 if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
4841 ptr[1] == CHAR_EQUALS_SIGN) &&
4842 check_posix_syntax(ptr, &tempptr))
4843 {
4844 *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
4845 goto FAILED;
4846 }
4847
4848
4849
4850
4851
4852 negate_class = FALSE;
4853 for (;;)
4854 {
4855 c = *(++ptr);
4856 if (c == CHAR_BACKSLASH)
4857 {
4858 if (ptr[1] == CHAR_E)
4859 ptr++;
4860 else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
4861 ptr += 3;
4862 else
4863 break;
4864 }
4865 else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
4866 negate_class = TRUE;
4867 else break;
4868 }
4869
4870
4871
4872
4873
4874
4875 if (c == CHAR_RIGHT_SQUARE_BRACKET &&
4876 (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
4877 {
4878 *code++ = negate_class? OP_ALLANY : OP_FAIL;
4879 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
4880 zerofirstchar = firstchar;
4881 zerofirstcharflags = firstcharflags;
4882 break;
4883 }
4884
4885
4886
4887
4888
4889 should_flip_negation = FALSE;
4890
4891
4892
4893
4894 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4895 xclass = FALSE;
4896 class_uchardata = code + LINK_SIZE + 2;
4897 class_uchardata_base = class_uchardata;
4898 #endif
4899
4900
4901
4902
4903
4904
4905
4906 class_has_8bitchar = 0;
4907 class_one_char = 0;
4908 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4909 xclass_has_prop = FALSE;
4910 #endif
4911
4912
4913
4914
4915
4916
4917 memset(classbits, 0, 32 * sizeof(pcre_uint8));
4918
4919
4920
4921
4922
4923 if (c != CHAR_NULL) do
4924 {
4925 const pcre_uchar *oldptr;
4926
4927 #ifdef SUPPORT_UTF
4928 if (utf && HAS_EXTRALEN(c))
4929 {
4930 GETCHARLEN(c, ptr, ptr);
4931 }
4932 #endif
4933
4934 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4935
4936
4937
4938
4939
4940
4941 if (class_uchardata > class_uchardata_base) xclass = TRUE;
4942
4943 if (lengthptr != NULL && class_uchardata > class_uchardata_base)
4944 {
4945 *lengthptr += (int)(class_uchardata - class_uchardata_base);
4946 class_uchardata = class_uchardata_base;
4947 }
4948 #endif
4949
4950
4951
4952 if (inescq)
4953 {
4954 if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
4955 {
4956 inescq = FALSE;
4957 ptr++;
4958 continue;
4959 }
4960 goto CHECK_RANGE;
4961 }
4962
4963
4964
4965
4966
4967
4968
4969 if (c == CHAR_LEFT_SQUARE_BRACKET &&
4970 (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
4971 ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
4972 {
4973 BOOL local_negate = FALSE;
4974 int posix_class, taboffset, tabopt;
4975 register const pcre_uint8 *cbits = cd->cbits;
4976 pcre_uint8 pbits[32];
4977
4978 if (ptr[1] != CHAR_COLON)
4979 {
4980 *errorcodeptr = ERR31;
4981 goto FAILED;
4982 }
4983
4984 ptr += 2;
4985 if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
4986 {
4987 local_negate = TRUE;
4988 should_flip_negation = TRUE;
4989 ptr++;
4990 }
4991
4992 posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
4993 if (posix_class < 0)
4994 {
4995 *errorcodeptr = ERR30;
4996 goto FAILED;
4997 }
4998
4999
5000
5001
5002
5003 if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
5004 posix_class = 0;
5005
5006
5007
5008
5009
5010
5011 #ifdef SUPPORT_UCP
5012 if ((options & PCRE_UCP) != 0)
5013 {
5014 unsigned int ptype = 0;
5015 int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
5016
5017
5018
5019
5020 if (posix_substitutes[pc] != NULL)
5021 {
5022 nestptr = tempptr + 1;
5023 ptr = posix_substitutes[pc] - 1;
5024 continue;
5025 }
5026
5027
5028
5029
5030 else switch(posix_class)
5031 {
5032 case PC_GRAPH:
5033 ptype = PT_PXGRAPH;
5034
5035 case PC_PRINT:
5036 if (ptype == 0) ptype = PT_PXPRINT;
5037
5038 case PC_PUNCT:
5039 if (ptype == 0) ptype = PT_PXPUNCT;
5040 *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
5041 *class_uchardata++ = ptype;
5042 *class_uchardata++ = 0;
5043 xclass_has_prop = TRUE;
5044 ptr = tempptr + 1;
5045 continue;
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059 default:
5060 if (!negate_class && local_negate &&
5061 (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
5062 {
5063 *class_uchardata++ = XCL_RANGE;
5064 class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
5065 class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
5066 }
5067 break;
5068 }
5069 }
5070 #endif
5071
5072
5073
5074
5075
5076
5077 posix_class *= 3;
5078
5079
5080
5081 memcpy(pbits, cbits + posix_class_maps[posix_class],
5082 32 * sizeof(pcre_uint8));
5083
5084
5085
5086 taboffset = posix_class_maps[posix_class + 1];
5087 tabopt = posix_class_maps[posix_class + 2];
5088
5089 if (taboffset >= 0)
5090 {
5091 if (tabopt >= 0)
5092 for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
5093 else
5094 for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
5095 }
5096
5097
5098
5099
5100 if (tabopt < 0) tabopt = -tabopt;
5101 if (tabopt == 1) pbits[1] &= ~0x3c;
5102 else if (tabopt == 2) pbits[11] &= 0x7f;
5103
5104
5105
5106
5107 if (local_negate)
5108 for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
5109 else
5110 for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
5111
5112 ptr = tempptr + 1;
5113
5114 class_has_8bitchar = 1;
5115
5116 class_one_char = 2;
5117 continue;
5118 }
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129 if (c == CHAR_BACKSLASH)
5130 {
5131 escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
5132 TRUE);
5133 if (*errorcodeptr != 0) goto FAILED;
5134 if (escape == 0) c = ec;
5135 else if (escape == ESC_b) c = CHAR_BS;
5136 else if (escape == ESC_N)
5137 {
5138 *errorcodeptr = ERR71;
5139 goto FAILED;
5140 }
5141 else if (escape == ESC_Q)
5142 {
5143 if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5144 {
5145 ptr += 2;
5146 }
5147 else inescq = TRUE;
5148 continue;
5149 }
5150 else if (escape == ESC_E) continue;
5151
5152 else
5153 {
5154 register const pcre_uint8 *cbits = cd->cbits;
5155
5156 class_has_8bitchar++;
5157
5158 class_one_char += 2;
5159
5160 switch (escape)
5161 {
5162 #ifdef SUPPORT_UCP
5163 case ESC_du:
5164 case ESC_DU:
5165 case ESC_wu:
5166 case ESC_WU:
5167 case ESC_su:
5168 case ESC_SU:
5169 nestptr = ptr;
5170 ptr = substitutes[escape - ESC_DU] - 1;
5171 class_has_8bitchar--;
5172 continue;
5173 #endif
5174 case ESC_d:
5175 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
5176 continue;
5177
5178 case ESC_D:
5179 should_flip_negation = TRUE;
5180 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
5181 continue;
5182
5183 case ESC_w:
5184 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
5185 continue;
5186
5187 case ESC_W:
5188 should_flip_negation = TRUE;
5189 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
5190 continue;
5191
5192
5193
5194
5195
5196
5197
5198
5199 case ESC_s:
5200 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
5201 continue;
5202
5203 case ESC_S:
5204 should_flip_negation = TRUE;
5205 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
5206 continue;
5207
5208
5209
5210 case ESC_h:
5211 (void)add_list_to_class(classbits, &class_uchardata, options, cd,
5212 PRIV(hspace_list), NOTACHAR);
5213 continue;
5214
5215 case ESC_H:
5216 (void)add_not_list_to_class(classbits, &class_uchardata, options,
5217 cd, PRIV(hspace_list));
5218 continue;
5219
5220 case ESC_v:
5221 (void)add_list_to_class(classbits, &class_uchardata, options, cd,
5222 PRIV(vspace_list), NOTACHAR);
5223 continue;
5224
5225 case ESC_V:
5226 (void)add_not_list_to_class(classbits, &class_uchardata, options,
5227 cd, PRIV(vspace_list));
5228 continue;
5229
5230 case ESC_p:
5231 case ESC_P:
5232 #ifdef SUPPORT_UCP
5233 {
5234 BOOL negated;
5235 unsigned int ptype = 0, pdata = 0;
5236 if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
5237 goto FAILED;
5238 *class_uchardata++ = ((escape == ESC_p) != negated)?
5239 XCL_PROP : XCL_NOTPROP;
5240 *class_uchardata++ = ptype;
5241 *class_uchardata++ = pdata;
5242 xclass_has_prop = TRUE;
5243 class_has_8bitchar--;
5244 continue;
5245 }
5246 #else
5247 *errorcodeptr = ERR45;
5248 goto FAILED;
5249 #endif
5250
5251
5252
5253
5254 default:
5255 if ((options & PCRE_EXTRA) != 0)
5256 {
5257 *errorcodeptr = ERR7;
5258 goto FAILED;
5259 }
5260 class_has_8bitchar--;
5261 class_one_char -= 2;
5262 c = *ptr;
5263 break;
5264 }
5265 }
5266
5267
5268
5269
5270 escape = 0;
5271
5272 }
5273
5274
5275
5276
5277
5278
5279 CHECK_RANGE:
5280 while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5281 {
5282 inescq = FALSE;
5283 ptr += 2;
5284 }
5285 oldptr = ptr;
5286
5287
5288
5289 if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
5290
5291
5292
5293 if (!inescq && ptr[1] == CHAR_MINUS)
5294 {
5295 pcre_uint32 d;
5296 ptr += 2;
5297 while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
5298
5299
5300
5301
5302 while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
5303 {
5304 ptr += 2;
5305 if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
5306 { ptr += 2; continue; }
5307 inescq = TRUE;
5308 break;
5309 }
5310
5311
5312
5313
5314 if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
5315 {
5316 ptr = oldptr;
5317 goto CLASS_SINGLE_CHARACTER;
5318 }
5319
5320
5321
5322 #ifdef SUPPORT_UTF
5323 if (utf)
5324 {
5325 GETCHARLEN(d, ptr, ptr);
5326 }
5327 else
5328 #endif
5329 d = *ptr;
5330
5331
5332
5333
5334
5335
5336
5337 if (!inescq)
5338 {
5339 if (d == CHAR_BACKSLASH)
5340 {
5341 int descape;
5342 descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
5343 if (*errorcodeptr != 0) goto FAILED;
5344
5345
5346
5347
5348 if (descape != 0)
5349 {
5350 if (descape == ESC_b) d = CHAR_BS; else
5351 {
5352 *errorcodeptr = ERR83;
5353 goto FAILED;
5354 }
5355 }
5356 }
5357
5358
5359
5360 else if (d == CHAR_LEFT_SQUARE_BRACKET &&
5361 (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
5362 ptr[1] == CHAR_EQUALS_SIGN) &&
5363 check_posix_syntax(ptr, &tempptr))
5364 {
5365 *errorcodeptr = ERR83;
5366 goto FAILED;
5367 }
5368 }
5369
5370
5371
5372
5373 if (d < c)
5374 {
5375 *errorcodeptr = ERR8;
5376 goto FAILED;
5377 }
5378 if (d == c) goto CLASS_SINGLE_CHARACTER;
5379
5380
5381
5382
5383
5384 class_one_char = 2;
5385
5386
5387
5388 if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
5389
5390 class_has_8bitchar +=
5391 add_to_class(classbits, &class_uchardata, options, cd, c, d);
5392
5393 continue;
5394 }
5395
5396
5397
5398
5399
5400
5401
5402 CLASS_SINGLE_CHARACTER:
5403 if (class_one_char < 2) class_one_char++;
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414 if (!inescq &&
5415 #ifdef SUPPORT_UCP
5416 !xclass_has_prop &&
5417 #endif
5418 class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
5419 {
5420 ptr++;
5421 zeroreqchar = reqchar;
5422 zeroreqcharflags = reqcharflags;
5423
5424 if (negate_class)
5425 {
5426 #ifdef SUPPORT_UCP
5427 int d;
5428 #endif
5429 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
5430 zerofirstchar = firstchar;
5431 zerofirstcharflags = firstcharflags;
5432
5433
5434
5435
5436
5437 #ifdef SUPPORT_UCP
5438 if (utf && (options & PCRE_CASELESS) != 0 &&
5439 (d = UCD_CASESET(c)) != 0)
5440 {
5441 *code++ = OP_NOTPROP;
5442 *code++ = PT_CLIST;
5443 *code++ = d;
5444 }
5445 else
5446 #endif
5447
5448
5449 {
5450 *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
5451 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5452 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5453 code += PRIV(ord2utf)(c, code);
5454 else
5455 #endif
5456 *code++ = c;
5457 }
5458
5459
5460
5461 goto END_CLASS;
5462 }
5463
5464
5465
5466
5467 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5468 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5469 mclength = PRIV(ord2utf)(c, mcbuffer);
5470 else
5471 #endif
5472 {
5473 mcbuffer[0] = c;
5474 mclength = 1;
5475 }
5476 goto ONE_CHAR;
5477 }
5478
5479
5480
5481
5482 class_has_8bitchar +=
5483 add_to_class(classbits, &class_uchardata, options, cd, c, c);
5484 }
5485
5486
5487
5488
5489
5490 while (((c = *(++ptr)) != CHAR_NULL ||
5491 (nestptr != NULL &&
5492 (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) &&
5493 (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
5494
5495
5496
5497 if (c == CHAR_NULL)
5498 {
5499 *errorcodeptr = ERR6;
5500 goto FAILED;
5501 }
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5512 if (class_uchardata > class_uchardata_base) xclass = TRUE;
5513 #endif
5514
5515
5516
5517
5518
5519 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
5520 zerofirstchar = firstchar;
5521 zerofirstcharflags = firstcharflags;
5522 zeroreqchar = reqchar;
5523 zeroreqcharflags = reqcharflags;
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533 #ifdef SUPPORT_UTF
5534 if (xclass && (xclass_has_prop || !should_flip_negation ||
5535 (options & PCRE_UCP) != 0))
5536 #elif !defined COMPILE_PCRE8
5537 if (xclass && (xclass_has_prop || !should_flip_negation))
5538 #endif
5539 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5540 {
5541 *class_uchardata++ = XCL_END;
5542 *code++ = OP_XCLASS;
5543 code += LINK_SIZE;
5544 *code = negate_class? XCL_NOT:0;
5545 if (xclass_has_prop) *code |= XCL_HASPROP;
5546
5547
5548
5549
5550 if (class_has_8bitchar > 0)
5551 {
5552 *code++ |= XCL_MAP;
5553 memmove(code + (32 / sizeof(pcre_uchar)), code,
5554 IN_UCHARS(class_uchardata - code));
5555 if (negate_class && !xclass_has_prop)
5556 for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
5557 memcpy(code, classbits, 32);
5558 code = class_uchardata + (32 / sizeof(pcre_uchar));
5559 }
5560 else code = class_uchardata;
5561
5562
5563
5564 PUT(previous, 1, (int)(code - previous));
5565 break;
5566 }
5567
5568
5569
5570
5571 if (lengthptr != NULL)
5572 *lengthptr += (int)(class_uchardata - class_uchardata_base);
5573 #endif
5574
5575
5576
5577
5578
5579
5580
5581 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
5582 if (lengthptr == NULL)
5583 {
5584 if (negate_class)
5585 for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
5586 memcpy(code, classbits, 32);
5587 }
5588 code += 32 / sizeof(pcre_uchar);
5589
5590 END_CLASS:
5591 break;
5592
5593
5594
5595
5596
5597
5598 case CHAR_LEFT_CURLY_BRACKET:
5599 if (!is_quantifier) goto NORMAL_CHAR;
5600 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
5601 if (*errorcodeptr != 0) goto FAILED;
5602 goto REPEAT;
5603
5604 case CHAR_ASTERISK:
5605 repeat_min = 0;
5606 repeat_max = -1;
5607 goto REPEAT;
5608
5609 case CHAR_PLUS:
5610 repeat_min = 1;
5611 repeat_max = -1;
5612 goto REPEAT;
5613
5614 case CHAR_QUESTION_MARK:
5615 repeat_min = 0;
5616 repeat_max = 1;
5617
5618 REPEAT:
5619 if (previous == NULL)
5620 {
5621 *errorcodeptr = ERR9;
5622 goto FAILED;
5623 }
5624
5625 if (repeat_min == 0)
5626 {
5627 firstchar = zerofirstchar;
5628 firstcharflags = zerofirstcharflags;
5629 reqchar = zeroreqchar;
5630 reqcharflags = zeroreqcharflags;
5631 }
5632
5633
5634
5635 reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
5636
5637 op_type = 0;
5638 possessive_quantifier = FALSE;
5639
5640
5641
5642
5643 tempcode = previous;
5644
5645
5646
5647
5648
5649 if ((options & PCRE_EXTENDED) != 0)
5650 {
5651 const pcre_uchar *p = ptr + 1;
5652 for (;;)
5653 {
5654 while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++;
5655 if (*p != CHAR_NUMBER_SIGN) break;
5656 p++;
5657 while (*p != CHAR_NULL)
5658 {
5659 if (IS_NEWLINE(p))
5660 {
5661 p += cd->nllen;
5662 break;
5663 }
5664 p++;
5665 #ifdef SUPPORT_UTF
5666 if (utf) FORWARDCHAR(p);
5667 #endif
5668 }
5669 }
5670 ptr = p - 1;
5671 }
5672
5673
5674
5675
5676
5677
5678
5679 if (ptr[1] == CHAR_PLUS)
5680 {
5681 repeat_type = 0;
5682 possessive_quantifier = TRUE;
5683 ptr++;
5684 }
5685 else if (ptr[1] == CHAR_QUESTION_MARK)
5686 {
5687 repeat_type = greedy_non_default;
5688 ptr++;
5689 }
5690 else repeat_type = greedy_default;
5691
5692
5693
5694
5695
5696
5697
5698 if (*previous == OP_RECURSE)
5699 {
5700 memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
5701 *previous = OP_ONCE;
5702 PUT(previous, 1, 2 + 2*LINK_SIZE);
5703 previous[2 + 2*LINK_SIZE] = OP_KET;
5704 PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
5705 code += 2 + 2 * LINK_SIZE;
5706 length_prevgroup = 3 + 3*LINK_SIZE;
5707
5708
5709
5710
5711 if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
5712 {
5713 int offset = GET(cd->hwm, -LINK_SIZE);
5714 if (offset == previous + 1 - cd->start_code)
5715 PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
5716 }
5717 }
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727 if (*previous == OP_CHAR || *previous == OP_CHARI
5728 || *previous == OP_NOT || *previous == OP_NOTI)
5729 {
5730 switch (*previous)
5731 {
5732 default:
5733 case OP_CHAR: op_type = OP_STAR - OP_STAR; break;
5734 case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
5735 case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;
5736 case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;
5737 }
5738
5739
5740
5741
5742
5743
5744 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5745 if (utf && NOT_FIRSTCHAR(code[-1]))
5746 {
5747 pcre_uchar *lastchar = code - 1;
5748 BACKCHAR(lastchar);
5749 c = (int)(code - lastchar);
5750 memcpy(utf_chars, lastchar, IN_UCHARS(c));
5751 c |= UTF_LENGTH;
5752 }
5753 else
5754 #endif
5755
5756
5757
5758 {
5759 c = code[-1];
5760 if (*previous <= OP_CHARI && repeat_min > 1)
5761 {
5762 reqchar = c;
5763 reqcharflags = req_caseopt | cd->req_varyopt;
5764 }
5765 }
5766
5767 goto OUTPUT_SINGLE_REPEAT;
5768 }
5769
5770
5771
5772
5773
5774
5775
5776
5777 else if (*previous < OP_EODN)
5778 {
5779 pcre_uchar *oldcode;
5780 int prop_type, prop_value;
5781 op_type = OP_TYPESTAR - OP_STAR;
5782 c = *previous;
5783
5784 OUTPUT_SINGLE_REPEAT:
5785 if (*previous == OP_PROP || *previous == OP_NOTPROP)
5786 {
5787 prop_type = previous[1];
5788 prop_value = previous[2];
5789 }
5790 else prop_type = prop_value = -1;
5791
5792 oldcode = code;
5793 code = previous;
5794
5795
5796
5797
5798 if (repeat_max == 0) goto END_REPEAT;
5799
5800
5801
5802 repeat_type += op_type;
5803
5804
5805
5806
5807 if (repeat_min == 0)
5808 {
5809 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
5810 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
5811 else
5812 {
5813 *code++ = OP_UPTO + repeat_type;
5814 PUT2INC(code, 0, repeat_max);
5815 }
5816 }
5817
5818
5819
5820
5821
5822
5823 else if (repeat_min == 1)
5824 {
5825 if (repeat_max == -1)
5826 *code++ = OP_PLUS + repeat_type;
5827 else
5828 {
5829 code = oldcode;
5830 if (repeat_max == 1) goto END_REPEAT;
5831 *code++ = OP_UPTO + repeat_type;
5832 PUT2INC(code, 0, repeat_max - 1);
5833 }
5834 }
5835
5836
5837
5838
5839 else
5840 {
5841 *code++ = OP_EXACT + op_type;
5842 PUT2INC(code, 0, repeat_min);
5843
5844
5845
5846
5847
5848
5849
5850 if (repeat_max < 0)
5851 {
5852 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5853 if (utf && (c & UTF_LENGTH) != 0)
5854 {
5855 memcpy(code, utf_chars, IN_UCHARS(c & 7));
5856 code += c & 7;
5857 }
5858 else
5859 #endif
5860 {
5861 *code++ = c;
5862 if (prop_type >= 0)
5863 {
5864 *code++ = prop_type;
5865 *code++ = prop_value;
5866 }
5867 }
5868 *code++ = OP_STAR + repeat_type;
5869 }
5870
5871
5872
5873
5874
5875 else if (repeat_max != repeat_min)
5876 {
5877 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5878 if (utf && (c & UTF_LENGTH) != 0)
5879 {
5880 memcpy(code, utf_chars, IN_UCHARS(c & 7));
5881 code += c & 7;
5882 }
5883 else
5884 #endif
5885 *code++ = c;
5886 if (prop_type >= 0)
5887 {
5888 *code++ = prop_type;
5889 *code++ = prop_value;
5890 }
5891 repeat_max -= repeat_min;
5892
5893 if (repeat_max == 1)
5894 {
5895 *code++ = OP_QUERY + repeat_type;
5896 }
5897 else
5898 {
5899 *code++ = OP_UPTO + repeat_type;
5900 PUT2INC(code, 0, repeat_max);
5901 }
5902 }
5903 }
5904
5905
5906
5907 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
5908 if (utf && (c & UTF_LENGTH) != 0)
5909 {
5910 memcpy(code, utf_chars, IN_UCHARS(c & 7));
5911 code += c & 7;
5912 }
5913 else
5914 #endif
5915 *code++ = c;
5916
5917
5918
5919
5920 #ifdef SUPPORT_UCP
5921 if (prop_type >= 0)
5922 {
5923 *code++ = prop_type;
5924 *code++ = prop_value;
5925 }
5926 #endif
5927 }
5928
5929
5930
5931
5932 else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
5933 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5934 *previous == OP_XCLASS ||
5935 #endif
5936 *previous == OP_REF || *previous == OP_REFI ||
5937 *previous == OP_DNREF || *previous == OP_DNREFI)
5938 {
5939 if (repeat_max == 0)
5940 {
5941 code = previous;
5942 goto END_REPEAT;
5943 }
5944
5945 if (repeat_min == 0 && repeat_max == -1)
5946 *code++ = OP_CRSTAR + repeat_type;
5947 else if (repeat_min == 1 && repeat_max == -1)
5948 *code++ = OP_CRPLUS + repeat_type;
5949 else if (repeat_min == 0 && repeat_max == 1)
5950 *code++ = OP_CRQUERY + repeat_type;
5951 else
5952 {
5953 *code++ = OP_CRRANGE + repeat_type;
5954 PUT2INC(code, 0, repeat_min);
5955 if (repeat_max == -1) repeat_max = 0;
5956 PUT2INC(code, 0, repeat_max);
5957 }
5958 }
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969 else if (*previous >= OP_ASSERT && *previous <= OP_COND)
5970 {
5971 register int i;
5972 int len = (int)(code - previous);
5973 size_t base_hwm_offset = item_hwm_offset;
5974 pcre_uchar *bralink = NULL;
5975 pcre_uchar *brazeroptr = NULL;
5976
5977
5978
5979
5980 if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
5981 goto END_REPEAT;
5982
5983
5984
5985
5986
5987
5988 if (*previous < OP_ONCE)
5989 {
5990 if (repeat_min > 0) goto END_REPEAT;
5991 if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
5992 }
5993
5994
5995
5996
5997
5998
5999
6000
6001 if (repeat_min == 0)
6002 {
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025 if (repeat_max <= 1)
6026 {
6027 *code = OP_END;
6028 adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
6029 memmove(previous + 1, previous, IN_UCHARS(len));
6030 code++;
6031 if (repeat_max == 0)
6032 {
6033 *previous++ = OP_SKIPZERO;
6034 goto END_REPEAT;
6035 }
6036 brazeroptr = previous;
6037 *previous++ = OP_BRAZERO + repeat_type;
6038 }
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048 else
6049 {
6050 int offset;
6051 *code = OP_END;
6052 adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
6053 memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
6054 code += 2 + LINK_SIZE;
6055 *previous++ = OP_BRAZERO + repeat_type;
6056 *previous++ = OP_BRA;
6057
6058
6059
6060
6061 offset = (bralink == NULL)? 0 : (int)(previous - bralink);
6062 bralink = previous;
6063 PUTINC(previous, 0, offset);
6064 }
6065
6066 repeat_max--;
6067 }
6068
6069
6070
6071
6072
6073
6074
6075
6076 else
6077 {
6078 if (repeat_min > 1)
6079 {
6080
6081
6082
6083
6084
6085 if (lengthptr != NULL)
6086 {
6087 int delta = (repeat_min - 1)*length_prevgroup;
6088 if ((INT64_OR_DOUBLE)(repeat_min - 1)*
6089 (INT64_OR_DOUBLE)length_prevgroup >
6090 (INT64_OR_DOUBLE)INT_MAX ||
6091 OFLOW_MAX - *lengthptr < delta)
6092 {
6093 *errorcodeptr = ERR20;
6094 goto FAILED;
6095 }
6096 *lengthptr += delta;
6097 }
6098
6099
6100
6101
6102
6103
6104 else
6105 {
6106 if (groupsetfirstchar && reqcharflags < 0)
6107 {
6108 reqchar = firstchar;
6109 reqcharflags = firstcharflags;
6110 }
6111
6112 for (i = 1; i < repeat_min; i++)
6113 {
6114 pcre_uchar *hc;
6115 size_t this_hwm_offset = cd->hwm - cd->start_workspace;
6116 memcpy(code, previous, IN_UCHARS(len));
6117
6118 while (cd->hwm > cd->start_workspace + cd->workspace_size -
6119 WORK_SIZE_SAFETY_MARGIN -
6120 (this_hwm_offset - base_hwm_offset))
6121 {
6122 *errorcodeptr = expand_workspace(cd);
6123 if (*errorcodeptr != 0) goto FAILED;
6124 }
6125
6126 for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6127 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6128 hc += LINK_SIZE)
6129 {
6130 PUT(cd->hwm, 0, GET(hc, 0) + len);
6131 cd->hwm += LINK_SIZE;
6132 }
6133 base_hwm_offset = this_hwm_offset;
6134 code += len;
6135 }
6136 }
6137 }
6138
6139 if (repeat_max > 0) repeat_max -= repeat_min;
6140 }
6141
6142
6143
6144
6145
6146
6147
6148
6149 if (repeat_max >= 0)
6150 {
6151
6152
6153
6154
6155
6156
6157
6158 if (lengthptr != NULL && repeat_max > 0)
6159 {
6160 int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
6161 2 - 2*LINK_SIZE;
6162 if ((INT64_OR_DOUBLE)repeat_max *
6163 (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
6164 > (INT64_OR_DOUBLE)INT_MAX ||
6165 OFLOW_MAX - *lengthptr < delta)
6166 {
6167 *errorcodeptr = ERR20;
6168 goto FAILED;
6169 }
6170 *lengthptr += delta;
6171 }
6172
6173
6174
6175 else for (i = repeat_max - 1; i >= 0; i--)
6176 {
6177 pcre_uchar *hc;
6178 size_t this_hwm_offset = cd->hwm - cd->start_workspace;
6179
6180 *code++ = OP_BRAZERO + repeat_type;
6181
6182
6183
6184
6185 if (i != 0)
6186 {
6187 int offset;
6188 *code++ = OP_BRA;
6189 offset = (bralink == NULL)? 0 : (int)(code - bralink);
6190 bralink = code;
6191 PUTINC(code, 0, offset);
6192 }
6193
6194 memcpy(code, previous, IN_UCHARS(len));
6195
6196
6197
6198
6199 while (cd->hwm > cd->start_workspace + cd->workspace_size -
6200 WORK_SIZE_SAFETY_MARGIN -
6201 (this_hwm_offset - base_hwm_offset))
6202 {
6203 *errorcodeptr = expand_workspace(cd);
6204 if (*errorcodeptr != 0) goto FAILED;
6205 }
6206
6207 for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6208 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6209 hc += LINK_SIZE)
6210 {
6211 PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
6212 cd->hwm += LINK_SIZE;
6213 }
6214 base_hwm_offset = this_hwm_offset;
6215 code += len;
6216 }
6217
6218
6219
6220
6221 while (bralink != NULL)
6222 {
6223 int oldlinkoffset;
6224 int offset = (int)(code - bralink + 1);
6225 pcre_uchar *bra = code - offset;
6226 oldlinkoffset = GET(bra, 1);
6227 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
6228 *code++ = OP_KET;
6229 PUTINC(code, 0, offset);
6230 PUT(bra, 1, offset);
6231 }
6232 }
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259 else
6260 {
6261 pcre_uchar *ketcode = code - 1 - LINK_SIZE;
6262 pcre_uchar *bracode = ketcode - GET(ketcode, 1);
6263
6264
6265
6266 if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
6267 possessive_quantifier) *bracode = OP_BRA;
6268
6269
6270
6271
6272 if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
6273 *ketcode = OP_KETRMAX + repeat_type;
6274
6275
6276
6277
6278 else
6279 {
6280
6281
6282 if (lengthptr == NULL)
6283 {
6284 pcre_uchar *scode = bracode;
6285 do
6286 {
6287 if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
6288 {
6289 *bracode += OP_SBRA - OP_BRA;
6290 break;
6291 }
6292 scode += GET(scode, 1);
6293 }
6294 while (*scode == OP_ALT);
6295 }
6296
6297
6298
6299
6300 if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
6301 *bracode = OP_SCOND;
6302
6303
6304
6305 if (possessive_quantifier)
6306 {
6307
6308
6309
6310
6311
6312 if (*bracode == OP_COND || *bracode == OP_SCOND)
6313 {
6314 int nlen = (int)(code - bracode);
6315 *code = OP_END;
6316 adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6317 memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
6318 code += 1 + LINK_SIZE;
6319 nlen += 1 + LINK_SIZE;
6320 *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
6321 *code++ = OP_KETRPOS;
6322 PUTINC(code, 0, nlen);
6323 PUT(bracode, 1, nlen);
6324 }
6325
6326
6327
6328 else
6329 {
6330 *bracode += 1;
6331 *ketcode = OP_KETRPOS;
6332 }
6333
6334
6335
6336
6337 if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
6338 if (repeat_min < 2) possessive_quantifier = FALSE;
6339 }
6340
6341
6342
6343 else *ketcode = OP_KETRMAX + repeat_type;
6344 }
6345 }
6346 }
6347
6348
6349
6350
6351
6352
6353 else if (*previous == OP_FAIL) goto END_REPEAT;
6354
6355
6356
6357 else
6358 {
6359 *errorcodeptr = ERR11;
6360 goto FAILED;
6361 }
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375 if (possessive_quantifier)
6376 {
6377 int len;
6378
6379
6380
6381
6382
6383
6384
6385 switch(*tempcode)
6386 {
6387 case OP_TYPEEXACT:
6388 tempcode += PRIV(OP_lengths)[*tempcode] +
6389 ((tempcode[1 + IMM2_SIZE] == OP_PROP
6390 || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
6391 break;
6392
6393
6394
6395 case OP_CHAR:
6396 case OP_CHARI:
6397 case OP_NOT:
6398 case OP_NOTI:
6399 case OP_EXACT:
6400 case OP_EXACTI:
6401 case OP_NOTEXACT:
6402 case OP_NOTEXACTI:
6403 tempcode += PRIV(OP_lengths)[*tempcode];
6404 #ifdef SUPPORT_UTF
6405 if (utf && HAS_EXTRALEN(tempcode[-1]))
6406 tempcode += GET_EXTRALEN(tempcode[-1]);
6407 #endif
6408 break;
6409
6410
6411
6412
6413 case OP_CLASS:
6414 case OP_NCLASS:
6415 tempcode += 1 + 32/sizeof(pcre_uchar);
6416 break;
6417
6418 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6419 case OP_XCLASS:
6420 tempcode += GET(tempcode, 1);
6421 break;
6422 #endif
6423 }
6424
6425
6426
6427
6428
6429
6430
6431 len = (int)(code - tempcode);
6432 if (len > 0)
6433 {
6434 unsigned int repcode = *tempcode;
6435
6436
6437
6438
6439
6440 if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
6441 *tempcode = opcode_possessify[repcode];
6442
6443
6444
6445
6446
6447 else
6448 {
6449 *code = OP_END;
6450 adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6451 memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6452 code += 1 + LINK_SIZE;
6453 len += 1 + LINK_SIZE;
6454 tempcode[0] = OP_ONCE;
6455 *code++ = OP_KET;
6456 PUTINC(code, 0, len);
6457 PUT(tempcode, 1, len);
6458 }
6459 }
6460
6461 #ifdef NEVER
6462 if (len > 0) switch (*tempcode)
6463 {
6464 case OP_STAR: *tempcode = OP_POSSTAR; break;
6465 case OP_PLUS: *tempcode = OP_POSPLUS; break;
6466 case OP_QUERY: *tempcode = OP_POSQUERY; break;
6467 case OP_UPTO: *tempcode = OP_POSUPTO; break;
6468
6469 case OP_STARI: *tempcode = OP_POSSTARI; break;
6470 case OP_PLUSI: *tempcode = OP_POSPLUSI; break;
6471 case OP_QUERYI: *tempcode = OP_POSQUERYI; break;
6472 case OP_UPTOI: *tempcode = OP_POSUPTOI; break;
6473
6474 case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break;
6475 case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break;
6476 case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
6477 case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break;
6478
6479 case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break;
6480 case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break;
6481 case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break;
6482 case OP_NOTUPTOI: *tempcode = OP_NOTPOSUPTOI; break;
6483
6484 case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break;
6485 case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break;
6486 case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break;
6487 case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break;
6488
6489 case OP_CRSTAR: *tempcode = OP_CRPOSSTAR; break;
6490 case OP_CRPLUS: *tempcode = OP_CRPOSPLUS; break;
6491 case OP_CRQUERY: *tempcode = OP_CRPOSQUERY; break;
6492 case OP_CRRANGE: *tempcode = OP_CRPOSRANGE; break;
6493
6494
6495
6496
6497 default:
6498 *code = OP_END;
6499 adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6500 memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6501 code += 1 + LINK_SIZE;
6502 len += 1 + LINK_SIZE;
6503 tempcode[0] = OP_ONCE;
6504 *code++ = OP_KET;
6505 PUTINC(code, 0, len);
6506 PUT(tempcode, 1, len);
6507 break;
6508 }
6509 #endif
6510 }
6511
6512
6513
6514
6515
6516 END_REPEAT:
6517 previous = NULL;
6518 cd->req_varyopt |= reqvary;
6519 break;
6520
6521
6522
6523
6524
6525
6526
6527 case CHAR_LEFT_PARENTHESIS:
6528 ptr++;
6529
6530
6531
6532
6533 if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
6534 {
6535 ptr += 2;
6536 while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
6537 if (*ptr == CHAR_NULL)
6538 {
6539 *errorcodeptr = ERR18;
6540 goto FAILED;
6541 }
6542 continue;
6543 }
6544
6545
6546
6547 if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
6548 || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
6549 {
6550 int i, namelen;
6551 int arglen = 0;
6552 const char *vn = verbnames;
6553 const pcre_uchar *name = ptr + 1;
6554 const pcre_uchar *arg = NULL;
6555 previous = NULL;
6556 ptr++;
6557 while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
6558 namelen = (int)(ptr - name);
6559
6560
6561
6562
6563
6564 if (*ptr == CHAR_COLON)
6565 {
6566 arg = ++ptr;
6567 while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
6568 arglen = (int)(ptr - arg);
6569 if ((unsigned int)arglen > MAX_MARK)
6570 {
6571 *errorcodeptr = ERR75;
6572 goto FAILED;
6573 }
6574 }
6575
6576 if (*ptr != CHAR_RIGHT_PARENTHESIS)
6577 {
6578 *errorcodeptr = ERR60;
6579 goto FAILED;
6580 }
6581
6582
6583
6584 for (i = 0; i < verbcount; i++)
6585 {
6586 if (namelen == verbs[i].len &&
6587 STRNCMP_UC_C8(name, vn, namelen) == 0)
6588 {
6589 int setverb;
6590
6591
6592
6593
6594 if (verbs[i].op == OP_ACCEPT)
6595 {
6596 open_capitem *oc;
6597 if (arglen != 0)
6598 {
6599 *errorcodeptr = ERR59;
6600 goto FAILED;
6601 }
6602 cd->had_accept = TRUE;
6603 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
6604 {
6605 *code++ = OP_CLOSE;
6606 PUT2INC(code, 0, oc->number);
6607 }
6608 setverb = *code++ =
6609 (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
6610
6611
6612 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
6613 }
6614
6615
6616
6617 else if (arglen == 0)
6618 {
6619 if (verbs[i].op < 0)
6620 {
6621 *errorcodeptr = ERR66;
6622 goto FAILED;
6623 }
6624 setverb = *code++ = verbs[i].op;
6625 }
6626
6627 else
6628 {
6629 if (verbs[i].op_arg < 0)
6630 {
6631 *errorcodeptr = ERR59;
6632 goto FAILED;
6633 }
6634 setverb = *code++ = verbs[i].op_arg;
6635 if (lengthptr != NULL)
6636 {
6637 *lengthptr += arglen;
6638 *code++ = 0;
6639 }
6640 else
6641 {
6642 *code++ = arglen;
6643 memcpy(code, arg, IN_UCHARS(arglen));
6644 code += arglen;
6645 }
6646 *code++ = 0;
6647 }
6648
6649 switch (setverb)
6650 {
6651 case OP_THEN:
6652 case OP_THEN_ARG:
6653 cd->external_flags |= PCRE_HASTHEN;
6654 break;
6655
6656 case OP_PRUNE:
6657 case OP_PRUNE_ARG:
6658 case OP_SKIP:
6659 case OP_SKIP_ARG:
6660 cd->had_pruneorskip = TRUE;
6661 break;
6662 }
6663
6664 break;
6665 }
6666
6667 vn += verbs[i].len + 1;
6668 }
6669
6670 if (i < verbcount) continue;
6671 *errorcodeptr = ERR60;
6672 goto FAILED;
6673 }
6674
6675
6676
6677 newoptions = options;
6678 skipbytes = 0;
6679 bravalue = OP_CBRA;
6680 item_hwm_offset = cd->hwm - cd->start_workspace;
6681 reset_bracount = FALSE;
6682
6683
6684
6685
6686 if (*ptr == CHAR_QUESTION_MARK)
6687 {
6688 int i, set, unset, namelen;
6689 int *optset;
6690 const pcre_uchar *name;
6691 pcre_uchar *slot;
6692
6693 switch (*(++ptr))
6694 {
6695
6696 case CHAR_VERTICAL_LINE:
6697 reset_bracount = TRUE;
6698 cd->dupgroups = TRUE;
6699
6700
6701
6702 case CHAR_COLON:
6703 bravalue = OP_BRA;
6704 ptr++;
6705 break;
6706
6707
6708
6709 case CHAR_LEFT_PARENTHESIS:
6710 bravalue = OP_COND;
6711 tempptr = ptr;
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730 if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
6731 {
6732 for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
6733 if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
6734 tempptr += i + 1;
6735 }
6736
6737
6738
6739
6740
6741 if (tempptr[1] == CHAR_QUESTION_MARK &&
6742 (tempptr[2] == CHAR_EQUALS_SIGN ||
6743 tempptr[2] == CHAR_EXCLAMATION_MARK ||
6744 (tempptr[2] == CHAR_LESS_THAN_SIGN &&
6745 (tempptr[3] == CHAR_EQUALS_SIGN ||
6746 tempptr[3] == CHAR_EXCLAMATION_MARK))))
6747 {
6748 cd->iscondassert = TRUE;
6749 break;
6750 }
6751
6752
6753
6754
6755 code[1+LINK_SIZE] = OP_CREF;
6756 skipbytes = 1+IMM2_SIZE;
6757 refsign = -1;
6758 namelen = -1;
6759 name = NULL;
6760 recno = 0;
6761
6762
6763
6764 ptr++;
6765 if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
6766 {
6767 terminator = -1;
6768 ptr += 2;
6769 code[1+LINK_SIZE] = OP_RREF;
6770 }
6771
6772
6773
6774
6775
6776 else if (*ptr == CHAR_LESS_THAN_SIGN)
6777 {
6778 terminator = CHAR_GREATER_THAN_SIGN;
6779 ptr++;
6780 }
6781 else if (*ptr == CHAR_APOSTROPHE)
6782 {
6783 terminator = CHAR_APOSTROPHE;
6784 ptr++;
6785 }
6786 else
6787 {
6788 terminator = CHAR_NULL;
6789 if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
6790 else if (IS_DIGIT(*ptr)) refsign = 0;
6791 }
6792
6793
6794
6795 if (refsign >= 0)
6796 {
6797 while (IS_DIGIT(*ptr))
6798 {
6799 if (recno > INT_MAX / 10 - 1)
6800 {
6801 while (IS_DIGIT(*ptr)) ptr++;
6802 *errorcodeptr = ERR61;
6803 goto FAILED;
6804 }
6805 recno = recno * 10 + (int)(*ptr - CHAR_0);
6806 ptr++;
6807 }
6808 }
6809
6810
6811
6812
6813
6814
6815 else
6816 {
6817 if (IS_DIGIT(*ptr))
6818 {
6819 *errorcodeptr = ERR84;
6820 goto FAILED;
6821 }
6822 if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)
6823 {
6824 *errorcodeptr = ERR28;
6825 goto FAILED;
6826 }
6827 name = ptr++;
6828 while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
6829 {
6830 ptr++;
6831 }
6832 namelen = (int)(ptr - name);
6833 if (lengthptr != NULL) skipbytes += IMM2_SIZE;
6834 }
6835
6836
6837
6838 if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
6839 *ptr++ != CHAR_RIGHT_PARENTHESIS)
6840 {
6841 ptr--;
6842 *errorcodeptr = ERR26;
6843 goto FAILED;
6844 }
6845
6846
6847
6848 if (lengthptr != NULL) break;
6849
6850
6851
6852
6853
6854 if (refsign >= 0)
6855 {
6856 if (recno <= 0)
6857 {
6858 *errorcodeptr = ERR35;
6859 goto FAILED;
6860 }
6861 if (refsign != 0) recno = (refsign == CHAR_MINUS)?
6862 cd->bracount - recno + 1 : recno + cd->bracount;
6863 if (recno <= 0 || recno > cd->final_bracount)
6864 {
6865 *errorcodeptr = ERR15;
6866 goto FAILED;
6867 }
6868 PUT2(code, 2+LINK_SIZE, recno);
6869 if (recno > cd->top_backref) cd->top_backref = recno;
6870 break;
6871 }
6872
6873
6874
6875 slot = cd->name_table;
6876 for (i = 0; i < cd->names_found; i++)
6877 {
6878 if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
6879 slot += cd->name_entry_size;
6880 }
6881
6882
6883
6884
6885
6886
6887 if (i < cd->names_found)
6888 {
6889 int offset = i++;
6890 int count = 1;
6891 recno = GET2(slot, 0);
6892 if (recno > cd->top_backref) cd->top_backref = recno;
6893 for (; i < cd->names_found; i++)
6894 {
6895 slot += cd->name_entry_size;
6896 if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0 ||
6897 (slot+IMM2_SIZE)[namelen] != 0) break;
6898 count++;
6899 }
6900
6901 if (count > 1)
6902 {
6903 PUT2(code, 2+LINK_SIZE, offset);
6904 PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
6905 skipbytes += IMM2_SIZE;
6906 code[1+LINK_SIZE]++;
6907 }
6908 else
6909 {
6910 PUT2(code, 2+LINK_SIZE, recno);
6911 }
6912 }
6913
6914
6915
6916
6917
6918
6919
6920 else if (terminator != CHAR_NULL)
6921 {
6922 *errorcodeptr = ERR15;
6923 goto FAILED;
6924 }
6925
6926
6927
6928
6929 else if (*name == CHAR_R)
6930 {
6931 recno = 0;
6932 for (i = 1; i < namelen; i++)
6933 {
6934 if (!IS_DIGIT(name[i]))
6935 {
6936 *errorcodeptr = ERR15;
6937 goto FAILED;
6938 }
6939 if (recno > INT_MAX / 10 - 1)
6940 {
6941 *errorcodeptr = ERR61;
6942 goto FAILED;
6943 }
6944 recno = recno * 10 + name[i] - CHAR_0;
6945 }
6946 if (recno == 0) recno = RREF_ANY;
6947 code[1+LINK_SIZE] = OP_RREF;
6948 PUT2(code, 2+LINK_SIZE, recno);
6949 }
6950
6951
6952
6953
6954 else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
6955 {
6956 code[1+LINK_SIZE] = OP_DEF;
6957 skipbytes = 1;
6958 }
6959
6960
6961
6962 else
6963 {
6964 *errorcodeptr = ERR15;
6965 goto FAILED;
6966 }
6967 break;
6968
6969
6970
6971 case CHAR_EQUALS_SIGN:
6972 bravalue = OP_ASSERT;
6973 cd->assert_depth += 1;
6974 ptr++;
6975 break;
6976
6977
6978
6979
6980
6981
6982
6983
6984 case CHAR_EXCLAMATION_MARK:
6985 ptr++;
6986 if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
6987 ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
6988 (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
6989 {
6990 *code++ = OP_FAIL;
6991 previous = NULL;
6992 continue;
6993 }
6994 bravalue = OP_ASSERT_NOT;
6995 cd->assert_depth += 1;
6996 break;
6997
6998
6999
7000 case CHAR_LESS_THAN_SIGN:
7001 switch (ptr[1])
7002 {
7003 case CHAR_EQUALS_SIGN:
7004 bravalue = OP_ASSERTBACK;
7005 cd->assert_depth += 1;
7006 ptr += 2;
7007 break;
7008
7009 case CHAR_EXCLAMATION_MARK:
7010 bravalue = OP_ASSERTBACK_NOT;
7011 cd->assert_depth += 1;
7012 ptr += 2;
7013 break;
7014
7015 default:
7016 if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0)
7017 goto DEFINE_NAME;
7018 ptr++;
7019 *errorcodeptr = ERR24;
7020 goto FAILED;
7021 }
7022 break;
7023
7024
7025
7026 case CHAR_GREATER_THAN_SIGN:
7027 bravalue = OP_ONCE;
7028 ptr++;
7029 break;
7030
7031
7032
7033 case CHAR_C:
7034 previous_callout = code;
7035 after_manual_callout = 1;
7036 *code++ = OP_CALLOUT;
7037 {
7038 int n = 0;
7039 ptr++;
7040 while(IS_DIGIT(*ptr))
7041 n = n * 10 + *ptr++ - CHAR_0;
7042 if (*ptr != CHAR_RIGHT_PARENTHESIS)
7043 {
7044 *errorcodeptr = ERR39;
7045 goto FAILED;
7046 }
7047 if (n > 255)
7048 {
7049 *errorcodeptr = ERR38;
7050 goto FAILED;
7051 }
7052 *code++ = n;
7053 PUT(code, 0, (int)(ptr - cd->start_pattern + 1));
7054 PUT(code, LINK_SIZE, 0);
7055 code += 2 * LINK_SIZE;
7056 }
7057 previous = NULL;
7058 continue;
7059
7060
7061
7062 case CHAR_P:
7063 if (*(++ptr) == CHAR_EQUALS_SIGN ||
7064 *ptr == CHAR_GREATER_THAN_SIGN)
7065 {
7066 is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
7067 terminator = CHAR_RIGHT_PARENTHESIS;
7068 goto NAMED_REF_OR_RECURSE;
7069 }
7070 else if (*ptr != CHAR_LESS_THAN_SIGN)
7071 {
7072 *errorcodeptr = ERR41;
7073 goto FAILED;
7074 }
7075
7076
7077
7078
7079 DEFINE_NAME:
7080 case CHAR_APOSTROPHE:
7081 terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
7082 CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7083 name = ++ptr;
7084 if (IS_DIGIT(*ptr))
7085 {
7086 *errorcodeptr = ERR84;
7087 goto FAILED;
7088 }
7089 while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
7090 namelen = (int)(ptr - name);
7091
7092
7093
7094
7095
7096
7097
7098 if (lengthptr != NULL)
7099 {
7100 named_group *ng;
7101 pcre_uint32 number = cd->bracount + 1;
7102
7103 if (*ptr != (pcre_uchar)terminator)
7104 {
7105 *errorcodeptr = ERR42;
7106 goto FAILED;
7107 }
7108
7109 if (cd->names_found >= MAX_NAME_COUNT)
7110 {
7111 *errorcodeptr = ERR49;
7112 goto FAILED;
7113 }
7114
7115 if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
7116 {
7117 cd->name_entry_size = namelen + IMM2_SIZE + 1;
7118 if (namelen > MAX_NAME_SIZE)
7119 {
7120 *errorcodeptr = ERR48;
7121 goto FAILED;
7122 }
7123 }
7124
7125
7126
7127
7128
7129
7130
7131
7132 ng = cd->named_groups;
7133 for (i = 0; i < cd->names_found; i++, ng++)
7134 {
7135 if (namelen == ng->length &&
7136 STRNCMP_UC_UC(name, ng->name, namelen) == 0)
7137 {
7138 if (ng->number == number) break;
7139 if ((options & PCRE_DUPNAMES) == 0)
7140 {
7141 *errorcodeptr = ERR43;
7142 goto FAILED;
7143 }
7144 cd->dupnames = TRUE;
7145 }
7146 else if (ng->number == number)
7147 {
7148 *errorcodeptr = ERR65;
7149 goto FAILED;
7150 }
7151 }
7152
7153 if (i >= cd->names_found)
7154 {
7155
7156
7157 if (cd->names_found >= cd->named_group_list_size)
7158 {
7159 int newsize = cd->named_group_list_size * 2;
7160 named_group *newspace = (PUBL(malloc))
7161 (newsize * sizeof(named_group));
7162
7163 if (newspace == NULL)
7164 {
7165 *errorcodeptr = ERR21;
7166 goto FAILED;
7167 }
7168
7169 memcpy(newspace, cd->named_groups,
7170 cd->named_group_list_size * sizeof(named_group));
7171 if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
7172 (PUBL(free))((void *)cd->named_groups);
7173 cd->named_groups = newspace;
7174 cd->named_group_list_size = newsize;
7175 }
7176
7177 cd->named_groups[cd->names_found].name = name;
7178 cd->named_groups[cd->names_found].length = namelen;
7179 cd->named_groups[cd->names_found].number = number;
7180 cd->names_found++;
7181 }
7182 }
7183
7184 ptr++;
7185 goto NUMBERED_GROUP;
7186
7187
7188
7189 case CHAR_AMPERSAND:
7190 terminator = CHAR_RIGHT_PARENTHESIS;
7191 is_recurse = TRUE;
7192
7193
7194
7195
7196
7197
7198
7199
7200 NAMED_REF_OR_RECURSE:
7201 name = ++ptr;
7202 if (IS_DIGIT(*ptr))
7203 {
7204 *errorcodeptr = ERR84;
7205 goto FAILED;
7206 }
7207 while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
7208 namelen = (int)(ptr - name);
7209
7210
7211
7212
7213
7214
7215
7216
7217 if (lengthptr != NULL)
7218 {
7219 named_group *ng;
7220 recno = 0;
7221
7222 if (namelen == 0)
7223 {
7224 *errorcodeptr = ERR62;
7225 goto FAILED;
7226 }
7227 if (*ptr != (pcre_uchar)terminator)
7228 {
7229 *errorcodeptr = ERR42;
7230 goto FAILED;
7231 }
7232 if (namelen > MAX_NAME_SIZE)
7233 {
7234 *errorcodeptr = ERR48;
7235 goto FAILED;
7236 }
7237
7238
7239
7240 if (!is_recurse) cd->namedrefcount++;
7241
7242
7243
7244
7245
7246 *lengthptr += IMM2_SIZE;
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259 *lengthptr += 2 + 2*LINK_SIZE;
7260
7261
7262
7263
7264
7265
7266
7267
7268 if (cd->dupgroups) *lengthptr += 4 + 4*LINK_SIZE;
7269
7270
7271
7272
7273
7274
7275 else
7276 {
7277 ng = cd->named_groups;
7278 for (i = 0; i < cd->names_found; i++, ng++)
7279 {
7280 if (namelen == ng->length &&
7281 STRNCMP_UC_UC(name, ng->name, namelen) == 0)
7282 {
7283 open_capitem *oc;
7284 recno = ng->number;
7285 if (is_recurse) break;
7286 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
7287 {
7288 if (oc->number == recno)
7289 {
7290 oc->flag = TRUE;
7291 break;
7292 }
7293 }
7294 }
7295 }
7296 }
7297 }
7298
7299
7300
7301
7302
7303
7304 else
7305 {
7306 slot = cd->name_table;
7307 for (i = 0; i < cd->names_found; i++)
7308 {
7309 if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
7310 slot[IMM2_SIZE+namelen] == 0)
7311 break;
7312 slot += cd->name_entry_size;
7313 }
7314
7315 if (i < cd->names_found)
7316 {
7317 recno = GET2(slot, 0);
7318 }
7319 else
7320 {
7321 *errorcodeptr = ERR15;
7322 goto FAILED;
7323 }
7324 }
7325
7326
7327
7328
7329 if (is_recurse) goto HANDLE_RECURSION;
7330
7331
7332
7333
7334 if (lengthptr == NULL && cd->dupnames)
7335 {
7336 int count = 1;
7337 unsigned int index = i;
7338 pcre_uchar *cslot = slot + cd->name_entry_size;
7339
7340 for (i++; i < cd->names_found; i++)
7341 {
7342 if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
7343 count++;
7344 cslot += cd->name_entry_size;
7345 }
7346
7347 if (count > 1)
7348 {
7349 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7350 previous = code;
7351 item_hwm_offset = cd->hwm - cd->start_workspace;
7352 *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
7353 PUT2INC(code, 0, index);
7354 PUT2INC(code, 0, count);
7355
7356
7357
7358 for (; slot < cslot; slot += cd->name_entry_size)
7359 {
7360 open_capitem *oc;
7361 recno = GET2(slot, 0);
7362 cd->backref_map |= (recno < 32)? (1 << recno) : 1;
7363 if (recno > cd->top_backref) cd->top_backref = recno;
7364
7365
7366
7367
7368
7369 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
7370 {
7371 if (oc->number == recno)
7372 {
7373 oc->flag = TRUE;
7374 break;
7375 }
7376 }
7377 }
7378
7379 continue;
7380 }
7381 }
7382
7383
7384
7385 goto HANDLE_REFERENCE;
7386
7387
7388
7389 case CHAR_R:
7390 recno = 0;
7391 if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
7392 {
7393 *errorcodeptr = ERR29;
7394 goto FAILED;
7395 }
7396 goto HANDLE_RECURSION;
7397
7398
7399
7400 case CHAR_MINUS: case CHAR_PLUS:
7401 case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
7402 case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
7403 {
7404 const pcre_uchar *called;
7405 terminator = CHAR_RIGHT_PARENTHESIS;
7406
7407
7408
7409
7410
7411
7412
7413 HANDLE_NUMERICAL_RECURSION:
7414
7415 if ((refsign = *ptr) == CHAR_PLUS)
7416 {
7417 ptr++;
7418 if (!IS_DIGIT(*ptr))
7419 {
7420 *errorcodeptr = ERR63;
7421 goto FAILED;
7422 }
7423 }
7424 else if (refsign == CHAR_MINUS)
7425 {
7426 if (!IS_DIGIT(ptr[1]))
7427 goto OTHER_CHAR_AFTER_QUERY;
7428 ptr++;
7429 }
7430
7431 recno = 0;
7432 while(IS_DIGIT(*ptr))
7433 {
7434 if (recno > INT_MAX / 10 - 1)
7435 {
7436 while (IS_DIGIT(*ptr)) ptr++;
7437 *errorcodeptr = ERR61;
7438 goto FAILED;
7439 }
7440 recno = recno * 10 + *ptr++ - CHAR_0;
7441 }
7442
7443 if (*ptr != (pcre_uchar)terminator)
7444 {
7445 *errorcodeptr = ERR29;
7446 goto FAILED;
7447 }
7448
7449 if (refsign == CHAR_MINUS)
7450 {
7451 if (recno == 0)
7452 {
7453 *errorcodeptr = ERR58;
7454 goto FAILED;
7455 }
7456 recno = cd->bracount - recno + 1;
7457 if (recno <= 0)
7458 {
7459 *errorcodeptr = ERR15;
7460 goto FAILED;
7461 }
7462 }
7463 else if (refsign == CHAR_PLUS)
7464 {
7465 if (recno == 0)
7466 {
7467 *errorcodeptr = ERR58;
7468 goto FAILED;
7469 }
7470 recno += cd->bracount;
7471 }
7472
7473
7474
7475 HANDLE_RECURSION:
7476
7477 previous = code;
7478 item_hwm_offset = cd->hwm - cd->start_workspace;
7479 called = cd->start_code;
7480
7481
7482
7483
7484
7485
7486
7487
7488 if (lengthptr == NULL)
7489 {
7490 *code = OP_END;
7491 if (recno != 0)
7492 called = PRIV(find_bracket)(cd->start_code, utf, recno);
7493
7494
7495
7496 if (called == NULL)
7497 {
7498 if (recno > cd->final_bracount)
7499 {
7500 *errorcodeptr = ERR15;
7501 goto FAILED;
7502 }
7503
7504
7505
7506
7507
7508 called = cd->start_code + recno;
7509 if (cd->hwm >= cd->start_workspace + cd->workspace_size -
7510 WORK_SIZE_SAFETY_MARGIN)
7511 {
7512 *errorcodeptr = expand_workspace(cd);
7513 if (*errorcodeptr != 0) goto FAILED;
7514 }
7515 PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
7516 }
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527 else if (GET(called, 1) == 0 && cond_depth <= 0 &&
7528 could_be_empty(called, code, bcptr, utf, cd))
7529 {
7530 *errorcodeptr = ERR40;
7531 goto FAILED;
7532 }
7533 }
7534
7535
7536
7537
7538
7539 *code = OP_RECURSE;
7540 PUT(code, 1, (int)(called - cd->start_code));
7541 code += 1 + LINK_SIZE;
7542 groupsetfirstchar = FALSE;
7543 }
7544
7545
7546
7547 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7548 continue;
7549
7550
7551
7552 default:
7553 OTHER_CHAR_AFTER_QUERY:
7554 set = unset = 0;
7555 optset = &set;
7556
7557 while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
7558 {
7559 switch (*ptr++)
7560 {
7561 case CHAR_MINUS: optset = &unset; break;
7562
7563 case CHAR_J:
7564 *optset |= PCRE_DUPNAMES;
7565 cd->external_flags |= PCRE_JCHANGED;
7566 break;
7567
7568 case CHAR_i: *optset |= PCRE_CASELESS; break;
7569 case CHAR_m: *optset |= PCRE_MULTILINE; break;
7570 case CHAR_s: *optset |= PCRE_DOTALL; break;
7571 case CHAR_x: *optset |= PCRE_EXTENDED; break;
7572 case CHAR_U: *optset |= PCRE_UNGREEDY; break;
7573 case CHAR_X: *optset |= PCRE_EXTRA; break;
7574
7575 default: *errorcodeptr = ERR12;
7576 ptr--;
7577 goto FAILED;
7578 }
7579 }
7580
7581
7582
7583 newoptions = (options | set) & (~unset);
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606 if (*ptr == CHAR_RIGHT_PARENTHESIS)
7607 {
7608 if (code == cd->start_code + 1 + LINK_SIZE &&
7609 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
7610 {
7611 cd->external_options = newoptions;
7612 }
7613 else
7614 {
7615 greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
7616 greedy_non_default = greedy_default ^ 1;
7617 req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
7618 }
7619
7620
7621
7622
7623 *optionsptr = options = newoptions;
7624 previous = NULL;
7625 continue;
7626 }
7627
7628
7629
7630
7631
7632
7633 bravalue = OP_BRA;
7634 ptr++;
7635 }
7636 }
7637
7638
7639
7640
7641
7642 else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)
7643 {
7644 bravalue = OP_BRA;
7645 }
7646
7647
7648
7649 else
7650 {
7651 NUMBERED_GROUP:
7652 cd->bracount += 1;
7653 PUT2(code, 1+LINK_SIZE, cd->bracount);
7654 skipbytes = IMM2_SIZE;
7655 }
7656
7657
7658
7659
7660 if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
7661 {
7662 *errorcodeptr = ERR82;
7663 goto FAILED;
7664 }
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674 if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
7675 cd->iscondassert)
7676 {
7677 previous = NULL;
7678 cd->iscondassert = FALSE;
7679 }
7680 else
7681 {
7682 previous = code;
7683 item_hwm_offset = cd->hwm - cd->start_workspace;
7684 }
7685
7686 *code = bravalue;
7687 tempcode = code;
7688 tempreqvary = cd->req_varyopt;
7689 tempbracount = cd->bracount;
7690 length_prevgroup = 0;
7691
7692 if (!compile_regex(
7693 newoptions,
7694 &tempcode,
7695 &ptr,
7696 errorcodeptr,
7697 (bravalue == OP_ASSERTBACK ||
7698 bravalue == OP_ASSERTBACK_NOT),
7699 reset_bracount,
7700 skipbytes,
7701 cond_depth +
7702 ((bravalue == OP_COND)?1:0),
7703 &subfirstchar,
7704 &subfirstcharflags,
7705 &subreqchar,
7706 &subreqcharflags,
7707 bcptr,
7708 cd,
7709 (lengthptr == NULL)? NULL :
7710 &length_prevgroup
7711 ))
7712 goto FAILED;
7713
7714 cd->parens_depth -= 1;
7715
7716
7717
7718
7719 if (bravalue == OP_ONCE && cd->bracount <= tempbracount)
7720 *code = OP_ONCE_NC;
7721
7722 if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
7723 cd->assert_depth -= 1;
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734 if (bravalue == OP_COND && lengthptr == NULL)
7735 {
7736 pcre_uchar *tc = code;
7737 int condcount = 0;
7738
7739 do {
7740 condcount++;
7741 tc += GET(tc,1);
7742 }
7743 while (*tc != OP_KET);
7744
7745
7746
7747
7748 if (code[LINK_SIZE+1] == OP_DEF)
7749 {
7750 if (condcount > 1)
7751 {
7752 *errorcodeptr = ERR54;
7753 goto FAILED;
7754 }
7755 bravalue = OP_DEF;
7756 }
7757
7758
7759
7760
7761
7762 else
7763 {
7764 if (condcount > 2)
7765 {
7766 *errorcodeptr = ERR27;
7767 goto FAILED;
7768 }
7769 if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE;
7770 }
7771 }
7772
7773
7774
7775 if (*ptr != CHAR_RIGHT_PARENTHESIS)
7776 {
7777 *errorcodeptr = ERR14;
7778 goto FAILED;
7779 }
7780
7781
7782
7783
7784
7785
7786 if (lengthptr != NULL)
7787 {
7788 if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
7789 {
7790 *errorcodeptr = ERR20;
7791 goto FAILED;
7792 }
7793 *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
7794 code++;
7795 PUTINC(code, 0, 1 + LINK_SIZE);
7796 *code++ = OP_KET;
7797 PUTINC(code, 0, 1 + LINK_SIZE);
7798 break;
7799 }
7800
7801
7802
7803 code = tempcode;
7804
7805
7806
7807
7808 if (bravalue == OP_DEF) break;
7809
7810
7811
7812
7813
7814
7815
7816
7817 zeroreqchar = reqchar;
7818 zeroreqcharflags = reqcharflags;
7819 zerofirstchar = firstchar;
7820 zerofirstcharflags = firstcharflags;
7821 groupsetfirstchar = FALSE;
7822
7823 if (bravalue >= OP_ONCE)
7824 {
7825
7826
7827
7828
7829
7830
7831 if (firstcharflags == REQ_UNSET)
7832 {
7833 if (subfirstcharflags >= 0)
7834 {
7835 firstchar = subfirstchar;
7836 firstcharflags = subfirstcharflags;
7837 groupsetfirstchar = TRUE;
7838 }
7839 else firstcharflags = REQ_NONE;
7840 zerofirstcharflags = REQ_NONE;
7841 }
7842
7843
7844
7845
7846
7847 else if (subfirstcharflags >= 0 && subreqcharflags < 0)
7848 {
7849 subreqchar = subfirstchar;
7850 subreqcharflags = subfirstcharflags | tempreqvary;
7851 }
7852
7853
7854
7855
7856 if (subreqcharflags >= 0)
7857 {
7858 reqchar = subreqchar;
7859 reqcharflags = subreqcharflags;
7860 }
7861 }
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871 else if (bravalue == OP_ASSERT && subreqcharflags >= 0)
7872 {
7873 reqchar = subreqchar;
7874 reqcharflags = subreqcharflags;
7875 }
7876 break;
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888 case CHAR_BACKSLASH:
7889 tempptr = ptr;
7890 escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
7891 if (*errorcodeptr != 0) goto FAILED;
7892
7893 if (escape == 0)
7894 c = ec;
7895 else
7896 {
7897 if (escape == ESC_Q)
7898 {
7899 if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
7900 ptr += 2;
7901 else inescq = TRUE;
7902 continue;
7903 }
7904
7905 if (escape == ESC_E) continue;
7906
7907
7908
7909
7910 if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
7911 firstcharflags = REQ_NONE;
7912
7913
7914
7915 zerofirstchar = firstchar;
7916 zerofirstcharflags = firstcharflags;
7917 zeroreqchar = reqchar;
7918 zeroreqcharflags = reqcharflags;
7919
7920
7921
7922
7923
7924
7925
7926
7927 if (escape == ESC_g)
7928 {
7929 const pcre_uchar *p;
7930 pcre_uint32 cf;
7931
7932 item_hwm_offset = cd->hwm - cd->start_workspace;
7933 terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7934 CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7935
7936
7937
7938
7939
7940
7941 skipbytes = 0;
7942 reset_bracount = FALSE;
7943
7944
7945
7946 cf = ptr[1];
7947 if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
7948 {
7949 is_recurse = TRUE;
7950 goto NAMED_REF_OR_RECURSE;
7951 }
7952
7953
7954
7955
7956 p = ptr + 2;
7957 while (IS_DIGIT(*p)) p++;
7958 if (*p != (pcre_uchar)terminator)
7959 {
7960 *errorcodeptr = ERR57;
7961 goto FAILED;
7962 }
7963 ptr++;
7964 goto HANDLE_NUMERICAL_RECURSION;
7965 }
7966
7967
7968
7969
7970 if (escape == ESC_k)
7971 {
7972 if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
7973 ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
7974 {
7975 *errorcodeptr = ERR69;
7976 goto FAILED;
7977 }
7978 is_recurse = FALSE;
7979 terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7980 CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
7981 CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
7982 goto NAMED_REF_OR_RECURSE;
7983 }
7984
7985
7986
7987
7988
7989 if (escape < 0)
7990 {
7991 open_capitem *oc;
7992 recno = -escape;
7993
7994
7995
7996
7997 HANDLE_REFERENCE:
7998 if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7999 previous = code;
8000 item_hwm_offset = cd->hwm - cd->start_workspace;
8001 *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
8002 PUT2INC(code, 0, recno);
8003 cd->backref_map |= (recno < 32)? (1 << recno) : 1;
8004 if (recno > cd->top_backref) cd->top_backref = recno;
8005
8006
8007
8008
8009
8010 for (oc = cd->open_caps; oc != NULL; oc = oc->next)
8011 {
8012 if (oc->number == recno)
8013 {
8014 oc->flag = TRUE;
8015 break;
8016 }
8017 }
8018 }
8019
8020
8021
8022 #ifdef SUPPORT_UCP
8023 else if (escape == ESC_P || escape == ESC_p)
8024 {
8025 BOOL negated;
8026 unsigned int ptype = 0, pdata = 0;
8027 if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
8028 goto FAILED;
8029 previous = code;
8030 item_hwm_offset = cd->hwm - cd->start_workspace;
8031 *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
8032 *code++ = ptype;
8033 *code++ = pdata;
8034 }
8035 #else
8036
8037
8038
8039
8040 else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
8041 {
8042 *errorcodeptr = ERR45;
8043 goto FAILED;
8044 }
8045 #endif
8046
8047
8048
8049
8050
8051
8052
8053 else
8054 {
8055 if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
8056 cd->max_lookbehind == 0)
8057 cd->max_lookbehind = 1;
8058 #ifdef SUPPORT_UCP
8059 if (escape >= ESC_DU && escape <= ESC_wu)
8060 {
8061 nestptr = ptr + 1;
8062 ptr = substitutes[escape - ESC_DU] - 1;
8063 }
8064 else
8065 #endif
8066
8067
8068
8069 {
8070 previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
8071 item_hwm_offset = cd->hwm - cd->start_workspace;
8072 *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
8073 }
8074 }
8075 continue;
8076 }
8077
8078
8079
8080
8081
8082 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
8083 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
8084 mclength = PRIV(ord2utf)(c, mcbuffer);
8085 else
8086 #endif
8087
8088 {
8089 mcbuffer[0] = c;
8090 mclength = 1;
8091 }
8092 goto ONE_CHAR;
8093
8094
8095
8096
8097
8098
8099
8100 default:
8101 NORMAL_CHAR:
8102 mclength = 1;
8103 mcbuffer[0] = c;
8104
8105 #ifdef SUPPORT_UTF
8106 if (utf && HAS_EXTRALEN(c))
8107 ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
8108 #endif
8109
8110
8111
8112
8113 ONE_CHAR:
8114 previous = code;
8115 item_hwm_offset = cd->hwm - cd->start_workspace;
8116
8117
8118
8119
8120
8121 #ifdef SUPPORT_UCP
8122 if (utf && (options & PCRE_CASELESS) != 0)
8123 {
8124 GETCHAR(c, mcbuffer);
8125 if ((c = UCD_CASESET(c)) != 0)
8126 {
8127 *code++ = OP_PROP;
8128 *code++ = PT_CLIST;
8129 *code++ = c;
8130 if (firstcharflags == REQ_UNSET)
8131 firstcharflags = zerofirstcharflags = REQ_NONE;
8132 break;
8133 }
8134 }
8135 #endif
8136
8137
8138
8139 *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR;
8140 for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
8141
8142
8143
8144 if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
8145 cd->external_flags |= PCRE_HASCRORLF;
8146
8147
8148
8149
8150
8151
8152 if (firstcharflags == REQ_UNSET)
8153 {
8154 zerofirstcharflags = REQ_NONE;
8155 zeroreqchar = reqchar;
8156 zeroreqcharflags = reqcharflags;
8157
8158
8159
8160
8161 if (mclength == 1 || req_caseopt == 0)
8162 {
8163 firstchar = mcbuffer[0] | req_caseopt;
8164 firstchar = mcbuffer[0];
8165 firstcharflags = req_caseopt;
8166
8167 if (mclength != 1)
8168 {
8169 reqchar = code[-1];
8170 reqcharflags = cd->req_varyopt;
8171 }
8172 }
8173 else firstcharflags = reqcharflags = REQ_NONE;
8174 }
8175
8176
8177
8178
8179 else
8180 {
8181 zerofirstchar = firstchar;
8182 zerofirstcharflags = firstcharflags;
8183 zeroreqchar = reqchar;
8184 zeroreqcharflags = reqcharflags;
8185 if (mclength == 1 || req_caseopt == 0)
8186 {
8187 reqchar = code[-1];
8188 reqcharflags = req_caseopt | cd->req_varyopt;
8189 }
8190 }
8191
8192 break;
8193 }
8194 }
8195
8196
8197
8198
8199
8200
8201 FAILED:
8202 *ptrptr = ptr;
8203 return FALSE;
8204 }
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240 static BOOL
8241 compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
8242 int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
8243 int cond_depth,
8244 pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
8245 pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
8246 branch_chain *bcptr, compile_data *cd, int *lengthptr)
8247 {
8248 const pcre_uchar *ptr = *ptrptr;
8249 pcre_uchar *code = *codeptr;
8250 pcre_uchar *last_branch = code;
8251 pcre_uchar *start_bracket = code;
8252 pcre_uchar *reverse_count = NULL;
8253 open_capitem capitem;
8254 int capnumber = 0;
8255 pcre_uint32 firstchar, reqchar;
8256 pcre_int32 firstcharflags, reqcharflags;
8257 pcre_uint32 branchfirstchar, branchreqchar;
8258 pcre_int32 branchfirstcharflags, branchreqcharflags;
8259 int length;
8260 unsigned int orig_bracount;
8261 unsigned int max_bracount;
8262 branch_chain bc;
8263 size_t save_hwm_offset;
8264
8265
8266
8267 if (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
8268 {
8269 *errorcodeptr= ERR85;
8270 return FALSE;
8271 }
8272
8273
8274
8275 bc.outer = bcptr;
8276 bc.current_branch = code;
8277
8278 firstchar = reqchar = 0;
8279 firstcharflags = reqcharflags = REQ_UNSET;
8280
8281 save_hwm_offset = cd->hwm - cd->start_workspace;
8282
8283
8284
8285
8286
8287
8288
8289
8290 length = 2 + 2*LINK_SIZE + skipbytes;
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303 if (*code == OP_CBRA)
8304 {
8305 capnumber = GET2(code, 1 + LINK_SIZE);
8306 capitem.number = capnumber;
8307 capitem.next = cd->open_caps;
8308 capitem.flag = FALSE;
8309 cd->open_caps = &capitem;
8310 }
8311
8312
8313
8314 PUT(code, 1, 0);
8315 code += 1 + LINK_SIZE + skipbytes;
8316
8317
8318
8319 orig_bracount = max_bracount = cd->bracount;
8320 for (;;)
8321 {
8322
8323
8324
8325 if (reset_bracount) cd->bracount = orig_bracount;
8326
8327
8328
8329 if (lookbehind)
8330 {
8331 *code++ = OP_REVERSE;
8332 reverse_count = code;
8333 PUTINC(code, 0, 0);
8334 length += 1 + LINK_SIZE;
8335 }
8336
8337
8338
8339
8340 if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
8341 &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc,
8342 cond_depth, cd, (lengthptr == NULL)? NULL : &length))
8343 {
8344 *ptrptr = ptr;
8345 return FALSE;
8346 }
8347
8348
8349
8350
8351 if (cd->bracount > max_bracount) max_bracount = cd->bracount;
8352
8353
8354
8355 if (lengthptr == NULL)
8356 {
8357
8358
8359
8360 if (*last_branch != OP_ALT)
8361 {
8362 firstchar = branchfirstchar;
8363 firstcharflags = branchfirstcharflags;
8364 reqchar = branchreqchar;
8365 reqcharflags = branchreqcharflags;
8366 }
8367
8368
8369
8370
8371
8372
8373 else
8374 {
8375
8376
8377
8378
8379 if (firstcharflags >= 0 &&
8380 (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
8381 {
8382 if (reqcharflags < 0)
8383 {
8384 reqchar = firstchar;
8385 reqcharflags = firstcharflags;
8386 }
8387 firstcharflags = REQ_NONE;
8388 }
8389
8390
8391
8392
8393 if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0)
8394 {
8395 branchreqchar = branchfirstchar;
8396 branchreqcharflags = branchfirstcharflags;
8397 }
8398
8399
8400
8401 if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) ||
8402 reqchar != branchreqchar)
8403 reqcharflags = REQ_NONE;
8404 else
8405 {
8406 reqchar = branchreqchar;
8407 reqcharflags |= branchreqcharflags;
8408 }
8409 }
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419 if (lookbehind)
8420 {
8421 int fixed_length;
8422 *code = OP_END;
8423 fixed_length = find_fixedlength(last_branch, (options & PCRE_UTF8) != 0,
8424 FALSE, cd, NULL);
8425 DPRINTF(("fixed length = %d\n", fixed_length));
8426 if (fixed_length == -3)
8427 {
8428 cd->check_lookbehind = TRUE;
8429 }
8430 else if (fixed_length < 0)
8431 {
8432 *errorcodeptr = (fixed_length == -2)? ERR36 :
8433 (fixed_length == -4)? ERR70: ERR25;
8434 *ptrptr = ptr;
8435 return FALSE;
8436 }
8437 else
8438 {
8439 if (fixed_length > cd->max_lookbehind)
8440 cd->max_lookbehind = fixed_length;
8441 PUT(reverse_count, 0, fixed_length);
8442 }
8443 }
8444 }
8445
8446
8447
8448
8449
8450
8451
8452
8453 if (*ptr != CHAR_VERTICAL_LINE)
8454 {
8455 if (lengthptr == NULL)
8456 {
8457 int branch_length = (int)(code - last_branch);
8458 do
8459 {
8460 int prev_length = GET(last_branch, 1);
8461 PUT(last_branch, 1, branch_length);
8462 branch_length = prev_length;
8463 last_branch -= branch_length;
8464 }
8465 while (branch_length > 0);
8466 }
8467
8468
8469
8470 *code = OP_KET;
8471 PUT(code, 1, (int)(code - start_bracket));
8472 code += 1 + LINK_SIZE;
8473
8474
8475
8476
8477
8478
8479 if (capnumber > 0)
8480 {
8481 if (cd->open_caps->flag)
8482 {
8483 *code = OP_END;
8484 adjust_recurse(start_bracket, 1 + LINK_SIZE,
8485 (options & PCRE_UTF8) != 0, cd, save_hwm_offset);
8486 memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
8487 IN_UCHARS(code - start_bracket));
8488 *start_bracket = OP_ONCE;
8489 code += 1 + LINK_SIZE;
8490 PUT(start_bracket, 1, (int)(code - start_bracket));
8491 *code = OP_KET;
8492 PUT(code, 1, (int)(code - start_bracket));
8493 code += 1 + LINK_SIZE;
8494 length += 2 + 2*LINK_SIZE;
8495 }
8496 cd->open_caps = cd->open_caps->next;
8497 }
8498
8499
8500
8501 cd->bracount = max_bracount;
8502
8503
8504
8505 *codeptr = code;
8506 *ptrptr = ptr;
8507 *firstcharptr = firstchar;
8508 *firstcharflagsptr = firstcharflags;
8509 *reqcharptr = reqchar;
8510 *reqcharflagsptr = reqcharflags;
8511 if (lengthptr != NULL)
8512 {
8513 if (OFLOW_MAX - *lengthptr < length)
8514 {
8515 *errorcodeptr = ERR20;
8516 return FALSE;
8517 }
8518 *lengthptr += length;
8519 }
8520 return TRUE;
8521 }
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532 if (lengthptr != NULL)
8533 {
8534 code = *codeptr + 1 + LINK_SIZE + skipbytes;
8535 length += 1 + LINK_SIZE;
8536 }
8537 else
8538 {
8539 *code = OP_ALT;
8540 PUT(code, 1, (int)(code - last_branch));
8541 bc.current_branch = last_branch = code;
8542 code += 1 + LINK_SIZE;
8543 }
8544
8545 ptr++;
8546 }
8547
8548 }
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594 static BOOL
8595 is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
8596 compile_data *cd, int atomcount)
8597 {
8598 do {
8599 const pcre_uchar *scode = first_significant_code(
8600 code + PRIV(OP_lengths)[*code], FALSE);
8601 register int op = *scode;
8602
8603
8604
8605 if (op == OP_BRA || op == OP_BRAPOS ||
8606 op == OP_SBRA || op == OP_SBRAPOS)
8607 {
8608 if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
8609 }
8610
8611
8612
8613 else if (op == OP_CBRA || op == OP_CBRAPOS ||
8614 op == OP_SCBRA || op == OP_SCBRAPOS)
8615 {
8616 int n = GET2(scode, 1+LINK_SIZE);
8617 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
8618 if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE;
8619 }
8620
8621
8622
8623 else if (op == OP_ASSERT || op == OP_COND)
8624 {
8625 if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE;
8626 }
8627
8628
8629
8630 else if (op == OP_ONCE || op == OP_ONCE_NC)
8631 {
8632 if (!is_anchored(scode, bracket_map, cd, atomcount + 1))
8633 return FALSE;
8634 }
8635
8636
8637
8638
8639
8640 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
8641 op == OP_TYPEPOSSTAR))
8642 {
8643 if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 ||
8644 atomcount > 0 || cd->had_pruneorskip)
8645 return FALSE;
8646 }
8647
8648
8649
8650 else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
8651
8652 code += GET(code, 1);
8653 }
8654 while (*code == OP_ALT);
8655 return TRUE;
8656 }
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684 static BOOL
8685 is_startline(const pcre_uchar *code, unsigned int bracket_map,
8686 compile_data *cd, int atomcount)
8687 {
8688 do {
8689 const pcre_uchar *scode = first_significant_code(
8690 code + PRIV(OP_lengths)[*code], FALSE);
8691 register int op = *scode;
8692
8693
8694
8695
8696
8697
8698 if (op == OP_COND)
8699 {
8700 scode += 1 + LINK_SIZE;
8701 if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
8702 switch (*scode)
8703 {
8704 case OP_CREF:
8705 case OP_DNCREF:
8706 case OP_RREF:
8707 case OP_DNRREF:
8708 case OP_DEF:
8709 case OP_FAIL:
8710 return FALSE;
8711
8712 default:
8713 if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
8714 do scode += GET(scode, 1); while (*scode == OP_ALT);
8715 scode += 1 + LINK_SIZE;
8716 break;
8717 }
8718 scode = first_significant_code(scode, FALSE);
8719 op = *scode;
8720 }
8721
8722
8723
8724 if (op == OP_BRA || op == OP_BRAPOS ||
8725 op == OP_SBRA || op == OP_SBRAPOS)
8726 {
8727 if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
8728 }
8729
8730
8731
8732 else if (op == OP_CBRA || op == OP_CBRAPOS ||
8733 op == OP_SCBRA || op == OP_SCBRAPOS)
8734 {
8735 int n = GET2(scode, 1+LINK_SIZE);
8736 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
8737 if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;
8738 }
8739
8740
8741
8742 else if (op == OP_ASSERT)
8743 {
8744 if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
8745 }
8746
8747
8748
8749 else if (op == OP_ONCE || op == OP_ONCE_NC)
8750 {
8751 if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;
8752 }
8753
8754
8755
8756
8757
8758
8759
8760 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
8761 {
8762 if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
8763 atomcount > 0 || cd->had_pruneorskip)
8764 return FALSE;
8765 }
8766
8767
8768
8769
8770
8771
8772 else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
8773
8774
8775
8776 code += GET(code, 1);
8777 }
8778 while (*code == OP_ALT);
8779 return TRUE;
8780 }
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805 static pcre_uint32
8806 find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags,
8807 BOOL inassert)
8808 {
8809 register pcre_uint32 c = 0;
8810 int cflags = REQ_NONE;
8811
8812 *flags = REQ_NONE;
8813 do {
8814 pcre_uint32 d;
8815 int dflags;
8816 int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
8817 *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
8818 const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
8819 TRUE);
8820 register pcre_uchar op = *scode;
8821
8822 switch(op)
8823 {
8824 default:
8825 return 0;
8826
8827 case OP_BRA:
8828 case OP_BRAPOS:
8829 case OP_CBRA:
8830 case OP_SCBRA:
8831 case OP_CBRAPOS:
8832 case OP_SCBRAPOS:
8833 case OP_ASSERT:
8834 case OP_ONCE:
8835 case OP_ONCE_NC:
8836 d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
8837 if (dflags < 0)
8838 return 0;
8839 if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0;
8840 break;
8841
8842 case OP_EXACT:
8843 scode += IMM2_SIZE;
8844
8845
8846 case OP_CHAR:
8847 case OP_PLUS:
8848 case OP_MINPLUS:
8849 case OP_POSPLUS:
8850 if (!inassert) return 0;
8851 if (cflags < 0) { c = scode[1]; cflags = 0; }
8852 else if (c != scode[1]) return 0;
8853 break;
8854
8855 case OP_EXACTI:
8856 scode += IMM2_SIZE;
8857
8858
8859 case OP_CHARI:
8860 case OP_PLUSI:
8861 case OP_MINPLUSI:
8862 case OP_POSPLUSI:
8863 if (!inassert) return 0;
8864 if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
8865 else if (c != scode[1]) return 0;
8866 break;
8867 }
8868
8869 code += GET(code, 1);
8870 }
8871 while (*code == OP_ALT);
8872
8873 *flags = cflags;
8874 return c;
8875 }
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896 static void
8897 add_name(compile_data *cd, const pcre_uchar *name, int length,
8898 unsigned int groupno)
8899 {
8900 int i;
8901 pcre_uchar *slot = cd->name_table;
8902
8903 for (i = 0; i < cd->names_found; i++)
8904 {
8905 int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length));
8906 if (crc == 0 && slot[IMM2_SIZE+length] != 0)
8907 crc = -1;
8908
8909
8910
8911
8912
8913
8914 if (crc < 0)
8915 {
8916 memmove(slot + cd->name_entry_size, slot,
8917 IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
8918 break;
8919 }
8920
8921
8922
8923 slot += cd->name_entry_size;
8924 }
8925
8926 PUT2(slot, 0, groupno);
8927 memcpy(slot + IMM2_SIZE, name, IN_UCHARS(length));
8928 slot[IMM2_SIZE + length] = 0;
8929 cd->names_found++;
8930 }
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956 #if defined COMPILE_PCRE8
8957 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
8958 pcre_compile(const char *pattern, int options, const char **errorptr,
8959 int *erroroffset, const unsigned char *tables)
8960 #elif defined COMPILE_PCRE16
8961 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
8962 pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
8963 int *erroroffset, const unsigned char *tables)
8964 #elif defined COMPILE_PCRE32
8965 PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
8966 pcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr,
8967 int *erroroffset, const unsigned char *tables)
8968 #endif
8969 {
8970 #if defined COMPILE_PCRE8
8971 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
8972 #elif defined COMPILE_PCRE16
8973 return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
8974 #elif defined COMPILE_PCRE32
8975 return pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
8976 #endif
8977 }
8978
8979
8980 #if defined COMPILE_PCRE8
8981 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
8982 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
8983 const char **errorptr, int *erroroffset, const unsigned char *tables)
8984 #elif defined COMPILE_PCRE16
8985 PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
8986 pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
8987 const char **errorptr, int *erroroffset, const unsigned char *tables)
8988 #elif defined COMPILE_PCRE32
8989 PCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION
8990 pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
8991 const char **errorptr, int *erroroffset, const unsigned char *tables)
8992 #endif
8993 {
8994 REAL_PCRE *re;
8995 int length = 1;
8996 pcre_int32 firstcharflags, reqcharflags;
8997 pcre_uint32 firstchar, reqchar;
8998 pcre_uint32 limit_match = PCRE_UINT32_MAX;
8999 pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
9000 int newline;
9001 int errorcode = 0;
9002 int skipatstart = 0;
9003 BOOL utf;
9004 BOOL never_utf = FALSE;
9005 size_t size;
9006 pcre_uchar *code;
9007 const pcre_uchar *codestart;
9008 const pcre_uchar *ptr;
9009 compile_data compile_block;
9010 compile_data *cd = &compile_block;
9011
9012
9013
9014
9015
9016
9017
9018
9019 pcre_uchar cworkspace[COMPILE_WORK_SIZE];
9020
9021
9022
9023
9024 named_group named_groups[NAMED_GROUP_LIST_SIZE];
9025
9026
9027
9028 ptr = (const pcre_uchar *)pattern;
9029
9030
9031
9032
9033
9034 if (errorptr == NULL)
9035 {
9036 if (errorcodeptr != NULL) *errorcodeptr = 99;
9037 return NULL;
9038 }
9039
9040 *errorptr = NULL;
9041 if (errorcodeptr != NULL) *errorcodeptr = ERR0;
9042
9043
9044
9045 if (erroroffset == NULL)
9046 {
9047 errorcode = ERR16;
9048 goto PCRE_EARLY_ERROR_RETURN2;
9049 }
9050
9051 *erroroffset = 0;
9052
9053
9054
9055 if (tables == NULL) tables = PRIV(default_tables);
9056 cd->lcc = tables + lcc_offset;
9057 cd->fcc = tables + fcc_offset;
9058 cd->cbits = tables + cbits_offset;
9059 cd->ctypes = tables + ctypes_offset;
9060
9061
9062
9063 if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
9064 {
9065 errorcode = ERR17;
9066 goto PCRE_EARLY_ERROR_RETURN;
9067 }
9068
9069
9070
9071 if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
9072
9073
9074
9075
9076 cd->external_flags = 0;
9077
9078 while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
9079 ptr[skipatstart+1] == CHAR_ASTERISK)
9080 {
9081 int newnl = 0;
9082 int newbsr = 0;
9083
9084
9085
9086
9087
9088 #ifdef COMPILE_PCRE8
9089 if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
9090 { skipatstart += 7; options |= PCRE_UTF8; continue; }
9091 #endif
9092 #ifdef COMPILE_PCRE16
9093 if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0)
9094 { skipatstart += 8; options |= PCRE_UTF16; continue; }
9095 #endif
9096 #ifdef COMPILE_PCRE32
9097 if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0)
9098 { skipatstart += 8; options |= PCRE_UTF32; continue; }
9099 #endif
9100
9101 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0)
9102 { skipatstart += 6; options |= PCRE_UTF8; continue; }
9103 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
9104 { skipatstart += 6; options |= PCRE_UCP; continue; }
9105 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_AUTO_POSSESS_RIGHTPAR, 16) == 0)
9106 { skipatstart += 18; options |= PCRE_NO_AUTO_POSSESS; continue; }
9107 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
9108 { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
9109
9110 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
9111 {
9112 pcre_uint32 c = 0;
9113 int p = skipatstart + 14;
9114 while (isdigit(ptr[p]))
9115 {
9116 if (c > PCRE_UINT32_MAX / 10 - 1) break;
9117 c = c*10 + ptr[p++] - CHAR_0;
9118 }
9119 if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
9120 if (c < limit_match)
9121 {
9122 limit_match = c;
9123 cd->external_flags |= PCRE_MLSET;
9124 }
9125 skipatstart = p;
9126 continue;
9127 }
9128
9129 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
9130 {
9131 pcre_uint32 c = 0;
9132 int p = skipatstart + 18;
9133 while (isdigit(ptr[p]))
9134 {
9135 if (c > PCRE_UINT32_MAX / 10 - 1) break;
9136 c = c*10 + ptr[p++] - CHAR_0;
9137 }
9138 if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
9139 if (c < limit_recursion)
9140 {
9141 limit_recursion = c;
9142 cd->external_flags |= PCRE_RLSET;
9143 }
9144 skipatstart = p;
9145 continue;
9146 }
9147
9148 if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
9149 { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
9150 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0)
9151 { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
9152 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5) == 0)
9153 { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
9154 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
9155 { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
9156 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
9157 { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
9158
9159 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
9160 { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
9161 else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
9162 { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
9163
9164 if (newnl != 0)
9165 options = (options & ~PCRE_NEWLINE_BITS) | newnl;
9166 else if (newbsr != 0)
9167 options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
9168 else break;
9169 }
9170
9171
9172 utf = (options & PCRE_UTF8) != 0;
9173 if (utf && never_utf)
9174 {
9175 errorcode = ERR78;
9176 goto PCRE_EARLY_ERROR_RETURN2;
9177 }
9178
9179
9180
9181
9182
9183
9184 #ifdef SUPPORT_UTF
9185 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
9186 (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
9187 {
9188 #if defined COMPILE_PCRE8
9189 errorcode = ERR44;
9190 #elif defined COMPILE_PCRE16
9191 errorcode = ERR74;
9192 #elif defined COMPILE_PCRE32
9193 errorcode = ERR77;
9194 #endif
9195 goto PCRE_EARLY_ERROR_RETURN2;
9196 }
9197 #else
9198 if (utf)
9199 {
9200 errorcode = ERR32;
9201 goto PCRE_EARLY_ERROR_RETURN;
9202 }
9203 #endif
9204
9205
9206
9207 #ifndef SUPPORT_UCP
9208 if ((options & PCRE_UCP) != 0)
9209 {
9210 errorcode = ERR67;
9211 goto PCRE_EARLY_ERROR_RETURN;
9212 }
9213 #endif
9214
9215
9216
9217 if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
9218 (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
9219 {
9220 errorcode = ERR56;
9221 goto PCRE_EARLY_ERROR_RETURN;
9222 }
9223
9224
9225
9226
9227
9228 switch (options & PCRE_NEWLINE_BITS)
9229 {
9230 case 0: newline = NEWLINE; break;
9231 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
9232 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
9233 case PCRE_NEWLINE_CR+
9234 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
9235 case PCRE_NEWLINE_ANY: newline = -1; break;
9236 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
9237 default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
9238 }
9239
9240 if (newline == -2)
9241 {
9242 cd->nltype = NLTYPE_ANYCRLF;
9243 }
9244 else if (newline < 0)
9245 {
9246 cd->nltype = NLTYPE_ANY;
9247 }
9248 else
9249 {
9250 cd->nltype = NLTYPE_FIXED;
9251 if (newline > 255)
9252 {
9253 cd->nllen = 2;
9254 cd->nl[0] = (newline >> 8) & 255;
9255 cd->nl[1] = newline & 255;
9256 }
9257 else
9258 {
9259 cd->nllen = 1;
9260 cd->nl[0] = newline;
9261 }
9262 }
9263
9264
9265
9266
9267
9268 cd->top_backref = 0;
9269 cd->backref_map = 0;
9270
9271
9272
9273 DPRINTF(("------------------------------------------------------------------\n"));
9274 #ifdef PCRE_DEBUG
9275 print_puchar(stdout, (PCRE_PUCHAR)pattern);
9276 #endif
9277 DPRINTF(("\n"));
9278
9279
9280
9281
9282
9283
9284
9285
9286 cd->bracount = cd->final_bracount = 0;
9287 cd->names_found = 0;
9288 cd->name_entry_size = 0;
9289 cd->name_table = NULL;
9290 cd->dupnames = FALSE;
9291 cd->dupgroups = FALSE;
9292 cd->namedrefcount = 0;
9293 cd->start_code = cworkspace;
9294 cd->hwm = cworkspace;
9295 cd->iscondassert = FALSE;
9296 cd->start_workspace = cworkspace;
9297 cd->workspace_size = COMPILE_WORK_SIZE;
9298 cd->named_groups = named_groups;
9299 cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
9300 cd->start_pattern = (const pcre_uchar *)pattern;
9301 cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
9302 cd->req_varyopt = 0;
9303 cd->parens_depth = 0;
9304 cd->assert_depth = 0;
9305 cd->max_lookbehind = 0;
9306 cd->external_options = options;
9307 cd->open_caps = NULL;
9308
9309
9310
9311
9312
9313
9314
9315 ptr += skipatstart;
9316 code = cworkspace;
9317 *code = OP_BRA;
9318
9319 (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
9320 FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
9321 cd, &length);
9322 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
9323
9324 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
9325 (int)(cd->hwm - cworkspace)));
9326
9327 if (length > MAX_PATTERN_SIZE)
9328 {
9329 errorcode = ERR20;
9330 goto PCRE_EARLY_ERROR_RETURN;
9331 }
9332
9333
9334
9335
9336
9337 size = sizeof(REAL_PCRE) +
9338 (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
9339
9340
9341
9342 re = (REAL_PCRE *)(PUBL(malloc))(size);
9343 if (re == NULL)
9344 {
9345 errorcode = ERR21;
9346 goto PCRE_EARLY_ERROR_RETURN;
9347 }
9348
9349
9350
9351
9352
9353
9354
9355 re->magic_number = MAGIC_NUMBER;
9356 re->size = (int)size;
9357 re->options = cd->external_options;
9358 re->flags = cd->external_flags;
9359 re->limit_match = limit_match;
9360 re->limit_recursion = limit_recursion;
9361 re->first_char = 0;
9362 re->req_char = 0;
9363 re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
9364 re->name_entry_size = cd->name_entry_size;
9365 re->name_count = cd->names_found;
9366 re->ref_count = 0;
9367 re->tables = (tables == PRIV(default_tables))? NULL : tables;
9368 re->nullpad = NULL;
9369 #ifdef COMPILE_PCRE32
9370 re->dummy = 0;
9371 #else
9372 re->dummy1 = re->dummy2 = re->dummy3 = 0;
9373 #endif
9374
9375
9376
9377
9378
9379
9380
9381
9382 cd->final_bracount = cd->bracount;
9383 cd->parens_depth = 0;
9384 cd->assert_depth = 0;
9385 cd->bracount = 0;
9386 cd->max_lookbehind = 0;
9387 cd->name_table = (pcre_uchar *)re + re->name_table_offset;
9388 codestart = cd->name_table + re->name_entry_size * re->name_count;
9389 cd->start_code = codestart;
9390 cd->hwm = (pcre_uchar *)(cd->start_workspace);
9391 cd->iscondassert = FALSE;
9392 cd->req_varyopt = 0;
9393 cd->had_accept = FALSE;
9394 cd->had_pruneorskip = FALSE;
9395 cd->check_lookbehind = FALSE;
9396 cd->open_caps = NULL;
9397
9398
9399
9400
9401 if (cd->names_found > 0)
9402 {
9403 int i = cd->names_found;
9404 named_group *ng = cd->named_groups;
9405 cd->names_found = 0;
9406 for (; i > 0; i--, ng++)
9407 add_name(cd, ng->name, ng->length, ng->number);
9408 if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
9409 (PUBL(free))((void *)cd->named_groups);
9410 }
9411
9412
9413
9414
9415
9416 ptr = (const pcre_uchar *)pattern + skipatstart;
9417 code = (pcre_uchar *)codestart;
9418 *code = OP_BRA;
9419 (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
9420 &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL);
9421 re->top_bracket = cd->bracount;
9422 re->top_backref = cd->top_backref;
9423 re->max_lookbehind = cd->max_lookbehind;
9424 re->flags = cd->external_flags | PCRE_MODE;
9425
9426 if (cd->had_accept)
9427 {
9428 reqchar = 0;
9429 reqcharflags = REQ_NONE;
9430 }
9431
9432
9433
9434 if (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22;
9435
9436
9437
9438
9439 *code++ = OP_END;
9440
9441 #ifndef PCRE_DEBUG
9442 if (code - codestart > length) errorcode = ERR23;
9443 #endif
9444
9445 #ifdef SUPPORT_VALGRIND
9446
9447
9448
9449 VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
9450 #endif
9451
9452
9453
9454
9455 if (cd->hwm > cd->start_workspace)
9456 {
9457 int prev_recno = -1;
9458 const pcre_uchar *groupptr = NULL;
9459 while (errorcode == 0 && cd->hwm > cd->start_workspace)
9460 {
9461 int offset, recno;
9462 cd->hwm -= LINK_SIZE;
9463 offset = GET(cd->hwm, 0);
9464
9465
9466
9467
9468 if (offset == 0 || codestart[offset-1] != OP_RECURSE)
9469 {
9470 errorcode = ERR10;
9471 break;
9472 }
9473
9474 recno = GET(codestart, offset);
9475 if (recno != prev_recno)
9476 {
9477 groupptr = PRIV(find_bracket)(codestart, utf, recno);
9478 prev_recno = recno;
9479 }
9480 if (groupptr == NULL) errorcode = ERR53;
9481 else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
9482 }
9483 }
9484
9485
9486
9487
9488 if (cd->workspace_size > COMPILE_WORK_SIZE)
9489 (PUBL(free))((void *)cd->start_workspace);
9490 cd->start_workspace = NULL;
9491
9492
9493
9494
9495 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
9496
9497
9498
9499
9500
9501
9502
9503
9504 if (errorcode == 0 && (options & PCRE_NO_AUTO_POSSESS) == 0)
9505 {
9506 pcre_uchar *temp = (pcre_uchar *)codestart;
9507 auto_possessify(temp, utf, cd);
9508 }
9509
9510
9511
9512
9513
9514
9515
9516
9517
9518 if (errorcode == 0 && cd->check_lookbehind)
9519 {
9520 pcre_uchar *cc = (pcre_uchar *)codestart;
9521
9522
9523
9524
9525
9526
9527 for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1);
9528 cc != NULL;
9529 cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1))
9530 {
9531 if (GET(cc, 1) == 0)
9532 {
9533 int fixed_length;
9534 pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
9535 int end_op = *be;
9536 *be = OP_END;
9537 fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
9538 cd, NULL);
9539 *be = end_op;
9540 DPRINTF(("fixed length = %d\n", fixed_length));
9541 if (fixed_length < 0)
9542 {
9543 errorcode = (fixed_length == -2)? ERR36 :
9544 (fixed_length == -4)? ERR70 : ERR25;
9545 break;
9546 }
9547 if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
9548 PUT(cc, 1, fixed_length);
9549 }
9550 cc += 1 + LINK_SIZE;
9551 }
9552 }
9553
9554
9555
9556 if (errorcode != 0)
9557 {
9558 (PUBL(free))(re);
9559 PCRE_EARLY_ERROR_RETURN:
9560 *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
9561 PCRE_EARLY_ERROR_RETURN2:
9562 *errorptr = find_error_text(errorcode);
9563 if (errorcodeptr != NULL) *errorcodeptr = errorcode;
9564 return NULL;
9565 }
9566
9567
9568
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578 if ((re->options & PCRE_ANCHORED) == 0)
9579 {
9580 if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
9581 else
9582 {
9583 if (firstcharflags < 0)
9584 firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE);
9585 if (firstcharflags >= 0)
9586 {
9587 #if defined COMPILE_PCRE8
9588 re->first_char = firstchar & 0xff;
9589 #elif defined COMPILE_PCRE16
9590 re->first_char = firstchar & 0xffff;
9591 #elif defined COMPILE_PCRE32
9592 re->first_char = firstchar;
9593 #endif
9594 if ((firstcharflags & REQ_CASELESS) != 0)
9595 {
9596 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
9597
9598 if (utf)
9599 {
9600 if (re->first_char < 128)
9601 {
9602 if (cd->fcc[re->first_char] != re->first_char)
9603 re->flags |= PCRE_FCH_CASELESS;
9604 }
9605 else if (UCD_OTHERCASE(re->first_char) != re->first_char)
9606 re->flags |= PCRE_FCH_CASELESS;
9607 }
9608 else
9609 #endif
9610 if (MAX_255(re->first_char)
9611 && cd->fcc[re->first_char] != re->first_char)
9612 re->flags |= PCRE_FCH_CASELESS;
9613 }
9614
9615 re->flags |= PCRE_FIRSTSET;
9616 }
9617
9618 else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;
9619 }
9620 }
9621
9622
9623
9624
9625
9626 if (reqcharflags >= 0 &&
9627 ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0))
9628 {
9629 #if defined COMPILE_PCRE8
9630 re->req_char = reqchar & 0xff;
9631 #elif defined COMPILE_PCRE16
9632 re->req_char = reqchar & 0xffff;
9633 #elif defined COMPILE_PCRE32
9634 re->req_char = reqchar;
9635 #endif
9636 if ((reqcharflags & REQ_CASELESS) != 0)
9637 {
9638 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
9639
9640 if (utf)
9641 {
9642 if (re->req_char < 128)
9643 {
9644 if (cd->fcc[re->req_char] != re->req_char)
9645 re->flags |= PCRE_RCH_CASELESS;
9646 }
9647 else if (UCD_OTHERCASE(re->req_char) != re->req_char)
9648 re->flags |= PCRE_RCH_CASELESS;
9649 }
9650 else
9651 #endif
9652 if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char)
9653 re->flags |= PCRE_RCH_CASELESS;
9654 }
9655
9656 re->flags |= PCRE_REQCHSET;
9657 }
9658
9659
9660
9661
9662 #ifdef PCRE_DEBUG
9663 printf("Length = %d top_bracket = %d top_backref = %d\n",
9664 length, re->top_bracket, re->top_backref);
9665
9666 printf("Options=%08x\n", re->options);
9667
9668 if ((re->flags & PCRE_FIRSTSET) != 0)
9669 {
9670 pcre_uchar ch = re->first_char;
9671 const char *caseless =
9672 ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
9673 if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
9674 else printf("First char = \\x%02x%s\n", ch, caseless);
9675 }
9676
9677 if ((re->flags & PCRE_REQCHSET) != 0)
9678 {
9679 pcre_uchar ch = re->req_char;
9680 const char *caseless =
9681 ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
9682 if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
9683 else printf("Req char = \\x%02x%s\n", ch, caseless);
9684 }
9685
9686 #if defined COMPILE_PCRE8
9687 pcre_printint((pcre *)re, stdout, TRUE);
9688 #elif defined COMPILE_PCRE16
9689 pcre16_printint((pcre *)re, stdout, TRUE);
9690 #elif defined COMPILE_PCRE32
9691 pcre32_printint((pcre *)re, stdout, TRUE);
9692 #endif
9693
9694
9695
9696
9697 if (code - codestart > length)
9698 {
9699 (PUBL(free))(re);
9700 *errorptr = find_error_text(ERR23);
9701 *erroroffset = ptr - (pcre_uchar *)pattern;
9702 if (errorcodeptr != NULL) *errorcodeptr = ERR23;
9703 return NULL;
9704 }
9705 #endif
9706
9707
9708
9709
9710 do
9711 {
9712 if (could_be_empty_branch(codestart, code, utf, cd, NULL))
9713 {
9714 re->flags |= PCRE_MATCH_EMPTY;
9715 break;
9716 }
9717 codestart += GET(codestart, 1);
9718 }
9719 while (*codestart == OP_ALT);
9720
9721 #if defined COMPILE_PCRE8
9722 return (pcre *)re;
9723 #elif defined COMPILE_PCRE16
9724 return (pcre16 *)re;
9725 #elif defined COMPILE_PCRE32
9726 return (pcre32 *)re;
9727 #endif
9728 }
9729
9730