This source file includes following definitions.
- find_minlength
- set_table_bit
- set_type_bits
- set_nottype_bits
- set_start_bits
- pcre_study
- pcre_free_study
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
50
51
52
53 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 static int
81 find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
82 const pcre_uchar *startcode, int options, recurse_check *recurses,
83 int *countptr)
84 {
85 int length = -1;
86
87 BOOL utf = (options & PCRE_UTF8) != 0;
88 BOOL had_recurse = FALSE;
89 recurse_check this_recurse;
90 register int branchlength = 0;
91 register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
92
93 if ((*countptr)++ > 1000) return -1;
94
95 if (*code == OP_CBRA || *code == OP_SCBRA ||
96 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
97
98
99
100
101 for (;;)
102 {
103 int d, min;
104 pcre_uchar *cs, *ce;
105 register pcre_uchar op = *cc;
106
107 switch (op)
108 {
109 case OP_COND:
110 case OP_SCOND:
111
112
113
114
115
116 cs = cc + GET(cc, 1);
117 if (*cs != OP_ALT)
118 {
119 cc = cs + 1 + LINK_SIZE;
120 break;
121 }
122
123
124
125
126 case OP_CBRA:
127 case OP_SCBRA:
128 case OP_BRA:
129 case OP_SBRA:
130 case OP_CBRAPOS:
131 case OP_SCBRAPOS:
132 case OP_BRAPOS:
133 case OP_SBRAPOS:
134 case OP_ONCE:
135 case OP_ONCE_NC:
136 d = find_minlength(re, cc, startcode, options, recurses, countptr);
137 if (d < 0) return d;
138 branchlength += d;
139 do cc += GET(cc, 1); while (*cc == OP_ALT);
140 cc += 1 + LINK_SIZE;
141 break;
142
143
144
145 case OP_ACCEPT:
146 case OP_ASSERT_ACCEPT:
147 return -1;
148
149
150
151
152
153
154
155 case OP_ALT:
156 case OP_KET:
157 case OP_KETRMAX:
158 case OP_KETRMIN:
159 case OP_KETRPOS:
160 case OP_END:
161 if (length < 0 || (!had_recurse && branchlength < length))
162 length = branchlength;
163 if (op != OP_ALT) return length;
164 cc += 1 + LINK_SIZE;
165 branchlength = 0;
166 had_recurse = FALSE;
167 break;
168
169
170
171 case OP_ASSERT:
172 case OP_ASSERT_NOT:
173 case OP_ASSERTBACK:
174 case OP_ASSERTBACK_NOT:
175 do cc += GET(cc, 1); while (*cc == OP_ALT);
176
177
178
179
180 case OP_REVERSE:
181 case OP_CREF:
182 case OP_DNCREF:
183 case OP_RREF:
184 case OP_DNRREF:
185 case OP_DEF:
186 case OP_CALLOUT:
187 case OP_SOD:
188 case OP_SOM:
189 case OP_EOD:
190 case OP_EODN:
191 case OP_CIRC:
192 case OP_CIRCM:
193 case OP_DOLL:
194 case OP_DOLLM:
195 case OP_NOT_WORD_BOUNDARY:
196 case OP_WORD_BOUNDARY:
197 cc += PRIV(OP_lengths)[*cc];
198 break;
199
200
201
202 case OP_BRAZERO:
203 case OP_BRAMINZERO:
204 case OP_BRAPOSZERO:
205 case OP_SKIPZERO:
206 cc += PRIV(OP_lengths)[*cc];
207 do cc += GET(cc, 1); while (*cc == OP_ALT);
208 cc += 1 + LINK_SIZE;
209 break;
210
211
212
213 case OP_CHAR:
214 case OP_CHARI:
215 case OP_NOT:
216 case OP_NOTI:
217 case OP_PLUS:
218 case OP_PLUSI:
219 case OP_MINPLUS:
220 case OP_MINPLUSI:
221 case OP_POSPLUS:
222 case OP_POSPLUSI:
223 case OP_NOTPLUS:
224 case OP_NOTPLUSI:
225 case OP_NOTMINPLUS:
226 case OP_NOTMINPLUSI:
227 case OP_NOTPOSPLUS:
228 case OP_NOTPOSPLUSI:
229 branchlength++;
230 cc += 2;
231 #ifdef SUPPORT_UTF
232 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
233 #endif
234 break;
235
236 case OP_TYPEPLUS:
237 case OP_TYPEMINPLUS:
238 case OP_TYPEPOSPLUS:
239 branchlength++;
240 cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
241 break;
242
243
244
245
246 case OP_EXACT:
247 case OP_EXACTI:
248 case OP_NOTEXACT:
249 case OP_NOTEXACTI:
250 branchlength += GET2(cc,1);
251 cc += 2 + IMM2_SIZE;
252 #ifdef SUPPORT_UTF
253 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
254 #endif
255 break;
256
257 case OP_TYPEEXACT:
258 branchlength += GET2(cc,1);
259 cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
260 || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
261 break;
262
263
264
265 case OP_PROP:
266 case OP_NOTPROP:
267 cc += 2;
268
269
270 case OP_NOT_DIGIT:
271 case OP_DIGIT:
272 case OP_NOT_WHITESPACE:
273 case OP_WHITESPACE:
274 case OP_NOT_WORDCHAR:
275 case OP_WORDCHAR:
276 case OP_ANY:
277 case OP_ALLANY:
278 case OP_EXTUNI:
279 case OP_HSPACE:
280 case OP_NOT_HSPACE:
281 case OP_VSPACE:
282 case OP_NOT_VSPACE:
283 branchlength++;
284 cc++;
285 break;
286
287
288
289
290 case OP_ANYNL:
291 branchlength += 1;
292 cc++;
293 break;
294
295
296
297
298
299 case OP_ANYBYTE:
300 #ifdef SUPPORT_UTF
301 if (utf) return -1;
302 #endif
303 branchlength++;
304 cc++;
305 break;
306
307
308
309
310 case OP_TYPESTAR:
311 case OP_TYPEMINSTAR:
312 case OP_TYPEQUERY:
313 case OP_TYPEMINQUERY:
314 case OP_TYPEPOSSTAR:
315 case OP_TYPEPOSQUERY:
316 if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
317 cc += PRIV(OP_lengths)[op];
318 break;
319
320 case OP_TYPEUPTO:
321 case OP_TYPEMINUPTO:
322 case OP_TYPEPOSUPTO:
323 if (cc[1 + IMM2_SIZE] == OP_PROP
324 || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
325 cc += PRIV(OP_lengths)[op];
326 break;
327
328
329
330 case OP_CLASS:
331 case OP_NCLASS:
332 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
333 case OP_XCLASS:
334
335
336 if (op == OP_XCLASS)
337 cc += GET(cc, 1);
338 else
339 cc += PRIV(OP_lengths)[OP_CLASS];
340 #else
341 cc += PRIV(OP_lengths)[OP_CLASS];
342 #endif
343
344 switch (*cc)
345 {
346 case OP_CRPLUS:
347 case OP_CRMINPLUS:
348 case OP_CRPOSPLUS:
349 branchlength++;
350
351
352 case OP_CRSTAR:
353 case OP_CRMINSTAR:
354 case OP_CRQUERY:
355 case OP_CRMINQUERY:
356 case OP_CRPOSSTAR:
357 case OP_CRPOSQUERY:
358 cc++;
359 break;
360
361 case OP_CRRANGE:
362 case OP_CRMINRANGE:
363 case OP_CRPOSRANGE:
364 branchlength += GET2(cc,1);
365 cc += 1 + 2 * IMM2_SIZE;
366 break;
367
368 default:
369 branchlength++;
370 break;
371 }
372 break;
373
374
375
376
377
378
379
380
381
382
383
384
385
386 case OP_DNREF:
387 case OP_DNREFI:
388 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
389 {
390 int count = GET2(cc, 1+IMM2_SIZE);
391 pcre_uchar *slot = (pcre_uchar *)re +
392 re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
393 d = INT_MAX;
394 while (count-- > 0)
395 {
396 ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
397 if (cs == NULL) return -2;
398 do ce += GET(ce, 1); while (*ce == OP_ALT);
399 if (cc > cs && cc < ce)
400 {
401 d = 0;
402 had_recurse = TRUE;
403 break;
404 }
405 else
406 {
407 recurse_check *r = recurses;
408 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
409 if (r != NULL)
410 {
411 d = 0;
412 had_recurse = TRUE;
413 break;
414 }
415 else
416 {
417 int dd;
418 this_recurse.prev = recurses;
419 this_recurse.group = cs;
420 dd = find_minlength(re, cs, startcode, options, &this_recurse,
421 countptr);
422 if (dd < d) d = dd;
423 }
424 }
425 slot += re->name_entry_size;
426 }
427 }
428 else d = 0;
429 cc += 1 + 2*IMM2_SIZE;
430 goto REPEAT_BACK_REFERENCE;
431
432 case OP_REF:
433 case OP_REFI:
434 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
435 {
436 ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
437 if (cs == NULL) return -2;
438 do ce += GET(ce, 1); while (*ce == OP_ALT);
439 if (cc > cs && cc < ce)
440 {
441 d = 0;
442 had_recurse = TRUE;
443 }
444 else
445 {
446 recurse_check *r = recurses;
447 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
448 if (r != NULL)
449 {
450 d = 0;
451 had_recurse = TRUE;
452 }
453 else
454 {
455 this_recurse.prev = recurses;
456 this_recurse.group = cs;
457 d = find_minlength(re, cs, startcode, options, &this_recurse,
458 countptr);
459 }
460 }
461 }
462 else d = 0;
463 cc += 1 + IMM2_SIZE;
464
465
466
467 REPEAT_BACK_REFERENCE:
468 switch (*cc)
469 {
470 case OP_CRSTAR:
471 case OP_CRMINSTAR:
472 case OP_CRQUERY:
473 case OP_CRMINQUERY:
474 case OP_CRPOSSTAR:
475 case OP_CRPOSQUERY:
476 min = 0;
477 cc++;
478 break;
479
480 case OP_CRPLUS:
481 case OP_CRMINPLUS:
482 case OP_CRPOSPLUS:
483 min = 1;
484 cc++;
485 break;
486
487 case OP_CRRANGE:
488 case OP_CRMINRANGE:
489 case OP_CRPOSRANGE:
490 min = GET2(cc, 1);
491 cc += 1 + 2 * IMM2_SIZE;
492 break;
493
494 default:
495 min = 1;
496 break;
497 }
498
499 branchlength += min * d;
500 break;
501
502
503
504
505 case OP_RECURSE:
506 cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
507 do ce += GET(ce, 1); while (*ce == OP_ALT);
508 if (cc > cs && cc < ce)
509 had_recurse = TRUE;
510 else
511 {
512 recurse_check *r = recurses;
513 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
514 if (r != NULL)
515 had_recurse = TRUE;
516 else
517 {
518 this_recurse.prev = recurses;
519 this_recurse.group = cs;
520 branchlength += find_minlength(re, cs, startcode, options,
521 &this_recurse, countptr);
522 }
523 }
524 cc += 1 + LINK_SIZE;
525 break;
526
527
528
529
530
531
532
533
534
535 case OP_UPTO:
536 case OP_UPTOI:
537 case OP_NOTUPTO:
538 case OP_NOTUPTOI:
539 case OP_MINUPTO:
540 case OP_MINUPTOI:
541 case OP_NOTMINUPTO:
542 case OP_NOTMINUPTOI:
543 case OP_POSUPTO:
544 case OP_POSUPTOI:
545 case OP_NOTPOSUPTO:
546 case OP_NOTPOSUPTOI:
547
548 case OP_STAR:
549 case OP_STARI:
550 case OP_NOTSTAR:
551 case OP_NOTSTARI:
552 case OP_MINSTAR:
553 case OP_MINSTARI:
554 case OP_NOTMINSTAR:
555 case OP_NOTMINSTARI:
556 case OP_POSSTAR:
557 case OP_POSSTARI:
558 case OP_NOTPOSSTAR:
559 case OP_NOTPOSSTARI:
560
561 case OP_QUERY:
562 case OP_QUERYI:
563 case OP_NOTQUERY:
564 case OP_NOTQUERYI:
565 case OP_MINQUERY:
566 case OP_MINQUERYI:
567 case OP_NOTMINQUERY:
568 case OP_NOTMINQUERYI:
569 case OP_POSQUERY:
570 case OP_POSQUERYI:
571 case OP_NOTPOSQUERY:
572 case OP_NOTPOSQUERYI:
573
574 cc += PRIV(OP_lengths)[op];
575 #ifdef SUPPORT_UTF
576 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
577 #endif
578 break;
579
580
581
582 case OP_MARK:
583 case OP_PRUNE_ARG:
584 case OP_SKIP_ARG:
585 case OP_THEN_ARG:
586 cc += PRIV(OP_lengths)[op] + cc[1];
587 break;
588
589
590
591 case OP_CLOSE:
592 case OP_COMMIT:
593 case OP_FAIL:
594 case OP_PRUNE:
595 case OP_SET_SOM:
596 case OP_SKIP:
597 case OP_THEN:
598 cc += PRIV(OP_lengths)[op];
599 break;
600
601
602
603
604 default:
605 return -3;
606 }
607 }
608
609 }
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632 static const pcre_uchar *
633 set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
634 compile_data *cd, BOOL utf)
635 {
636 pcre_uint32 c = *p;
637
638 #ifdef COMPILE_PCRE8
639 SET_BIT(c);
640
641 #ifdef SUPPORT_UTF
642 if (utf && c > 127)
643 {
644 GETCHARINC(c, p);
645 #ifdef SUPPORT_UCP
646 if (caseless)
647 {
648 pcre_uchar buff[6];
649 c = UCD_OTHERCASE(c);
650 (void)PRIV(ord2utf)(c, buff);
651 SET_BIT(buff[0]);
652 }
653 #endif
654 return p;
655 }
656 #else
657 (void)(utf);
658 #endif
659
660
661
662 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
663 return p + 1;
664 #endif
665
666 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
667 if (c > 0xff)
668 {
669 c = 0xff;
670 caseless = FALSE;
671 }
672 SET_BIT(c);
673
674 #ifdef SUPPORT_UTF
675 if (utf && c > 127)
676 {
677 GETCHARINC(c, p);
678 #ifdef SUPPORT_UCP
679 if (caseless)
680 {
681 c = UCD_OTHERCASE(c);
682 if (c > 0xff)
683 c = 0xff;
684 SET_BIT(c);
685 }
686 #endif
687 return p;
688 }
689 #else
690 (void)(utf);
691 #endif
692
693 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
694 return p + 1;
695 #endif
696 }
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720 static void
721 set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
722 compile_data *cd)
723 {
724 register pcre_uint32 c;
725 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
726 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
727 if (table_limit == 32) return;
728 for (c = 128; c < 256; c++)
729 {
730 if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
731 {
732 pcre_uchar buff[6];
733 (void)PRIV(ord2utf)(c, buff);
734 SET_BIT(buff[0]);
735 }
736 }
737 #endif
738 }
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762 static void
763 set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
764 compile_data *cd)
765 {
766 register pcre_uint32 c;
767 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
768 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
769 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
770 #endif
771 }
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799 static int
800 set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
801 compile_data *cd)
802 {
803 register pcre_uint32 c;
804 int yield = SSB_DONE;
805 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
806 int table_limit = utf? 16:32;
807 #else
808 int table_limit = 32;
809 #endif
810
811 #if 0
812
813
814
815
816
817
818
819
820
821
822
823
824 volatile int dummy;
825
826 #endif
827
828 do
829 {
830 BOOL try_next = TRUE;
831 const pcre_uchar *tcode = code + 1 + LINK_SIZE;
832
833 if (*code == OP_CBRA || *code == OP_SCBRA ||
834 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
835
836 while (try_next)
837 {
838 int rc;
839
840 switch(*tcode)
841 {
842
843
844
845
846 default:
847 return SSB_UNKNOWN;
848
849
850
851 case OP_ACCEPT:
852 case OP_ASSERT_ACCEPT:
853 case OP_ALLANY:
854 case OP_ANY:
855 case OP_ANYBYTE:
856 case OP_CIRC:
857 case OP_CIRCM:
858 case OP_CLOSE:
859 case OP_COMMIT:
860 case OP_COND:
861 case OP_CREF:
862 case OP_DEF:
863 case OP_DNCREF:
864 case OP_DNREF:
865 case OP_DNREFI:
866 case OP_DNRREF:
867 case OP_DOLL:
868 case OP_DOLLM:
869 case OP_END:
870 case OP_EOD:
871 case OP_EODN:
872 case OP_EXTUNI:
873 case OP_FAIL:
874 case OP_MARK:
875 case OP_NOT:
876 case OP_NOTEXACT:
877 case OP_NOTEXACTI:
878 case OP_NOTI:
879 case OP_NOTMINPLUS:
880 case OP_NOTMINPLUSI:
881 case OP_NOTMINQUERY:
882 case OP_NOTMINQUERYI:
883 case OP_NOTMINSTAR:
884 case OP_NOTMINSTARI:
885 case OP_NOTMINUPTO:
886 case OP_NOTMINUPTOI:
887 case OP_NOTPLUS:
888 case OP_NOTPLUSI:
889 case OP_NOTPOSPLUS:
890 case OP_NOTPOSPLUSI:
891 case OP_NOTPOSQUERY:
892 case OP_NOTPOSQUERYI:
893 case OP_NOTPOSSTAR:
894 case OP_NOTPOSSTARI:
895 case OP_NOTPOSUPTO:
896 case OP_NOTPOSUPTOI:
897 case OP_NOTPROP:
898 case OP_NOTQUERY:
899 case OP_NOTQUERYI:
900 case OP_NOTSTAR:
901 case OP_NOTSTARI:
902 case OP_NOTUPTO:
903 case OP_NOTUPTOI:
904 case OP_NOT_HSPACE:
905 case OP_NOT_VSPACE:
906 case OP_PRUNE:
907 case OP_PRUNE_ARG:
908 case OP_RECURSE:
909 case OP_REF:
910 case OP_REFI:
911 case OP_REVERSE:
912 case OP_RREF:
913 case OP_SCOND:
914 case OP_SET_SOM:
915 case OP_SKIP:
916 case OP_SKIP_ARG:
917 case OP_SOD:
918 case OP_SOM:
919 case OP_THEN:
920 case OP_THEN_ARG:
921 return SSB_FAIL;
922
923
924
925
926
927
928 case OP_PROP:
929 if (tcode[1] != PT_CLIST) return SSB_FAIL;
930 {
931 const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
932 while ((c = *p++) < NOTACHAR)
933 {
934 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
935 if (utf)
936 {
937 pcre_uchar buff[6];
938 (void)PRIV(ord2utf)(c, buff);
939 c = buff[0];
940 }
941 #endif
942 if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
943 }
944 }
945 try_next = FALSE;
946 break;
947
948
949
950 case OP_WORD_BOUNDARY:
951 case OP_NOT_WORD_BOUNDARY:
952 tcode++;
953 break;
954
955
956
957
958
959
960 case OP_BRA:
961 case OP_SBRA:
962 case OP_CBRA:
963 case OP_SCBRA:
964 case OP_BRAPOS:
965 case OP_SBRAPOS:
966 case OP_CBRAPOS:
967 case OP_SCBRAPOS:
968 case OP_ONCE:
969 case OP_ONCE_NC:
970 case OP_ASSERT:
971 rc = set_start_bits(tcode, start_bits, utf, cd);
972 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
973 if (rc == SSB_DONE) try_next = FALSE; else
974 {
975 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
976 tcode += 1 + LINK_SIZE;
977 }
978 break;
979
980
981
982
983
984
985
986
987 case OP_ALT:
988 yield = SSB_CONTINUE;
989 try_next = FALSE;
990 break;
991
992 case OP_KET:
993 case OP_KETRMAX:
994 case OP_KETRMIN:
995 case OP_KETRPOS:
996 return SSB_CONTINUE;
997
998
999
1000 case OP_CALLOUT:
1001 tcode += 2 + 2*LINK_SIZE;
1002 break;
1003
1004
1005
1006 case OP_ASSERT_NOT:
1007 case OP_ASSERTBACK:
1008 case OP_ASSERTBACK_NOT:
1009 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
1010 tcode += 1 + LINK_SIZE;
1011 break;
1012
1013
1014
1015 case OP_BRAZERO:
1016 case OP_BRAMINZERO:
1017 case OP_BRAPOSZERO:
1018 rc = set_start_bits(++tcode, start_bits, utf, cd);
1019 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
1020
1021
1022
1023
1024
1025 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
1026 tcode += 1 + LINK_SIZE;
1027 break;
1028
1029
1030
1031 case OP_SKIPZERO:
1032 tcode++;
1033 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
1034 tcode += 1 + LINK_SIZE;
1035 break;
1036
1037
1038
1039 case OP_STAR:
1040 case OP_MINSTAR:
1041 case OP_POSSTAR:
1042 case OP_QUERY:
1043 case OP_MINQUERY:
1044 case OP_POSQUERY:
1045 tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
1046 break;
1047
1048 case OP_STARI:
1049 case OP_MINSTARI:
1050 case OP_POSSTARI:
1051 case OP_QUERYI:
1052 case OP_MINQUERYI:
1053 case OP_POSQUERYI:
1054 tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
1055 break;
1056
1057
1058
1059 case OP_UPTO:
1060 case OP_MINUPTO:
1061 case OP_POSUPTO:
1062 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
1063 break;
1064
1065 case OP_UPTOI:
1066 case OP_MINUPTOI:
1067 case OP_POSUPTOI:
1068 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
1069 break;
1070
1071
1072
1073 case OP_EXACT:
1074 tcode += IMM2_SIZE;
1075
1076 case OP_CHAR:
1077 case OP_PLUS:
1078 case OP_MINPLUS:
1079 case OP_POSPLUS:
1080 (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
1081 try_next = FALSE;
1082 break;
1083
1084 case OP_EXACTI:
1085 tcode += IMM2_SIZE;
1086
1087 case OP_CHARI:
1088 case OP_PLUSI:
1089 case OP_MINPLUSI:
1090 case OP_POSPLUSI:
1091 (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
1092 try_next = FALSE;
1093 break;
1094
1095
1096
1097
1098
1099
1100
1101 case OP_HSPACE:
1102 SET_BIT(CHAR_HT);
1103 SET_BIT(CHAR_SPACE);
1104 #ifdef SUPPORT_UTF
1105 if (utf)
1106 {
1107 #ifdef COMPILE_PCRE8
1108 SET_BIT(0xC2);
1109 SET_BIT(0xE1);
1110 SET_BIT(0xE2);
1111 SET_BIT(0xE3);
1112 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1113 SET_BIT(0xA0);
1114 SET_BIT(0xFF);
1115 #endif
1116 }
1117 else
1118 #endif
1119 {
1120 #ifndef EBCDIC
1121 SET_BIT(0xA0);
1122 #endif
1123 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1124 SET_BIT(0xFF);
1125 #endif
1126 }
1127 try_next = FALSE;
1128 break;
1129
1130 case OP_ANYNL:
1131 case OP_VSPACE:
1132 SET_BIT(CHAR_LF);
1133 SET_BIT(CHAR_VT);
1134 SET_BIT(CHAR_FF);
1135 SET_BIT(CHAR_CR);
1136 #ifdef SUPPORT_UTF
1137 if (utf)
1138 {
1139 #ifdef COMPILE_PCRE8
1140 SET_BIT(0xC2);
1141 SET_BIT(0xE2);
1142 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1143 SET_BIT(CHAR_NEL);
1144 SET_BIT(0xFF);
1145 #endif
1146 }
1147 else
1148 #endif
1149 {
1150 SET_BIT(CHAR_NEL);
1151 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1152 SET_BIT(0xFF);
1153 #endif
1154 }
1155 try_next = FALSE;
1156 break;
1157
1158
1159
1160
1161
1162
1163 case OP_NOT_DIGIT:
1164 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1165 try_next = FALSE;
1166 break;
1167
1168 case OP_DIGIT:
1169 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1170 try_next = FALSE;
1171 break;
1172
1173
1174
1175
1176
1177 case OP_NOT_WHITESPACE:
1178 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1179 try_next = FALSE;
1180 break;
1181
1182 case OP_WHITESPACE:
1183 set_type_bits(start_bits, cbit_space, table_limit, cd);
1184 try_next = FALSE;
1185 break;
1186
1187 case OP_NOT_WORDCHAR:
1188 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1189 try_next = FALSE;
1190 break;
1191
1192 case OP_WORDCHAR:
1193 set_type_bits(start_bits, cbit_word, table_limit, cd);
1194 try_next = FALSE;
1195 break;
1196
1197
1198
1199
1200 case OP_TYPEPLUS:
1201 case OP_TYPEMINPLUS:
1202 case OP_TYPEPOSPLUS:
1203 tcode++;
1204 break;
1205
1206 case OP_TYPEEXACT:
1207 tcode += 1 + IMM2_SIZE;
1208 break;
1209
1210
1211
1212
1213 case OP_TYPEUPTO:
1214 case OP_TYPEMINUPTO:
1215 case OP_TYPEPOSUPTO:
1216 tcode += IMM2_SIZE;
1217
1218 case OP_TYPESTAR:
1219 case OP_TYPEMINSTAR:
1220 case OP_TYPEPOSSTAR:
1221 case OP_TYPEQUERY:
1222 case OP_TYPEMINQUERY:
1223 case OP_TYPEPOSQUERY:
1224 switch(tcode[1])
1225 {
1226 default:
1227 case OP_ANY:
1228 case OP_ALLANY:
1229 return SSB_FAIL;
1230
1231 case OP_HSPACE:
1232 SET_BIT(CHAR_HT);
1233 SET_BIT(CHAR_SPACE);
1234 #ifdef SUPPORT_UTF
1235 if (utf)
1236 {
1237 #ifdef COMPILE_PCRE8
1238 SET_BIT(0xC2);
1239 SET_BIT(0xE1);
1240 SET_BIT(0xE2);
1241 SET_BIT(0xE3);
1242 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1243 SET_BIT(0xA0);
1244 SET_BIT(0xFF);
1245 #endif
1246 }
1247 else
1248 #endif
1249 #ifndef EBCDIC
1250 SET_BIT(0xA0);
1251 #endif
1252 break;
1253
1254 case OP_ANYNL:
1255 case OP_VSPACE:
1256 SET_BIT(CHAR_LF);
1257 SET_BIT(CHAR_VT);
1258 SET_BIT(CHAR_FF);
1259 SET_BIT(CHAR_CR);
1260 #ifdef SUPPORT_UTF
1261 if (utf)
1262 {
1263 #ifdef COMPILE_PCRE8
1264 SET_BIT(0xC2);
1265 SET_BIT(0xE2);
1266 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1267 SET_BIT(CHAR_NEL);
1268 SET_BIT(0xFF);
1269 #endif
1270 }
1271 else
1272 #endif
1273 SET_BIT(CHAR_NEL);
1274 break;
1275
1276 case OP_NOT_DIGIT:
1277 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1278 break;
1279
1280 case OP_DIGIT:
1281 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1282 break;
1283
1284
1285
1286
1287
1288 case OP_NOT_WHITESPACE:
1289 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1290 break;
1291
1292 case OP_WHITESPACE:
1293 set_type_bits(start_bits, cbit_space, table_limit, cd);
1294 break;
1295
1296 case OP_NOT_WORDCHAR:
1297 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1298 break;
1299
1300 case OP_WORDCHAR:
1301 set_type_bits(start_bits, cbit_word, table_limit, cd);
1302 break;
1303 }
1304
1305 tcode += 2;
1306 break;
1307
1308
1309
1310
1311
1312
1313
1314 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1315 case OP_XCLASS:
1316 if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
1317 return SSB_FAIL;
1318
1319 if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
1320 return SSB_FAIL;
1321 #endif
1322
1323
1324 case OP_NCLASS:
1325 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1326 if (utf)
1327 {
1328 start_bits[24] |= 0xf0;
1329 memset(start_bits+25, 0xff, 7);
1330 }
1331 #endif
1332 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1333 SET_BIT(0xFF);
1334 #endif
1335
1336
1337 case OP_CLASS:
1338 {
1339 pcre_uint8 *map;
1340 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1341 map = NULL;
1342 if (*tcode == OP_XCLASS)
1343 {
1344 if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
1345 map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
1346 tcode += GET(tcode, 1);
1347 }
1348 else
1349 #endif
1350 {
1351 tcode++;
1352 map = (pcre_uint8 *)tcode;
1353 tcode += 32 / sizeof(pcre_uchar);
1354 }
1355
1356
1357
1358
1359
1360
1361
1362 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1363 if (map != NULL)
1364 #endif
1365 {
1366 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1367 if (utf)
1368 {
1369 for (c = 0; c < 16; c++) start_bits[c] |= map[c];
1370 for (c = 128; c < 256; c++)
1371 {
1372 if ((map[c/8] && (1 << (c&7))) != 0)
1373 {
1374 int d = (c >> 6) | 0xc0;
1375 start_bits[d/8] |= (1 << (d&7));
1376 c = (c & 0xc0) + 0x40 - 1;
1377 }
1378 }
1379 }
1380 else
1381 #endif
1382 {
1383
1384 for (c = 0; c < 32; c++) start_bits[c] |= map[c];
1385 }
1386 }
1387
1388
1389
1390
1391 switch (*tcode)
1392 {
1393 case OP_CRSTAR:
1394 case OP_CRMINSTAR:
1395 case OP_CRQUERY:
1396 case OP_CRMINQUERY:
1397 case OP_CRPOSSTAR:
1398 case OP_CRPOSQUERY:
1399 tcode++;
1400 break;
1401
1402 case OP_CRRANGE:
1403 case OP_CRMINRANGE:
1404 case OP_CRPOSRANGE:
1405 if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
1406 else try_next = FALSE;
1407 break;
1408
1409 default:
1410 try_next = FALSE;
1411 break;
1412 }
1413 }
1414 break;
1415
1416 }
1417 }
1418
1419 code += GET(code, 1);
1420 }
1421 while (*code == OP_ALT);
1422 return yield;
1423 }
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448 #if defined COMPILE_PCRE8
1449 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1450 pcre_study(const pcre *external_re, int options, const char **errorptr)
1451 #elif defined COMPILE_PCRE16
1452 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
1453 pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
1454 #elif defined COMPILE_PCRE32
1455 PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
1456 pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
1457 #endif
1458 {
1459 int min;
1460 int count = 0;
1461 BOOL bits_set = FALSE;
1462 pcre_uint8 start_bits[32];
1463 PUBL(extra) *extra = NULL;
1464 pcre_study_data *study;
1465 const pcre_uint8 *tables;
1466 pcre_uchar *code;
1467 compile_data compile_block;
1468 const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1469
1470
1471 *errorptr = NULL;
1472
1473 if (re == NULL || re->magic_number != MAGIC_NUMBER)
1474 {
1475 *errorptr = "argument is not a compiled regular expression";
1476 return NULL;
1477 }
1478
1479 if ((re->flags & PCRE_MODE) == 0)
1480 {
1481 #if defined COMPILE_PCRE8
1482 *errorptr = "argument not compiled in 8 bit mode";
1483 #elif defined COMPILE_PCRE16
1484 *errorptr = "argument not compiled in 16 bit mode";
1485 #elif defined COMPILE_PCRE32
1486 *errorptr = "argument not compiled in 32 bit mode";
1487 #endif
1488 return NULL;
1489 }
1490
1491 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
1492 {
1493 *errorptr = "unknown or incorrect option bit(s) set";
1494 return NULL;
1495 }
1496
1497 code = (pcre_uchar *)re + re->name_table_offset +
1498 (re->name_count * re->name_entry_size);
1499
1500
1501
1502
1503
1504 if ((re->options & PCRE_ANCHORED) == 0 &&
1505 (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
1506 {
1507 int rc;
1508
1509
1510
1511 tables = re->tables;
1512
1513 #if defined COMPILE_PCRE8
1514 if (tables == NULL)
1515 (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1516 (void *)(&tables));
1517 #elif defined COMPILE_PCRE16
1518 if (tables == NULL)
1519 (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1520 (void *)(&tables));
1521 #elif defined COMPILE_PCRE32
1522 if (tables == NULL)
1523 (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1524 (void *)(&tables));
1525 #endif
1526
1527 compile_block.lcc = tables + lcc_offset;
1528 compile_block.fcc = tables + fcc_offset;
1529 compile_block.cbits = tables + cbits_offset;
1530 compile_block.ctypes = tables + ctypes_offset;
1531
1532
1533
1534 memset(start_bits, 0, 32 * sizeof(pcre_uint8));
1535 rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1536 &compile_block);
1537 bits_set = rc == SSB_DONE;
1538 if (rc == SSB_UNKNOWN)
1539 {
1540 *errorptr = "internal error: opcode not recognized";
1541 return NULL;
1542 }
1543 }
1544
1545
1546
1547 switch(min = find_minlength(re, code, code, re->options, NULL, &count))
1548 {
1549 case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1550 case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1551 default: break;
1552 }
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563 if (bits_set || min > 0 || (options & (
1564 #ifdef SUPPORT_JIT
1565 PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1566 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1567 #endif
1568 PCRE_STUDY_EXTRA_NEEDED)) != 0)
1569 {
1570 extra = (PUBL(extra) *)(PUBL(malloc))
1571 (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
1572 if (extra == NULL)
1573 {
1574 *errorptr = "failed to get memory";
1575 return NULL;
1576 }
1577
1578 study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
1579 extra->flags = PCRE_EXTRA_STUDY_DATA;
1580 extra->study_data = study;
1581
1582 study->size = sizeof(pcre_study_data);
1583 study->flags = 0;
1584
1585
1586
1587
1588
1589 if (bits_set)
1590 {
1591 study->flags |= PCRE_STUDY_MAPPED;
1592 memcpy(study->start_bits, start_bits, sizeof(start_bits));
1593 }
1594 else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
1595
1596 #ifdef PCRE_DEBUG
1597 if (bits_set)
1598 {
1599 pcre_uint8 *ptr = start_bits;
1600 int i;
1601
1602 printf("Start bits:\n");
1603 for (i = 0; i < 32; i++)
1604 printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
1605 }
1606 #endif
1607
1608
1609
1610
1611
1612
1613 if (min > 0)
1614 {
1615 study->flags |= PCRE_STUDY_MINLEN;
1616 study->minlength = min;
1617 }
1618 else study->minlength = 0;
1619
1620
1621
1622
1623
1624
1625 #ifdef SUPPORT_JIT
1626 extra->executable_jit = NULL;
1627 if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
1628 PRIV(jit_compile)(re, extra, JIT_COMPILE);
1629 if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
1630 PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
1631 if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
1632 PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
1633
1634 if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
1635 (options & PCRE_STUDY_EXTRA_NEEDED) == 0)
1636 {
1637 #if defined COMPILE_PCRE8
1638 pcre_free_study(extra);
1639 #elif defined COMPILE_PCRE16
1640 pcre16_free_study(extra);
1641 #elif defined COMPILE_PCRE32
1642 pcre32_free_study(extra);
1643 #endif
1644 extra = NULL;
1645 }
1646 #endif
1647 }
1648
1649 return extra;
1650 }
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663 #if defined COMPILE_PCRE8
1664 PCRE_EXP_DEFN void
1665 pcre_free_study(pcre_extra *extra)
1666 #elif defined COMPILE_PCRE16
1667 PCRE_EXP_DEFN void
1668 pcre16_free_study(pcre16_extra *extra)
1669 #elif defined COMPILE_PCRE32
1670 PCRE_EXP_DEFN void
1671 pcre32_free_study(pcre32_extra *extra)
1672 #endif
1673 {
1674 if (extra == NULL)
1675 return;
1676 #ifdef SUPPORT_JIT
1677 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1678 extra->executable_jit != NULL)
1679 PRIV(jit_free)(extra->executable_jit);
1680 #endif
1681 PUBL(free)(extra);
1682 }
1683
1684