This source file includes following definitions.
- grapheme_register_constants
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- strstr_common_handler
- PHP_FUNCTION
- PHP_FUNCTION
- grapheme_extract_charcount_iter
- grapheme_extract_bytecount_iter
- grapheme_extract_count_iter
- PHP_FUNCTION
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <php.h>
23 #include "grapheme.h"
24 #include "grapheme_util.h"
25
26 #include <unicode/utypes.h>
27 #include <unicode/ucol.h>
28 #include <unicode/ustring.h>
29 #include <unicode/ubrk.h>
30
31 #include "ext/standard/php_string.h"
32
33
34
35 #define GRAPHEME_EXTRACT_TYPE_COUNT 0
36 #define GRAPHEME_EXTRACT_TYPE_MAXBYTES 1
37 #define GRAPHEME_EXTRACT_TYPE_MAXCHARS 2
38 #define GRAPHEME_EXTRACT_TYPE_MIN GRAPHEME_EXTRACT_TYPE_COUNT
39 #define GRAPHEME_EXTRACT_TYPE_MAX GRAPHEME_EXTRACT_TYPE_MAXCHARS
40
41
42
43
44
45 void grapheme_register_constants( INIT_FUNC_ARGS )
46 {
47 REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_COUNT", GRAPHEME_EXTRACT_TYPE_COUNT, CONST_CS | CONST_PERSISTENT);
48 REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXBYTES", GRAPHEME_EXTRACT_TYPE_MAXBYTES, CONST_CS | CONST_PERSISTENT);
49 REGISTER_LONG_CONSTANT("GRAPHEME_EXTR_MAXCHARS", GRAPHEME_EXTRACT_TYPE_MAXCHARS, CONST_CS | CONST_PERSISTENT);
50 }
51
52
53
54
55 PHP_FUNCTION(grapheme_strlen)
56 {
57 char* string;
58 size_t string_len;
59 UChar* ustring = NULL;
60 int ustring_len = 0;
61 zend_long ret_len;
62 UErrorCode status;
63
64 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &string, &string_len) == FAILURE) {
65 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
66 "grapheme_strlen: unable to parse input param", 0 );
67 RETURN_FALSE;
68 }
69
70 ret_len = grapheme_ascii_check((unsigned char *)string, string_len);
71
72 if ( ret_len >= 0 )
73 RETURN_LONG(string_len);
74
75
76 status = U_ZERO_ERROR;
77 intl_convert_utf8_to_utf16(&ustring, &ustring_len, string, string_len, &status );
78
79 if ( U_FAILURE( status ) ) {
80
81 intl_error_set_code( NULL, status );
82
83
84 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
85 if (ustring) {
86 efree( ustring );
87 }
88 RETURN_NULL();
89 }
90
91 ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 );
92
93 if (ustring) {
94 efree( ustring );
95 }
96
97 if (ret_len >= 0) {
98 RETVAL_LONG(ret_len);
99 } else {
100 RETVAL_FALSE;
101 }
102 }
103
104
105
106
107 PHP_FUNCTION(grapheme_strpos)
108 {
109 char *haystack, *needle;
110 size_t haystack_len, needle_len;
111 const char *found;
112 zend_long loffset = 0;
113 int32_t offset = 0;
114 zend_long ret_pos;
115
116 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
117 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
118 "grapheme_strpos: unable to parse input param", 0 );
119 RETURN_FALSE;
120 }
121
122 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
123 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
124 RETURN_FALSE;
125 }
126
127
128 offset = (int32_t) loffset;
129
130
131
132 if (needle_len == 0) {
133 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
134 RETURN_FALSE;
135 }
136
137
138
139
140
141 found = php_memnstr(haystack + offset, needle, needle_len, haystack + haystack_len);
142
143
144 if (!found) {
145 RETURN_FALSE;
146 }
147
148
149 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
150 RETURN_LONG(found - haystack);
151 }
152
153
154 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 , 0 );
155
156 if ( ret_pos >= 0 ) {
157 RETURN_LONG(ret_pos);
158 } else {
159 RETURN_FALSE;
160 }
161
162 }
163
164
165
166
167 PHP_FUNCTION(grapheme_stripos)
168 {
169 char *haystack, *needle, *haystack_dup, *needle_dup;
170 size_t haystack_len, needle_len;
171 const char *found;
172 zend_long loffset = 0;
173 int32_t offset = 0;
174 zend_long ret_pos;
175 int is_ascii;
176
177 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
178 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
179 "grapheme_stripos: unable to parse input param", 0 );
180 RETURN_FALSE;
181 }
182
183 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
184 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Offset not contained in string", 1 );
185 RETURN_FALSE;
186 }
187
188
189 offset = (int32_t) loffset;
190
191
192
193 if (needle_len == 0) {
194 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_stripos: Empty delimiter", 1 );
195 RETURN_FALSE;
196 }
197
198
199 is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 );
200
201 if ( is_ascii ) {
202 needle_dup = estrndup(needle, needle_len);
203 php_strtolower(needle_dup, needle_len);
204 haystack_dup = estrndup(haystack, haystack_len);
205 php_strtolower(haystack_dup, haystack_len);
206
207 found = php_memnstr(haystack_dup + offset, needle_dup, needle_len, haystack_dup + haystack_len);
208
209 efree(haystack_dup);
210 efree(needle_dup);
211
212 if (found) {
213 RETURN_LONG(found - haystack_dup);
214 }
215
216
217 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
218 RETURN_FALSE;
219 }
220 }
221
222
223 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 , 0 );
224
225 if ( ret_pos >= 0 ) {
226 RETURN_LONG(ret_pos);
227 } else {
228 RETURN_FALSE;
229 }
230
231 }
232
233
234
235
236 PHP_FUNCTION(grapheme_strrpos)
237 {
238 char *haystack, *needle;
239 size_t haystack_len, needle_len;
240 zend_long loffset = 0;
241 int32_t offset = 0;
242 zend_long ret_pos;
243 int is_ascii;
244
245 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
246 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
247 "grapheme_strrpos: unable to parse input param", 0 );
248 RETURN_FALSE;
249 }
250
251 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
252 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
253 RETURN_FALSE;
254 }
255
256
257 offset = (int32_t) loffset;
258
259
260
261 if (needle_len == 0) {
262 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
263 RETURN_FALSE;
264 }
265
266 is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
267
268 if ( is_ascii ) {
269
270 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
271
272 if ( ret_pos >= 0 ) {
273 RETURN_LONG(ret_pos);
274 }
275
276
277
278 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
279 RETURN_FALSE;
280 }
281
282
283 }
284
285 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 , 1);
286
287 if ( ret_pos >= 0 ) {
288 RETURN_LONG(ret_pos);
289 } else {
290 RETURN_FALSE;
291 }
292
293
294 }
295
296
297
298
299 PHP_FUNCTION(grapheme_strripos)
300 {
301 char *haystack, *needle;
302 size_t haystack_len, needle_len;
303 zend_long loffset = 0;
304 int32_t offset = 0;
305 zend_long ret_pos;
306 int is_ascii;
307
308 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &haystack, &haystack_len, &needle, &needle_len, &loffset) == FAILURE) {
309 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
310 "grapheme_strrpos: unable to parse input param", 0 );
311 RETURN_FALSE;
312 }
313
314 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
315 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 );
316 RETURN_FALSE;
317 }
318
319
320 offset = (int32_t) loffset;
321
322
323
324 if (needle_len == 0) {
325 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
326 RETURN_FALSE;
327 }
328
329 is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
330
331 if ( is_ascii ) {
332 char *needle_dup, *haystack_dup;
333
334 needle_dup = estrndup(needle, needle_len);
335 php_strtolower(needle_dup, needle_len);
336 haystack_dup = estrndup(haystack, haystack_len);
337 php_strtolower(haystack_dup, haystack_len);
338
339 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
340
341 efree(haystack_dup);
342 efree(needle_dup);
343
344 if ( ret_pos >= 0 ) {
345 RETURN_LONG(ret_pos);
346 }
347
348
349
350 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
351 RETURN_FALSE;
352 }
353
354
355 }
356
357 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 , 1 );
358
359 if ( ret_pos >= 0 ) {
360 RETURN_LONG(ret_pos);
361 } else {
362 RETURN_FALSE;
363 }
364
365
366 }
367
368
369
370
371 PHP_FUNCTION(grapheme_substr)
372 {
373 char *str;
374 zend_string *u8_sub_str;
375 UChar *ustr;
376 size_t str_len;
377 int32_t ustr_len;
378 zend_long lstart = 0, length = 0;
379 int32_t start = 0;
380 int iter_val;
381 UErrorCode status;
382 unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
383 UBreakIterator* bi = NULL;
384 int sub_str_start_pos, sub_str_end_pos;
385 int32_t (*iter_func)(UBreakIterator *);
386 zend_bool no_length = 1;
387
388 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!", &str, &str_len, &lstart, &length, &no_length) == FAILURE) {
389 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
390 "grapheme_substr: unable to parse input param", 0 );
391 RETURN_FALSE;
392 }
393
394 if ( OUTSIDE_STRING(lstart, str_len)) {
395 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
396 RETURN_FALSE;
397 }
398
399
400 start = (int32_t) lstart;
401
402 if(no_length) {
403 length = str_len;
404 }
405
406 if(length < INT32_MIN) {
407 length = INT32_MIN;
408 } else if(length > INT32_MAX) {
409 length = INT32_MAX;
410 }
411
412
413
414 if ( grapheme_ascii_check((unsigned char *)str, str_len) >= 0 ) {
415 int32_t asub_str_len;
416 char *sub_str;
417 grapheme_substr_ascii(str, str_len, start, (int32_t)length, &sub_str, &asub_str_len);
418
419 if ( NULL == sub_str ) {
420 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 );
421 RETURN_FALSE;
422 }
423
424 RETURN_STRINGL(sub_str, asub_str_len);
425 }
426
427 ustr = NULL;
428 ustr_len = 0;
429 status = U_ZERO_ERROR;
430 intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, &status);
431
432 if ( U_FAILURE( status ) ) {
433
434 intl_error_set_code( NULL, status );
435
436
437 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
438 if (ustr) {
439 efree( ustr );
440 }
441 RETURN_FALSE;
442 }
443
444 bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
445
446 if( U_FAILURE(status) ) {
447 RETURN_FALSE;
448 }
449
450 ubrk_setText(bi, ustr, ustr_len, &status);
451
452 if ( start < 0 ) {
453 iter_func = ubrk_previous;
454 ubrk_last(bi);
455 iter_val = 1;
456 }
457 else {
458 iter_func = ubrk_next;
459 iter_val = -1;
460 }
461
462 sub_str_start_pos = 0;
463
464 while ( start ) {
465 sub_str_start_pos = iter_func(bi);
466
467 if ( UBRK_DONE == sub_str_start_pos ) {
468 break;
469 }
470
471 start += iter_val;
472 }
473
474 if ( 0 != start || sub_str_start_pos >= ustr_len ) {
475
476 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
477
478 if (ustr) {
479 efree(ustr);
480 }
481 ubrk_close(bi);
482 RETURN_FALSE;
483 }
484
485
486 if (length >= (int32_t)str_len) {
487
488
489
490 status = U_ZERO_ERROR;
491 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
492
493 if (ustr) {
494 efree( ustr );
495 }
496 ubrk_close( bi );
497
498 if ( !u8_sub_str ) {
499
500 intl_error_set_code( NULL, status );
501
502
503 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
504
505 RETURN_FALSE;
506 }
507
508
509 RETVAL_NEW_STR(u8_sub_str);
510 return;
511 }
512
513 if(length == 0) {
514
515 if (ustr) {
516 efree(ustr);
517 }
518 ubrk_close(bi);
519 RETURN_EMPTY_STRING();
520 }
521
522
523
524 if ( length < 0 ) {
525 iter_func = ubrk_previous;
526 ubrk_last(bi);
527 iter_val = 1;
528 }
529 else {
530 iter_func = ubrk_next;
531 iter_val = -1;
532 }
533
534 sub_str_end_pos = 0;
535
536 while ( length ) {
537 sub_str_end_pos = iter_func(bi);
538
539 if ( UBRK_DONE == sub_str_end_pos ) {
540 break;
541 }
542
543 length += iter_val;
544 }
545
546 ubrk_close(bi);
547
548 if ( UBRK_DONE == sub_str_end_pos) {
549 if(length < 0) {
550 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length not contained in string", 1 );
551
552 efree(ustr);
553 RETURN_FALSE;
554 } else {
555 sub_str_end_pos = ustr_len;
556 }
557 }
558
559 if(sub_str_start_pos > sub_str_end_pos) {
560 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
561
562 efree(ustr);
563 RETURN_FALSE;
564 }
565
566 status = U_ZERO_ERROR;
567 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
568
569 efree( ustr );
570
571 if ( !u8_sub_str ) {
572
573 intl_error_set_code( NULL, status );
574
575
576 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
577
578 RETURN_FALSE;
579 }
580
581
582 RETVAL_NEW_STR(u8_sub_str);
583 }
584
585
586
587 static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
588 {
589 char *haystack, *needle;
590 const char *found;
591 size_t haystack_len, needle_len;
592 int32_t ret_pos, uchar_pos;
593 zend_bool part = 0;
594
595 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|b", &haystack, &haystack_len, &needle, &needle_len, &part) == FAILURE) {
596
597 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
598 "grapheme_strstr: unable to parse input param", 0 );
599
600 RETURN_FALSE;
601 }
602
603 if (needle_len == 0) {
604
605 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Empty delimiter", 1 );
606
607 RETURN_FALSE;
608 }
609
610
611 if ( !f_ignore_case ) {
612
613
614
615
616 found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
617
618
619 if ( !found ) {
620 RETURN_FALSE;
621 }
622
623
624 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
625 size_t found_offset = found - haystack;
626
627 if (part) {
628 RETURN_STRINGL(haystack, found_offset);
629 } else {
630 RETURN_STRINGL(found, haystack_len - found_offset);
631 }
632 }
633
634 }
635
636
637 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 );
638
639 if ( ret_pos < 0 ) {
640 RETURN_FALSE;
641 }
642
643
644
645 ret_pos = 0;
646 U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
647
648 if (part) {
649 RETURN_STRINGL(haystack, ret_pos);
650 } else {
651 RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
652 }
653
654 }
655
656
657
658
659 PHP_FUNCTION(grapheme_strstr)
660 {
661 strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 );
662 }
663
664
665
666
667 PHP_FUNCTION(grapheme_stristr)
668 {
669 strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 );
670 }
671
672
673
674 static inline int32_t
675 grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
676 {
677 int pos = 0, prev_pos = 0;
678 int ret_pos = 0, prev_ret_pos = 0;
679
680 while ( 1 ) {
681 pos = ubrk_next(bi);
682
683 if ( UBRK_DONE == pos ) {
684 break;
685 }
686
687
688 if ( pos > csize ) {
689 break;
690 }
691
692
693
694
695 prev_ret_pos = ret_pos;
696 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
697
698 if ( prev_ret_pos == ret_pos ) {
699
700 break;
701 }
702
703 prev_pos = pos;
704 }
705
706 return ret_pos;
707 }
708
709
710
711 static inline int32_t
712 grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
713 {
714 int pos = 0, prev_pos = 0;
715 int ret_pos = 0, prev_ret_pos = 0;
716
717 while ( 1 ) {
718 pos = ubrk_next(bi);
719
720 if ( UBRK_DONE == pos ) {
721 break;
722 }
723
724 prev_ret_pos = ret_pos;
725 U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);
726
727 if ( ret_pos > bsize ) {
728 ret_pos = prev_ret_pos;
729 break;
730 }
731
732 if ( prev_ret_pos == ret_pos ) {
733
734 break;
735 }
736
737 prev_pos = pos;
738 }
739
740 return ret_pos;
741 }
742
743
744
745 static inline int32_t
746 grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
747 {
748 int pos = 0, next_pos = 0;
749 int ret_pos = 0;
750
751 while ( size ) {
752 next_pos = ubrk_next(bi);
753
754 if ( UBRK_DONE == next_pos ) {
755 break;
756 }
757 pos = next_pos;
758 size--;
759 }
760
761
762
763
764
765 U8_FWD_N(pstr, ret_pos, str_len, pos);
766
767 return ret_pos;
768 }
769
770
771
772 typedef int32_t (*grapheme_extract_iter)(UBreakIterator * , int32_t , unsigned char * , int32_t );
773
774 static grapheme_extract_iter grapheme_extract_iters[] = {
775 &grapheme_extract_count_iter,
776 &grapheme_extract_bytecount_iter,
777 &grapheme_extract_charcount_iter,
778 };
779
780
781
782
783 PHP_FUNCTION(grapheme_extract)
784 {
785 char *str, *pstr;
786 UChar *ustr;
787 size_t str_len;
788 int32_t ustr_len;
789 zend_long size;
790 zend_long lstart = 0;
791 int32_t start = 0;
792 zend_long extract_type = GRAPHEME_EXTRACT_TYPE_COUNT;
793 UErrorCode status;
794 unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
795 UBreakIterator* bi = NULL;
796 int ret_pos;
797 zval *next = NULL;
798
799 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|llz", &str, &str_len, &size, &extract_type, &lstart, &next) == FAILURE) {
800 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
801 "grapheme_extract: unable to parse input param", 0 );
802 RETURN_FALSE;
803 }
804
805 if ( NULL != next ) {
806 if ( !Z_ISREF_P(next) ) {
807 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
808 "grapheme_extract: 'next' was not passed by reference", 0 );
809 RETURN_FALSE;
810 } else {
811 ZVAL_DEREF(next);
812
813 SEPARATE_ZVAL_NOREF(next);
814 zval_dtor(next);
815 ZVAL_LONG(next, lstart);
816 }
817 }
818
819 if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
820 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
821 "grapheme_extract: unknown extract type param", 0 );
822 RETURN_FALSE;
823 }
824
825 if ( lstart > INT32_MAX || lstart < 0 || lstart >= str_len ) {
826 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 );
827 RETURN_FALSE;
828 }
829
830 if ( size > INT32_MAX || size < 0) {
831 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: size is invalid", 0 );
832 RETURN_FALSE;
833 }
834 if (size == 0) {
835 RETURN_EMPTY_STRING();
836 }
837
838
839 start = (int32_t) lstart;
840
841 pstr = str + start;
842
843
844 if ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
845 char *str_end = str + str_len;
846
847 while ( !UTF8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
848 pstr++;
849 if ( pstr >= str_end ) {
850 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
851 "grapheme_extract: invalid input string", 0 );
852
853 RETURN_FALSE;
854 }
855 }
856 }
857
858 str_len -= (pstr - str);
859
860
861
862
863
864 if ( -1 != grapheme_ascii_check((unsigned char *)pstr, MIN(size + 1, str_len)) ) {
865 size_t nsize = MIN(size, str_len);
866 if ( NULL != next ) {
867 ZVAL_LONG(next, start+nsize);
868 }
869 RETURN_STRINGL(pstr, nsize);
870 }
871
872
873 ustr = NULL;
874 ustr_len = 0;
875 status = U_ZERO_ERROR;
876 intl_convert_utf8_to_utf16(&ustr, &ustr_len, pstr, str_len, &status );
877
878 if ( U_FAILURE( status ) ) {
879
880 intl_error_set_code( NULL, status );
881
882
883 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
884
885 if ( NULL != ustr )
886 efree( ustr );
887
888 RETURN_FALSE;
889 }
890
891 bi = NULL;
892 status = U_ZERO_ERROR;
893 bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
894
895 ubrk_setText(bi, ustr, ustr_len, &status);
896
897
898
899
900
901
902 ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, (unsigned char *)pstr, (int32_t)str_len);
903
904 if (ustr) {
905 efree(ustr);
906 }
907 ubrk_close(bi);
908
909 if ( NULL != next ) {
910 ZVAL_LONG(next, start+ret_pos);
911 }
912
913 RETURN_STRINGL(((char *)pstr), ret_pos);
914 }
915
916
917
918
919
920
921
922
923
924
925
926