root/ext/mbstring/php_mbregex.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ZEND_EXTERN_MODULE_GLOBALS
  2. php_mb_regex_free_cache
  3. _php_mb_regex_globals_ctor
  4. _php_mb_regex_globals_dtor
  5. php_mb_regex_globals_alloc
  6. php_mb_regex_globals_free
  7. PHP_MINIT_FUNCTION
  8. PHP_MSHUTDOWN_FUNCTION
  9. PHP_RINIT_FUNCTION
  10. PHP_RSHUTDOWN_FUNCTION
  11. PHP_MINFO_FUNCTION
  12. _php_mb_regex_name2mbctype
  13. _php_mb_regex_mbctype2name
  14. php_mb_regex_set_mbctype
  15. php_mb_regex_set_default_mbctype
  16. php_mb_regex_get_mbctype
  17. php_mb_regex_get_default_mbctype
  18. php_mbregex_compile_pattern
  19. _php_mb_regex_get_option_string
  20. _php_mb_regex_init_options
  21. PHP_FUNCTION
  22. _php_mb_regex_ereg_exec
  23. PHP_FUNCTION
  24. PHP_FUNCTION
  25. _php_mb_regex_ereg_replace_exec
  26. PHP_FUNCTION
  27. PHP_FUNCTION
  28. PHP_FUNCTION
  29. PHP_FUNCTION
  30. PHP_FUNCTION
  31. _php_mb_regex_ereg_search_exec
  32. PHP_FUNCTION
  33. PHP_FUNCTION
  34. PHP_FUNCTION
  35. PHP_FUNCTION
  36. PHP_FUNCTION
  37. PHP_FUNCTION
  38. PHP_FUNCTION
  39. _php_mb_regex_set_options
  40. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
  16    +----------------------------------------------------------------------+
  17  */
  18 
  19 /* $Id$ */
  20 
  21 
  22 #ifdef HAVE_CONFIG_H
  23 #include "config.h"
  24 #endif
  25 
  26 #include "php.h"
  27 #include "php_ini.h"
  28 
  29 #if HAVE_MBREGEX
  30 
  31 #include "zend_smart_str.h"
  32 #include "ext/standard/info.h"
  33 #include "php_mbregex.h"
  34 #include "mbstring.h"
  35 
  36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
  37 #include <oniguruma.h>
  38 #undef UChar
  39 
  40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
  41 
  42 struct _zend_mb_regex_globals {
  43         OnigEncoding default_mbctype;
  44         OnigEncoding current_mbctype;
  45         HashTable ht_rc;
  46         zval search_str;
  47         zval *search_str_val;
  48         unsigned int search_pos;
  49         php_mb_regex_t *search_re;
  50         OnigRegion *search_regs;
  51         OnigOptionType regex_default_options;
  52         OnigSyntaxType *regex_default_syntax;
  53 };
  54 
  55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
  56 
  57 /* {{{ static void php_mb_regex_free_cache() */
  58 static void php_mb_regex_free_cache(zval *el) {
  59         onig_free((php_mb_regex_t *)Z_PTR_P(el));
  60 }
  61 /* }}} */
  62 
  63 /* {{{ _php_mb_regex_globals_ctor */
  64 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
  65 {
  66         pglobals->default_mbctype = ONIG_ENCODING_UTF8;
  67         pglobals->current_mbctype = ONIG_ENCODING_UTF8;
  68         zend_hash_init(&(pglobals->ht_rc), 0, NULL, php_mb_regex_free_cache, 1);
  69         ZVAL_UNDEF(&pglobals->search_str);
  70         pglobals->search_re = (php_mb_regex_t*)NULL;
  71         pglobals->search_pos = 0;
  72         pglobals->search_regs = (OnigRegion*)NULL;
  73         pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  74         pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
  75         return SUCCESS;
  76 }
  77 /* }}} */
  78 
  79 /* {{{ _php_mb_regex_globals_dtor */
  80 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals)
  81 {
  82         zend_hash_destroy(&pglobals->ht_rc);
  83 }
  84 /* }}} */
  85 
  86 /* {{{ php_mb_regex_globals_alloc */
  87 zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
  88 {
  89         zend_mb_regex_globals *pglobals = pemalloc(
  90                         sizeof(zend_mb_regex_globals), 1);
  91         if (!pglobals) {
  92                 return NULL;
  93         }
  94         if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
  95                 pefree(pglobals, 1);
  96                 return NULL;
  97         }
  98         return pglobals;
  99 }
 100 /* }}} */
 101 
 102 /* {{{ php_mb_regex_globals_free */
 103 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
 104 {
 105         if (!pglobals) {
 106                 return;
 107         }
 108         _php_mb_regex_globals_dtor(pglobals);
 109         pefree(pglobals, 1);
 110 }
 111 /* }}} */
 112 
 113 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
 114 PHP_MINIT_FUNCTION(mb_regex)
 115 {
 116         onig_init();
 117         return SUCCESS;
 118 }
 119 /* }}} */
 120 
 121 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
 122 PHP_MSHUTDOWN_FUNCTION(mb_regex)
 123 {
 124         onig_end();
 125         return SUCCESS;
 126 }
 127 /* }}} */
 128 
 129 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
 130 PHP_RINIT_FUNCTION(mb_regex)
 131 {
 132         return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
 133 }
 134 /* }}} */
 135 
 136 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
 137 PHP_RSHUTDOWN_FUNCTION(mb_regex)
 138 {
 139         MBREX(current_mbctype) = MBREX(default_mbctype);
 140 
 141         if (!Z_ISUNDEF(MBREX(search_str))) {
 142                 zval_ptr_dtor(&MBREX(search_str));
 143                 ZVAL_UNDEF(&MBREX(search_str));
 144         }
 145         MBREX(search_pos) = 0;
 146 
 147         if (MBREX(search_regs) != NULL) {
 148                 onig_region_free(MBREX(search_regs), 1);
 149                 MBREX(search_regs) = (OnigRegion *)NULL;
 150         }
 151         zend_hash_clean(&MBREX(ht_rc));
 152 
 153         return SUCCESS;
 154 }
 155 /* }}} */
 156 
 157 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
 158 PHP_MINFO_FUNCTION(mb_regex)
 159 {
 160         char buf[32];
 161         php_info_print_table_start();
 162         php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
 163         snprintf(buf, sizeof(buf), "%d.%d.%d",
 164                         ONIGURUMA_VERSION_MAJOR,
 165                         ONIGURUMA_VERSION_MINOR,
 166                         ONIGURUMA_VERSION_TEENY);
 167 #ifdef PHP_ONIG_BUNDLED
 168 #ifdef USE_COMBINATION_EXPLOSION_CHECK
 169         php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
 170 #else   /* USE_COMBINATION_EXPLOSION_CHECK */
 171         php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
 172 #endif  /* USE_COMBINATION_EXPLOSION_CHECK */
 173 #endif /* PHP_BUNDLED_ONIG */
 174         php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
 175         php_info_print_table_end();
 176 }
 177 /* }}} */
 178 
 179 /*
 180  * encoding name resolver
 181  */
 182 
 183 /* {{{ encoding name map */
 184 typedef struct _php_mb_regex_enc_name_map_t {
 185         const char *names;
 186         OnigEncoding code;
 187 } php_mb_regex_enc_name_map_t;
 188 
 189 php_mb_regex_enc_name_map_t enc_name_map[] = {
 190 #ifdef ONIG_ENCODING_EUC_JP
 191         {
 192                 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
 193                 ONIG_ENCODING_EUC_JP
 194         },
 195 #endif
 196 #ifdef ONIG_ENCODING_UTF8
 197         {
 198                 "UTF-8\0UTF8\0",
 199                 ONIG_ENCODING_UTF8
 200         },
 201 #endif
 202 #ifdef ONIG_ENCODING_UTF16_BE
 203         {
 204                 "UTF-16\0UTF-16BE\0",
 205                 ONIG_ENCODING_UTF16_BE
 206         },
 207 #endif
 208 #ifdef ONIG_ENCODING_UTF16_LE
 209         {
 210                 "UTF-16LE\0",
 211                 ONIG_ENCODING_UTF16_LE
 212         },
 213 #endif
 214 #ifdef ONIG_ENCODING_UTF32_BE
 215         {
 216                 "UCS-4\0UTF-32\0UTF-32BE\0",
 217                 ONIG_ENCODING_UTF32_BE
 218         },
 219 #endif
 220 #ifdef ONIG_ENCODING_UTF32_LE
 221         {
 222                 "UCS-4LE\0UTF-32LE\0",
 223                 ONIG_ENCODING_UTF32_LE
 224         },
 225 #endif
 226 #ifdef ONIG_ENCODING_SJIS
 227         {
 228                 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
 229                 ONIG_ENCODING_SJIS
 230         },
 231 #endif
 232 #ifdef ONIG_ENCODING_BIG5
 233         {
 234                 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
 235                 ONIG_ENCODING_BIG5
 236         },
 237 #endif
 238 #ifdef ONIG_ENCODING_EUC_CN
 239         {
 240                 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
 241                 ONIG_ENCODING_EUC_CN
 242         },
 243 #endif
 244 #ifdef ONIG_ENCODING_EUC_TW
 245         {
 246                 "EUC-TW\0EUCTW\0EUC_TW\0",
 247                 ONIG_ENCODING_EUC_TW
 248         },
 249 #endif
 250 #ifdef ONIG_ENCODING_EUC_KR
 251         {
 252                 "EUC-KR\0EUCKR\0EUC_KR\0",
 253                 ONIG_ENCODING_EUC_KR
 254         },
 255 #endif
 256 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
 257         {
 258                 "KOI8\0KOI-8\0",
 259                 ONIG_ENCODING_KOI8
 260         },
 261 #endif
 262 #ifdef ONIG_ENCODING_KOI8_R
 263         {
 264                 "KOI8R\0KOI8-R\0KOI-8R\0",
 265                 ONIG_ENCODING_KOI8_R
 266         },
 267 #endif
 268 #ifdef ONIG_ENCODING_ISO_8859_1
 269         {
 270                 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
 271                 ONIG_ENCODING_ISO_8859_1
 272         },
 273 #endif
 274 #ifdef ONIG_ENCODING_ISO_8859_2
 275         {
 276                 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
 277                 ONIG_ENCODING_ISO_8859_2
 278         },
 279 #endif
 280 #ifdef ONIG_ENCODING_ISO_8859_3
 281         {
 282                 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
 283                 ONIG_ENCODING_ISO_8859_3
 284         },
 285 #endif
 286 #ifdef ONIG_ENCODING_ISO_8859_4
 287         {
 288                 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
 289                 ONIG_ENCODING_ISO_8859_4
 290         },
 291 #endif
 292 #ifdef ONIG_ENCODING_ISO_8859_5
 293         {
 294                 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
 295                 ONIG_ENCODING_ISO_8859_5
 296         },
 297 #endif
 298 #ifdef ONIG_ENCODING_ISO_8859_6
 299         {
 300                 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
 301                 ONIG_ENCODING_ISO_8859_6
 302         },
 303 #endif
 304 #ifdef ONIG_ENCODING_ISO_8859_7
 305         {
 306                 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
 307                 ONIG_ENCODING_ISO_8859_7
 308         },
 309 #endif
 310 #ifdef ONIG_ENCODING_ISO_8859_8
 311         {
 312                 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
 313                 ONIG_ENCODING_ISO_8859_8
 314         },
 315 #endif
 316 #ifdef ONIG_ENCODING_ISO_8859_9
 317         {
 318                 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
 319                 ONIG_ENCODING_ISO_8859_9
 320         },
 321 #endif
 322 #ifdef ONIG_ENCODING_ISO_8859_10
 323         {
 324                 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
 325                 ONIG_ENCODING_ISO_8859_10
 326         },
 327 #endif
 328 #ifdef ONIG_ENCODING_ISO_8859_11
 329         {
 330                 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
 331                 ONIG_ENCODING_ISO_8859_11
 332         },
 333 #endif
 334 #ifdef ONIG_ENCODING_ISO_8859_13
 335         {
 336                 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
 337                 ONIG_ENCODING_ISO_8859_13
 338         },
 339 #endif
 340 #ifdef ONIG_ENCODING_ISO_8859_14
 341         {
 342                 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
 343                 ONIG_ENCODING_ISO_8859_14
 344         },
 345 #endif
 346 #ifdef ONIG_ENCODING_ISO_8859_15
 347         {
 348                 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
 349                 ONIG_ENCODING_ISO_8859_15
 350         },
 351 #endif
 352 #ifdef ONIG_ENCODING_ISO_8859_16
 353         {
 354                 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
 355                 ONIG_ENCODING_ISO_8859_16
 356         },
 357 #endif
 358 #ifdef ONIG_ENCODING_ASCII
 359         {
 360                 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
 361                 ONIG_ENCODING_ASCII
 362         },
 363 #endif
 364         { NULL, ONIG_ENCODING_UNDEF }
 365 };
 366 /* }}} */
 367 
 368 /* {{{ php_mb_regex_name2mbctype */
 369 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
 370 {
 371         const char *p;
 372         php_mb_regex_enc_name_map_t *mapping;
 373 
 374         if (pname == NULL || !*pname) {
 375                 return ONIG_ENCODING_UNDEF;
 376         }
 377 
 378         for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
 379                 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
 380                         if (strcasecmp(p, pname) == 0) {
 381                                 return mapping->code;
 382                         }
 383                 }
 384         }
 385 
 386         return ONIG_ENCODING_UNDEF;
 387 }
 388 /* }}} */
 389 
 390 /* {{{ php_mb_regex_mbctype2name */
 391 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
 392 {
 393         php_mb_regex_enc_name_map_t *mapping;
 394 
 395         for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
 396                 if (mapping->code == mbctype) {
 397                         return mapping->names;
 398                 }
 399         }
 400 
 401         return NULL;
 402 }
 403 /* }}} */
 404 
 405 /* {{{ php_mb_regex_set_mbctype */
 406 int php_mb_regex_set_mbctype(const char *encname)
 407 {
 408         OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
 409         if (mbctype == ONIG_ENCODING_UNDEF) {
 410                 return FAILURE;
 411         }
 412         MBREX(current_mbctype) = mbctype;
 413         return SUCCESS;
 414 }
 415 /* }}} */
 416 
 417 /* {{{ php_mb_regex_set_default_mbctype */
 418 int php_mb_regex_set_default_mbctype(const char *encname)
 419 {
 420         OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
 421         if (mbctype == ONIG_ENCODING_UNDEF) {
 422                 return FAILURE;
 423         }
 424         MBREX(default_mbctype) = mbctype;
 425         return SUCCESS;
 426 }
 427 /* }}} */
 428 
 429 /* {{{ php_mb_regex_get_mbctype */
 430 const char *php_mb_regex_get_mbctype(void)
 431 {
 432         return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 433 }
 434 /* }}} */
 435 
 436 /* {{{ php_mb_regex_get_default_mbctype */
 437 const char *php_mb_regex_get_default_mbctype(void)
 438 {
 439         return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
 440 }
 441 /* }}} */
 442 
 443 /*
 444  * regex cache
 445  */
 446 /* {{{ php_mbregex_compile_pattern */
 447 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax)
 448 {
 449         int err_code = 0;
 450         php_mb_regex_t *retval = NULL, *rc = NULL;
 451         OnigErrorInfo err_info;
 452         OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 453 
 454         rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
 455         if (!rc || rc->options != options || rc->enc != enc || rc->syntax != syntax) {
 456                 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
 457                         onig_error_code_to_str(err_str, err_code, err_info);
 458                         php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
 459                         retval = NULL;
 460                         goto out;
 461                 }
 462                 zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
 463         } else if (rc) {
 464                 retval = rc;
 465         }
 466 out:
 467         return retval;
 468 }
 469 /* }}} */
 470 
 471 /* {{{ _php_mb_regex_get_option_string */
 472 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
 473 {
 474         size_t len_left = len;
 475         size_t len_req = 0;
 476         char *p = str;
 477         char c;
 478 
 479         if ((option & ONIG_OPTION_IGNORECASE) != 0) {
 480                 if (len_left > 0) {
 481                         --len_left;
 482                         *(p++) = 'i';
 483                 }
 484                 ++len_req;
 485         }
 486 
 487         if ((option & ONIG_OPTION_EXTEND) != 0) {
 488                 if (len_left > 0) {
 489                         --len_left;
 490                         *(p++) = 'x';
 491                 }
 492                 ++len_req;
 493         }
 494 
 495         if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
 496                         (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
 497                 if (len_left > 0) {
 498                         --len_left;
 499                         *(p++) = 'p';
 500                 }
 501                 ++len_req;
 502         } else {
 503                 if ((option & ONIG_OPTION_MULTILINE) != 0) {
 504                         if (len_left > 0) {
 505                                 --len_left;
 506                                 *(p++) = 'm';
 507                         }
 508                         ++len_req;
 509                 }
 510 
 511                 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
 512                         if (len_left > 0) {
 513                                 --len_left;
 514                                 *(p++) = 's';
 515                         }
 516                         ++len_req;
 517                 }
 518         }
 519         if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
 520                 if (len_left > 0) {
 521                         --len_left;
 522                         *(p++) = 'l';
 523                 }
 524                 ++len_req;
 525         }
 526         if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
 527                 if (len_left > 0) {
 528                         --len_left;
 529                         *(p++) = 'n';
 530                 }
 531                 ++len_req;
 532         }
 533 
 534         c = 0;
 535 
 536         if (syntax == ONIG_SYNTAX_JAVA) {
 537                 c = 'j';
 538         } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
 539                 c = 'u';
 540         } else if (syntax == ONIG_SYNTAX_GREP) {
 541                 c = 'g';
 542         } else if (syntax == ONIG_SYNTAX_EMACS) {
 543                 c = 'c';
 544         } else if (syntax == ONIG_SYNTAX_RUBY) {
 545                 c = 'r';
 546         } else if (syntax == ONIG_SYNTAX_PERL) {
 547                 c = 'z';
 548         } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
 549                 c = 'b';
 550         } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
 551                 c = 'd';
 552         }
 553 
 554         if (c != 0) {
 555                 if (len_left > 0) {
 556                         --len_left;
 557                         *(p++) = c;
 558                 }
 559                 ++len_req;
 560         }
 561 
 562 
 563         if (len_left > 0) {
 564                 --len_left;
 565                 *(p++) = '\0';
 566         }
 567         ++len_req;
 568         if (len < len_req) {
 569                 return len_req;
 570         }
 571 
 572         return 0;
 573 }
 574 /* }}} */
 575 
 576 /* {{{ _php_mb_regex_init_options */
 577 static void
 578 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
 579 {
 580         int n;
 581         char c;
 582         int optm = 0;
 583 
 584         *syntax = ONIG_SYNTAX_RUBY;
 585 
 586         if (parg != NULL) {
 587                 n = 0;
 588                 while(n < narg) {
 589                         c = parg[n++];
 590                         switch (c) {
 591                                 case 'i':
 592                                         optm |= ONIG_OPTION_IGNORECASE;
 593                                         break;
 594                                 case 'x':
 595                                         optm |= ONIG_OPTION_EXTEND;
 596                                         break;
 597                                 case 'm':
 598                                         optm |= ONIG_OPTION_MULTILINE;
 599                                         break;
 600                                 case 's':
 601                                         optm |= ONIG_OPTION_SINGLELINE;
 602                                         break;
 603                                 case 'p':
 604                                         optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
 605                                         break;
 606                                 case 'l':
 607                                         optm |= ONIG_OPTION_FIND_LONGEST;
 608                                         break;
 609                                 case 'n':
 610                                         optm |= ONIG_OPTION_FIND_NOT_EMPTY;
 611                                         break;
 612                                 case 'j':
 613                                         *syntax = ONIG_SYNTAX_JAVA;
 614                                         break;
 615                                 case 'u':
 616                                         *syntax = ONIG_SYNTAX_GNU_REGEX;
 617                                         break;
 618                                 case 'g':
 619                                         *syntax = ONIG_SYNTAX_GREP;
 620                                         break;
 621                                 case 'c':
 622                                         *syntax = ONIG_SYNTAX_EMACS;
 623                                         break;
 624                                 case 'r':
 625                                         *syntax = ONIG_SYNTAX_RUBY;
 626                                         break;
 627                                 case 'z':
 628                                         *syntax = ONIG_SYNTAX_PERL;
 629                                         break;
 630                                 case 'b':
 631                                         *syntax = ONIG_SYNTAX_POSIX_BASIC;
 632                                         break;
 633                                 case 'd':
 634                                         *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
 635                                         break;
 636                                 case 'e':
 637                                         if (eval != NULL) *eval = 1;
 638                                         break;
 639                                 default:
 640                                         break;
 641                         }
 642                 }
 643                 if (option != NULL) *option|=optm;
 644         }
 645 }
 646 /* }}} */
 647 
 648 /*
 649  * php functions
 650  */
 651 
 652 /* {{{ proto string mb_regex_encoding([string encoding])
 653    Returns the current encoding for regex as a string. */
 654 PHP_FUNCTION(mb_regex_encoding)
 655 {
 656         char *encoding = NULL;
 657         size_t encoding_len;
 658         OnigEncoding mbctype;
 659 
 660         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
 661                 return;
 662         }
 663 
 664         if (!encoding) {
 665                 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 666 
 667                 if (retval == NULL) {
 668                         RETURN_FALSE;
 669                 }
 670 
 671                 RETURN_STRING((char *)retval);
 672         } else {
 673                 mbctype = _php_mb_regex_name2mbctype(encoding);
 674 
 675                 if (mbctype == ONIG_ENCODING_UNDEF) {
 676                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
 677                         RETURN_FALSE;
 678                 }
 679 
 680                 MBREX(current_mbctype) = mbctype;
 681                 RETURN_TRUE;
 682         }
 683 }
 684 /* }}} */
 685 
 686 /* {{{ _php_mb_regex_ereg_exec */
 687 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
 688 {
 689         zval *arg_pattern, *array = NULL;
 690         char *string;
 691         size_t string_len;
 692         php_mb_regex_t *re;
 693         OnigRegion *regs = NULL;
 694         int i, match_len, beg, end;
 695         OnigOptionType options;
 696         char *str;
 697 
 698         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z/", &arg_pattern, &string, &string_len, &array) == FAILURE) {
 699                 RETURN_FALSE;
 700         }
 701 
 702         options = MBREX(regex_default_options);
 703         if (icase) {
 704                 options |= ONIG_OPTION_IGNORECASE;
 705         }
 706 
 707         /* compile the regular expression from the supplied regex */
 708         if (Z_TYPE_P(arg_pattern) != IS_STRING) {
 709                 /* we convert numbers to integers and treat them as a string */
 710                 if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) {
 711                         convert_to_long_ex(arg_pattern);        /* get rid of decimal places */
 712                 }
 713                 convert_to_string_ex(arg_pattern);
 714                 /* don't bother doing an extended regex with just a number */
 715         }
 716 
 717         if (Z_STRLEN_P(arg_pattern) == 0) {
 718                 php_error_docref(NULL, E_WARNING, "empty pattern");
 719                 RETVAL_FALSE;
 720                 goto out;
 721         }
 722 
 723         re = php_mbregex_compile_pattern(Z_STRVAL_P(arg_pattern), Z_STRLEN_P(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax));
 724         if (re == NULL) {
 725                 RETVAL_FALSE;
 726                 goto out;
 727         }
 728 
 729         regs = onig_region_new();
 730 
 731         /* actually execute the regular expression */
 732         if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
 733                 RETVAL_FALSE;
 734                 goto out;
 735         }
 736 
 737         match_len = 1;
 738         str = string;
 739         if (array != NULL) {
 740                 zval_dtor(array);
 741                 array_init(array);
 742 
 743                 match_len = regs->end[0] - regs->beg[0];
 744                 for (i = 0; i < regs->num_regs; i++) {
 745                         beg = regs->beg[i];
 746                         end = regs->end[i];
 747                         if (beg >= 0 && beg < end && end <= string_len) {
 748                                 add_index_stringl(array, i, (char *)&str[beg], end - beg);
 749                         } else {
 750                                 add_index_bool(array, i, 0);
 751                         }
 752                 }
 753         }
 754 
 755         if (match_len == 0) {
 756                 match_len = 1;
 757         }
 758         RETVAL_LONG(match_len);
 759 out:
 760         if (regs != NULL) {
 761                 onig_region_free(regs, 1);
 762         }
 763 }
 764 /* }}} */
 765 
 766 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
 767    Regular expression match for multibyte string */
 768 PHP_FUNCTION(mb_ereg)
 769 {
 770         _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 771 }
 772 /* }}} */
 773 
 774 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
 775    Case-insensitive regular expression match for multibyte string */
 776 PHP_FUNCTION(mb_eregi)
 777 {
 778         _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 779 }
 780 /* }}} */
 781 
 782 /* {{{ _php_mb_regex_ereg_replace_exec */
 783 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
 784 {
 785         zval *arg_pattern_zval;
 786 
 787         char *arg_pattern;
 788         size_t arg_pattern_len;
 789 
 790         char *replace;
 791         size_t replace_len;
 792 
 793         zend_fcall_info arg_replace_fci;
 794         zend_fcall_info_cache arg_replace_fci_cache;
 795 
 796         char *string;
 797         size_t string_len;
 798 
 799         char *p;
 800         php_mb_regex_t *re;
 801         OnigSyntaxType *syntax;
 802         OnigRegion *regs = NULL;
 803         smart_str out_buf = {0};
 804         smart_str eval_buf = {0};
 805         smart_str *pbuf;
 806         int i, err, eval, n;
 807         OnigUChar *pos;
 808         OnigUChar *string_lim;
 809         char *description = NULL;
 810         char pat_buf[4];
 811 
 812         const mbfl_encoding *enc;
 813 
 814         {
 815                 const char *current_enc_name;
 816                 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 817                 if (current_enc_name == NULL ||
 818                         (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
 819                         php_error_docref(NULL, E_WARNING, "Unknown error");
 820                         RETURN_FALSE;
 821                 }
 822         }
 823         eval = 0;
 824         {
 825                 char *option_str = NULL;
 826                 size_t option_str_len = 0;
 827 
 828                 if (!is_callable) {
 829                         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zss|s",
 830                                                 &arg_pattern_zval,
 831                                                 &replace, &replace_len,
 832                                                 &string, &string_len,
 833                                                 &option_str, &option_str_len) == FAILURE) {
 834                                 RETURN_FALSE;
 835                         }
 836                 } else {
 837                         if (zend_parse_parameters(ZEND_NUM_ARGS(), "zfs|s",
 838                                                 &arg_pattern_zval,
 839                                                 &arg_replace_fci, &arg_replace_fci_cache,
 840                                                 &string, &string_len,
 841                                                 &option_str, &option_str_len) == FAILURE) {
 842                                 RETURN_FALSE;
 843                         }
 844                 }
 845 
 846                 if (option_str != NULL) {
 847                         _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
 848                 } else {
 849                         options |= MBREX(regex_default_options);
 850                         syntax = MBREX(regex_default_syntax);
 851                 }
 852         }
 853         if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) {
 854                 arg_pattern = Z_STRVAL_P(arg_pattern_zval);
 855                 arg_pattern_len = Z_STRLEN_P(arg_pattern_zval);
 856         } else {
 857                 /* FIXME: this code is not multibyte aware! */
 858                 convert_to_long_ex(arg_pattern_zval);
 859                 pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval);
 860                 pat_buf[1] = '\0';
 861                 pat_buf[2] = '\0';
 862                 pat_buf[3] = '\0';
 863 
 864                 arg_pattern = pat_buf;
 865                 arg_pattern_len = 1;
 866         }
 867         /* create regex pattern buffer */
 868         re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax);
 869         if (re == NULL) {
 870                 RETURN_FALSE;
 871         }
 872 
 873         if (eval || is_callable) {
 874                 pbuf = &eval_buf;
 875                 description = zend_make_compiled_string_description("mbregex replace");
 876         } else {
 877                 pbuf = &out_buf;
 878                 description = NULL;
 879         }
 880 
 881         if (is_callable) {
 882                 if (eval) {
 883                         php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback");
 884                         RETURN_FALSE;
 885                 }
 886         }
 887 
 888         /* do the actual work */
 889         err = 0;
 890         pos = (OnigUChar *)string;
 891         string_lim = (OnigUChar*)(string + string_len);
 892         regs = onig_region_new();
 893         while (err >= 0) {
 894                 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
 895                 if (err <= -2) {
 896                         OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 897                         onig_error_code_to_str(err_str, err);
 898                         php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
 899                         break;
 900                 }
 901                 if (err >= 0) {
 902 #if moriyoshi_0
 903                         if (regs->beg[0] == regs->end[0]) {
 904                                 php_error_docref(NULL, E_WARNING, "Empty regular expression");
 905                                 break;
 906                         }
 907 #endif
 908                         /* copy the part of the string before the match */
 909                         smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
 910 
 911                         if (!is_callable) {
 912                                 /* copy replacement and backrefs */
 913                                 i = 0;
 914                                 p = replace;
 915                                 while (i < replace_len) {
 916                                         int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
 917                                         n = -1;
 918                                         if ((replace_len - i) >= 2 && fwd == 1 &&
 919                                         p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
 920                                                 n = p[1] - '0';
 921                                         }
 922                                         if (n >= 0 && n < regs->num_regs) {
 923                                                 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
 924                                                         smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
 925                                                 }
 926                                                 p += 2;
 927                                                 i += 2;
 928                                         } else {
 929                                                 smart_str_appendl(pbuf, p, fwd);
 930                                                 p += fwd;
 931                                                 i += fwd;
 932                                         }
 933                                 }
 934                         }
 935 
 936                         if (eval) {
 937                                 zval v;
 938                                 /* null terminate buffer */
 939                                 smart_str_0(&eval_buf);
 940                                 /* do eval */
 941                                 if (zend_eval_stringl(ZSTR_VAL(eval_buf.s), ZSTR_LEN(eval_buf.s), &v, description) == FAILURE) {
 942                                         efree(description);
 943                                         php_error_docref(NULL,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, ZSTR_VAL(eval_buf.s));
 944                                         /* zend_error() does not return in this case */
 945                                 }
 946 
 947                                 /* result of eval */
 948                                 convert_to_string(&v);
 949                                 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
 950                                 /* Clean up */
 951                                 ZSTR_LEN(eval_buf.s) = 0;
 952                                 zval_dtor(&v);
 953                         } else if (is_callable) {
 954                                 zval args[1];
 955                                 zval subpats, retval;
 956                                 int i;
 957 
 958                                 array_init(&subpats);
 959                                 for (i = 0; i < regs->num_regs; i++) {
 960                                         add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
 961                                 }
 962 
 963                                 ZVAL_COPY_VALUE(&args[0], &subpats);
 964                                 /* null terminate buffer */
 965                                 smart_str_0(&eval_buf);
 966 
 967                                 arg_replace_fci.param_count = 1;
 968                                 arg_replace_fci.params = args;
 969                                 arg_replace_fci.retval = &retval;
 970                                 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
 971                                                 !Z_ISUNDEF(retval)) {
 972                                         convert_to_string_ex(&retval);
 973                                         smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
 974                                         if (eval_buf.s) {
 975                                                 ZSTR_LEN(eval_buf.s) = 0;
 976                                         }
 977                                         zval_ptr_dtor(&retval);
 978                                 } else {
 979                                         efree(description);
 980                                         if (!EG(exception)) {
 981                                                 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
 982                                         }
 983                                 }
 984                                 zval_ptr_dtor(&subpats);
 985                         }
 986 
 987                         n = regs->end[0];
 988                         if ((pos - (OnigUChar *)string) < n) {
 989                                 pos = (OnigUChar *)string + n;
 990                         } else {
 991                                 if (pos < string_lim) {
 992                                         smart_str_appendl(&out_buf, (char *)pos, 1);
 993                                 }
 994                                 pos++;
 995                         }
 996                 } else { /* nomatch */
 997                         /* stick that last bit of string on our output */
 998                         if (string_lim - pos > 0) {
 999                                 smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1000                         }
1001                 }
1002                 onig_region_free(regs, 0);
1003         }
1004 
1005         if (description) {
1006                 efree(description);
1007         }
1008         if (regs != NULL) {
1009                 onig_region_free(regs, 1);
1010         }
1011         smart_str_free(&eval_buf);
1012 
1013         if (err <= -2) {
1014                 smart_str_free(&out_buf);
1015                 RETVAL_FALSE;
1016         } else if (out_buf.s) {
1017                 smart_str_0(&out_buf);
1018                 RETVAL_STR(out_buf.s);
1019         } else {
1020                 RETVAL_EMPTY_STRING();
1021         }
1022 }
1023 /* }}} */
1024 
1025 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1026    Replace regular expression for multibyte string */
1027 PHP_FUNCTION(mb_ereg_replace)
1028 {
1029         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1030 }
1031 /* }}} */
1032 
1033 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1034    Case insensitive replace regular expression for multibyte string */
1035 PHP_FUNCTION(mb_eregi_replace)
1036 {
1037         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1038 }
1039 /* }}} */
1040 
1041 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1042     regular expression for multibyte string using replacement callback */
1043 PHP_FUNCTION(mb_ereg_replace_callback)
1044 {
1045         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1046 }
1047 /* }}} */
1048 
1049 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1050    split multibyte string into array by regular expression */
1051 PHP_FUNCTION(mb_split)
1052 {
1053         char *arg_pattern;
1054         size_t arg_pattern_len;
1055         php_mb_regex_t *re;
1056         OnigRegion *regs = NULL;
1057         char *string;
1058         OnigUChar *pos, *chunk_pos;
1059         size_t string_len;
1060 
1061         int n, err;
1062         zend_long count = -1;
1063 
1064         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1065                 RETURN_FALSE;
1066         }
1067 
1068         if (count > 0) {
1069                 count--;
1070         }
1071 
1072         /* create regex pattern buffer */
1073         if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1074                 RETURN_FALSE;
1075         }
1076 
1077         array_init(return_value);
1078 
1079         chunk_pos = pos = (OnigUChar *)string;
1080         err = 0;
1081         regs = onig_region_new();
1082         /* churn through str, generating array entries as we go */
1083         while (count != 0 && (pos - (OnigUChar *)string) < (ptrdiff_t)string_len) {
1084                 int beg, end;
1085                 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086                 if (err < 0) {
1087                         break;
1088                 }
1089                 beg = regs->beg[0], end = regs->end[0];
1090                 /* add it to the array */
1091                 if ((pos - (OnigUChar *)string) < end) {
1092                         if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093                                 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1094                                 --count;
1095                         } else {
1096                                 err = -2;
1097                                 break;
1098                         }
1099                         /* point at our new starting point */
1100                         chunk_pos = pos = (OnigUChar *)string + end;
1101                 } else {
1102                         pos++;
1103                 }
1104                 onig_region_free(regs, 0);
1105         }
1106 
1107         onig_region_free(regs, 1);
1108 
1109         /* see if we encountered an error */
1110         if (err <= -2) {
1111                 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1112                 onig_error_code_to_str(err_str, err);
1113                 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1114                 zval_dtor(return_value);
1115                 RETURN_FALSE;
1116         }
1117 
1118         /* otherwise we just have one last element to add to the array */
1119         n = ((OnigUChar *)(string + string_len) - chunk_pos);
1120         if (n > 0) {
1121                 add_next_index_stringl(return_value, (char *)chunk_pos, n);
1122         } else {
1123                 add_next_index_stringl(return_value, "", 0);
1124         }
1125 }
1126 /* }}} */
1127 
1128 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1129    Regular expression match for multibyte string */
1130 PHP_FUNCTION(mb_ereg_match)
1131 {
1132         char *arg_pattern;
1133         size_t arg_pattern_len;
1134 
1135         char *string;
1136         size_t string_len;
1137 
1138         php_mb_regex_t *re;
1139         OnigSyntaxType *syntax;
1140         OnigOptionType option = 0;
1141         int err;
1142 
1143         {
1144                 char *option_str = NULL;
1145                 size_t option_str_len = 0;
1146 
1147                 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s",
1148                                           &arg_pattern, &arg_pattern_len, &string, &string_len,
1149                                           &option_str, &option_str_len)==FAILURE) {
1150                         RETURN_FALSE;
1151                 }
1152 
1153                 if (option_str != NULL) {
1154                         _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1155                 } else {
1156                         option |= MBREX(regex_default_options);
1157                         syntax = MBREX(regex_default_syntax);
1158                 }
1159         }
1160 
1161         if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1162                 RETURN_FALSE;
1163         }
1164 
1165         /* match */
1166         err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1167         if (err >= 0) {
1168                 RETVAL_TRUE;
1169         } else {
1170                 RETVAL_FALSE;
1171         }
1172 }
1173 /* }}} */
1174 
1175 /* regex search */
1176 /* {{{ _php_mb_regex_ereg_search_exec */
1177 static void
1178 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1179 {
1180         char *arg_pattern = NULL, *arg_options = NULL;
1181         size_t arg_pattern_len, arg_options_len;
1182         int n, i, err, pos, len, beg, end;
1183         OnigOptionType option;
1184         OnigUChar *str;
1185         OnigSyntaxType *syntax;
1186 
1187         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1188                 return;
1189         }
1190 
1191         option = MBREX(regex_default_options);
1192 
1193         if (arg_options) {
1194                 option = 0;
1195                 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1196         }
1197 
1198         if (arg_pattern) {
1199                 /* create regex pattern buffer */
1200                 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax))) == NULL) {
1201                         RETURN_FALSE;
1202                 }
1203         }
1204 
1205         pos = MBREX(search_pos);
1206         str = NULL;
1207         len = 0;
1208         if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1209                 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1210                 len = Z_STRLEN(MBREX(search_str));
1211         }
1212 
1213         if (MBREX(search_re) == NULL) {
1214                 php_error_docref(NULL, E_WARNING, "No regex given");
1215                 RETURN_FALSE;
1216         }
1217 
1218         if (str == NULL) {
1219                 php_error_docref(NULL, E_WARNING, "No string given");
1220                 RETURN_FALSE;
1221         }
1222 
1223         if (MBREX(search_regs)) {
1224                 onig_region_free(MBREX(search_regs), 1);
1225         }
1226         MBREX(search_regs) = onig_region_new();
1227 
1228         err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1229         if (err == ONIG_MISMATCH) {
1230                 MBREX(search_pos) = len;
1231                 RETVAL_FALSE;
1232         } else if (err <= -2) {
1233                 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1234                 onig_error_code_to_str(err_str, err);
1235                 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1236                 RETVAL_FALSE;
1237         } else {
1238                 if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1239                         php_error_docref(NULL, E_WARNING, "Empty regular expression");
1240                 }
1241                 switch (mode) {
1242                 case 1:
1243                         array_init(return_value);
1244                         beg = MBREX(search_regs)->beg[0];
1245                         end = MBREX(search_regs)->end[0];
1246                         add_next_index_long(return_value, beg);
1247                         add_next_index_long(return_value, end - beg);
1248                         break;
1249                 case 2:
1250                         array_init(return_value);
1251                         n = MBREX(search_regs)->num_regs;
1252                         for (i = 0; i < n; i++) {
1253                                 beg = MBREX(search_regs)->beg[i];
1254                                 end = MBREX(search_regs)->end[i];
1255                                 if (beg >= 0 && beg <= end && end <= len) {
1256                                         add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1257                                 } else {
1258                                         add_index_bool(return_value, i, 0);
1259                                 }
1260                         }
1261                         break;
1262                 default:
1263                         RETVAL_TRUE;
1264                         break;
1265                 }
1266                 end = MBREX(search_regs)->end[0];
1267                 if (pos < end) {
1268                         MBREX(search_pos) = end;
1269                 } else {
1270                         MBREX(search_pos) = pos + 1;
1271                 }
1272         }
1273 
1274         if (err < 0) {
1275                 onig_region_free(MBREX(search_regs), 1);
1276                 MBREX(search_regs) = (OnigRegion *)NULL;
1277         }
1278 }
1279 /* }}} */
1280 
1281 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1282    Regular expression search for multibyte string */
1283 PHP_FUNCTION(mb_ereg_search)
1284 {
1285         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1286 }
1287 /* }}} */
1288 
1289 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1290    Regular expression search for multibyte string */
1291 PHP_FUNCTION(mb_ereg_search_pos)
1292 {
1293         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1294 }
1295 /* }}} */
1296 
1297 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1298    Regular expression search for multibyte string */
1299 PHP_FUNCTION(mb_ereg_search_regs)
1300 {
1301         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1302 }
1303 /* }}} */
1304 
1305 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1306    Initialize string and regular expression for search. */
1307 PHP_FUNCTION(mb_ereg_search_init)
1308 {
1309         size_t argc = ZEND_NUM_ARGS();
1310         zval *arg_str;
1311         char *arg_pattern = NULL, *arg_options = NULL;
1312         size_t arg_pattern_len = 0, arg_options_len = 0;
1313         OnigSyntaxType *syntax = NULL;
1314         OnigOptionType option;
1315 
1316         if (zend_parse_parameters(argc, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1317                 return;
1318         }
1319 
1320         if (argc > 1 && arg_pattern_len == 0) {
1321                 php_error_docref(NULL, E_WARNING, "Empty pattern");
1322                 RETURN_FALSE;
1323         }
1324 
1325         option = MBREX(regex_default_options);
1326         syntax = MBREX(regex_default_syntax);
1327 
1328         if (argc == 3) {
1329                 option = 0;
1330                 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1331         }
1332 
1333         if (argc > 1) {
1334                 /* create regex pattern buffer */
1335                 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax)) == NULL) {
1336                         RETURN_FALSE;
1337                 }
1338         }
1339 
1340         if (!Z_ISNULL(MBREX(search_str))) {
1341                 zval_ptr_dtor(&MBREX(search_str));
1342         }
1343 
1344         ZVAL_DUP(&MBREX(search_str), arg_str);
1345 
1346         MBREX(search_pos) = 0;
1347 
1348         if (MBREX(search_regs) != NULL) {
1349                 onig_region_free(MBREX(search_regs), 1);
1350                 MBREX(search_regs) = NULL;
1351         }
1352 
1353         RETURN_TRUE;
1354 }
1355 /* }}} */
1356 
1357 /* {{{ proto array mb_ereg_search_getregs(void)
1358    Get matched substring of the last time */
1359 PHP_FUNCTION(mb_ereg_search_getregs)
1360 {
1361         int n, i, len, beg, end;
1362         OnigUChar *str;
1363 
1364         if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1365                 array_init(return_value);
1366 
1367                 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1368                 len = Z_STRLEN(MBREX(search_str));
1369                 n = MBREX(search_regs)->num_regs;
1370                 for (i = 0; i < n; i++) {
1371                         beg = MBREX(search_regs)->beg[i];
1372                         end = MBREX(search_regs)->end[i];
1373                         if (beg >= 0 && beg <= end && end <= len) {
1374                                 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1375                         } else {
1376                                 add_index_bool(return_value, i, 0);
1377                         }
1378                 }
1379         } else {
1380                 RETVAL_FALSE;
1381         }
1382 }
1383 /* }}} */
1384 
1385 /* {{{ proto int mb_ereg_search_getpos(void)
1386    Get search start position */
1387 PHP_FUNCTION(mb_ereg_search_getpos)
1388 {
1389         RETVAL_LONG(MBREX(search_pos));
1390 }
1391 /* }}} */
1392 
1393 /* {{{ proto bool mb_ereg_search_setpos(int position)
1394    Set search start position */
1395 PHP_FUNCTION(mb_ereg_search_setpos)
1396 {
1397         zend_long position;
1398 
1399         if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1400                 return;
1401         }
1402 
1403         if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position >= Z_STRLEN(MBREX(search_str)))) {
1404                 php_error_docref(NULL, E_WARNING, "Position is out of range");
1405                 MBREX(search_pos) = 0;
1406                 RETURN_FALSE;
1407         }
1408 
1409         MBREX(search_pos) = position;
1410         RETURN_TRUE;
1411 }
1412 /* }}} */
1413 
1414 /* {{{ php_mb_regex_set_options */
1415 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1416 {
1417         if (prev_options != NULL) {
1418                 *prev_options = MBREX(regex_default_options);
1419         }
1420         if (prev_syntax != NULL) {
1421                 *prev_syntax = MBREX(regex_default_syntax);
1422         }
1423         MBREX(regex_default_options) = options;
1424         MBREX(regex_default_syntax) = syntax;
1425 }
1426 /* }}} */
1427 
1428 /* {{{ proto string mb_regex_set_options([string options])
1429    Set or get the default options for mbregex functions */
1430 PHP_FUNCTION(mb_regex_set_options)
1431 {
1432         OnigOptionType opt;
1433         OnigSyntaxType *syntax;
1434         char *string = NULL;
1435         size_t string_len;
1436         char buf[16];
1437 
1438         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s",
1439                                   &string, &string_len) == FAILURE) {
1440                 RETURN_FALSE;
1441         }
1442         if (string != NULL) {
1443                 opt = 0;
1444                 syntax = NULL;
1445                 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1446                 _php_mb_regex_set_options(opt, syntax, NULL, NULL);
1447         } else {
1448                 opt = MBREX(regex_default_options);
1449                 syntax = MBREX(regex_default_syntax);
1450         }
1451         _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1452 
1453         RETVAL_STRING(buf);
1454 }
1455 /* }}} */
1456 
1457 #endif  /* HAVE_MBREGEX */
1458 
1459 /*
1460  * Local variables:
1461  * tab-width: 4
1462  * c-basic-offset: 4
1463  * End:
1464  * vim600: fdm=marker
1465  * vim: noet sw=4 ts=4
1466  */

/* [<][>][^][v][top][bottom][index][help] */