root/ext/mbstring/mbstring.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ZEND_DECLARE_MODULE_GLOBALS
  2. ZEND_TSRMLS_CACHE_DEFINE
  3. get_input_encoding
  4. get_output_encoding
  5. _php_mb_allocators_malloc
  6. _php_mb_allocators_realloc
  7. _php_mb_allocators_calloc
  8. _php_mb_allocators_free
  9. _php_mb_allocators_pmalloc
  10. _php_mb_allocators_prealloc
  11. _php_mb_allocators_pfree
  12. php_mb_parse_encoding_list
  13. php_mb_parse_encoding_array
  14. php_mb_zend_encoding_fetcher
  15. php_mb_zend_encoding_name_getter
  16. php_mb_zend_encoding_lexer_compatibility_checker
  17. php_mb_zend_encoding_detector
  18. php_mb_zend_encoding_converter
  19. php_mb_zend_encoding_list_parser
  20. php_mb_zend_internal_encoding_getter
  21. php_mb_zend_internal_encoding_setter
  22. _php_mb_compile_regex
  23. _php_mb_match_regex
  24. _php_mb_free_regex
  25. _php_mb_compile_regex
  26. _php_mb_match_regex
  27. _php_mb_free_regex
  28. php_mb_nls_get_default_detect_order_list
  29. php_mb_rfc1867_substring_conf
  30. php_mb_rfc1867_getword
  31. php_mb_rfc1867_getword_conf
  32. php_mb_rfc1867_basename
  33. PHP_INI_MH
  34. PHP_INI_MH
  35. PHP_INI_MH
  36. PHP_INI_MH
  37. _php_mb_ini_mbstring_internal_encoding_set
  38. PHP_INI_MH
  39. PHP_INI_MH
  40. PHP_INI_MH
  41. PHP_INI_MH
  42. PHP_INI_BEGIN
  43. PHP_GSHUTDOWN_FUNCTION
  44. PHP_MINIT_FUNCTION
  45. PHP_MSHUTDOWN_FUNCTION
  46. PHP_RINIT_FUNCTION
  47. PHP_RSHUTDOWN_FUNCTION
  48. PHP_MINFO_FUNCTION
  49. PHP_FUNCTION
  50. PHP_FUNCTION
  51. PHP_FUNCTION
  52. PHP_FUNCTION
  53. PHP_FUNCTION
  54. PHP_FUNCTION
  55. PHP_FUNCTION
  56. PHP_FUNCTION
  57. PHP_FUNCTION
  58. PHP_FUNCTION
  59. PHP_FUNCTION
  60. PHP_FUNCTION
  61. PHP_FUNCTION
  62. PHP_FUNCTION
  63. PHP_FUNCTION
  64. PHP_FUNCTION
  65. PHP_FUNCTION
  66. PHP_FUNCTION
  67. PHP_FUNCTION
  68. PHP_FUNCTION
  69. PHP_FUNCTION
  70. PHP_FUNCTION
  71. PHP_FUNCTION
  72. php_mb_convert_encoding
  73. PHP_FUNCTION
  74. PHP_FUNCTION
  75. PHP_FUNCTION
  76. PHP_FUNCTION
  77. PHP_FUNCTION
  78. PHP_FUNCTION
  79. PHP_FUNCTION
  80. PHP_FUNCTION
  81. PHP_FUNCTION
  82. PHP_FUNCTION
  83. PHP_FUNCTION
  84. php_mb_numericentity_exec
  85. PHP_FUNCTION
  86. PHP_FUNCTION
  87. _php_mbstr_parse_mail_headers
  88. PHP_FUNCTION
  89. PHP_FUNCTION
  90. PHP_FUNCTION
  91. php_mb_populate_current_detect_order_list
  92. php_mb_encoding_translation
  93. php_mb_mbchar_bytes_ex
  94. php_mb_mbchar_bytes
  95. php_mb_safe_strrchr_ex
  96. php_mb_safe_strrchr
  97. php_mb_stripos
  98. php_mb_gpc_get_detect_order
  99. php_mb_gpc_set_input_encoding

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
  16    |         Rui Hirokawa <hirokawa@php.net>                              |
  17    +----------------------------------------------------------------------+
  18  */
  19 
  20 /* $Id$ */
  21 
  22 /*
  23  * PHP 4 Multibyte String module "mbstring"
  24  *
  25  * History:
  26  *   2000.5.19  Release php-4.0RC2_jstring-1.0
  27  *   2001.4.1   Release php4_jstring-1.0.91
  28  *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
  29  *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
  30  */
  31 
  32 /*
  33  * PHP3 Internationalization support program.
  34  *
  35  * Copyright (c) 1999,2000 by the PHP3 internationalization team.
  36  * All rights reserved.
  37  *
  38  * See README_PHP3-i18n-ja for more detail.
  39  *
  40  * Authors:
  41  *    Hironori Sato <satoh@jpnnet.com>
  42  *    Shigeru Kanemoto <sgk@happysize.co.jp>
  43  *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
  44  *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
  45  */
  46 
  47 /* {{{ includes */
  48 #ifdef HAVE_CONFIG_H
  49 #include "config.h"
  50 #endif
  51 
  52 #include "php.h"
  53 #include "php_ini.h"
  54 #include "php_variables.h"
  55 #include "mbstring.h"
  56 #include "ext/standard/php_string.h"
  57 #include "ext/standard/php_mail.h"
  58 #include "ext/standard/exec.h"
  59 #include "ext/standard/url.h"
  60 #include "main/php_output.h"
  61 #include "ext/standard/info.h"
  62 
  63 #include "libmbfl/mbfl/mbfl_allocators.h"
  64 #include "libmbfl/mbfl/mbfilter_pass.h"
  65 
  66 #include "php_variables.h"
  67 #include "php_globals.h"
  68 #include "rfc1867.h"
  69 #include "php_content_types.h"
  70 #include "SAPI.h"
  71 #include "php_unicode.h"
  72 #include "TSRM.h"
  73 
  74 #include "mb_gpc.h"
  75 
  76 #if HAVE_MBREGEX
  77 #include "php_mbregex.h"
  78 #endif
  79 
  80 #include "zend_multibyte.h"
  81 
  82 #if HAVE_ONIG
  83 #include "php_onig_compat.h"
  84 #include <oniguruma.h>
  85 #undef UChar
  86 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
  87 #include "ext/pcre/php_pcre.h"
  88 #endif
  89 /* }}} */
  90 
  91 #if HAVE_MBSTRING
  92 
  93 /* {{{ prototypes */
  94 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
  95 
  96 static PHP_GINIT_FUNCTION(mbstring);
  97 static PHP_GSHUTDOWN_FUNCTION(mbstring);
  98 
  99 static void php_mb_populate_current_detect_order_list(void);
 100 
 101 static int php_mb_encoding_translation(void);
 102 
 103 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
 104 
 105 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
 106 
 107 /* }}} */
 108 
 109 /* {{{ php_mb_default_identify_list */
 110 typedef struct _php_mb_nls_ident_list {
 111         enum mbfl_no_language lang;
 112         const enum mbfl_no_encoding *list;
 113         size_t list_size;
 114 } php_mb_nls_ident_list;
 115 
 116 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
 117         mbfl_no_encoding_ascii,
 118         mbfl_no_encoding_jis,
 119         mbfl_no_encoding_utf8,
 120         mbfl_no_encoding_euc_jp,
 121         mbfl_no_encoding_sjis
 122 };
 123 
 124 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
 125         mbfl_no_encoding_ascii,
 126         mbfl_no_encoding_utf8,
 127         mbfl_no_encoding_euc_cn,
 128         mbfl_no_encoding_cp936
 129 };
 130 
 131 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
 132         mbfl_no_encoding_ascii,
 133         mbfl_no_encoding_utf8,
 134         mbfl_no_encoding_euc_tw,
 135         mbfl_no_encoding_big5
 136 };
 137 
 138 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
 139         mbfl_no_encoding_ascii,
 140         mbfl_no_encoding_utf8,
 141         mbfl_no_encoding_euc_kr,
 142         mbfl_no_encoding_uhc
 143 };
 144 
 145 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
 146         mbfl_no_encoding_ascii,
 147         mbfl_no_encoding_utf8,
 148         mbfl_no_encoding_koi8r,
 149         mbfl_no_encoding_cp1251,
 150         mbfl_no_encoding_cp866
 151 };
 152 
 153 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
 154         mbfl_no_encoding_ascii,
 155         mbfl_no_encoding_utf8,
 156         mbfl_no_encoding_armscii8
 157 };
 158 
 159 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
 160         mbfl_no_encoding_ascii,
 161         mbfl_no_encoding_utf8,
 162         mbfl_no_encoding_cp1254,
 163         mbfl_no_encoding_8859_9
 164 };
 165 
 166 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
 167         mbfl_no_encoding_ascii,
 168         mbfl_no_encoding_utf8,
 169         mbfl_no_encoding_koi8u
 170 };
 171 
 172 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
 173         mbfl_no_encoding_ascii,
 174         mbfl_no_encoding_utf8
 175 };
 176 
 177 
 178 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
 179         { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
 180         { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
 181         { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
 182         { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
 183         { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
 184         { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
 185         { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
 186         { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
 187         { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
 188 };
 189 
 190 /* }}} */
 191 
 192 /* {{{ mb_overload_def mb_ovld[] */
 193 static const struct mb_overload_def mb_ovld[] = {
 194         {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
 195         {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
 196         {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
 197         {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
 198         {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
 199         {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
 200         {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
 201         {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
 202         {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
 203         {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
 204         {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
 205         {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
 206         {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
 207         {0, NULL, NULL, NULL}
 208 };
 209 /* }}} */
 210 
 211 /* {{{ arginfo */
 212 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
 213         ZEND_ARG_INFO(0, language)
 214 ZEND_END_ARG_INFO()
 215 
 216 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
 217         ZEND_ARG_INFO(0, encoding)
 218 ZEND_END_ARG_INFO()
 219 
 220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
 221         ZEND_ARG_INFO(0, type)
 222 ZEND_END_ARG_INFO()
 223 
 224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
 225         ZEND_ARG_INFO(0, encoding)
 226 ZEND_END_ARG_INFO()
 227 
 228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
 229         ZEND_ARG_INFO(0, encoding)
 230 ZEND_END_ARG_INFO()
 231 
 232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
 233         ZEND_ARG_INFO(0, substchar)
 234 ZEND_END_ARG_INFO()
 235 
 236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
 237         ZEND_ARG_INFO(0, encoding)
 238 ZEND_END_ARG_INFO()
 239 
 240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
 241         ZEND_ARG_INFO(0, encoded_string)
 242         ZEND_ARG_INFO(1, result)
 243 ZEND_END_ARG_INFO()
 244 
 245 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
 246         ZEND_ARG_INFO(0, contents)
 247         ZEND_ARG_INFO(0, status)
 248 ZEND_END_ARG_INFO()
 249 
 250 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
 251         ZEND_ARG_INFO(0, str)
 252         ZEND_ARG_INFO(0, encoding)
 253 ZEND_END_ARG_INFO()
 254 
 255 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
 256         ZEND_ARG_INFO(0, haystack)
 257         ZEND_ARG_INFO(0, needle)
 258         ZEND_ARG_INFO(0, offset)
 259         ZEND_ARG_INFO(0, encoding)
 260 ZEND_END_ARG_INFO()
 261 
 262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
 263         ZEND_ARG_INFO(0, haystack)
 264         ZEND_ARG_INFO(0, needle)
 265         ZEND_ARG_INFO(0, offset)
 266         ZEND_ARG_INFO(0, encoding)
 267 ZEND_END_ARG_INFO()
 268 
 269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
 270         ZEND_ARG_INFO(0, haystack)
 271         ZEND_ARG_INFO(0, needle)
 272         ZEND_ARG_INFO(0, offset)
 273         ZEND_ARG_INFO(0, encoding)
 274 ZEND_END_ARG_INFO()
 275 
 276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
 277         ZEND_ARG_INFO(0, haystack)
 278         ZEND_ARG_INFO(0, needle)
 279         ZEND_ARG_INFO(0, offset)
 280         ZEND_ARG_INFO(0, encoding)
 281 ZEND_END_ARG_INFO()
 282 
 283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
 284         ZEND_ARG_INFO(0, haystack)
 285         ZEND_ARG_INFO(0, needle)
 286         ZEND_ARG_INFO(0, part)
 287         ZEND_ARG_INFO(0, encoding)
 288 ZEND_END_ARG_INFO()
 289 
 290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
 291         ZEND_ARG_INFO(0, haystack)
 292         ZEND_ARG_INFO(0, needle)
 293         ZEND_ARG_INFO(0, part)
 294         ZEND_ARG_INFO(0, encoding)
 295 ZEND_END_ARG_INFO()
 296 
 297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
 298         ZEND_ARG_INFO(0, haystack)
 299         ZEND_ARG_INFO(0, needle)
 300         ZEND_ARG_INFO(0, part)
 301         ZEND_ARG_INFO(0, encoding)
 302 ZEND_END_ARG_INFO()
 303 
 304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
 305         ZEND_ARG_INFO(0, haystack)
 306         ZEND_ARG_INFO(0, needle)
 307         ZEND_ARG_INFO(0, part)
 308         ZEND_ARG_INFO(0, encoding)
 309 ZEND_END_ARG_INFO()
 310 
 311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
 312         ZEND_ARG_INFO(0, haystack)
 313         ZEND_ARG_INFO(0, needle)
 314         ZEND_ARG_INFO(0, encoding)
 315 ZEND_END_ARG_INFO()
 316 
 317 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
 318         ZEND_ARG_INFO(0, str)
 319         ZEND_ARG_INFO(0, start)
 320         ZEND_ARG_INFO(0, length)
 321         ZEND_ARG_INFO(0, encoding)
 322 ZEND_END_ARG_INFO()
 323 
 324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
 325         ZEND_ARG_INFO(0, str)
 326         ZEND_ARG_INFO(0, start)
 327         ZEND_ARG_INFO(0, length)
 328         ZEND_ARG_INFO(0, encoding)
 329 ZEND_END_ARG_INFO()
 330 
 331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
 332         ZEND_ARG_INFO(0, str)
 333         ZEND_ARG_INFO(0, encoding)
 334 ZEND_END_ARG_INFO()
 335 
 336 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
 337         ZEND_ARG_INFO(0, str)
 338         ZEND_ARG_INFO(0, start)
 339         ZEND_ARG_INFO(0, width)
 340         ZEND_ARG_INFO(0, trimmarker)
 341         ZEND_ARG_INFO(0, encoding)
 342 ZEND_END_ARG_INFO()
 343 
 344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
 345         ZEND_ARG_INFO(0, str)
 346         ZEND_ARG_INFO(0, to)
 347         ZEND_ARG_INFO(0, from)
 348 ZEND_END_ARG_INFO()
 349 
 350 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
 351         ZEND_ARG_INFO(0, sourcestring)
 352         ZEND_ARG_INFO(0, mode)
 353         ZEND_ARG_INFO(0, encoding)
 354 ZEND_END_ARG_INFO()
 355 
 356 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
 357         ZEND_ARG_INFO(0, sourcestring)
 358         ZEND_ARG_INFO(0, encoding)
 359 ZEND_END_ARG_INFO()
 360 
 361 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
 362         ZEND_ARG_INFO(0, sourcestring)
 363         ZEND_ARG_INFO(0, encoding)
 364 ZEND_END_ARG_INFO()
 365 
 366 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
 367         ZEND_ARG_INFO(0, str)
 368         ZEND_ARG_INFO(0, encoding_list)
 369         ZEND_ARG_INFO(0, strict)
 370 ZEND_END_ARG_INFO()
 371 
 372 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
 373 ZEND_END_ARG_INFO()
 374 
 375 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
 376         ZEND_ARG_INFO(0, encoding)
 377 ZEND_END_ARG_INFO()
 378 
 379 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
 380         ZEND_ARG_INFO(0, str)
 381         ZEND_ARG_INFO(0, charset)
 382         ZEND_ARG_INFO(0, transfer)
 383         ZEND_ARG_INFO(0, linefeed)
 384         ZEND_ARG_INFO(0, indent)
 385 ZEND_END_ARG_INFO()
 386 
 387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
 388         ZEND_ARG_INFO(0, string)
 389 ZEND_END_ARG_INFO()
 390 
 391 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
 392         ZEND_ARG_INFO(0, str)
 393         ZEND_ARG_INFO(0, option)
 394         ZEND_ARG_INFO(0, encoding)
 395 ZEND_END_ARG_INFO()
 396 
 397 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
 398         ZEND_ARG_INFO(0, to)
 399         ZEND_ARG_INFO(0, from)
 400         ZEND_ARG_VARIADIC_INFO(1, vars)
 401 ZEND_END_ARG_INFO()
 402 
 403 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
 404         ZEND_ARG_INFO(0, string)
 405         ZEND_ARG_INFO(0, convmap)
 406         ZEND_ARG_INFO(0, encoding)
 407         ZEND_ARG_INFO(0, is_hex)
 408 ZEND_END_ARG_INFO()
 409 
 410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
 411         ZEND_ARG_INFO(0, string)
 412         ZEND_ARG_INFO(0, convmap)
 413         ZEND_ARG_INFO(0, encoding)
 414 ZEND_END_ARG_INFO()
 415 
 416 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
 417         ZEND_ARG_INFO(0, to)
 418         ZEND_ARG_INFO(0, subject)
 419         ZEND_ARG_INFO(0, message)
 420         ZEND_ARG_INFO(0, additional_headers)
 421         ZEND_ARG_INFO(0, additional_parameters)
 422 ZEND_END_ARG_INFO()
 423 
 424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
 425         ZEND_ARG_INFO(0, type)
 426 ZEND_END_ARG_INFO()
 427 
 428 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
 429         ZEND_ARG_INFO(0, var)
 430         ZEND_ARG_INFO(0, encoding)
 431 ZEND_END_ARG_INFO()
 432 
 433 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
 434         ZEND_ARG_INFO(0, encoding)
 435 ZEND_END_ARG_INFO()
 436 
 437 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
 438         ZEND_ARG_INFO(0, pattern)
 439         ZEND_ARG_INFO(0, string)
 440         ZEND_ARG_INFO(1, registers)
 441 ZEND_END_ARG_INFO()
 442 
 443 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
 444         ZEND_ARG_INFO(0, pattern)
 445         ZEND_ARG_INFO(0, string)
 446         ZEND_ARG_INFO(1, registers)
 447 ZEND_END_ARG_INFO()
 448 
 449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
 450         ZEND_ARG_INFO(0, pattern)
 451         ZEND_ARG_INFO(0, replacement)
 452         ZEND_ARG_INFO(0, string)
 453         ZEND_ARG_INFO(0, option)
 454 ZEND_END_ARG_INFO()
 455 
 456 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
 457         ZEND_ARG_INFO(0, pattern)
 458         ZEND_ARG_INFO(0, replacement)
 459         ZEND_ARG_INFO(0, string)
 460 ZEND_END_ARG_INFO()
 461 
 462 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
 463         ZEND_ARG_INFO(0, pattern)
 464         ZEND_ARG_INFO(0, callback)
 465         ZEND_ARG_INFO(0, string)
 466         ZEND_ARG_INFO(0, option)
 467 ZEND_END_ARG_INFO()
 468 
 469 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
 470         ZEND_ARG_INFO(0, pattern)
 471         ZEND_ARG_INFO(0, string)
 472         ZEND_ARG_INFO(0, limit)
 473 ZEND_END_ARG_INFO()
 474 
 475 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
 476         ZEND_ARG_INFO(0, pattern)
 477         ZEND_ARG_INFO(0, string)
 478         ZEND_ARG_INFO(0, option)
 479 ZEND_END_ARG_INFO()
 480 
 481 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
 482         ZEND_ARG_INFO(0, pattern)
 483         ZEND_ARG_INFO(0, option)
 484 ZEND_END_ARG_INFO()
 485 
 486 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
 487         ZEND_ARG_INFO(0, pattern)
 488         ZEND_ARG_INFO(0, option)
 489 ZEND_END_ARG_INFO()
 490 
 491 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
 492         ZEND_ARG_INFO(0, pattern)
 493         ZEND_ARG_INFO(0, option)
 494 ZEND_END_ARG_INFO()
 495 
 496 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
 497         ZEND_ARG_INFO(0, string)
 498         ZEND_ARG_INFO(0, pattern)
 499         ZEND_ARG_INFO(0, option)
 500 ZEND_END_ARG_INFO()
 501 
 502 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
 503 ZEND_END_ARG_INFO()
 504 
 505 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
 506 ZEND_END_ARG_INFO()
 507 
 508 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
 509         ZEND_ARG_INFO(0, position)
 510 ZEND_END_ARG_INFO()
 511 
 512 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
 513         ZEND_ARG_INFO(0, options)
 514 ZEND_END_ARG_INFO()
 515 /* }}} */
 516 
 517 /* {{{ zend_function_entry mbstring_functions[] */
 518 const zend_function_entry mbstring_functions[] = {
 519         PHP_FE(mb_convert_case,                 arginfo_mb_convert_case)
 520         PHP_FE(mb_strtoupper,                   arginfo_mb_strtoupper)
 521         PHP_FE(mb_strtolower,                   arginfo_mb_strtolower)
 522         PHP_FE(mb_language,                             arginfo_mb_language)
 523         PHP_FE(mb_internal_encoding,    arginfo_mb_internal_encoding)
 524         PHP_FE(mb_http_input,                   arginfo_mb_http_input)
 525         PHP_FE(mb_http_output,                  arginfo_mb_http_output)
 526         PHP_FE(mb_detect_order,                 arginfo_mb_detect_order)
 527         PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
 528         PHP_FE(mb_parse_str,                    arginfo_mb_parse_str)
 529         PHP_FE(mb_output_handler,               arginfo_mb_output_handler)
 530         PHP_FE(mb_preferred_mime_name,  arginfo_mb_preferred_mime_name)
 531         PHP_FE(mb_strlen,                               arginfo_mb_strlen)
 532         PHP_FE(mb_strpos,                               arginfo_mb_strpos)
 533         PHP_FE(mb_strrpos,                              arginfo_mb_strrpos)
 534         PHP_FE(mb_stripos,                              arginfo_mb_stripos)
 535         PHP_FE(mb_strripos,                             arginfo_mb_strripos)
 536         PHP_FE(mb_strstr,                               arginfo_mb_strstr)
 537         PHP_FE(mb_strrchr,                              arginfo_mb_strrchr)
 538         PHP_FE(mb_stristr,                              arginfo_mb_stristr)
 539         PHP_FE(mb_strrichr,                             arginfo_mb_strrichr)
 540         PHP_FE(mb_substr_count,                 arginfo_mb_substr_count)
 541         PHP_FE(mb_substr,                               arginfo_mb_substr)
 542         PHP_FE(mb_strcut,                               arginfo_mb_strcut)
 543         PHP_FE(mb_strwidth,                             arginfo_mb_strwidth)
 544         PHP_FE(mb_strimwidth,                   arginfo_mb_strimwidth)
 545         PHP_FE(mb_convert_encoding,             arginfo_mb_convert_encoding)
 546         PHP_FE(mb_detect_encoding,              arginfo_mb_detect_encoding)
 547         PHP_FE(mb_list_encodings,               arginfo_mb_list_encodings)
 548         PHP_FE(mb_encoding_aliases,             arginfo_mb_encoding_aliases)
 549         PHP_FE(mb_convert_kana,                 arginfo_mb_convert_kana)
 550         PHP_FE(mb_encode_mimeheader,    arginfo_mb_encode_mimeheader)
 551         PHP_FE(mb_decode_mimeheader,    arginfo_mb_decode_mimeheader)
 552         PHP_FE(mb_convert_variables,    arginfo_mb_convert_variables)
 553         PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
 554         PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
 555         PHP_FE(mb_send_mail,                    arginfo_mb_send_mail)
 556         PHP_FE(mb_get_info,                             arginfo_mb_get_info)
 557         PHP_FE(mb_check_encoding,               arginfo_mb_check_encoding)
 558 #if HAVE_MBREGEX
 559         PHP_MBREGEX_FUNCTION_ENTRIES
 560 #endif
 561         PHP_FE_END
 562 };
 563 /* }}} */
 564 
 565 /* {{{ zend_module_entry mbstring_module_entry */
 566 zend_module_entry mbstring_module_entry = {
 567         STANDARD_MODULE_HEADER,
 568         "mbstring",
 569         mbstring_functions,
 570         PHP_MINIT(mbstring),
 571         PHP_MSHUTDOWN(mbstring),
 572         PHP_RINIT(mbstring),
 573         PHP_RSHUTDOWN(mbstring),
 574         PHP_MINFO(mbstring),
 575         PHP_MBSTRING_VERSION,
 576         PHP_MODULE_GLOBALS(mbstring),
 577         PHP_GINIT(mbstring),
 578         PHP_GSHUTDOWN(mbstring),
 579         NULL,
 580         STANDARD_MODULE_PROPERTIES_EX
 581 };
 582 /* }}} */
 583 
 584 /* {{{ static sapi_post_entry php_post_entries[] */
 585 static sapi_post_entry php_post_entries[] = {
 586         { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
 587         { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
 588         { NULL, 0, NULL, NULL }
 589 };
 590 /* }}} */
 591 
 592 #ifdef COMPILE_DL_MBSTRING
 593 #ifdef ZTS
 594 ZEND_TSRMLS_CACHE_DEFINE()
 595 #endif
 596 ZEND_GET_MODULE(mbstring)
 597 #endif
 598 
 599 static char *get_internal_encoding(void) {
 600         if (PG(internal_encoding) && PG(internal_encoding)[0]) {
 601                 return PG(internal_encoding);
 602         } else if (SG(default_charset)) {
 603                 return SG(default_charset);
 604         }
 605         return "";
 606 }
 607 
 608 static char *get_input_encoding(void) {
 609         if (PG(input_encoding) && PG(input_encoding)[0]) {
 610                 return PG(input_encoding);
 611         } else if (SG(default_charset)) {
 612                 return SG(default_charset);
 613         }
 614         return "";
 615 }
 616 
 617 static char *get_output_encoding(void) {
 618         if (PG(output_encoding) && PG(output_encoding)[0]) {
 619                 return PG(output_encoding);
 620         } else if (SG(default_charset)) {
 621                 return SG(default_charset);
 622         }
 623         return "";
 624 }
 625 
 626 
 627 /* {{{ allocators */
 628 static void *_php_mb_allocators_malloc(unsigned int sz)
 629 {
 630         return emalloc(sz);
 631 }
 632 
 633 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
 634 {
 635         return erealloc(ptr, sz);
 636 }
 637 
 638 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
 639 {
 640         return ecalloc(nelems, szelem);
 641 }
 642 
 643 static void _php_mb_allocators_free(void *ptr)
 644 {
 645         efree(ptr);
 646 }
 647 
 648 static void *_php_mb_allocators_pmalloc(unsigned int sz)
 649 {
 650         return pemalloc(sz, 1);
 651 }
 652 
 653 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
 654 {
 655         return perealloc(ptr, sz, 1);
 656 }
 657 
 658 static void _php_mb_allocators_pfree(void *ptr)
 659 {
 660         pefree(ptr, 1);
 661 }
 662 
 663 static mbfl_allocators _php_mb_allocators = {
 664         _php_mb_allocators_malloc,
 665         _php_mb_allocators_realloc,
 666         _php_mb_allocators_calloc,
 667         _php_mb_allocators_free,
 668         _php_mb_allocators_pmalloc,
 669         _php_mb_allocators_prealloc,
 670         _php_mb_allocators_pfree
 671 };
 672 /* }}} */
 673 
 674 /* {{{ static sapi_post_entry mbstr_post_entries[] */
 675 static sapi_post_entry mbstr_post_entries[] = {
 676         { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
 677         { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
 678         { NULL, 0, NULL, NULL }
 679 };
 680 /* }}} */
 681 
 682 /* {{{ static int php_mb_parse_encoding_list()
 683  *  Return 0 if input contains any illegal encoding, otherwise 1.
 684  *  Even if any illegal encoding is detected the result may contain a list
 685  *  of parsed encodings.
 686  */
 687 static int
 688 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
 689 {
 690         int size, bauto, ret = SUCCESS;
 691         size_t n;
 692         char *p, *p1, *p2, *endp, *tmpstr;
 693         const mbfl_encoding **entry, **list;
 694 
 695         list = NULL;
 696         if (value == NULL || value_length <= 0) {
 697                 if (return_list) {
 698                         *return_list = NULL;
 699                 }
 700                 if (return_size) {
 701                         *return_size = 0;
 702                 }
 703                 return FAILURE;
 704         } else {
 705                 /* copy the value string for work */
 706                 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
 707                         tmpstr = (char *)estrndup(value+1, value_length-2);
 708                         value_length -= 2;
 709                 }
 710                 else
 711                         tmpstr = (char *)estrndup(value, value_length);
 712                 if (tmpstr == NULL) {
 713                         return FAILURE;
 714                 }
 715                 /* count the number of listed encoding names */
 716                 endp = tmpstr + value_length;
 717                 n = 1;
 718                 p1 = tmpstr;
 719                 while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
 720                         p1 = p2 + 1;
 721                         n++;
 722                 }
 723                 size = n + MBSTRG(default_detect_order_list_size);
 724                 /* make list */
 725                 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
 726                 if (list != NULL) {
 727                         entry = list;
 728                         n = 0;
 729                         bauto = 0;
 730                         p1 = tmpstr;
 731                         do {
 732                                 p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
 733                                 if (p == NULL) {
 734                                         p = endp;
 735                                 }
 736                                 *p = '\0';
 737                                 /* trim spaces */
 738                                 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
 739                                         p1++;
 740                                 }
 741                                 p--;
 742                                 while (p > p1 && (*p == ' ' || *p == '\t')) {
 743                                         *p = '\0';
 744                                         p--;
 745                                 }
 746                                 /* convert to the encoding number and check encoding */
 747                                 if (strcasecmp(p1, "auto") == 0) {
 748                                         if (!bauto) {
 749                                                 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
 750                                                 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
 751                                                 size_t i;
 752                                                 bauto = 1;
 753                                                 for (i = 0; i < identify_list_size; i++) {
 754                                                         *entry++ = mbfl_no2encoding(*src++);
 755                                                         n++;
 756                                                 }
 757                                         }
 758                                 } else {
 759                                         const mbfl_encoding *encoding = mbfl_name2encoding(p1);
 760                                         if (encoding) {
 761                                                 *entry++ = encoding;
 762                                                 n++;
 763                                         } else {
 764                                                 ret = 0;
 765                                         }
 766                                 }
 767                                 p1 = p2 + 1;
 768                         } while (n < size && p2 != NULL);
 769                         if (n > 0) {
 770                                 if (return_list) {
 771                                         *return_list = list;
 772                                 } else {
 773                                         pefree(list, persistent);
 774                                 }
 775                         } else {
 776                                 pefree(list, persistent);
 777                                 if (return_list) {
 778                                         *return_list = NULL;
 779                                 }
 780                                 ret = 0;
 781                         }
 782                         if (return_size) {
 783                                 *return_size = n;
 784                         }
 785                 } else {
 786                         if (return_list) {
 787                                 *return_list = NULL;
 788                         }
 789                         if (return_size) {
 790                                 *return_size = 0;
 791                         }
 792                         ret = 0;
 793                 }
 794                 efree(tmpstr);
 795         }
 796 
 797         return ret;
 798 }
 799 /* }}} */
 800 
 801 /* {{{ static int php_mb_parse_encoding_array()
 802  *  Return 0 if input contains any illegal encoding, otherwise 1.
 803  *  Even if any illegal encoding is detected the result may contain a list
 804  *  of parsed encodings.
 805  */
 806 static int
 807 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent)
 808 {
 809         zval *hash_entry;
 810         HashTable *target_hash;
 811         int i, n, size, bauto, ret = SUCCESS;
 812         const mbfl_encoding **list, **entry;
 813 
 814         list = NULL;
 815         if (Z_TYPE_P(array) == IS_ARRAY) {
 816                 target_hash = Z_ARRVAL_P(array);
 817                 i = zend_hash_num_elements(target_hash);
 818                 size = i + MBSTRG(default_detect_order_list_size);
 819                 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
 820                 if (list != NULL) {
 821                         entry = list;
 822                         bauto = 0;
 823                         n = 0;
 824                         ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
 825                                 convert_to_string_ex(hash_entry);
 826                                 if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
 827                                         if (!bauto) {
 828                                                 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
 829                                                 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
 830                                                 size_t j;
 831 
 832                                                 bauto = 1;
 833                                                 for (j = 0; j < identify_list_size; j++) {
 834                                                         *entry++ = mbfl_no2encoding(*src++);
 835                                                         n++;
 836                                                 }
 837                                         }
 838                                 } else {
 839                                         const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
 840                                         if (encoding) {
 841                                                 *entry++ = encoding;
 842                                                 n++;
 843                                         } else {
 844                                                 ret = FAILURE;
 845                                         }
 846                                 }
 847                                 i--;
 848                         } ZEND_HASH_FOREACH_END();
 849                         if (n > 0) {
 850                                 if (return_list) {
 851                                         *return_list = list;
 852                                 } else {
 853                                         pefree(list, persistent);
 854                                 }
 855                         } else {
 856                                 pefree(list, persistent);
 857                                 if (return_list) {
 858                                         *return_list = NULL;
 859                                 }
 860                                 ret = FAILURE;
 861                         }
 862                         if (return_size) {
 863                                 *return_size = n;
 864                         }
 865                 } else {
 866                         if (return_list) {
 867                                 *return_list = NULL;
 868                         }
 869                         if (return_size) {
 870                                 *return_size = 0;
 871                         }
 872                         ret = FAILURE;
 873                 }
 874         }
 875 
 876         return ret;
 877 }
 878 /* }}} */
 879 
 880 /* {{{ zend_multibyte interface */
 881 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
 882 {
 883         return (const zend_encoding*)mbfl_name2encoding(encoding_name);
 884 }
 885 
 886 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
 887 {
 888         return ((const mbfl_encoding *)encoding)->name;
 889 }
 890 
 891 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
 892 {
 893         const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
 894         if (encoding->flag & MBFL_ENCTYPE_SBCS) {
 895                 return 1;
 896         }
 897         if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
 898                 return 1;
 899         }
 900         return 0;
 901 }
 902 
 903 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
 904 {
 905         mbfl_string string;
 906 
 907         if (!list) {
 908                 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
 909                 list_size = MBSTRG(current_detect_order_list_size);
 910         }
 911 
 912         mbfl_string_init(&string);
 913         string.no_language = MBSTRG(language);
 914         string.val = (unsigned char *)arg_string;
 915         string.len = arg_length;
 916         return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
 917 }
 918 
 919 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
 920 {
 921         mbfl_string string, result;
 922         mbfl_buffer_converter *convd;
 923         int status, loc;
 924 
 925         /* new encoding */
 926         /* initialize string */
 927         mbfl_string_init(&string);
 928         mbfl_string_init(&result);
 929         string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
 930         string.no_language = MBSTRG(language);
 931         string.val = (unsigned char*)from;
 932         string.len = from_length;
 933 
 934         /* initialize converter */
 935         convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
 936         if (convd == NULL) {
 937                 return -1;
 938         }
 939         mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
 940         mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
 941 
 942         /* do it */
 943         status = mbfl_buffer_converter_feed2(convd, &string, &loc);
 944         if (status) {
 945                 mbfl_buffer_converter_delete(convd);
 946                 return (size_t)-1;
 947         }
 948 
 949         mbfl_buffer_converter_flush(convd);
 950         if (!mbfl_buffer_converter_result(convd, &result)) {
 951                 mbfl_buffer_converter_delete(convd);
 952                 return (size_t)-1;
 953         }
 954 
 955         *to = result.val;
 956         *to_length = result.len;
 957 
 958         mbfl_buffer_converter_delete(convd);
 959 
 960         return loc;
 961 }
 962 
 963 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent)
 964 {
 965         return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent);
 966 }
 967 
 968 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
 969 {
 970         return (const zend_encoding *)MBSTRG(internal_encoding);
 971 }
 972 
 973 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
 974 {
 975         MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
 976         return SUCCESS;
 977 }
 978 
 979 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
 980         "mbstring",
 981         php_mb_zend_encoding_fetcher,
 982         php_mb_zend_encoding_name_getter,
 983         php_mb_zend_encoding_lexer_compatibility_checker,
 984         php_mb_zend_encoding_detector,
 985         php_mb_zend_encoding_converter,
 986         php_mb_zend_encoding_list_parser,
 987         php_mb_zend_internal_encoding_getter,
 988         php_mb_zend_internal_encoding_setter
 989 };
 990 /* }}} */
 991 
 992 static void *_php_mb_compile_regex(const char *pattern);
 993 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
 994 static void _php_mb_free_regex(void *opaque);
 995 
 996 #if HAVE_ONIG
 997 /* {{{ _php_mb_compile_regex */
 998 static void *_php_mb_compile_regex(const char *pattern)
 999 {
1000         php_mb_regex_t *retval;
1001         OnigErrorInfo err_info;
1002         int err_code;
1003 
1004         if ((err_code = onig_new(&retval,
1005                         (const OnigUChar *)pattern,
1006                         (const OnigUChar *)pattern + strlen(pattern),
1007                         ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1008                         ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1009                 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1010                 onig_error_code_to_str(err_str, err_code, err_info);
1011                 php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
1012                 retval = NULL;
1013         }
1014         return retval;
1015 }
1016 /* }}} */
1017 
1018 /* {{{ _php_mb_match_regex */
1019 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1020 {
1021         return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1022                         (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1023                         (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1024 }
1025 /* }}} */
1026 
1027 /* {{{ _php_mb_free_regex */
1028 static void _php_mb_free_regex(void *opaque)
1029 {
1030         onig_free((php_mb_regex_t *)opaque);
1031 }
1032 /* }}} */
1033 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1034 /* {{{ _php_mb_compile_regex */
1035 static void *_php_mb_compile_regex(const char *pattern)
1036 {
1037         pcre *retval;
1038         const char *err_str;
1039         int err_offset;
1040 
1041         if (!(retval = pcre_compile(pattern,
1042                         PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1043                 php_error_docref(NULL, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1044         }
1045         return retval;
1046 }
1047 /* }}} */
1048 
1049 /* {{{ _php_mb_match_regex */
1050 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1051 {
1052         return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1053                         0, NULL, 0) >= 0;
1054 }
1055 /* }}} */
1056 
1057 /* {{{ _php_mb_free_regex */
1058 static void _php_mb_free_regex(void *opaque)
1059 {
1060         pcre_free(opaque);
1061 }
1062 /* }}} */
1063 #endif
1064 
1065 /* {{{ php_mb_nls_get_default_detect_order_list */
1066 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1067 {
1068         size_t i;
1069 
1070         *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1071         *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1072 
1073         for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1074                 if (php_mb_default_identify_list[i].lang == lang) {
1075                         *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1076                         *plist_size = php_mb_default_identify_list[i].list_size;
1077                         return 1;
1078                 }
1079         }
1080         return 0;
1081 }
1082 /* }}} */
1083 
1084 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote)
1085 {
1086         char *result = emalloc(len + 2);
1087         char *resp = result;
1088         int i;
1089 
1090         for (i = 0; i < len && start[i] != quote; ++i) {
1091                 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1092                         *resp++ = start[++i];
1093                 } else {
1094                         size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1095 
1096                         while (j-- > 0 && i < len) {
1097                                 *resp++ = start[i++];
1098                         }
1099                         --i;
1100                 }
1101         }
1102 
1103         *resp = '\0';
1104         return result;
1105 }
1106 
1107 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
1108 {
1109         char *pos = *line, quote;
1110         char *res;
1111 
1112         while (*pos && *pos != stop) {
1113                 if ((quote = *pos) == '"' || quote == '\'') {
1114                         ++pos;
1115                         while (*pos && *pos != quote) {
1116                                 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1117                                         pos += 2;
1118                                 } else {
1119                                         ++pos;
1120                                 }
1121                         }
1122                         if (*pos) {
1123                                 ++pos;
1124                         }
1125                 } else {
1126                         pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1127 
1128                 }
1129         }
1130         if (*pos == '\0') {
1131                 res = estrdup(*line);
1132                 *line += strlen(*line);
1133                 return res;
1134         }
1135 
1136         res = estrndup(*line, pos - *line);
1137 
1138         while (*pos == stop) {
1139                 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1140         }
1141 
1142         *line = pos;
1143         return res;
1144 }
1145 /* }}} */
1146 
1147 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
1148 {
1149         while (*str && isspace(*(unsigned char *)str)) {
1150                 ++str;
1151         }
1152 
1153         if (!*str) {
1154                 return estrdup("");
1155         }
1156 
1157         if (*str == '"' || *str == '\'') {
1158                 char quote = *str;
1159 
1160                 str++;
1161                 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
1162         } else {
1163                 char *strend = str;
1164 
1165                 while (*strend && !isspace(*(unsigned char *)strend)) {
1166                         ++strend;
1167                 }
1168                 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
1169         }
1170 }
1171 /* }}} */
1172 
1173 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
1174 {
1175         char *s, *s2;
1176         const size_t filename_len = strlen(filename);
1177 
1178         /* The \ check should technically be needed for win32 systems only where
1179          * it is a valid path separator. However, IE in all it's wisdom always sends
1180          * the full path of the file on the user's filesystem, which means that unless
1181          * the user does basename() they get a bogus file name. Until IE's user base drops
1182          * to nill or problem is fixed this code must remain enabled for all systems. */
1183         s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1184         s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1185 
1186         if (s && s2) {
1187                 if (s > s2) {
1188                         return ++s;
1189                 } else {
1190                         return ++s2;
1191                 }
1192         } else if (s) {
1193                 return ++s;
1194         } else if (s2) {
1195                 return ++s2;
1196         } else {
1197                 return filename;
1198         }
1199 }
1200 /* }}} */
1201 
1202 /* {{{ php.ini directive handler */
1203 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
1204 static PHP_INI_MH(OnUpdate_mbstring_language)
1205 {
1206         enum mbfl_no_language no_language;
1207 
1208         no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
1209         if (no_language == mbfl_no_language_invalid) {
1210                 MBSTRG(language) = mbfl_no_language_neutral;
1211                 return FAILURE;
1212         }
1213         MBSTRG(language) = no_language;
1214         php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1215         return SUCCESS;
1216 }
1217 /* }}} */
1218 
1219 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
1220 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1221 {
1222         const mbfl_encoding **list;
1223         size_t size;
1224 
1225         if (!new_value) {
1226                 if (MBSTRG(detect_order_list)) {
1227                         pefree(MBSTRG(detect_order_list), 1);
1228                 }
1229                 MBSTRG(detect_order_list) = NULL;
1230                 MBSTRG(detect_order_list_size) = 0;
1231                 return SUCCESS;
1232         }
1233 
1234         if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1235                 return FAILURE;
1236         }
1237 
1238         if (MBSTRG(detect_order_list)) {
1239                 pefree(MBSTRG(detect_order_list), 1);
1240         }
1241         MBSTRG(detect_order_list) = list;
1242         MBSTRG(detect_order_list_size) = size;
1243         return SUCCESS;
1244 }
1245 /* }}} */
1246 
1247 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1248 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1249 {
1250         const mbfl_encoding **list;
1251         size_t size;
1252 
1253         if (!new_value) {
1254                 if (MBSTRG(http_input_list)) {
1255                         pefree(MBSTRG(http_input_list), 1);
1256                 }
1257                 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) {
1258                         MBSTRG(http_input_list) = list;
1259                         MBSTRG(http_input_list_size) = size;
1260                         return SUCCESS;
1261                 }
1262                 MBSTRG(http_input_list) = NULL;
1263                 MBSTRG(http_input_list_size) = 0;
1264                 return SUCCESS;
1265         }
1266 
1267         if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) {
1268                 return FAILURE;
1269         }
1270 
1271         if (MBSTRG(http_input_list)) {
1272                 pefree(MBSTRG(http_input_list), 1);
1273         }
1274         MBSTRG(http_input_list) = list;
1275         MBSTRG(http_input_list_size) = size;
1276 
1277         if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1278                 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1279         }
1280 
1281         return SUCCESS;
1282 }
1283 /* }}} */
1284 
1285 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1286 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1287 {
1288         const mbfl_encoding *encoding;
1289 
1290         if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
1291                 encoding = mbfl_name2encoding(get_output_encoding());
1292                 if (!encoding) {
1293                         MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1294                         MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1295                         return SUCCESS;
1296                 }
1297         } else {
1298                 encoding = mbfl_name2encoding(ZSTR_VAL(new_value));
1299                 if (!encoding) {
1300                         MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1301                         MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1302                         return FAILURE;
1303                 }
1304         }
1305         MBSTRG(http_output_encoding) = encoding;
1306         MBSTRG(current_http_output_encoding) = encoding;
1307 
1308         if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1309                 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1310         }
1311 
1312         return SUCCESS;
1313 }
1314 /* }}} */
1315 
1316 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1317 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length)
1318 {
1319         const mbfl_encoding *encoding;
1320 
1321         if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1322                 /* falls back to UTF-8 if an unknown encoding name is given */
1323                 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1324         }
1325         MBSTRG(internal_encoding) = encoding;
1326         MBSTRG(current_internal_encoding) = encoding;
1327 #if HAVE_MBREGEX
1328         {
1329                 const char *enc_name = new_value;
1330                 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
1331                         /* falls back to UTF-8 if an unknown encoding name is given */
1332                         enc_name = "UTF-8";
1333                         php_mb_regex_set_default_mbctype(enc_name);
1334                 }
1335                 php_mb_regex_set_mbctype(new_value);
1336         }
1337 #endif
1338         return SUCCESS;
1339 }
1340 /* }}} */
1341 
1342 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1343 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1344 {
1345         if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1346                 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1347         }
1348 
1349         if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
1350                 return FAILURE;
1351         }
1352 
1353         if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1354                 if (new_value && ZSTR_LEN(new_value)) {
1355                         return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1356                 } else {
1357                         return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1);
1358                 }
1359         } else {
1360                 /* the corresponding mbstring globals needs to be set according to the
1361                  * ini value in the later stage because it never falls back to the
1362                  * default value if 1. no value for mbstring.internal_encoding is given,
1363                  * 2. mbstring.language directive is processed in per-dir or runtime
1364                  * context and 3. call to the handler for mbstring.language is done
1365                  * after mbstring.internal_encoding is handled. */
1366                 return SUCCESS;
1367         }
1368 }
1369 /* }}} */
1370 
1371 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1372 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1373 {
1374         int c;
1375         char *endptr = NULL;
1376 
1377         if (new_value != NULL) {
1378                 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
1379                         MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1380                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1381                 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
1382                         MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1383                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1384                 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
1385                         MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1386                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1387                 } else {
1388                         MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1389                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1390                         if (ZSTR_LEN(new_value) > 0) {
1391                                 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
1392                                 if (*endptr == '\0') {
1393                                         MBSTRG(filter_illegal_substchar) = c;
1394                                         MBSTRG(current_filter_illegal_substchar) = c;
1395                                 }
1396                         }
1397                 }
1398         } else {
1399                 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1400                 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1401                 MBSTRG(filter_illegal_substchar) = 0x3f;        /* '?' */
1402                 MBSTRG(current_filter_illegal_substchar) = 0x3f;        /* '?' */
1403         }
1404 
1405         return SUCCESS;
1406 }
1407 /* }}} */
1408 
1409 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1410 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1411 {
1412         if (new_value == NULL) {
1413                 return FAILURE;
1414         }
1415 
1416         OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1417 
1418         if (MBSTRG(encoding_translation)) {
1419                 sapi_unregister_post_entry(php_post_entries);
1420                 sapi_register_post_entries(mbstr_post_entries);
1421         } else {
1422                 sapi_unregister_post_entry(mbstr_post_entries);
1423                 sapi_register_post_entries(php_post_entries);
1424         }
1425 
1426         return SUCCESS;
1427 }
1428 /* }}} */
1429 
1430 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1431 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1432 {
1433         zend_string *tmp;
1434         void *re = NULL;
1435 
1436         if (!new_value) {
1437                 new_value = entry->orig_value;
1438         }
1439         tmp = php_trim(new_value, NULL, 0, 3);
1440 
1441         if (ZSTR_LEN(tmp) > 0) {
1442                 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1443                         zend_string_release(tmp);
1444                         return FAILURE;
1445                 }
1446         }
1447 
1448         if (MBSTRG(http_output_conv_mimetypes)) {
1449                 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1450         }
1451 
1452         MBSTRG(http_output_conv_mimetypes) = re;
1453 
1454         zend_string_release(tmp);
1455         return SUCCESS;
1456 }
1457 /* }}} */
1458 /* }}} */
1459 
1460 /* {{{ php.ini directive registration */
1461 PHP_INI_BEGIN()
1462         PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1463         PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1464         PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1465         PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1466         STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1467         PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1468         STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1469         PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1470 
1471         STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1472                 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1473                 OnUpdate_mbstring_encoding_translation,
1474                 encoding_translation, zend_mbstring_globals, mbstring_globals)
1475         PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1476                 "^(text/|application/xhtml\\+xml)",
1477                 PHP_INI_ALL,
1478                 OnUpdate_mbstring_http_output_conv_mimetypes)
1479 
1480         STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1481                 PHP_INI_ALL,
1482                 OnUpdateLong,
1483                 strict_detection, zend_mbstring_globals, mbstring_globals)
1484 PHP_INI_END()
1485 /* }}} */
1486 
1487 /* {{{ module global initialize handler */
1488 static PHP_GINIT_FUNCTION(mbstring)
1489 {
1490 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1491 ZEND_TSRMLS_CACHE_UPDATE();
1492 #endif
1493 
1494         mbstring_globals->language = mbfl_no_language_uni;
1495         mbstring_globals->internal_encoding = NULL;
1496         mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1497         mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1498         mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1499         mbstring_globals->http_input_identify = NULL;
1500         mbstring_globals->http_input_identify_get = NULL;
1501         mbstring_globals->http_input_identify_post = NULL;
1502         mbstring_globals->http_input_identify_cookie = NULL;
1503         mbstring_globals->http_input_identify_string = NULL;
1504         mbstring_globals->http_input_list = NULL;
1505         mbstring_globals->http_input_list_size = 0;
1506         mbstring_globals->detect_order_list = NULL;
1507         mbstring_globals->detect_order_list_size = 0;
1508         mbstring_globals->current_detect_order_list = NULL;
1509         mbstring_globals->current_detect_order_list_size = 0;
1510         mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1511         mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1512         mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1513         mbstring_globals->filter_illegal_substchar = 0x3f;      /* '?' */
1514         mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1515         mbstring_globals->current_filter_illegal_substchar = 0x3f;      /* '?' */
1516         mbstring_globals->illegalchars = 0;
1517         mbstring_globals->func_overload = 0;
1518         mbstring_globals->encoding_translation = 0;
1519         mbstring_globals->strict_detection = 0;
1520         mbstring_globals->outconv = NULL;
1521         mbstring_globals->http_output_conv_mimetypes = NULL;
1522 #if HAVE_MBREGEX
1523         mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1524 #endif
1525 }
1526 /* }}} */
1527 
1528 /* {{{ PHP_GSHUTDOWN_FUNCTION */
1529 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1530 {
1531         if (mbstring_globals->http_input_list) {
1532                 free(mbstring_globals->http_input_list);
1533         }
1534         if (mbstring_globals->detect_order_list) {
1535                 free(mbstring_globals->detect_order_list);
1536         }
1537         if (mbstring_globals->http_output_conv_mimetypes) {
1538                 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1539         }
1540 #if HAVE_MBREGEX
1541         php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1542 #endif
1543 }
1544 /* }}} */
1545 
1546 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
1547 PHP_MINIT_FUNCTION(mbstring)
1548 {
1549 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1550 ZEND_TSRMLS_CACHE_UPDATE();
1551 #endif
1552         __mbfl_allocators = &_php_mb_allocators;
1553 
1554         REGISTER_INI_ENTRIES();
1555 
1556         /* This is a global handler. Should not be set in a per-request handler. */
1557         sapi_register_treat_data(mbstr_treat_data);
1558 
1559         /* Post handlers are stored in the thread-local context. */
1560         if (MBSTRG(encoding_translation)) {
1561                 sapi_register_post_entries(mbstr_post_entries);
1562         }
1563 
1564         REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1565         REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1566         REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1567 
1568         REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1569         REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1570         REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1571 
1572 #if HAVE_MBREGEX
1573         PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1574 #endif
1575 
1576         if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1577                 return FAILURE;
1578         }
1579 
1580         php_rfc1867_set_multibyte_callbacks(
1581                 php_mb_encoding_translation,
1582                 php_mb_gpc_get_detect_order,
1583                 php_mb_gpc_set_input_encoding,
1584                 php_mb_rfc1867_getword,
1585                 php_mb_rfc1867_getword_conf,
1586                 php_mb_rfc1867_basename);
1587 
1588         return SUCCESS;
1589 }
1590 /* }}} */
1591 
1592 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1593 PHP_MSHUTDOWN_FUNCTION(mbstring)
1594 {
1595         UNREGISTER_INI_ENTRIES();
1596 
1597 #if HAVE_MBREGEX
1598         PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1599 #endif
1600 
1601         return SUCCESS;
1602 }
1603 /* }}} */
1604 
1605 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
1606 PHP_RINIT_FUNCTION(mbstring)
1607 {
1608         zend_function *func, *orig;
1609         const struct mb_overload_def *p;
1610 
1611         MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1612         MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1613         MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1614         MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1615 
1616         MBSTRG(illegalchars) = 0;
1617 
1618         php_mb_populate_current_detect_order_list();
1619 
1620         /* override original function. */
1621         if (MBSTRG(func_overload)){
1622                 p = &(mb_ovld[0]);
1623 
1624                 CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1625                 while (p->type > 0) {
1626                         if ((MBSTRG(func_overload) & p->type) == p->type &&
1627                                 !zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1628                         ) {
1629                                 func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1630 
1631                                 if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1632                                         php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1633                                         return FAILURE;
1634                                 } else {
1635                                         ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1636                                         zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1637                                         function_add_ref(orig);
1638 
1639                                         if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1640                                                 php_error_docref("ref.mbstring", E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1641                                                 return FAILURE;
1642                                         }
1643 
1644                                         function_add_ref(func);
1645                                 }
1646                         }
1647                         p++;
1648                 }
1649         }
1650 #if HAVE_MBREGEX
1651         PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1652 #endif
1653         zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1654 
1655         return SUCCESS;
1656 }
1657 /* }}} */
1658 
1659 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1660 PHP_RSHUTDOWN_FUNCTION(mbstring)
1661 {
1662         const struct mb_overload_def *p;
1663         zend_function *orig;
1664 
1665         if (MBSTRG(current_detect_order_list) != NULL) {
1666                 efree(MBSTRG(current_detect_order_list));
1667                 MBSTRG(current_detect_order_list) = NULL;
1668                 MBSTRG(current_detect_order_list_size) = 0;
1669         }
1670         if (MBSTRG(outconv) != NULL) {
1671                 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1672                 mbfl_buffer_converter_delete(MBSTRG(outconv));
1673                 MBSTRG(outconv) = NULL;
1674         }
1675 
1676         /* clear http input identification. */
1677         MBSTRG(http_input_identify) = NULL;
1678         MBSTRG(http_input_identify_post) = NULL;
1679         MBSTRG(http_input_identify_get) = NULL;
1680         MBSTRG(http_input_identify_cookie) = NULL;
1681         MBSTRG(http_input_identify_string) = NULL;
1682 
1683         /*  clear overloaded function. */
1684         if (MBSTRG(func_overload)){
1685                 p = &(mb_ovld[0]);
1686                 while (p->type > 0) {
1687                         if ((MBSTRG(func_overload) & p->type) == p->type &&
1688                                 (orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1689 
1690                                 zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1691                                 function_add_ref(orig);
1692                                 zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1693                         }
1694                         p++;
1695                 }
1696                 CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1697         }
1698 
1699 #if HAVE_MBREGEX
1700         PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1701 #endif
1702 
1703         return SUCCESS;
1704 }
1705 /* }}} */
1706 
1707 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
1708 PHP_MINFO_FUNCTION(mbstring)
1709 {
1710         php_info_print_table_start();
1711         php_info_print_table_row(2, "Multibyte Support", "enabled");
1712         php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1713         php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1714         {
1715                 char tmp[256];
1716                 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1717                 php_info_print_table_row(2, "libmbfl version", tmp);
1718         }
1719 #if HAVE_ONIG
1720         {
1721                 char tmp[256];
1722                 snprintf(tmp, sizeof(tmp), "%d.%d.%d", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
1723                 php_info_print_table_row(2, "oniguruma version", tmp);
1724         }
1725 #endif
1726         php_info_print_table_end();
1727 
1728         php_info_print_table_start();
1729         php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1730         php_info_print_table_end();
1731 
1732 #if HAVE_MBREGEX
1733         PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1734 #endif
1735 
1736         DISPLAY_INI_ENTRIES();
1737 }
1738 /* }}} */
1739 
1740 /* {{{ proto string mb_language([string language])
1741    Sets the current language or Returns the current language as a string */
1742 PHP_FUNCTION(mb_language)
1743 {
1744         zend_string *name = NULL;
1745 
1746         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S", &name) == FAILURE) {
1747                 return;
1748         }
1749         if (name == NULL) {
1750                 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1751         } else {
1752                 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1753                 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1754                         php_error_docref(NULL, E_WARNING, "Unknown language \"%s\"", ZSTR_VAL(name));
1755                         RETVAL_FALSE;
1756                 } else {
1757                         RETVAL_TRUE;
1758                 }
1759                 zend_string_release(ini_name);
1760         }
1761 }
1762 /* }}} */
1763 
1764 /* {{{ proto string mb_internal_encoding([string encoding])
1765    Sets the current internal encoding or Returns the current internal encoding as a string */
1766 PHP_FUNCTION(mb_internal_encoding)
1767 {
1768         const char *name = NULL;
1769         size_t name_len;
1770         const mbfl_encoding *encoding;
1771 
1772         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1773                 return;
1774         }
1775         if (name == NULL) {
1776                 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1777                 if (name != NULL) {
1778                         RETURN_STRING(name);
1779                 } else {
1780                         RETURN_FALSE;
1781                 }
1782         } else {
1783                 encoding = mbfl_name2encoding(name);
1784                 if (!encoding) {
1785                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1786                         RETURN_FALSE;
1787                 } else {
1788                         MBSTRG(current_internal_encoding) = encoding;
1789                         RETURN_TRUE;
1790                 }
1791         }
1792 }
1793 /* }}} */
1794 
1795 /* {{{ proto mixed mb_http_input([string type])
1796    Returns the input encoding */
1797 PHP_FUNCTION(mb_http_input)
1798 {
1799         char *typ = NULL;
1800         size_t typ_len;
1801         int retname;
1802         char *list, *temp;
1803         const mbfl_encoding *result = NULL;
1804 
1805         retname = 1;
1806         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
1807                 return;
1808         }
1809         if (typ == NULL) {
1810                 result = MBSTRG(http_input_identify);
1811         } else {
1812                 switch (*typ) {
1813                 case 'G':
1814                 case 'g':
1815                         result = MBSTRG(http_input_identify_get);
1816                         break;
1817                 case 'P':
1818                 case 'p':
1819                         result = MBSTRG(http_input_identify_post);
1820                         break;
1821                 case 'C':
1822                 case 'c':
1823                         result = MBSTRG(http_input_identify_cookie);
1824                         break;
1825                 case 'S':
1826                 case 's':
1827                         result = MBSTRG(http_input_identify_string);
1828                         break;
1829                 case 'I':
1830                 case 'i':
1831                         {
1832                                 const mbfl_encoding **entry = MBSTRG(http_input_list);
1833                                 const size_t n = MBSTRG(http_input_list_size);
1834                                 size_t i;
1835                                 array_init(return_value);
1836                                 for (i = 0; i < n; i++) {
1837                                         add_next_index_string(return_value, (*entry)->name);
1838                                         entry++;
1839                                 }
1840                                 retname = 0;
1841                         }
1842                         break;
1843                 case 'L':
1844                 case 'l':
1845                         {
1846                                 const mbfl_encoding **entry = MBSTRG(http_input_list);
1847                                 const size_t n = MBSTRG(http_input_list_size);
1848                                 size_t i;
1849                                 list = NULL;
1850                                 for (i = 0; i < n; i++) {
1851                                         if (list) {
1852                                                 temp = list;
1853                                                 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1854                                                 efree(temp);
1855                                                 if (!list) {
1856                                                         break;
1857                                                 }
1858                                         } else {
1859                                                 list = estrdup((*entry)->name);
1860                                         }
1861                                         entry++;
1862                                 }
1863                         }
1864                         if (!list) {
1865                                 RETURN_FALSE;
1866                         }
1867                         RETVAL_STRING(list);
1868                         efree(list);
1869                         retname = 0;
1870                         break;
1871                 default:
1872                         result = MBSTRG(http_input_identify);
1873                         break;
1874                 }
1875         }
1876 
1877         if (retname) {
1878                 if (result) {
1879                         RETVAL_STRING(result->name);
1880                 } else {
1881                         RETVAL_FALSE;
1882                 }
1883         }
1884 }
1885 /* }}} */
1886 
1887 /* {{{ proto string mb_http_output([string encoding])
1888    Sets the current output_encoding or returns the current output_encoding as a string */
1889 PHP_FUNCTION(mb_http_output)
1890 {
1891         const char *name = NULL;
1892         size_t name_len;
1893         const mbfl_encoding *encoding;
1894 
1895         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &name, &name_len) == FAILURE) {
1896                 return;
1897         }
1898 
1899         if (name == NULL) {
1900                 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1901                 if (name != NULL) {
1902                         RETURN_STRING(name);
1903                 } else {
1904                         RETURN_FALSE;
1905                 }
1906         } else {
1907                 encoding = mbfl_name2encoding(name);
1908                 if (!encoding) {
1909                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
1910                         RETURN_FALSE;
1911                 } else {
1912                         MBSTRG(current_http_output_encoding) = encoding;
1913                         RETURN_TRUE;
1914                 }
1915         }
1916 }
1917 /* }}} */
1918 
1919 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1920    Sets the current detect_order or Return the current detect_order as a array */
1921 PHP_FUNCTION(mb_detect_order)
1922 {
1923         zval *arg1 = NULL;
1924 
1925         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1926                 return;
1927         }
1928 
1929         if (!arg1) {
1930                 size_t i;
1931                 size_t n = MBSTRG(current_detect_order_list_size);
1932                 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1933                 array_init(return_value);
1934                 for (i = 0; i < n; i++) {
1935                         add_next_index_string(return_value, (*entry)->name);
1936                         entry++;
1937                 }
1938         } else {
1939                 const mbfl_encoding **list = NULL;
1940                 size_t size = 0;
1941                 switch (Z_TYPE_P(arg1)) {
1942                         case IS_ARRAY:
1943                                 if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0)) {
1944                                         if (list) {
1945                                                 efree(list);
1946                                         }
1947                                         RETURN_FALSE;
1948                                 }
1949                                 break;
1950                         default:
1951                                 convert_to_string_ex(arg1);
1952                                 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0)) {
1953                                         if (list) {
1954                                                 efree(list);
1955                                         }
1956                                         RETURN_FALSE;
1957                                 }
1958                                 break;
1959                 }
1960 
1961                 if (list == NULL) {
1962                         RETURN_FALSE;
1963                 }
1964 
1965                 if (MBSTRG(current_detect_order_list)) {
1966                         efree(MBSTRG(current_detect_order_list));
1967                 }
1968                 MBSTRG(current_detect_order_list) = list;
1969                 MBSTRG(current_detect_order_list_size) = size;
1970                 RETURN_TRUE;
1971         }
1972 }
1973 /* }}} */
1974 
1975 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1976    Sets the current substitute_character or returns the current substitute_character */
1977 PHP_FUNCTION(mb_substitute_character)
1978 {
1979         zval *arg1 = NULL;
1980 
1981         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1982                 return;
1983         }
1984 
1985         if (!arg1) {
1986                 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1987                         RETURN_STRING("none");
1988                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1989                         RETURN_STRING("long");
1990                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1991                         RETURN_STRING("entity");
1992                 } else {
1993                         RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1994                 }
1995         } else {
1996                 RETVAL_TRUE;
1997 
1998                 switch (Z_TYPE_P(arg1)) {
1999                         case IS_STRING:
2000                                 if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2001                                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
2002                                 } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2003                                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
2004                                 } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
2005                                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
2006                                 } else {
2007                                         convert_to_long_ex(arg1);
2008 
2009                                         if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2010                                                 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2011                                                 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2012                                         } else {
2013                                                 php_error_docref(NULL, E_WARNING, "Unknown character.");
2014                                                 RETURN_FALSE;
2015                                         }
2016                                 }
2017                                 break;
2018                         default:
2019                                 convert_to_long_ex(arg1);
2020                                 if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2021                                         MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2022                                         MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2023                                 } else {
2024                                         php_error_docref(NULL, E_WARNING, "Unknown character.");
2025                                         RETURN_FALSE;
2026                                 }
2027                                 break;
2028                 }
2029         }
2030 }
2031 /* }}} */
2032 
2033 /* {{{ proto string mb_preferred_mime_name(string encoding)
2034    Return the preferred MIME name (charset) as a string */
2035 PHP_FUNCTION(mb_preferred_mime_name)
2036 {
2037         enum mbfl_no_encoding no_encoding;
2038         char *name = NULL;
2039         size_t name_len;
2040 
2041         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
2042                 return;
2043         } else {
2044                 no_encoding = mbfl_name2no_encoding(name);
2045                 if (no_encoding == mbfl_no_encoding_invalid) {
2046                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
2047                         RETVAL_FALSE;
2048                 } else {
2049                         const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2050                         if (preferred_name == NULL || *preferred_name == '\0') {
2051                                 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2052                                 RETVAL_FALSE;
2053                         } else {
2054                                 RETVAL_STRING((char *)preferred_name);
2055                         }
2056                 }
2057         }
2058 }
2059 /* }}} */
2060 
2061 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2062 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2063 
2064 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2065    Parses GET/POST/COOKIE data and sets global variables */
2066 PHP_FUNCTION(mb_parse_str)
2067 {
2068         zval *track_vars_array = NULL;
2069         char *encstr = NULL;
2070         size_t encstr_len;
2071         php_mb_encoding_handler_info_t info;
2072         const mbfl_encoding *detected;
2073 
2074         track_vars_array = NULL;
2075         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2076                 return;
2077         }
2078 
2079         if (track_vars_array != NULL) {
2080                 /* Clear out the array */
2081                 zval_dtor(track_vars_array);
2082                 array_init(track_vars_array);
2083         }
2084 
2085         encstr = estrndup(encstr, encstr_len);
2086 
2087         info.data_type              = PARSE_STRING;
2088         info.separator              = PG(arg_separator).input;
2089         info.report_errors          = 1;
2090         info.to_encoding            = MBSTRG(current_internal_encoding);
2091         info.to_language            = MBSTRG(language);
2092         info.from_encodings         = MBSTRG(http_input_list);
2093         info.num_from_encodings     = MBSTRG(http_input_list_size);
2094         info.from_language          = MBSTRG(language);
2095 
2096         if (track_vars_array != NULL) {
2097                 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
2098         } else {
2099                 zval tmp;
2100                 zend_array *symbol_table = zend_rebuild_symbol_table();
2101 
2102                 ZVAL_ARR(&tmp, symbol_table);
2103                 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr);
2104         }
2105 
2106         MBSTRG(http_input_identify) = detected;
2107 
2108         RETVAL_BOOL(detected);
2109 
2110         if (encstr != NULL) efree(encstr);
2111 }
2112 /* }}} */
2113 
2114 /* {{{ proto string mb_output_handler(string contents, int status)
2115    Returns string in output buffer converted to the http_output encoding */
2116 PHP_FUNCTION(mb_output_handler)
2117 {
2118         char *arg_string;
2119         size_t arg_string_len;
2120         zend_long arg_status;
2121         mbfl_string string, result;
2122         const char *charset;
2123         char *p;
2124         const mbfl_encoding *encoding;
2125         int last_feed, len;
2126         unsigned char send_text_mimetype = 0;
2127         char *s, *mimetype = NULL;
2128 
2129         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2130                 return;
2131         }
2132 
2133         encoding = MBSTRG(current_http_output_encoding);
2134 
2135         /* start phase only */
2136         if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2137                 /* delete the converter just in case. */
2138                 if (MBSTRG(outconv)) {
2139                         MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2140                         mbfl_buffer_converter_delete(MBSTRG(outconv));
2141                         MBSTRG(outconv) = NULL;
2142                 }
2143                 if (encoding == &mbfl_encoding_pass) {
2144                         RETURN_STRINGL(arg_string, arg_string_len);
2145                 }
2146 
2147                 /* analyze mime type */
2148                 if (SG(sapi_headers).mimetype &&
2149                         _php_mb_match_regex(
2150                                 MBSTRG(http_output_conv_mimetypes),
2151                                 SG(sapi_headers).mimetype,
2152                                 strlen(SG(sapi_headers).mimetype))) {
2153                         if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2154                                 mimetype = estrdup(SG(sapi_headers).mimetype);
2155                         } else {
2156                                 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2157                         }
2158                         send_text_mimetype = 1;
2159                 } else if (SG(sapi_headers).send_default_content_type) {
2160                         mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2161                 }
2162 
2163                 /* if content-type is not yet set, set it and activate the converter */
2164                 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2165                         charset = encoding->mime_name;
2166                         if (charset) {
2167                                 len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2168                                 if (sapi_add_header(p, len, 0) != FAILURE) {
2169                                         SG(sapi_headers).send_default_content_type = 0;
2170                                 }
2171                         }
2172                         /* activate the converter */
2173                         MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2174                         if (send_text_mimetype){
2175                                 efree(mimetype);
2176                         }
2177                 }
2178         }
2179 
2180         /* just return if the converter is not activated. */
2181         if (MBSTRG(outconv) == NULL) {
2182                 RETURN_STRINGL(arg_string, arg_string_len);
2183         }
2184 
2185         /* flag */
2186         last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2187         /* mode */
2188         mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2189         mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2190 
2191         /* feed the string */
2192         mbfl_string_init(&string);
2193         /* these are not needed. convd has encoding info.
2194         string.no_language = MBSTRG(language);
2195         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2196         */
2197         string.val = (unsigned char *)arg_string;
2198         string.len = arg_string_len;
2199         mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2200         if (last_feed) {
2201                 mbfl_buffer_converter_flush(MBSTRG(outconv));
2202         }
2203         /* get the converter output, and return it */
2204         mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2205         // TODO: avoid reallocation ???
2206         RETVAL_STRINGL((char *)result.val, result.len);         /* the string is already strdup()'ed */
2207         efree(result.val);
2208 
2209         /* delete the converter if it is the last feed. */
2210         if (last_feed) {
2211                 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2212                 mbfl_buffer_converter_delete(MBSTRG(outconv));
2213                 MBSTRG(outconv) = NULL;
2214         }
2215 }
2216 /* }}} */
2217 
2218 /* {{{ proto int mb_strlen(string str [, string encoding])
2219    Get character numbers of a string */
2220 PHP_FUNCTION(mb_strlen)
2221 {
2222         int n;
2223         mbfl_string string;
2224         char *enc_name = NULL;
2225         size_t enc_name_len, string_len;
2226 
2227         mbfl_string_init(&string);
2228 
2229         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
2230                 return;
2231         }
2232 
2233         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2234                         php_error_docref(NULL, E_WARNING, "String overflows the max allowed length of %u", UINT_MAX);
2235                         return;
2236         }
2237 
2238         string.len = (uint32_t)string_len;
2239 
2240         string.no_language = MBSTRG(language);
2241         if (enc_name == NULL) {
2242                 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2243         } else {
2244                 string.no_encoding = mbfl_name2no_encoding(enc_name);
2245                 if (string.no_encoding == mbfl_no_encoding_invalid) {
2246                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2247                         RETURN_FALSE;
2248                 }
2249         }
2250 
2251         n = mbfl_strlen(&string);
2252         if (n >= 0) {
2253                 RETVAL_LONG(n);
2254         } else {
2255                 RETVAL_FALSE;
2256         }
2257 }
2258 /* }}} */
2259 
2260 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2261    Find position of first occurrence of a string within another */
2262 PHP_FUNCTION(mb_strpos)
2263 {
2264         int n, reverse = 0;
2265         zend_long offset = 0;
2266         mbfl_string haystack, needle;
2267         char *enc_name = NULL;
2268         size_t enc_name_len, haystack_len, needle_len;
2269 
2270         mbfl_string_init(&haystack);
2271         mbfl_string_init(&needle);
2272         haystack.no_language = MBSTRG(language);
2273         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2274         needle.no_language = MBSTRG(language);
2275         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2276 
2277         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2278                 return;
2279         }
2280 
2281         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2282                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2283                         return;
2284         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2285                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2286                         return;
2287         }
2288 
2289         haystack.len = (uint32_t)haystack_len;
2290         needle.len = (uint32_t)needle_len;
2291 
2292         if (enc_name != NULL) {
2293                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2294                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2295                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2296                         RETURN_FALSE;
2297                 }
2298         }
2299 
2300         if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2301                 php_error_docref(NULL, E_WARNING, "Offset not contained in string");
2302                 RETURN_FALSE;
2303         }
2304         if (needle.len == 0) {
2305                 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2306                 RETURN_FALSE;
2307         }
2308 
2309         n = mbfl_strpos(&haystack, &needle, offset, reverse);
2310         if (n >= 0) {
2311                 RETVAL_LONG(n);
2312         } else {
2313                 switch (-n) {
2314                 case 1:
2315                         break;
2316                 case 2:
2317                         php_error_docref(NULL, E_WARNING, "Needle has not positive length");
2318                         break;
2319                 case 4:
2320                         php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
2321                         break;
2322                 case 8:
2323                         php_error_docref(NULL, E_NOTICE, "Argument is empty");
2324                         break;
2325                 default:
2326                         php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
2327                         break;
2328                 }
2329                 RETVAL_FALSE;
2330         }
2331 }
2332 /* }}} */
2333 
2334 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2335    Find position of last occurrence of a string within another */
2336 PHP_FUNCTION(mb_strrpos)
2337 {
2338         int n;
2339         mbfl_string haystack, needle;
2340         char *enc_name = NULL;
2341         size_t enc_name_len, haystack_len, needle_len;
2342         zval *zoffset = NULL;
2343         long offset = 0, str_flg;
2344         char *enc_name2 = NULL;
2345         int enc_name_len2;
2346 
2347         mbfl_string_init(&haystack);
2348         mbfl_string_init(&needle);
2349         haystack.no_language = MBSTRG(language);
2350         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2351         needle.no_language = MBSTRG(language);
2352         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2353 
2354         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|zs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2355                 return;
2356         }
2357 
2358         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2359                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2360                         return;
2361         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2362                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2363                         return;
2364         }
2365 
2366         haystack.len = (uint32_t)haystack_len;
2367         needle.len = (uint32_t)needle_len;
2368 
2369         if (zoffset) {
2370                 if (Z_TYPE_P(zoffset) == IS_STRING) {
2371                         enc_name2     = Z_STRVAL_P(zoffset);
2372                         enc_name_len2 = Z_STRLEN_P(zoffset);
2373                         str_flg       = 1;
2374 
2375                         if (enc_name2 != NULL) {
2376                                 switch (*enc_name2) {
2377                                         case '0':
2378                                         case '1':
2379                                         case '2':
2380                                         case '3':
2381                                         case '4':
2382                                         case '5':
2383                                         case '6':
2384                                         case '7':
2385                                         case '8':
2386                                         case '9':
2387                                         case ' ':
2388                                         case '-':
2389                                         case '.':
2390                                                 break;
2391                                         default :
2392                                                 str_flg = 0;
2393                                                 break;
2394                                 }
2395                         }
2396 
2397                         if (str_flg) {
2398                                 convert_to_long_ex(zoffset);
2399                                 offset   = Z_LVAL_P(zoffset);
2400                         } else {
2401                                 enc_name     = enc_name2;
2402                                 enc_name_len = enc_name_len2;
2403                         }
2404                 } else {
2405                         convert_to_long_ex(zoffset);
2406                         offset = Z_LVAL_P(zoffset);
2407                 }
2408         }
2409 
2410         if (enc_name != NULL) {
2411                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2412                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2413                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2414                         RETURN_FALSE;
2415                 }
2416         }
2417 
2418         if (haystack.len <= 0) {
2419                 RETURN_FALSE;
2420         }
2421         if (needle.len <= 0) {
2422                 RETURN_FALSE;
2423         }
2424 
2425         {
2426                 int haystack_char_len = mbfl_strlen(&haystack);
2427                 if ((offset > 0 && offset > haystack_char_len) ||
2428                         (offset < 0 && -offset > haystack_char_len)) {
2429                         php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
2430                         RETURN_FALSE;
2431                 }
2432         }
2433 
2434         n = mbfl_strpos(&haystack, &needle, offset, 1);
2435         if (n >= 0) {
2436                 RETVAL_LONG(n);
2437         } else {
2438                 RETVAL_FALSE;
2439         }
2440 }
2441 /* }}} */
2442 
2443 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2444    Finds position of first occurrence of a string within another, case insensitive */
2445 PHP_FUNCTION(mb_stripos)
2446 {
2447         int n = -1;
2448         zend_long offset = 0;
2449         mbfl_string haystack, needle;
2450         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2451         size_t from_encoding_len, haystack_len, needle_len;
2452 
2453         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2454                 return;
2455         }
2456 
2457         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2458                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2459                         return;
2460         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2461                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2462                         return;
2463         }
2464 
2465         haystack.len = (uint32_t)haystack_len;
2466         needle.len = (uint32_t)needle_len;
2467 
2468         if (needle.len == 0) {
2469                 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2470                 RETURN_FALSE;
2471         }
2472         n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2473 
2474         if (n >= 0) {
2475                 RETVAL_LONG(n);
2476         } else {
2477                 RETVAL_FALSE;
2478         }
2479 }
2480 /* }}} */
2481 
2482 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2483    Finds position of last occurrence of a string within another, case insensitive */
2484 PHP_FUNCTION(mb_strripos)
2485 {
2486         int n = -1;
2487         zend_long offset = 0;
2488         mbfl_string haystack, needle;
2489         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2490         size_t from_encoding_len, haystack_len, needle_len;
2491 
2492         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|ls", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2493                 return;
2494         }
2495 
2496         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2497                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2498                         return;
2499         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2500                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2501                         return;
2502         }
2503 
2504         haystack.len = (uint32_t)haystack_len;
2505         needle.len = (uint32_t)needle_len;
2506 
2507         n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2508 
2509         if (n >= 0) {
2510                 RETVAL_LONG(n);
2511         } else {
2512                 RETVAL_FALSE;
2513         }
2514 }
2515 /* }}} */
2516 
2517 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2518    Finds first occurrence of a string within another */
2519 PHP_FUNCTION(mb_strstr)
2520 {
2521         int n, len, mblen;
2522         mbfl_string haystack, needle, result, *ret = NULL;
2523         char *enc_name = NULL;
2524         size_t enc_name_len, haystack_len, needle_len;
2525         zend_bool part = 0;
2526 
2527         mbfl_string_init(&haystack);
2528         mbfl_string_init(&needle);
2529         haystack.no_language = MBSTRG(language);
2530         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2531         needle.no_language = MBSTRG(language);
2532         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2533 
2534         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2535                 return;
2536         }
2537 
2538         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2539                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2540                         return;
2541         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2542                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2543                         return;
2544         }
2545 
2546         haystack.len = (uint32_t)haystack_len;
2547         needle.len = (uint32_t)needle_len;
2548 
2549         if (enc_name != NULL) {
2550                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2551                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2552                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2553                         RETURN_FALSE;
2554                 }
2555         }
2556 
2557         if (needle.len <= 0) {
2558                 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2559                 RETURN_FALSE;
2560         }
2561         n = mbfl_strpos(&haystack, &needle, 0, 0);
2562         if (n >= 0) {
2563                 mblen = mbfl_strlen(&haystack);
2564                 if (part) {
2565                         ret = mbfl_substr(&haystack, &result, 0, n);
2566                         if (ret != NULL) {
2567                                 // TODO: avoid reallocation ???
2568                                 RETVAL_STRINGL((char *)ret->val, ret->len);
2569                                 efree(ret->val);
2570                         } else {
2571                                 RETVAL_FALSE;
2572                         }
2573                 } else {
2574                         len = (mblen - n);
2575                         ret = mbfl_substr(&haystack, &result, n, len);
2576                         if (ret != NULL) {
2577                                 // TODO: avoid reallocation ???
2578                                 RETVAL_STRINGL((char *)ret->val, ret->len);
2579                                 efree(ret->val);
2580                         } else {
2581                                 RETVAL_FALSE;
2582                         }
2583                 }
2584         } else {
2585                 RETVAL_FALSE;
2586         }
2587 }
2588 /* }}} */
2589 
2590 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2591    Finds the last occurrence of a character in a string within another */
2592 PHP_FUNCTION(mb_strrchr)
2593 {
2594         int n, len, mblen;
2595         mbfl_string haystack, needle, result, *ret = NULL;
2596         char *enc_name = NULL;
2597         size_t enc_name_len, haystack_len, needle_len;
2598         zend_bool part = 0;
2599 
2600         mbfl_string_init(&haystack);
2601         mbfl_string_init(&needle);
2602         haystack.no_language = MBSTRG(language);
2603         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2604         needle.no_language = MBSTRG(language);
2605         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2606 
2607         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &enc_name, &enc_name_len) == FAILURE) {
2608                 return;
2609         }
2610 
2611         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2612                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2613                         return;
2614         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2615                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2616                         return;
2617         }
2618 
2619         haystack.len = (uint32_t)haystack_len;
2620         needle.len = (uint32_t)needle_len;
2621 
2622         if (enc_name != NULL) {
2623                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2624                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2625                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2626                         RETURN_FALSE;
2627                 }
2628         }
2629 
2630         if (haystack.len <= 0) {
2631                 RETURN_FALSE;
2632         }
2633         if (needle.len <= 0) {
2634                 RETURN_FALSE;
2635         }
2636         n = mbfl_strpos(&haystack, &needle, 0, 1);
2637         if (n >= 0) {
2638                 mblen = mbfl_strlen(&haystack);
2639                 if (part) {
2640                         ret = mbfl_substr(&haystack, &result, 0, n);
2641                         if (ret != NULL) {
2642                                 // TODO: avoid reallocation ???
2643                                 RETVAL_STRINGL((char *)ret->val, ret->len);
2644                                 efree(ret->val);
2645                         } else {
2646                                 RETVAL_FALSE;
2647                         }
2648                 } else {
2649                         len = (mblen - n);
2650                         ret = mbfl_substr(&haystack, &result, n, len);
2651                         if (ret != NULL) {
2652                                 // TODO: avoid reallocation ???
2653                                 RETVAL_STRINGL((char *)ret->val, ret->len);
2654                                 efree(ret->val);
2655                         } else {
2656                                 RETVAL_FALSE;
2657                         }
2658                 }
2659         } else {
2660                 RETVAL_FALSE;
2661         }
2662 }
2663 /* }}} */
2664 
2665 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2666    Finds first occurrence of a string within another, case insensitive */
2667 PHP_FUNCTION(mb_stristr)
2668 {
2669         zend_bool part = 0;
2670         size_t from_encoding_len, len, mblen, haystack_len, needle_len;
2671         int n;
2672         mbfl_string haystack, needle, result, *ret = NULL;
2673         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2674         mbfl_string_init(&haystack);
2675         mbfl_string_init(&needle);
2676         haystack.no_language = MBSTRG(language);
2677         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2678         needle.no_language = MBSTRG(language);
2679         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2680 
2681 
2682         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2683                 return;
2684         }
2685 
2686         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2687                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2688                         return;
2689         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2690                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2691                         return;
2692         }
2693 
2694         haystack.len = (uint32_t)haystack_len;
2695         needle.len = (uint32_t)needle_len;
2696 
2697         if (!needle.len) {
2698                 php_error_docref(NULL, E_WARNING, "Empty delimiter");
2699                 RETURN_FALSE;
2700         }
2701 
2702         haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2703         if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2704                 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2705                 RETURN_FALSE;
2706         }
2707 
2708         n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2709 
2710         if (n <0) {
2711                 RETURN_FALSE;
2712         }
2713 
2714         mblen = mbfl_strlen(&haystack);
2715 
2716         if (part) {
2717                 ret = mbfl_substr(&haystack, &result, 0, n);
2718                 if (ret != NULL) {
2719                         // TODO: avoid reallocation ???
2720                         RETVAL_STRINGL((char *)ret->val, ret->len);
2721                         efree(ret->val);
2722                 } else {
2723                         RETVAL_FALSE;
2724                 }
2725         } else {
2726                 len = (mblen - n);
2727                 ret = mbfl_substr(&haystack, &result, n, len);
2728                 if (ret != NULL) {
2729                         // TODO: avoid reallocaton ???
2730                         RETVAL_STRINGL((char *)ret->val, ret->len);
2731                         efree(ret->val);
2732                 } else {
2733                         RETVAL_FALSE;
2734                 }
2735         }
2736 }
2737 /* }}} */
2738 
2739 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2740    Finds the last occurrence of a character in a string within another, case insensitive */
2741 PHP_FUNCTION(mb_strrichr)
2742 {
2743         zend_bool part = 0;
2744         int n, len, mblen;
2745         size_t from_encoding_len, haystack_len, needle_len;
2746         mbfl_string haystack, needle, result, *ret = NULL;
2747         const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2748         mbfl_string_init(&haystack);
2749         mbfl_string_init(&needle);
2750         haystack.no_language = MBSTRG(language);
2751         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2752         needle.no_language = MBSTRG(language);
2753         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2754 
2755 
2756         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bs", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2757                 return;
2758         }
2759 
2760         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2761                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2762                         return;
2763         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2764                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2765                         return;
2766         }
2767 
2768         haystack.len = (uint32_t)haystack_len;
2769         needle.len = (uint32_t)needle_len;
2770 
2771         haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2772         if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2773                 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2774                 RETURN_FALSE;
2775         }
2776 
2777         n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2778 
2779         if (n <0) {
2780                 RETURN_FALSE;
2781         }
2782 
2783         mblen = mbfl_strlen(&haystack);
2784 
2785         if (part) {
2786                 ret = mbfl_substr(&haystack, &result, 0, n);
2787                 if (ret != NULL) {
2788                         // TODO: avoid reallocation ???
2789                         RETVAL_STRINGL((char *)ret->val, ret->len);
2790                         efree(ret->val);
2791                 } else {
2792                         RETVAL_FALSE;
2793                 }
2794         } else {
2795                 len = (mblen - n);
2796                 ret = mbfl_substr(&haystack, &result, n, len);
2797                 if (ret != NULL) {
2798                         // TODO: avoid reallocation ???
2799                         RETVAL_STRINGL((char *)ret->val, ret->len);
2800                         efree(ret->val);
2801                 } else {
2802                         RETVAL_FALSE;
2803                 }
2804         }
2805 }
2806 /* }}} */
2807 
2808 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2809    Count the number of substring occurrences */
2810 PHP_FUNCTION(mb_substr_count)
2811 {
2812         int n;
2813         mbfl_string haystack, needle;
2814         char *enc_name = NULL;
2815         size_t enc_name_len, haystack_len, needle_len;
2816 
2817         mbfl_string_init(&haystack);
2818         mbfl_string_init(&needle);
2819         haystack.no_language = MBSTRG(language);
2820         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2821         needle.no_language = MBSTRG(language);
2822         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2823 
2824         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s", (char **)&haystack.val, &haystack_len, (char **)&needle.val, &needle_len, &enc_name, &enc_name_len) == FAILURE) {
2825                 return;
2826         }
2827 
2828         if (ZEND_SIZE_T_UINT_OVFL(haystack_len)) {
2829                         php_error_docref(NULL, E_WARNING, "Haystack length overflows the max allowed length of %u", UINT_MAX);
2830                         return;
2831         } else if (ZEND_SIZE_T_UINT_OVFL(needle_len)) {
2832                         php_error_docref(NULL, E_WARNING, "Needle length overflows the max allowed length of %u", UINT_MAX);
2833                         return;
2834         }
2835 
2836         haystack.len = (uint32_t)haystack_len;
2837         needle.len = (uint32_t)needle_len;
2838 
2839         if (enc_name != NULL) {
2840                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2841                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2842                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2843                         RETURN_FALSE;
2844                 }
2845         }
2846 
2847         if (needle.len <= 0) {
2848                 php_error_docref(NULL, E_WARNING, "Empty substring");
2849                 RETURN_FALSE;
2850         }
2851 
2852         n = mbfl_substr_count(&haystack, &needle);
2853         if (n >= 0) {
2854                 RETVAL_LONG(n);
2855         } else {
2856                 RETVAL_FALSE;
2857         }
2858 }
2859 /* }}} */
2860 
2861 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2862    Returns part of a string */
2863 PHP_FUNCTION(mb_substr)
2864 {
2865         char *str, *encoding = NULL;
2866         zend_long from, len;
2867         int mblen;
2868         size_t str_len, encoding_len;
2869         zend_bool len_is_null = 1;
2870         mbfl_string string, result, *ret;
2871 
2872         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", &str, &str_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2873                 return;
2874         }
2875 
2876         mbfl_string_init(&string);
2877         string.no_language = MBSTRG(language);
2878         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2879 
2880         if (encoding) {
2881                 string.no_encoding = mbfl_name2no_encoding(encoding);
2882                 if (string.no_encoding == mbfl_no_encoding_invalid) {
2883                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2884                         RETURN_FALSE;
2885                 }
2886         }
2887 
2888         string.val = (unsigned char *)str;
2889         string.len = str_len;
2890 
2891         if (len_is_null) {
2892                 len = str_len;
2893         }
2894 
2895         /* measures length */
2896         mblen = 0;
2897         if (from < 0 || len < 0) {
2898                 mblen = mbfl_strlen(&string);
2899         }
2900 
2901         /* if "from" position is negative, count start position from the end
2902          * of the string
2903          */
2904         if (from < 0) {
2905                 from = mblen + from;
2906                 if (from < 0) {
2907                         from = 0;
2908                 }
2909         }
2910 
2911         /* if "length" position is negative, set it to the length
2912          * needed to stop that many chars from the end of the string
2913          */
2914         if (len < 0) {
2915                 len = (mblen - from) + len;
2916                 if (len < 0) {
2917                         len = 0;
2918                 }
2919         }
2920 
2921         if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2922                 && (from >= mbfl_strlen(&string))) {
2923                 RETURN_FALSE;
2924         }
2925 
2926         ret = mbfl_substr(&string, &result, from, len);
2927         if (NULL == ret) {
2928                 RETURN_FALSE;
2929         }
2930 
2931         // TODO: avoid reallocation ???
2932         RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2933         efree(ret->val);
2934 }
2935 /* }}} */
2936 
2937 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2938    Returns part of a string */
2939 PHP_FUNCTION(mb_strcut)
2940 {
2941         char *encoding = NULL;
2942         zend_long from, len;
2943         size_t encoding_len, string_len;
2944         zend_bool len_is_null = 1;
2945         mbfl_string string, result, *ret;
2946 
2947         mbfl_string_init(&string);
2948         string.no_language = MBSTRG(language);
2949         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2950 
2951         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!s", (char **)&string.val, &string_len, &from, &len, &len_is_null, &encoding, &encoding_len) == FAILURE) {
2952                 return;
2953         }
2954 
2955         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
2956                         php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
2957                         return;
2958         }
2959 
2960         string.len = (uint32_t)string_len;
2961 
2962         if (encoding) {
2963                 string.no_encoding = mbfl_name2no_encoding(encoding);
2964                 if (string.no_encoding == mbfl_no_encoding_invalid) {
2965                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
2966                         RETURN_FALSE;
2967                 }
2968         }
2969 
2970         if (len_is_null) {
2971                 len = string.len;
2972         }
2973 
2974         /* if "from" position is negative, count start position from the end
2975          * of the string
2976          */
2977         if (from < 0) {
2978                 from = string.len + from;
2979                 if (from < 0) {
2980                         from = 0;
2981                 }
2982         }
2983 
2984         /* if "length" position is negative, set it to the length
2985          * needed to stop that many chars from the end of the string
2986          */
2987         if (len < 0) {
2988                 len = (string.len - from) + len;
2989                 if (len < 0) {
2990                         len = 0;
2991                 }
2992         }
2993 
2994         if ((unsigned int)from > string.len) {
2995                 RETURN_FALSE;
2996         }
2997 
2998         ret = mbfl_strcut(&string, &result, from, len);
2999         if (ret == NULL) {
3000                 RETURN_FALSE;
3001         }
3002 
3003         // TODO: avoid reallocation ???
3004         RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3005         efree(ret->val);
3006 }
3007 /* }}} */
3008 
3009 /* {{{ proto int mb_strwidth(string str [, string encoding])
3010    Gets terminal width of a string */
3011 PHP_FUNCTION(mb_strwidth)
3012 {
3013         int n;
3014         mbfl_string string;
3015         char *enc_name = NULL;
3016         size_t enc_name_len, string_len;
3017 
3018         mbfl_string_init(&string);
3019 
3020         string.no_language = MBSTRG(language);
3021         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3022 
3023         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", (char **)&string.val, &string_len, &enc_name, &enc_name_len) == FAILURE) {
3024                 return;
3025         }
3026 
3027         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3028                         php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3029                         return;
3030         }
3031 
3032         string.len = (uint32_t)string_len;
3033 
3034         if (enc_name != NULL) {
3035                 string.no_encoding = mbfl_name2no_encoding(enc_name);
3036                 if (string.no_encoding == mbfl_no_encoding_invalid) {
3037                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc_name);
3038                         RETURN_FALSE;
3039                 }
3040         }
3041 
3042         n = mbfl_strwidth(&string);
3043         if (n >= 0) {
3044                 RETVAL_LONG(n);
3045         } else {
3046                 RETVAL_FALSE;
3047         }
3048 }
3049 /* }}} */
3050 
3051 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
3052    Trim the string in terminal width */
3053 PHP_FUNCTION(mb_strimwidth)
3054 {
3055         char *str, *trimmarker = NULL, *encoding = NULL;
3056         zend_long from, width;
3057         size_t str_len, trimmarker_len, encoding_len;
3058         mbfl_string string, result, marker, *ret;
3059 
3060         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
3061                 return;
3062         }
3063 
3064         mbfl_string_init(&string);
3065         mbfl_string_init(&marker);
3066         string.no_language = MBSTRG(language);
3067         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3068         marker.no_language = MBSTRG(language);
3069         marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3070         marker.val = NULL;
3071         marker.len = 0;
3072 
3073         if (encoding) {
3074                 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
3075                 if (string.no_encoding == mbfl_no_encoding_invalid) {
3076                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3077                         RETURN_FALSE;
3078                 }
3079         }
3080 
3081         string.val = (unsigned char *)str;
3082         string.len = str_len;
3083 
3084         if (from < 0 || (size_t)from > str_len) {
3085                 php_error_docref(NULL, E_WARNING, "Start position is out of range");
3086                 RETURN_FALSE;
3087         }
3088 
3089         if (width < 0) {
3090                 php_error_docref(NULL, E_WARNING, "Width is negative value");
3091                 RETURN_FALSE;
3092         }
3093 
3094         if (trimmarker) {
3095                 marker.val = (unsigned char *)trimmarker;
3096                 marker.len = trimmarker_len;
3097         }
3098 
3099         ret = mbfl_strimwidth(&string, &marker, &result, from, width);
3100 
3101         if (ret == NULL) {
3102                 RETURN_FALSE;
3103         }
3104         // TODO: avoid reallocation ???
3105         RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3106         efree(ret->val);
3107 }
3108 /* }}} */
3109 
3110 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
3111 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
3112 {
3113         mbfl_string string, result, *ret;
3114         const mbfl_encoding *from_encoding, *to_encoding;
3115         mbfl_buffer_converter *convd;
3116         size_t size;
3117         const mbfl_encoding **list;
3118         char *output=NULL;
3119 
3120         if (output_len) {
3121                 *output_len = 0;
3122         }
3123         if (!input) {
3124                 return NULL;
3125         }
3126         /* new encoding */
3127         if (_to_encoding && strlen(_to_encoding)) {
3128                 to_encoding = mbfl_name2encoding(_to_encoding);
3129                 if (!to_encoding) {
3130                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3131                         return NULL;
3132                 }
3133         } else {
3134                 to_encoding = MBSTRG(current_internal_encoding);
3135         }
3136 
3137         /* initialize string */
3138         mbfl_string_init(&string);
3139         mbfl_string_init(&result);
3140         from_encoding = MBSTRG(current_internal_encoding);
3141         string.no_encoding = from_encoding->no_encoding;
3142         string.no_language = MBSTRG(language);
3143         string.val = (unsigned char *)input;
3144         string.len = length;
3145 
3146         /* pre-conversion encoding */
3147         if (_from_encodings) {
3148                 list = NULL;
3149                 size = 0;
3150                 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0);
3151                 if (size == 1) {
3152                         from_encoding = *list;
3153                         string.no_encoding = from_encoding->no_encoding;
3154                 } else if (size > 1) {
3155                         /* auto detect */
3156                         from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3157                         if (from_encoding) {
3158                                 string.no_encoding = from_encoding->no_encoding;
3159                         } else {
3160                                 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
3161                                 from_encoding = &mbfl_encoding_pass;
3162                                 to_encoding = from_encoding;
3163                                 string.no_encoding = from_encoding->no_encoding;
3164                         }
3165                 } else {
3166                         php_error_docref(NULL, E_WARNING, "Illegal character encoding specified");
3167                 }
3168                 if (list != NULL) {
3169                         efree((void *)list);
3170                 }
3171         }
3172 
3173         /* initialize converter */
3174         convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3175         if (convd == NULL) {
3176                 php_error_docref(NULL, E_WARNING, "Unable to create character encoding converter");
3177                 return NULL;
3178         }
3179         mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3180         mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3181 
3182         /* do it */
3183         ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3184         if (ret) {
3185                 if (output_len) {
3186                         *output_len = ret->len;
3187                 }
3188                 output = (char *)ret->val;
3189         }
3190 
3191         MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3192         mbfl_buffer_converter_delete(convd);
3193         return output;
3194 }
3195 /* }}} */
3196 
3197 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3198    Returns converted string in desired encoding */
3199 PHP_FUNCTION(mb_convert_encoding)
3200 {
3201         char *arg_str, *arg_new;
3202         size_t str_len, new_len;
3203         zval *arg_old = NULL;
3204         size_t size, l, n;
3205         char *_from_encodings = NULL, *ret, *s_free = NULL;
3206 
3207         zval *hash_entry;
3208         HashTable *target_hash;
3209 
3210         if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3211                 return;
3212         }
3213 
3214         if (arg_old) {
3215                 switch (Z_TYPE_P(arg_old)) {
3216                         case IS_ARRAY:
3217                                 target_hash = Z_ARRVAL_P(arg_old);
3218                                 _from_encodings = NULL;
3219 
3220                                 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3221 
3222                                         convert_to_string_ex(hash_entry);
3223 
3224                                         if ( _from_encodings) {
3225                                                 l = strlen(_from_encodings);
3226                                                 n = strlen(Z_STRVAL_P(hash_entry));
3227                                                 _from_encodings = erealloc(_from_encodings, l+n+2);
3228                                                 memcpy(_from_encodings + l, ",", 1);
3229                                                 memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3230                                         } else {
3231                                                 _from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3232                                         }
3233                                 } ZEND_HASH_FOREACH_END();
3234 
3235                                 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3236                                         efree(_from_encodings);
3237                                         _from_encodings = NULL;
3238                                 }
3239                                 s_free = _from_encodings;
3240                                 break;
3241                         default:
3242                                 convert_to_string(arg_old);
3243                                 _from_encodings = Z_STRVAL_P(arg_old);
3244                                 break;
3245                         }
3246         }
3247 
3248         /* new encoding */
3249         ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
3250         if (ret != NULL) {
3251                 // TODO: avoid reallocation ???
3252                 RETVAL_STRINGL(ret, size);              /* the string is already strdup()'ed */
3253                 efree(ret);
3254         } else {
3255                 RETVAL_FALSE;
3256         }
3257 
3258         if ( s_free) {
3259                 efree(s_free);
3260         }
3261 }
3262 /* }}} */
3263 
3264 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3265    Returns a case-folded version of sourcestring */
3266 PHP_FUNCTION(mb_convert_case)
3267 {
3268         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3269         char *str;
3270         size_t str_len, from_encoding_len;
3271         zend_long case_mode = 0;
3272         char *newstr;
3273         size_t ret_len;
3274 
3275         RETVAL_FALSE;
3276         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
3277                                 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3278                 return;
3279         }
3280 
3281         newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
3282 
3283         if (newstr) {
3284                 // TODO: avoid reallocation ???
3285                 RETVAL_STRINGL(newstr, ret_len);
3286                 efree(newstr);
3287         }
3288 }
3289 /* }}} */
3290 
3291 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3292  *  Returns a uppercased version of sourcestring
3293  */
3294 PHP_FUNCTION(mb_strtoupper)
3295 {
3296         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3297         char *str;
3298         size_t str_len, from_encoding_len;
3299         char *newstr;
3300         size_t ret_len;
3301 
3302         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3303                                 &from_encoding, &from_encoding_len) == FAILURE) {
3304                 return;
3305         }
3306         newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
3307 
3308         if (newstr) {
3309                 // TODO: avoid reallocation ???
3310                 RETVAL_STRINGL(newstr, ret_len);
3311                 efree(newstr);
3312                 return;
3313         }
3314         RETURN_FALSE;
3315 }
3316 /* }}} */
3317 
3318 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3319  *  Returns a lowercased version of sourcestring
3320  */
3321 PHP_FUNCTION(mb_strtolower)
3322 {
3323         const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3324         char *str;
3325         size_t str_len, from_encoding_len;
3326         char *newstr;
3327         size_t ret_len;
3328 
3329         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
3330                                 &from_encoding, &from_encoding_len) == FAILURE) {
3331                 return;
3332         }
3333         newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
3334 
3335         if (newstr) {
3336                 // TODO: avoid reallocation ???
3337                 RETVAL_STRINGL(newstr, ret_len);
3338                 efree(newstr);
3339                 return;
3340         }
3341         RETURN_FALSE;
3342 }
3343 /* }}} */
3344 
3345 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3346    Encodings of the given string is returned (as a string) */
3347 PHP_FUNCTION(mb_detect_encoding)
3348 {
3349         char *str;
3350         size_t str_len;
3351         zend_bool strict=0;
3352         zval *encoding_list = NULL;
3353 
3354         mbfl_string string;
3355         const mbfl_encoding *ret;
3356         const mbfl_encoding **elist, **list;
3357         size_t size;
3358 
3359         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|z!b", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3360                 return;
3361         }
3362 
3363         /* make encoding list */
3364         list = NULL;
3365         size = 0;
3366         if (encoding_list) {
3367                 switch (Z_TYPE_P(encoding_list)) {
3368                 case IS_ARRAY:
3369                         if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0)) {
3370                                 if (list) {
3371                                         efree(list);
3372                                         list = NULL;
3373                                         size = 0;
3374                                 }
3375                         }
3376                         break;
3377                 default:
3378                         convert_to_string(encoding_list);
3379                         if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0)) {
3380                                 if (list) {
3381                                         efree(list);
3382                                         list = NULL;
3383                                         size = 0;
3384                                 }
3385                         }
3386                         break;
3387                 }
3388                 if (size <= 0) {
3389                         php_error_docref(NULL, E_WARNING, "Illegal argument");
3390                 }
3391         }
3392 
3393         if (ZEND_NUM_ARGS() < 3) {
3394                 strict = (zend_bool)MBSTRG(strict_detection);
3395         }
3396 
3397         if (size > 0 && list != NULL) {
3398                 elist = list;
3399         } else {
3400                 elist = MBSTRG(current_detect_order_list);
3401                 size = MBSTRG(current_detect_order_list_size);
3402         }
3403 
3404         mbfl_string_init(&string);
3405         string.no_language = MBSTRG(language);
3406         string.val = (unsigned char *)str;
3407         string.len = str_len;
3408         ret = mbfl_identify_encoding2(&string, elist, size, strict);
3409 
3410         if (list != NULL) {
3411                 efree((void *)list);
3412         }
3413 
3414         if (ret == NULL) {
3415                 RETURN_FALSE;
3416         }
3417 
3418         RETVAL_STRING((char *)ret->name);
3419 }
3420 /* }}} */
3421 
3422 /* {{{ proto mixed mb_list_encodings()
3423    Returns an array of all supported entity encodings */
3424 PHP_FUNCTION(mb_list_encodings)
3425 {
3426         const mbfl_encoding **encodings;
3427         const mbfl_encoding *encoding;
3428         int i;
3429 
3430         array_init(return_value);
3431         i = 0;
3432         encodings = mbfl_get_supported_encodings();
3433         while ((encoding = encodings[i++]) != NULL) {
3434                 add_next_index_string(return_value, (char *) encoding->name);
3435         }
3436 }
3437 /* }}} */
3438 
3439 /* {{{ proto array mb_encoding_aliases(string encoding)
3440    Returns an array of the aliases of a given encoding name */
3441 PHP_FUNCTION(mb_encoding_aliases)
3442 {
3443         const mbfl_encoding *encoding;
3444         char *name = NULL;
3445         size_t name_len;
3446 
3447         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
3448                 return;
3449         }
3450 
3451         encoding = mbfl_name2encoding(name);
3452         if (!encoding) {
3453                 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name);
3454                 RETURN_FALSE;
3455         }
3456 
3457         array_init(return_value);
3458         if (encoding->aliases != NULL) {
3459                 const char **alias;
3460                 for (alias = *encoding->aliases; *alias; ++alias) {
3461                         add_next_index_string(return_value, (char *)*alias);
3462                 }
3463         }
3464 }
3465 /* }}} */
3466 
3467 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3468    Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3469 PHP_FUNCTION(mb_encode_mimeheader)
3470 {
3471         enum mbfl_no_encoding charset, transenc;
3472         mbfl_string  string, result, *ret;
3473         char *charset_name = NULL;
3474         size_t charset_name_len;
3475         char *trans_enc_name = NULL;
3476         size_t trans_enc_name_len;
3477         char *linefeed = "\r\n";
3478         size_t linefeed_len, string_len;
3479         zend_long indent = 0;
3480 
3481         mbfl_string_init(&string);
3482         string.no_language = MBSTRG(language);
3483         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3484 
3485         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|sssl", (char **)&string.val, &string_len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3486                 return;
3487         }
3488 
3489         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3490                         php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3491                         return;
3492         }
3493 
3494         string.len = (uint32_t)string_len;
3495 
3496         charset = mbfl_no_encoding_pass;
3497         transenc = mbfl_no_encoding_base64;
3498 
3499         if (charset_name != NULL) {
3500                 charset = mbfl_name2no_encoding(charset_name);
3501                 if (charset == mbfl_no_encoding_invalid) {
3502                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3503                         RETURN_FALSE;
3504                 }
3505         } else {
3506                 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3507                 if (lang != NULL) {
3508                         charset = lang->mail_charset;
3509                         transenc = lang->mail_header_encoding;
3510                 }
3511         }
3512 
3513         if (trans_enc_name != NULL) {
3514                 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3515                         transenc = mbfl_no_encoding_base64;
3516                 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3517                         transenc = mbfl_no_encoding_qprint;
3518                 }
3519         }
3520 
3521         mbfl_string_init(&result);
3522         ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3523         if (ret != NULL) {
3524                 // TODO: avoid reallocation ???
3525                 RETVAL_STRINGL((char *)ret->val, ret->len);     /* the string is already strdup()'ed */
3526                 efree(ret->val);
3527         } else {
3528                 RETVAL_FALSE;
3529         }
3530 }
3531 /* }}} */
3532 
3533 /* {{{ proto string mb_decode_mimeheader(string string)
3534    Decodes the MIME "encoded-word" in the string */
3535 PHP_FUNCTION(mb_decode_mimeheader)
3536 {
3537         mbfl_string string, result, *ret;
3538         size_t string_len;
3539 
3540         mbfl_string_init(&string);
3541         string.no_language = MBSTRG(language);
3542         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3543 
3544         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string_len) == FAILURE) {
3545                 return;
3546         }
3547 
3548         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3549                         php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3550                         return;
3551         }
3552 
3553         string.len = (uint32_t)string_len;
3554 
3555         mbfl_string_init(&result);
3556         ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3557         if (ret != NULL) {
3558                 // TODO: avoid reallocation ???
3559                 RETVAL_STRINGL((char *)ret->val, ret->len);     /* the string is already strdup()'ed */
3560                 efree(ret->val);
3561         } else {
3562                 RETVAL_FALSE;
3563         }
3564 }
3565 /* }}} */
3566 
3567 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3568    Conversion between full-width character and half-width character (Japanese) */
3569 PHP_FUNCTION(mb_convert_kana)
3570 {
3571         int opt, i;
3572         mbfl_string string, result, *ret;
3573         char *optstr = NULL;
3574         size_t optstr_len;
3575         char *encname = NULL;
3576         size_t encname_len, string_len;
3577 
3578         mbfl_string_init(&string);
3579         string.no_language = MBSTRG(language);
3580         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3581 
3582         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|ss", (char **)&string.val, &string_len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3583                 return;
3584         }
3585 
3586         if (ZEND_SIZE_T_UINT_OVFL(string_len)) {
3587                         php_error_docref(NULL, E_WARNING, "String length overflows the max allowed length of %u", UINT_MAX);
3588                         return;
3589         }
3590 
3591         string.len = (uint32_t)string_len;
3592 
3593         /* option */
3594         if (optstr != NULL) {
3595                 char *p = optstr;
3596                 int n = optstr_len;
3597                 i = 0;
3598                 opt = 0;
3599                 while (i < n) {
3600                         i++;
3601                         switch (*p++) {
3602                         case 'A':
3603                                 opt |= 0x1;
3604                                 break;
3605                         case 'a':
3606                                 opt |= 0x10;
3607                                 break;
3608                         case 'R':
3609                                 opt |= 0x2;
3610                                 break;
3611                         case 'r':
3612                                 opt |= 0x20;
3613                                 break;
3614                         case 'N':
3615                                 opt |= 0x4;
3616                                 break;
3617                         case 'n':
3618                                 opt |= 0x40;
3619                                 break;
3620                         case 'S':
3621                                 opt |= 0x8;
3622                                 break;
3623                         case 's':
3624                                 opt |= 0x80;
3625                                 break;
3626                         case 'K':
3627                                 opt |= 0x100;
3628                                 break;
3629                         case 'k':
3630                                 opt |= 0x1000;
3631                                 break;
3632                         case 'H':
3633                                 opt |= 0x200;
3634                                 break;
3635                         case 'h':
3636                                 opt |= 0x2000;
3637                                 break;
3638                         case 'V':
3639                                 opt |= 0x800;
3640                                 break;
3641                         case 'C':
3642                                 opt |= 0x10000;
3643                                 break;
3644                         case 'c':
3645                                 opt |= 0x20000;
3646                                 break;
3647                         case 'M':
3648                                 opt |= 0x100000;
3649                                 break;
3650                         case 'm':
3651                                 opt |= 0x200000;
3652                                 break;
3653                         }
3654                 }
3655         } else {
3656                 opt = 0x900;
3657         }
3658 
3659         /* encoding */
3660         if (encname != NULL) {
3661                 string.no_encoding = mbfl_name2no_encoding(encname);
3662                 if (string.no_encoding == mbfl_no_encoding_invalid) {
3663                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encname);
3664                         RETURN_FALSE;
3665                 }
3666         }
3667 
3668         ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3669         if (ret != NULL) {
3670                 // TODO: avoid reallocation ???
3671                 RETVAL_STRINGL((char *)ret->val, ret->len);             /* the string is already strdup()'ed */
3672                 efree(ret->val);
3673         } else {
3674                 RETVAL_FALSE;
3675         }
3676 }
3677 /* }}} */
3678 
3679 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3680 
3681 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3682    Converts the string resource in variables to desired encoding */
3683 PHP_FUNCTION(mb_convert_variables)
3684 {
3685         zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3686         HashTable *target_hash;
3687         mbfl_string string, result, *ret;
3688         const mbfl_encoding *from_encoding, *to_encoding;
3689         mbfl_encoding_detector *identd;
3690         mbfl_buffer_converter *convd;
3691         int n, argc, stack_level, stack_max;
3692         size_t to_enc_len;
3693         size_t elistsz;
3694         const mbfl_encoding **elist;
3695         char *to_enc;
3696         void *ptmp;
3697 
3698         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3699                 return;
3700         }
3701 
3702         /* new encoding */
3703         to_encoding = mbfl_name2encoding(to_enc);
3704         if (!to_encoding) {
3705                 php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3706                 RETURN_FALSE;
3707         }
3708 
3709         /* initialize string */
3710         mbfl_string_init(&string);
3711         mbfl_string_init(&result);
3712         from_encoding = MBSTRG(current_internal_encoding);
3713         string.no_encoding = from_encoding->no_encoding;
3714         string.no_language = MBSTRG(language);
3715 
3716         /* pre-conversion encoding */
3717         elist = NULL;
3718         elistsz = 0;
3719         switch (Z_TYPE_P(zfrom_enc)) {
3720                 case IS_ARRAY:
3721                         php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0);
3722                         break;
3723                 default:
3724                         convert_to_string_ex(zfrom_enc);
3725                         php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0);
3726                         break;
3727         }
3728 
3729         if (elistsz <= 0) {
3730                 from_encoding = &mbfl_encoding_pass;
3731         } else if (elistsz == 1) {
3732                 from_encoding = *elist;
3733         } else {
3734                 /* auto detect */
3735                 from_encoding = NULL;
3736                 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3737                 stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3738                 stack_level = 0;
3739                 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3740                 if (identd != NULL) {
3741                         n = 0;
3742                         while (n < argc || stack_level > 0) {
3743                                 if (stack_level <= 0) {
3744                                         var = &args[n++];
3745                                         ZVAL_DEREF(var);
3746                                         SEPARATE_ZVAL_NOREF(var);
3747                                         if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3748                                                 target_hash = HASH_OF(var);
3749                                                 if (target_hash != NULL) {
3750                                                         zend_hash_internal_pointer_reset(target_hash);
3751                                                 }
3752                                         }
3753                                 } else {
3754                                         stack_level--;
3755                                         var = &stack[stack_level];
3756                                 }
3757                                 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3758                                         target_hash = HASH_OF(var);
3759                                         if (target_hash != NULL) {
3760                                                 while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3761                                                         zend_hash_move_forward(target_hash);
3762                                                         if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3763                                                                 hash_entry = Z_INDIRECT_P(hash_entry);
3764                                                         }
3765                                                         ZVAL_DEREF(hash_entry);
3766                                                         if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3767                                                                 if (stack_level >= stack_max) {
3768                                                                         stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3769                                                                         ptmp = erealloc(stack, sizeof(zval) * stack_max);
3770                                                                         stack = (zval *)ptmp;
3771                                                                 }
3772                                                                 ZVAL_COPY_VALUE(&stack[stack_level], var);
3773                                                                 stack_level++;
3774                                                                 var = hash_entry;
3775                                                                 target_hash = HASH_OF(var);
3776                                                                 if (target_hash != NULL) {
3777                                                                         zend_hash_internal_pointer_reset(target_hash);
3778                                                                         continue;
3779                                                                 }
3780                                                         } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3781                                                                 string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3782                                                                 string.len = Z_STRLEN_P(hash_entry);
3783                                                                 if (mbfl_encoding_detector_feed(identd, &string)) {
3784                                                                         goto detect_end;                /* complete detecting */
3785                                                                 }
3786                                                         }
3787                                                 }
3788                                         }
3789                                 } else if (Z_TYPE_P(var) == IS_STRING) {
3790                                         string.val = (unsigned char *)Z_STRVAL_P(var);
3791                                         string.len = Z_STRLEN_P(var);
3792                                         if (mbfl_encoding_detector_feed(identd, &string)) {
3793                                                 goto detect_end;                /* complete detecting */
3794                                         }
3795                                 }
3796                         }
3797 detect_end:
3798                         from_encoding = mbfl_encoding_detector_judge2(identd);
3799                         mbfl_encoding_detector_delete(identd);
3800                 }
3801                 efree(stack);
3802 
3803                 if (!from_encoding) {
3804                         php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3805                         from_encoding = &mbfl_encoding_pass;
3806                 }
3807         }
3808         if (elist != NULL) {
3809                 efree((void *)elist);
3810         }
3811         /* create converter */
3812         convd = NULL;
3813         if (from_encoding != &mbfl_encoding_pass) {
3814                 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3815                 if (convd == NULL) {
3816                         php_error_docref(NULL, E_WARNING, "Unable to create converter");
3817                         RETURN_FALSE;
3818                 }
3819                 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3820                 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3821         }
3822 
3823         /* convert */
3824         if (convd != NULL) {
3825                 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3826                 stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
3827                 stack_level = 0;
3828                 n = 0;
3829                 while (n < argc || stack_level > 0) {
3830                         if (stack_level <= 0) {
3831                                 var = &args[n++];
3832                                 ZVAL_DEREF(var);
3833                                 SEPARATE_ZVAL_NOREF(var);
3834                                 if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3835                                         target_hash = HASH_OF(var);
3836                                         if (target_hash != NULL) {
3837                                                 zend_hash_internal_pointer_reset(target_hash);
3838                                         }
3839                                 }
3840                         } else {
3841                                 stack_level--;
3842                                 var = &stack[stack_level];
3843                         }
3844                         if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3845                                 target_hash = HASH_OF(var);
3846                                 if (target_hash != NULL) {
3847                                         while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
3848                                                 zend_hash_move_forward(target_hash);
3849                                                 if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
3850                                                         hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
3851                                                 }
3852                                                 hash_entry = hash_entry_ptr;
3853                                                 ZVAL_DEREF(hash_entry);
3854                                                 if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3855                                                         if (stack_level >= stack_max) {
3856                                                                 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3857                                                                 ptmp = erealloc(stack, sizeof(zval) * stack_max);
3858                                                                 stack = (zval *)ptmp;
3859                                                         }
3860                                                         ZVAL_COPY_VALUE(&stack[stack_level], var);
3861                                                         stack_level++;
3862                                                         var = hash_entry;
3863                                                         SEPARATE_ZVAL(hash_entry);
3864                                                         target_hash = HASH_OF(var);
3865                                                         if (target_hash != NULL) {
3866                                                                 zend_hash_internal_pointer_reset(target_hash);
3867                                                                 continue;
3868                                                         }
3869                                                 } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3870                                                         string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3871                                                         string.len = Z_STRLEN_P(hash_entry);
3872                                                         ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3873                                                         if (ret != NULL) {
3874                                                                 zval_ptr_dtor(hash_entry_ptr);
3875                                                                 // TODO: avoid reallocation ???
3876                                                                 ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
3877                                                                 efree(ret->val);
3878                                                         }
3879                                                 }
3880                                         }
3881                                 }
3882                         } else if (Z_TYPE_P(var) == IS_STRING) {
3883                                 string.val = (unsigned char *)Z_STRVAL_P(var);
3884                                 string.len = Z_STRLEN_P(var);
3885                                 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3886                                 if (ret != NULL) {
3887                                         zval_ptr_dtor(var);
3888                                         // TODO: avoid reallocation ???
3889                                         ZVAL_STRINGL(var, (char *)ret->val, ret->len);
3890                                         efree(ret->val);
3891                                 }
3892                         }
3893                 }
3894                 efree(stack);
3895 
3896                 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3897                 mbfl_buffer_converter_delete(convd);
3898         }
3899 
3900         if (from_encoding) {
3901                 RETURN_STRING(from_encoding->name);
3902         } else {
3903                 RETURN_FALSE;
3904         }
3905 }
3906 /* }}} */
3907 
3908 /* {{{ HTML numeric entity */
3909 /* {{{ static void php_mb_numericentity_exec() */
3910 static void
3911 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3912 {
3913         char *str, *encoding = NULL;
3914         size_t str_len, encoding_len;
3915         zval *zconvmap, *hash_entry;
3916         HashTable *target_hash;
3917         int i, *convmap, *mapelm, mapsize=0;
3918         zend_bool is_hex = 0;
3919         mbfl_string string, result, *ret;
3920         enum mbfl_no_encoding no_encoding;
3921 
3922         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3923                 return;
3924         }
3925 
3926         mbfl_string_init(&string);
3927         string.no_language = MBSTRG(language);
3928         string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3929         string.val = (unsigned char *)str;
3930         string.len = str_len;
3931 
3932         /* encoding */
3933         if (encoding && encoding_len > 0) {
3934                 no_encoding = mbfl_name2no_encoding(encoding);
3935                 if (no_encoding == mbfl_no_encoding_invalid) {
3936                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", encoding);
3937                         RETURN_FALSE;
3938                 } else {
3939                         string.no_encoding = no_encoding;
3940                 }
3941         }
3942 
3943         if (type == 0 && is_hex) {
3944                 type = 2; /* output in hex format */
3945         }
3946 
3947         /* conversion map */
3948         convmap = NULL;
3949         if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3950                 target_hash = Z_ARRVAL_P(zconvmap);
3951                 i = zend_hash_num_elements(target_hash);
3952                 if (i > 0) {
3953                         convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3954                         mapelm = convmap;
3955                         mapsize = 0;
3956                         ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3957                                 convert_to_long_ex(hash_entry);
3958                                 *mapelm++ = Z_LVAL_P(hash_entry);
3959                                 mapsize++;
3960                         } ZEND_HASH_FOREACH_END();
3961                 }
3962         }
3963         if (convmap == NULL) {
3964                 RETURN_FALSE;
3965         }
3966         mapsize /= 4;
3967 
3968         ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3969         if (ret != NULL) {
3970                 // TODO: avoid reallocation ???
3971                 RETVAL_STRINGL((char *)ret->val, ret->len);
3972                 efree(ret->val);
3973         } else {
3974                 RETVAL_FALSE;
3975         }
3976         efree((void *)convmap);
3977 }
3978 /* }}} */
3979 
3980 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3981    Converts specified characters to HTML numeric entities */
3982 PHP_FUNCTION(mb_encode_numericentity)
3983 {
3984         php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3985 }
3986 /* }}} */
3987 
3988 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3989    Converts HTML numeric entities to character code */
3990 PHP_FUNCTION(mb_decode_numericentity)
3991 {
3992         php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3993 }
3994 /* }}} */
3995 /* }}} */
3996 
3997 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3998  *  Sends an email message with MIME scheme
3999  */
4000 
4001 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                                                                         \
4002         if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {        \
4003                 pos += 2;                                                                                       \
4004                 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {                                                   \
4005                         pos++;                                                                                  \
4006                 }                                                                                               \
4007                 continue;                                                                                       \
4008         }
4009 
4010 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)                    \
4011         pp = str;                                       \
4012         ee = pp + len;                                  \
4013         while ((pp = memchr(pp, '\0', (ee - pp)))) {    \
4014                 *pp = ' ';                              \
4015         }                                               \
4016 
4017 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
4018 {
4019         const char *ps;
4020         size_t icnt;
4021         int state = 0;
4022         int crlf_state = -1;
4023         char *token = NULL;
4024         size_t token_pos = 0;
4025         zend_string *fld_name, *fld_val;
4026 
4027         ps = str;
4028         icnt = str_len;
4029         fld_name = fld_val = NULL;
4030 
4031         /*
4032          *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4033          *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
4034          *      state  0            1           2          3
4035          *
4036          *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
4037          *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
4038          * crlf_state -1                       0                     1 -1
4039          *
4040          */
4041 
4042         while (icnt > 0) {
4043                 switch (*ps) {
4044                         case ':':
4045                                 if (crlf_state == 1) {
4046                                         token_pos++;
4047                                 }
4048 
4049                                 if (state == 0 || state == 1) {
4050                                         if(token && token_pos > 0) {
4051                                                 fld_name = zend_string_init(token, token_pos, 0);
4052                                         }
4053                                         state = 2;
4054                                 } else {
4055                                         token_pos++;
4056                                 }
4057 
4058                                 crlf_state = 0;
4059                                 break;
4060 
4061                         case '\n':
4062                                 if (crlf_state == -1) {
4063                                         goto out;
4064                                 }
4065                                 crlf_state = -1;
4066                                 break;
4067 
4068                         case '\r':
4069                                 if (crlf_state == 1) {
4070                                         token_pos++;
4071                                 } else {
4072                                         crlf_state = 1;
4073                                 }
4074                                 break;
4075 
4076                         case ' ': case '\t':
4077                                 if (crlf_state == -1) {
4078                                         if (state == 3) {
4079                                                 /* continuing from the previous line */
4080                                                 state = 4;
4081                                         } else {
4082                                                 /* simply skipping this new line */
4083                                                 state = 5;
4084                                         }
4085                                 } else {
4086                                         if (crlf_state == 1) {
4087                                                 token_pos++;
4088                                         }
4089                                         if (state == 1 || state == 3) {
4090                                                 token_pos++;
4091                                         }
4092                                 }
4093                                 crlf_state = 0;
4094                                 break;
4095 
4096                         default:
4097                                 switch (state) {
4098                                         case 0:
4099                                                 token = (char*)ps;
4100                                                 token_pos = 0;
4101                                                 state = 1;
4102                                                 break;
4103 
4104                                         case 2:
4105                                                 if (crlf_state != -1) {
4106                                                         token = (char*)ps;
4107                                                         token_pos = 0;
4108 
4109                                                         state = 3;
4110                                                         break;
4111                                                 }
4112                                                 /* break is missing intentionally */
4113 
4114                                         case 3:
4115                                                 if (crlf_state == -1) {
4116                                                         if(token && token_pos > 0) {
4117                                                                 fld_val = zend_string_init(token, token_pos, 0);
4118                                                         }
4119 
4120                                                         if (fld_name != NULL && fld_val != NULL) {
4121                                                                 zval val;
4122                                                                 /* FIXME: some locale free implementation is
4123                                                                  * really required here,,, */
4124                                                                 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4125                                                                 ZVAL_STR(&val, fld_val);
4126 
4127                                                                 zend_hash_update(ht, fld_name, &val);
4128 
4129                                                                 zend_string_release(fld_name);
4130                                                         }
4131 
4132                                                         fld_name = fld_val = NULL;
4133                                                         token = (char*)ps;
4134                                                         token_pos = 0;
4135 
4136                                                         state = 1;
4137                                                 }
4138                                                 break;
4139 
4140                                         case 4:
4141                                                 token_pos++;
4142                                                 state = 3;
4143                                                 break;
4144                                 }
4145 
4146                                 if (crlf_state == 1) {
4147                                         token_pos++;
4148                                 }
4149 
4150                                 token_pos++;
4151 
4152                                 crlf_state = 0;
4153                                 break;
4154                 }
4155                 ps++, icnt--;
4156         }
4157 out:
4158         if (state == 2) {
4159                 token = "";
4160                 token_pos = 0;
4161 
4162                 state = 3;
4163         }
4164         if (state == 3) {
4165                 if(token && token_pos > 0) {
4166                         fld_val = zend_string_init(token, token_pos, 0);
4167                 }
4168                 if (fld_name != NULL && fld_val != NULL) {
4169                         zval val;
4170                         /* FIXME: some locale free implementation is
4171                          * really required here,,, */
4172                         php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
4173                         ZVAL_STR(&val, fld_val);
4174 
4175                         zend_hash_update(ht, fld_name, &val);
4176 
4177                         zend_string_release(fld_name);
4178                 }
4179         }
4180         return state;
4181 }
4182 
4183 PHP_FUNCTION(mb_send_mail)
4184 {
4185         int n;
4186         char *to = NULL;
4187         size_t to_len;
4188         char *message = NULL;
4189         size_t message_len;
4190         char *headers = NULL;
4191         size_t headers_len;
4192         char *subject = NULL;
4193         zend_string *extra_cmd = NULL;
4194         size_t subject_len;
4195         int i;
4196         char *to_r = NULL;
4197         char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4198         struct {
4199                 int cnt_type:1;
4200                 int cnt_trans_enc:1;
4201         } suppressed_hdrs = { 0, 0 };
4202 
4203         char *message_buf = NULL, *subject_buf = NULL, *p;
4204         mbfl_string orig_str, conv_str;
4205         mbfl_string *pstr;      /* pointer to mbfl string for return value */
4206         enum mbfl_no_encoding
4207                 tran_cs,        /* transfar text charset */
4208                 head_enc,       /* header transfar encoding */
4209                 body_enc;       /* body transfar encoding */
4210         mbfl_memory_device device;      /* automatic allocateable buffer for additional header */
4211         const mbfl_language *lang;
4212         int err = 0;
4213         HashTable ht_headers;
4214         zval *s;
4215         extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4216         char *pp, *ee;
4217 
4218         /* initialize */
4219         mbfl_memory_device_init(&device, 0, 0);
4220         mbfl_string_init(&orig_str);
4221         mbfl_string_init(&conv_str);
4222 
4223         /* character-set, transfer-encoding */
4224         tran_cs = mbfl_no_encoding_utf8;
4225         head_enc = mbfl_no_encoding_base64;
4226         body_enc = mbfl_no_encoding_base64;
4227         lang = mbfl_no2language(MBSTRG(language));
4228         if (lang != NULL) {
4229                 tran_cs = lang->mail_charset;
4230                 head_enc = lang->mail_header_encoding;
4231                 body_enc = lang->mail_body_encoding;
4232         }
4233 
4234         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
4235                 return;
4236         }
4237 
4238         /* ASCIIZ check */
4239         MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4240         MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4241         MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4242         if (headers) {
4243                 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4244         }
4245         if (extra_cmd) {
4246                 MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
4247         }
4248 
4249         zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4250 
4251         if (headers != NULL) {
4252                 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4253         }
4254 
4255         if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4256                 char *tmp;
4257                 char *param_name;
4258                 char *charset = NULL;
4259 
4260                 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4261                 p = strchr(Z_STRVAL_P(s), ';');
4262 
4263                 if (p != NULL) {
4264                         /* skipping the padded spaces */
4265                         do {
4266                                 ++p;
4267                         } while (*p == ' ' || *p == '\t');
4268 
4269                         if (*p != '\0') {
4270                                 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4271                                         if (strcasecmp(param_name, "charset") == 0) {
4272                                                 enum mbfl_no_encoding _tran_cs = tran_cs;
4273 
4274                                                 charset = php_strtok_r(NULL, "= \"", &tmp);
4275                                                 if (charset != NULL) {
4276                                                         _tran_cs = mbfl_name2no_encoding(charset);
4277                                                 }
4278 
4279                                                 if (_tran_cs == mbfl_no_encoding_invalid) {
4280                                                         php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4281                                                         _tran_cs = mbfl_no_encoding_ascii;
4282                                                 }
4283                                                 tran_cs = _tran_cs;
4284                                         }
4285                                 }
4286                         }
4287                 }
4288                 suppressed_hdrs.cnt_type = 1;
4289         }
4290 
4291         if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4292                 enum mbfl_no_encoding _body_enc;
4293 
4294                 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
4295                 _body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4296                 switch (_body_enc) {
4297                         case mbfl_no_encoding_base64:
4298                         case mbfl_no_encoding_7bit:
4299                         case mbfl_no_encoding_8bit:
4300                                 body_enc = _body_enc;
4301                                 break;
4302 
4303                         default:
4304                                 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4305                                 body_enc =      mbfl_no_encoding_8bit;
4306                                 break;
4307                 }
4308                 suppressed_hdrs.cnt_trans_enc = 1;
4309         }
4310 
4311         /* To: */
4312         if (to != NULL) {
4313                 if (to_len > 0) {
4314                         to_r = estrndup(to, to_len);
4315                         for (; to_len; to_len--) {
4316                                 if (!isspace((unsigned char) to_r[to_len - 1])) {
4317                                         break;
4318                                 }
4319                                 to_r[to_len - 1] = '\0';
4320                         }
4321                         for (i = 0; to_r[i]; i++) {
4322                         if (iscntrl((unsigned char) to_r[i])) {
4323                                 /* According to RFC 822, section 3.1.1 long headers may be separated into
4324                                  * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4325                                  * To prevent these separators from being replaced with a space, we use the
4326                                  * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4327                                  */
4328                                 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4329                                 to_r[i] = ' ';
4330                         }
4331                         }
4332                 } else {
4333                         to_r = to;
4334                 }
4335         } else {
4336                 php_error_docref(NULL, E_WARNING, "Missing To: field");
4337                 err = 1;
4338         }
4339 
4340         /* Subject: */
4341         if (subject != NULL) {
4342                 orig_str.no_language = MBSTRG(language);
4343                 orig_str.val = (unsigned char *)subject;
4344                 orig_str.len = subject_len;
4345                 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4346                 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4347                         const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4348                         orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4349                 }
4350                 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4351                 if (pstr != NULL) {
4352                         subject_buf = subject = (char *)pstr->val;
4353                 }
4354         } else {
4355                 php_error_docref(NULL, E_WARNING, "Missing Subject: field");
4356                 err = 1;
4357         }
4358 
4359         /* message body */
4360         if (message != NULL) {
4361                 orig_str.no_language = MBSTRG(language);
4362                 orig_str.val = (unsigned char *)message;
4363                 orig_str.len = (unsigned int)message_len;
4364                 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4365 
4366                 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4367                         const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4368                         orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4369                 }
4370 
4371                 pstr = NULL;
4372                 {
4373                         mbfl_string tmpstr;
4374 
4375                         if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4376                                 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4377                                 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4378                                 efree(tmpstr.val);
4379                         }
4380                 }
4381                 if (pstr != NULL) {
4382                         message_buf = message = (char *)pstr->val;
4383                 }
4384         } else {
4385                 /* this is not really an error, so it is allowed. */
4386                 php_error_docref(NULL, E_WARNING, "Empty message body");
4387                 message = NULL;
4388         }
4389 
4390         /* other headers */
4391 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4392 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4393 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4394 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4395         if (headers != NULL) {
4396                 p = headers;
4397                 n = headers_len;
4398                 mbfl_memory_device_strncat(&device, p, n);
4399                 if (n > 0 && p[n - 1] != '\n') {
4400                         mbfl_memory_device_strncat(&device, "\n", 1);
4401                 }
4402         }
4403 
4404         if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4405                 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4406                 mbfl_memory_device_strncat(&device, "\n", 1);
4407         }
4408 
4409         if (!suppressed_hdrs.cnt_type) {
4410                 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4411 
4412                 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4413                 if (p != NULL) {
4414                         mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4415                         mbfl_memory_device_strcat(&device, p);
4416                 }
4417                 mbfl_memory_device_strncat(&device, "\n", 1);
4418         }
4419         if (!suppressed_hdrs.cnt_trans_enc) {
4420                 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4421                 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4422                 if (p == NULL) {
4423                         p = "7bit";
4424                 }
4425                 mbfl_memory_device_strcat(&device, p);
4426                 mbfl_memory_device_strncat(&device, "\n", 1);
4427         }
4428 
4429         mbfl_memory_device_unput(&device);
4430         mbfl_memory_device_output('\0', &device);
4431         headers = (char *)device.buffer;
4432 
4433         if (force_extra_parameters) {
4434                 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4435         } else if (extra_cmd) {
4436                 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
4437         }
4438 
4439         if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
4440                 RETVAL_TRUE;
4441         } else {
4442                 RETVAL_FALSE;
4443         }
4444 
4445         if (extra_cmd) {
4446                 zend_string_release(extra_cmd);
4447         }
4448 
4449         if (to_r != to) {
4450                 efree(to_r);
4451         }
4452         if (subject_buf) {
4453                 efree((void *)subject_buf);
4454         }
4455         if (message_buf) {
4456                 efree((void *)message_buf);
4457         }
4458         mbfl_memory_device_clear(&device);
4459         zend_hash_destroy(&ht_headers);
4460 }
4461 
4462 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4463 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4464 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4465 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4466 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4467 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4468 /* }}} */
4469 
4470 /* {{{ proto mixed mb_get_info([string type])
4471    Returns the current settings of mbstring */
4472 PHP_FUNCTION(mb_get_info)
4473 {
4474         char *typ = NULL;
4475         size_t typ_len;
4476         size_t n;
4477         char *name;
4478         const struct mb_overload_def *over_func;
4479         zval row1, row2;
4480         const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4481         const mbfl_encoding **entry;
4482 
4483         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
4484                 return;
4485         }
4486 
4487         if (!typ || !strcasecmp("all", typ)) {
4488                 array_init(return_value);
4489                 if (MBSTRG(current_internal_encoding)) {
4490                         add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4491                 }
4492                 if (MBSTRG(http_input_identify)) {
4493                         add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4494                 }
4495                 if (MBSTRG(current_http_output_encoding)) {
4496                         add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4497                 }
4498                 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4499                         add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4500                 }
4501                 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4502                 if (MBSTRG(func_overload)){
4503                         over_func = &(mb_ovld[0]);
4504                         array_init(&row1);
4505                         while (over_func->type > 0) {
4506                                 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4507                                         add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4508                                 }
4509                                 over_func++;
4510                         }
4511                         add_assoc_zval(return_value, "func_overload_list", &row1);
4512                 } else {
4513                         add_assoc_string(return_value, "func_overload_list", "no overload");
4514                 }
4515                 if (lang != NULL) {
4516                         if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4517                                 add_assoc_string(return_value, "mail_charset", name);
4518                         }
4519                         if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4520                                 add_assoc_string(return_value, "mail_header_encoding", name);
4521                         }
4522                         if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4523                                 add_assoc_string(return_value, "mail_body_encoding", name);
4524                         }
4525                 }
4526                 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4527                 if (MBSTRG(encoding_translation)) {
4528                         add_assoc_string(return_value, "encoding_translation", "On");
4529                 } else {
4530                         add_assoc_string(return_value, "encoding_translation", "Off");
4531                 }
4532                 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4533                         add_assoc_string(return_value, "language", name);
4534                 }
4535                 n = MBSTRG(current_detect_order_list_size);
4536                 entry = MBSTRG(current_detect_order_list);
4537                 if (n > 0) {
4538                         size_t i;
4539                         array_init(&row2);
4540                         for (i = 0; i < n; i++) {
4541                                 add_next_index_string(&row2, (*entry)->name);
4542                                 entry++;
4543                         }
4544                         add_assoc_zval(return_value, "detect_order", &row2);
4545                 }
4546                 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4547                         add_assoc_string(return_value, "substitute_character", "none");
4548                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4549                         add_assoc_string(return_value, "substitute_character", "long");
4550                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4551                         add_assoc_string(return_value, "substitute_character", "entity");
4552                 } else {
4553                         add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4554                 }
4555                 if (MBSTRG(strict_detection)) {
4556                         add_assoc_string(return_value, "strict_detection", "On");
4557                 } else {
4558                         add_assoc_string(return_value, "strict_detection", "Off");
4559                 }
4560         } else if (!strcasecmp("internal_encoding", typ)) {
4561                 if (MBSTRG(current_internal_encoding)) {
4562                         RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4563                 }
4564         } else if (!strcasecmp("http_input", typ)) {
4565                 if (MBSTRG(http_input_identify)) {
4566                         RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4567                 }
4568         } else if (!strcasecmp("http_output", typ)) {
4569                 if (MBSTRG(current_http_output_encoding)) {
4570                         RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4571                 }
4572         } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4573                 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4574                         RETVAL_STRING(name);
4575                 }
4576         } else if (!strcasecmp("func_overload", typ)) {
4577                 RETVAL_LONG(MBSTRG(func_overload));
4578         } else if (!strcasecmp("func_overload_list", typ)) {
4579                 if (MBSTRG(func_overload)){
4580                                 over_func = &(mb_ovld[0]);
4581                                 array_init(return_value);
4582                                 while (over_func->type > 0) {
4583                                         if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4584                                                 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4585                                         }
4586                                         over_func++;
4587                                 }
4588                 } else {
4589                         RETVAL_STRING("no overload");
4590                 }
4591         } else if (!strcasecmp("mail_charset", typ)) {
4592                 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4593                         RETVAL_STRING(name);
4594                 }
4595         } else if (!strcasecmp("mail_header_encoding", typ)) {
4596                 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4597                         RETVAL_STRING(name);
4598                 }
4599         } else if (!strcasecmp("mail_body_encoding", typ)) {
4600                 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4601                         RETVAL_STRING(name);
4602                 }
4603         } else if (!strcasecmp("illegal_chars", typ)) {
4604                 RETVAL_LONG(MBSTRG(illegalchars));
4605         } else if (!strcasecmp("encoding_translation", typ)) {
4606                 if (MBSTRG(encoding_translation)) {
4607                         RETVAL_STRING("On");
4608                 } else {
4609                         RETVAL_STRING("Off");
4610                 }
4611         } else if (!strcasecmp("language", typ)) {
4612                 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4613                         RETVAL_STRING(name);
4614                 }
4615         } else if (!strcasecmp("detect_order", typ)) {
4616                 n = MBSTRG(current_detect_order_list_size);
4617                 entry = MBSTRG(current_detect_order_list);
4618                 if (n > 0) {
4619                         size_t i;
4620                         array_init(return_value);
4621                         for (i = 0; i < n; i++) {
4622                                 add_next_index_string(return_value, (*entry)->name);
4623                                 entry++;
4624                         }
4625                 }
4626         } else if (!strcasecmp("substitute_character", typ)) {
4627                 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4628                         RETVAL_STRING("none");
4629                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4630                         RETVAL_STRING("long");
4631                 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4632                         RETVAL_STRING("entity");
4633                 } else {
4634                         RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4635                 }
4636         } else if (!strcasecmp("strict_detection", typ)) {
4637                 if (MBSTRG(strict_detection)) {
4638                         RETVAL_STRING("On");
4639                 } else {
4640                         RETVAL_STRING("Off");
4641                 }
4642         } else {
4643                 RETURN_FALSE;
4644         }
4645 }
4646 /* }}} */
4647 
4648 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4649    Check if the string is valid for the specified encoding */
4650 PHP_FUNCTION(mb_check_encoding)
4651 {
4652         char *var = NULL;
4653         size_t var_len;
4654         char *enc = NULL;
4655         size_t enc_len;
4656         mbfl_buffer_converter *convd;
4657         const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4658         mbfl_string string, result, *ret = NULL;
4659         long illegalchars = 0;
4660 
4661         if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4662                 return;
4663         }
4664 
4665         if (var == NULL) {
4666                 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4667         }
4668 
4669         if (enc != NULL) {
4670                 encoding = mbfl_name2encoding(enc);
4671                 if (!encoding || encoding == &mbfl_encoding_pass) {
4672                         php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc);
4673                         RETURN_FALSE;
4674                 }
4675         }
4676 
4677         convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4678         if (convd == NULL) {
4679                 php_error_docref(NULL, E_WARNING, "Unable to create converter");
4680                 RETURN_FALSE;
4681         }
4682         mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4683         mbfl_buffer_converter_illegal_substchar(convd, 0);
4684 
4685         /* initialize string */
4686         mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4687         mbfl_string_init(&result);
4688 
4689         string.val = (unsigned char *)var;
4690         string.len = var_len;
4691         ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4692         illegalchars = mbfl_buffer_illegalchars(convd);
4693         mbfl_buffer_converter_delete(convd);
4694 
4695         RETVAL_FALSE;
4696         if (ret != NULL) {
4697                 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4698                         RETVAL_TRUE;
4699                 }
4700                 mbfl_string_clear(&result);
4701         }
4702 }
4703 /* }}} */
4704 
4705 /* {{{ php_mb_populate_current_detect_order_list */
4706 static void php_mb_populate_current_detect_order_list(void)
4707 {
4708         const mbfl_encoding **entry = 0;
4709         size_t nentries;
4710 
4711         if (MBSTRG(current_detect_order_list)) {
4712                 return;
4713         }
4714 
4715         if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4716                 nentries = MBSTRG(detect_order_list_size);
4717                 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4718                 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4719         } else {
4720                 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4721                 size_t i;
4722                 nentries = MBSTRG(default_detect_order_list_size);
4723                 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4724                 for (i = 0; i < nentries; i++) {
4725                         entry[i] = mbfl_no2encoding(src[i]);
4726                 }
4727         }
4728         MBSTRG(current_detect_order_list) = entry;
4729         MBSTRG(current_detect_order_list_size) = nentries;
4730 }
4731 /* }}} */
4732 
4733 /* {{{ static int php_mb_encoding_translation() */
4734 static int php_mb_encoding_translation(void)
4735 {
4736         return MBSTRG(encoding_translation);
4737 }
4738 /* }}} */
4739 
4740 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4741 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4742 {
4743         if (enc != NULL) {
4744                 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4745                         if (enc->mblen_table != NULL) {
4746                                 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4747                         }
4748                 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4749                         return 2;
4750                 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4751                         return 4;
4752                 }
4753         }
4754         return 1;
4755 }
4756 /* }}} */
4757 
4758 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4759 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4760 {
4761         return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4762 }
4763 /* }}} */
4764 
4765 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4766 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4767 {
4768         register const char *p = s;
4769         char *last=NULL;
4770 
4771         if (nbytes == (size_t)-1) {
4772                 size_t nb = 0;
4773 
4774                 while (*p != '\0') {
4775                         if (nb == 0) {
4776                                 if ((unsigned char)*p == (unsigned char)c) {
4777                                         last = (char *)p;
4778                                 }
4779                                 nb = php_mb_mbchar_bytes_ex(p, enc);
4780                                 if (nb == 0) {
4781                                         return NULL; /* something is going wrong! */
4782                                 }
4783                         }
4784                         --nb;
4785                         ++p;
4786                 }
4787         } else {
4788                 register size_t bcnt = nbytes;
4789                 register size_t nbytes_char;
4790                 while (bcnt > 0) {
4791                         if ((unsigned char)*p == (unsigned char)c) {
4792                                 last = (char *)p;
4793                         }
4794                         nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4795                         if (bcnt < nbytes_char) {
4796                                 return NULL;
4797                         }
4798                         p += nbytes_char;
4799                         bcnt -= nbytes_char;
4800                 }
4801         }
4802         return last;
4803 }
4804 /* }}} */
4805 
4806 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4807 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4808 {
4809         return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4810 }
4811 /* }}} */
4812 
4813 /* {{{ MBSTRING_API int php_mb_stripos()
4814  */
4815 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
4816 {
4817         int n;
4818         mbfl_string haystack, needle;
4819         n = -1;
4820 
4821         mbfl_string_init(&haystack);
4822         mbfl_string_init(&needle);
4823         haystack.no_language = MBSTRG(language);
4824         haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4825         needle.no_language = MBSTRG(language);
4826         needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4827 
4828         do {
4829                 size_t len = 0;
4830                 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
4831                 haystack.len = len;
4832 
4833                 if (!haystack.val) {
4834                         break;
4835                 }
4836 
4837                 if (haystack.len <= 0) {
4838                         break;
4839                 }
4840 
4841                 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
4842                 needle.len = len;
4843 
4844                 if (!needle.val) {
4845                         break;
4846                 }
4847 
4848                 if (needle.len <= 0) {
4849                         break;
4850                 }
4851 
4852                 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4853                 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4854                         php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4855                         break;
4856                 }
4857 
4858                 {
4859                         int haystack_char_len = mbfl_strlen(&haystack);
4860 
4861                         if (mode) {
4862                                 if ((offset > 0 && offset > haystack_char_len) ||
4863                                         (offset < 0 && -offset > haystack_char_len)) {
4864                                         php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
4865                                         break;
4866                                 }
4867                         } else {
4868                                 if (offset < 0 || offset > haystack_char_len) {
4869                                         php_error_docref(NULL, E_WARNING, "Offset not contained in string");
4870                                         break;
4871                                 }
4872                         }
4873                 }
4874 
4875                 n = mbfl_strpos(&haystack, &needle, offset, mode);
4876         } while(0);
4877 
4878         if (haystack.val) {
4879                 efree(haystack.val);
4880         }
4881 
4882         if (needle.val) {
4883                 efree(needle.val);
4884         }
4885 
4886         return n;
4887 }
4888 /* }}} */
4889 
4890 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4891 {
4892         *list = (const zend_encoding **)MBSTRG(http_input_list);
4893         *list_size = MBSTRG(http_input_list_size);
4894 }
4895 /* }}} */
4896 
4897 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4898 {
4899         MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4900 }
4901 /* }}} */
4902 
4903 #endif  /* HAVE_MBSTRING */
4904 
4905 /*
4906  * Local variables:
4907  * tab-width: 4
4908  * c-basic-offset: 4
4909  * End:
4910  * vim600: fdm=marker
4911  * vim: noet sw=4 ts=4
4912  */

/* [<][>][^][v][top][bottom][index][help] */