root/ext/intl/idn/idn.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. idn_register_constants
  2. php_intl_idn_check_status
  3. php_intl_bad_args
  4. php_intl_idn_to_46
  5. php_intl_idn_to
  6. php_intl_idn_handoff
  7. PHP_FUNCTION
  8. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2009 The PHP Group                                     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Pierre A. Joye <pierre@php.net>                              |
  16    |         Gustavo Lopes  <cataphract@php.net>                          |
  17    +----------------------------------------------------------------------+
  18  */
  19 /* $Id$ */
  20 
  21 /* {{{ includes */
  22 #ifdef HAVE_CONFIG_H
  23 #include "config.h"
  24 #endif
  25 
  26 #include <php.h>
  27 
  28 #include <unicode/uidna.h>
  29 #include <unicode/ustring.h>
  30 #include "ext/standard/php_string.h"
  31 
  32 #include "intl_error.h"
  33 #include "intl_convert.h"
  34 /* }}} */
  35 
  36 #ifdef UIDNA_INFO_INITIALIZER
  37 #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
  38 #endif
  39 
  40 enum {
  41         INTL_IDN_VARIANT_2003 = 0,
  42         INTL_IDN_VARIANT_UTS46
  43 };
  44 
  45 /* {{{ grapheme_register_constants
  46  * Register API constants
  47  */
  48 void idn_register_constants( INIT_FUNC_ARGS )
  49 {
  50         /* OPTIONS */
  51 
  52         /* Option to prohibit processing of unassigned codepoints in the input and
  53            do not check if the input conforms to STD-3 ASCII rules. */
  54         REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
  55 
  56         /* Option to allow processing of unassigned codepoints in the input */
  57         REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
  58 
  59         /* Option to check if input conforms to STD-3 ASCII rules */
  60         REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
  61 
  62 #ifdef HAVE_46_API
  63 
  64         /* Option to check for whether the input conforms to the BiDi rules.
  65          * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
  66         REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
  67 
  68         /* Option to check for whether the input conforms to the CONTEXTJ rules.
  69          * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
  70         REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  71 
  72         /* Option for nontransitional processing in ToASCII().
  73          * By default, ToASCII() uses transitional processing.
  74          * Ignored by the IDNA2003 implementation. */
  75         REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
  76 
  77         /* Option for nontransitional processing in ToUnicode().
  78          * By default, ToUnicode() uses transitional processing.
  79          * Ignored by the IDNA2003 implementation. */
  80         REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
  81 #endif
  82 
  83         /* VARIANTS */
  84         REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
  85 #ifdef HAVE_46_API
  86         REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
  87 #endif
  88 
  89 #ifdef HAVE_46_API
  90         /* PINFO ERROR CODES */
  91         REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
  92         REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  93         REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  94         REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  95         REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  96         REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
  97         REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
  98         REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
  99         REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
 100         REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
 101         REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
 102         REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
 103         REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
 104 #endif
 105 }
 106 /* }}} */
 107 
 108 enum {
 109         INTL_IDN_TO_ASCII = 0,
 110         INTL_IDN_TO_UTF8
 111 };
 112 
 113 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
 114 static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode)
 115 {
 116         intl_error_set_code(NULL, err);
 117         if (U_FAILURE(err)) {
 118                 char *buff;
 119                 spprintf(&buff, 0, "%s: %s",
 120                         mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
 121                         msg);
 122                 intl_error_set_custom_msg(NULL, buff, 1);
 123                 efree(buff);
 124                 return FAILURE;
 125         }
 126 
 127         return SUCCESS;
 128 }
 129 
 130 static inline void php_intl_bad_args(const char *msg, int mode)
 131 {
 132         php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode);
 133 }
 134 
 135 #ifdef HAVE_46_API
 136 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
 137                 const char *domain, int32_t domain_len, uint32_t option, int mode, zval *idna_info)
 138 {
 139         UErrorCode        status = U_ZERO_ERROR;
 140         UIDNA             *uts46;
 141         int32_t           len;
 142         int32_t           buffer_capac = 255; /* no domain name may exceed this */
 143         zend_string       *buffer = zend_string_alloc(buffer_capac, 0);
 144         UIDNAInfo         info = UIDNA_INFO_INITIALIZER;
 145         int                       buffer_used = 0;
 146 
 147         uts46 = uidna_openUTS46(option, &status);
 148         if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
 149                         mode) == FAILURE) {
 150                 zend_string_free(buffer);
 151                 RETURN_FALSE;
 152         }
 153 
 154         if (mode == INTL_IDN_TO_ASCII) {
 155                 len = uidna_nameToASCII_UTF8(uts46, domain, domain_len,
 156                                 ZSTR_VAL(buffer), buffer_capac, &info, &status);
 157         } else {
 158                 len = uidna_nameToUnicodeUTF8(uts46, domain, domain_len,
 159                                 ZSTR_VAL(buffer), buffer_capac, &info, &status);
 160         }
 161         if (php_intl_idn_check_status(status, "failed to convert name",
 162                         mode) == FAILURE) {
 163                 uidna_close(uts46);
 164                 zend_string_free(buffer);
 165                 RETURN_FALSE;
 166         }
 167         if (len >= 255) {
 168                 php_error_docref(NULL, E_ERROR, "ICU returned an unexpected length");
 169         }
 170 
 171         ZSTR_VAL(buffer)[len] = '\0';
 172         ZSTR_LEN(buffer) = len;
 173 
 174         if (info.errors == 0) {
 175                 RETVAL_STR(buffer);
 176                 buffer_used = 1;
 177         } else {
 178                 RETVAL_FALSE;
 179         }
 180 
 181         if (idna_info) {
 182                 if (buffer_used) { /* used in return_value then */
 183                         zval_addref_p(return_value);
 184                         add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, return_value);
 185                 } else {
 186                         zval zv;
 187                         ZVAL_NEW_STR(&zv, buffer);
 188                         buffer_used = 1;
 189                         add_assoc_zval_ex(idna_info, "result", sizeof("result")-1, &zv);
 190                 }
 191                 add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
 192                                 sizeof("isTransitionalDifferent")-1, info.isTransitionalDifferent);
 193                 add_assoc_long_ex(idna_info, "errors", sizeof("errors")-1, (zend_long)info.errors);
 194         }
 195 
 196         if (!buffer_used) {
 197                 zend_string_free(buffer);
 198         }
 199 
 200         uidna_close(uts46);
 201 }
 202 #endif
 203 
 204 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
 205                 const char *domain, int32_t domain_len, uint32_t option, int mode)
 206 {
 207         UChar* ustring = NULL;
 208         int ustring_len = 0;
 209         UErrorCode status;
 210         zend_string *u8str;
 211         UChar     converted[MAXPATHLEN];
 212         int32_t   converted_ret_len;
 213 
 214         /* convert the string to UTF-16. */
 215         status = U_ZERO_ERROR;
 216         intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
 217 
 218         if (U_FAILURE(status)) {
 219                 intl_error_set_code(NULL, status);
 220 
 221                 /* Set error messages. */
 222                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
 223                 if (ustring) {
 224                         efree(ustring);
 225                 }
 226                 RETURN_FALSE;
 227         } else {
 228                 UParseError parse_error;
 229 
 230                 status = U_ZERO_ERROR;
 231                 if (mode == INTL_IDN_TO_ASCII) {
 232                         converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
 233                 } else {
 234                         converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
 235                 }
 236                 efree(ustring);
 237 
 238                 if (U_FAILURE(status)) {
 239                         intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 );
 240                         RETURN_FALSE;
 241                 }
 242 
 243                 status = U_ZERO_ERROR;
 244                 u8str = intl_convert_utf16_to_utf8(converted, converted_ret_len, &status);
 245 
 246                 if (!u8str) {
 247                         /* Set global error code. */
 248                         intl_error_set_code(NULL, status);
 249 
 250                         /* Set error messages. */
 251                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
 252                         RETURN_FALSE;
 253                 }
 254         }
 255 
 256         /* return the allocated string, not a duplicate */
 257         RETVAL_NEW_STR(u8str);
 258 }
 259 
 260 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
 261 {
 262         char *domain;
 263         size_t domain_len;
 264         zend_long option = 0,
 265                  variant = INTL_IDN_VARIANT_2003;
 266         zval *idna_info = NULL;
 267 
 268         intl_error_reset(NULL);
 269 
 270         if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|llz/",
 271                         &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
 272                 php_intl_bad_args("bad arguments", mode);
 273                 RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
 274         }
 275 
 276 #ifdef HAVE_46_API
 277         if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
 278                 php_intl_bad_args("invalid variant, must be one of {"
 279                         "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode);
 280                 RETURN_FALSE;
 281         }
 282 #else
 283         if (variant != INTL_IDN_VARIANT_2003) {
 284                 php_intl_bad_args("invalid variant, PHP was compiled against "
 285                         "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
 286                         mode);
 287                 RETURN_FALSE;
 288         }
 289 #endif
 290 
 291         if (domain_len < 1) {
 292                 php_intl_bad_args("empty domain name", mode);
 293                 RETURN_FALSE;
 294         }
 295         if (domain_len > INT32_MAX - 1) {
 296                 php_intl_bad_args("domain name too large", mode);
 297                 RETURN_FALSE;
 298         }
 299         /* don't check options; it wasn't checked before */
 300 
 301         if (idna_info != NULL) {
 302                 if (variant == INTL_IDN_VARIANT_2003) {
 303                         php_error_docref0(NULL, E_NOTICE,
 304                                 "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
 305                                 "takes 3 - extra argument ignored");
 306                 } else {
 307                         zval_dtor(idna_info);
 308                         array_init(idna_info);
 309                 }
 310         }
 311 
 312         if (variant == INTL_IDN_VARIANT_2003) {
 313                 php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
 314                                 domain, (int32_t)domain_len, (uint32_t)option, mode);
 315         }
 316 #ifdef HAVE_46_API
 317         else {
 318                 php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, (int32_t)domain_len,
 319                                 (uint32_t)option, mode, idna_info);
 320         }
 321 #endif
 322 }
 323 
 324 /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
 325    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
 326 PHP_FUNCTION(idn_to_ascii)
 327 {
 328         php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
 329 }
 330 /* }}} */
 331 
 332 
 333 /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
 334    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
 335 PHP_FUNCTION(idn_to_utf8)
 336 {
 337         php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
 338 }
 339 /* }}} */
 340 
 341 
 342 /*
 343  * Local variables:
 344  * tab-width: 4
 345  * c-basic-offset: 4
 346  * End:
 347  * vim600: fdm=marker
 348  * vim: noet sw=4 ts=4
 349  */

/* [<][>][^][v][top][bottom][index][help] */