root/ext/intl/normalizer/normalizer_normalize.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. PHP_FUNCTION
  2. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                                                                                |
   4    +----------------------------------------------------------------------+
   5    | This source file is subject to version 3.01 of the PHP license,      |
   6    | that is bundled with this package in the file LICENSE, and is                |
   7    | available through the world-wide-web at the following url:                   |
   8    | http://www.php.net/license/3_01.txt                                                                  |
   9    | If you did not receive a copy of the PHP license and are unable to   |
  10    | obtain it through the world-wide-web, please send a note to                  |
  11    | license@php.net so we can mail you a copy immediately.                               |
  12    +----------------------------------------------------------------------+
  13    | Authors: Ed Batutis <ed@batutis.com>                                                                 |
  14    +----------------------------------------------------------------------+
  15  */
  16 
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #include "php_intl.h"
  22 #include "unicode/unorm.h"
  23 #include "normalizer.h"
  24 #include "normalizer_class.h"
  25 #include "normalizer_normalize.h"
  26 #include "intl_convert.h"
  27 
  28 /* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] )
  29  * Normalize a string. }}} */
  30 /* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] )
  31  * Normalize a string.
  32  */
  33 PHP_FUNCTION( normalizer_normalize )
  34 {
  35         char*                   input = NULL;
  36         /* form is optional, defaults to FORM_C */
  37         zend_long           form = NORMALIZER_DEFAULT;
  38         size_t                  input_len = 0;
  39 
  40         UChar*                  uinput = NULL;
  41         int32_t             uinput_len = 0;
  42         int                         expansion_factor = 1;
  43         UErrorCode              status = U_ZERO_ERROR;
  44 
  45         UChar*                  uret_buf = NULL;
  46         int32_t                 uret_len = 0;
  47 
  48         zend_string*    u8str;
  49 
  50         int32_t                 size_needed;
  51 
  52         intl_error_reset( NULL );
  53 
  54         /* Parse parameters. */
  55         if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "s|l",
  56                                 &input, &input_len, &form ) == FAILURE )
  57         {
  58                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  59                                                  "normalizer_normalize: unable to parse input params", 0 );
  60 
  61                 RETURN_FALSE;
  62         }
  63 
  64         expansion_factor = 1;
  65 
  66         switch(form) {
  67                 case NORMALIZER_NONE:
  68                         break;
  69                 case NORMALIZER_FORM_D:
  70                         expansion_factor = 3;
  71                         break;
  72                 case NORMALIZER_FORM_KD:
  73                         expansion_factor = 3;
  74                         break;
  75                 case NORMALIZER_FORM_C:
  76                 case NORMALIZER_FORM_KC:
  77                         break;
  78                 default:
  79                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
  80                                                 "normalizer_normalize: illegal normalization form", 0 );
  81                         RETURN_FALSE;
  82         }
  83 
  84         /*
  85          * Normalize string (converting it to UTF-16 first).
  86          */
  87 
  88         /* First convert the string to UTF-16. */
  89         intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
  90 
  91         if( U_FAILURE( status ) )
  92         {
  93                 /* Set global error code. */
  94                 intl_error_set_code( NULL, status );
  95 
  96                 /* Set error messages. */
  97                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
  98                 if (uinput) {
  99                         efree( uinput );
 100                 }
 101                 RETURN_FALSE;
 102         }
 103 
 104 
 105         /* Allocate memory for the destination buffer for normalization */
 106         uret_len = uinput_len * expansion_factor;
 107         uret_buf = eumalloc( uret_len + 1 );
 108 
 109         /* normalize */
 110         size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
 111 
 112         /* Bail out if an unexpected error occurred.
 113          * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
 114          * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
 115          */
 116         if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
 117                 efree( uret_buf );
 118                 efree( uinput );
 119                 RETURN_NULL();
 120         }
 121 
 122         if ( size_needed > uret_len ) {
 123                 /* realloc does not seem to work properly - memory is corrupted
 124                  * uret_buf =  eurealloc(uret_buf, size_needed + 1);
 125                  */
 126                 efree( uret_buf );
 127                 uret_buf = eumalloc( size_needed + 1 );
 128                 uret_len = size_needed;
 129 
 130                 status = U_ZERO_ERROR;
 131 
 132                 /* try normalize again */
 133                 size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
 134 
 135                 /* Bail out if an unexpected error occurred. */
 136                 if( U_FAILURE(status)  ) {
 137                         /* Set error messages. */
 138                         intl_error_set_custom_msg( NULL,"Error normalizing string", 0 );
 139                         efree( uret_buf );
 140                         efree( uinput );
 141                         RETURN_FALSE;
 142                 }
 143         }
 144 
 145         efree( uinput );
 146 
 147         /* the buffer we actually used */
 148         uret_len = size_needed;
 149 
 150         /* Convert normalized string from UTF-16 to UTF-8. */
 151         u8str = intl_convert_utf16_to_utf8(uret_buf, uret_len, &status );
 152         efree( uret_buf );
 153         if( !u8str )
 154         {
 155                 intl_error_set( NULL, status,
 156                                 "normalizer_normalize: error converting normalized text UTF-8", 0 );
 157                 RETURN_FALSE;
 158         }
 159 
 160         /* Return it. */
 161         RETVAL_NEW_STR( u8str );
 162 }
 163 /* }}} */
 164 
 165 /* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] )
 166  * Test if a string is in a given normalization form. }}} */
 167 /* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] )
 168  * Test if a string is in a given normalization form.
 169  */
 170 PHP_FUNCTION( normalizer_is_normalized )
 171 {
 172         char*           input = NULL;
 173         /* form is optional, defaults to FORM_C */
 174         zend_long               form = NORMALIZER_DEFAULT;
 175         size_t          input_len = 0;
 176 
 177         UChar*          uinput = NULL;
 178         int             uinput_len = 0;
 179         UErrorCode      status = U_ZERO_ERROR;
 180 
 181         UBool           uret = FALSE;
 182 
 183         intl_error_reset( NULL );
 184 
 185         /* Parse parameters. */
 186         if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "s|l",
 187                                 &input, &input_len, &form) == FAILURE )
 188         {
 189                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 190                                 "normalizer_is_normalized: unable to parse input params", 0 );
 191 
 192                 RETURN_FALSE;
 193         }
 194 
 195         switch(form) {
 196                 /* case NORMALIZER_NONE: not allowed - doesn't make sense */
 197 
 198                 case NORMALIZER_FORM_D:
 199                 case NORMALIZER_FORM_KD:
 200                 case NORMALIZER_FORM_C:
 201                 case NORMALIZER_FORM_KC:
 202                         break;
 203                 default:
 204                         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 205                                                 "normalizer_normalize: illegal normalization form", 0 );
 206                         RETURN_FALSE;
 207         }
 208 
 209 
 210         /*
 211          * Test normalization of string (converting it to UTF-16 first).
 212          */
 213 
 214         /* First convert the string to UTF-16. */
 215         intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
 216 
 217         if( U_FAILURE( status ) )
 218         {
 219                 /* Set global error code. */
 220                 intl_error_set_code( NULL, status );
 221 
 222                 /* Set error messages. */
 223                 intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 );
 224                 if (uinput) {
 225                         efree( uinput );
 226                 }
 227                 RETURN_FALSE;
 228         }
 229 
 230 
 231         /* test string */
 232         uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
 233 
 234         efree( uinput );
 235 
 236         /* Bail out if an unexpected error occurred. */
 237         if( U_FAILURE(status)  ) {
 238                 /* Set error messages. */
 239                 intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 );
 240                 RETURN_FALSE;
 241         }
 242 
 243         if ( uret )
 244                 RETURN_TRUE;
 245 
 246         RETURN_FALSE;
 247 }
 248 /* }}} */
 249 
 250 /*
 251  * Local variables:
 252  * tab-width: 4
 253  * c-basic-offset: 4
 254  * End:
 255  * vim600: noet sw=4 ts=4 fdm=marker
 256  * vim<600: noet sw=4 ts=4
 257  */

/* [<][>][^][v][top][bottom][index][help] */