root/ext/standard/cyr_convert.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. php_convert_cyr_string
  2. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Kirill Maximov <kir@rus.net>                                 |
  16    +----------------------------------------------------------------------+
  17  */
  18 
  19 /* $Id$ */
  20 
  21 #include <stdlib.h>
  22 
  23 #ifdef HAVE_UNISTD_H
  24 #include <unistd.h>
  25 #endif
  26 #include <string.h>
  27 #include <errno.h>
  28 
  29 #include "php.h"
  30 #include "cyr_convert.h"
  31 
  32 #include <stdio.h>
  33 
  34 /*****************************************************************************
  35 * This is codetables for different Cyrillic charsets (relative to koi8-r).
  36 * Each table contains data for 128-255 symbols from ASCII table.
  37 * First 256 symbols are for conversion from koi8-r to corresponding charset,
  38 * second 256 symbols are for reverse conversion, from charset to koi8-r.
  39 *
  40 * Here we have the following tables:
  41 * _cyr_win1251   - for windows-1251 charset
  42 * _cyr_iso88595  - for iso8859-5 charset
  43 * _cyr_cp866     - for x-cp866 charset
  44 * _cyr_mac       - for x-mac-cyrillic charset
  45 *
  46 *****************************************************************************/
  47 
  48 typedef unsigned char _cyr_charset_table[512];
  49 
  50 /* {{{ static const _cyr_charset_table _cyr_win1251
  51  */
  52 static const _cyr_charset_table _cyr_win1251 = {
  53 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  54 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  55 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  56 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  57 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  58 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  59 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  60 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  61 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
  62 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
  63 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
  64 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
  65 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  66 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  67 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  68 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
  69 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  70 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  71 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  72 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  73 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  74 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  75 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  76 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  77 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  78 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
  79 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
  80 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
  81 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
  82 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
  83 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
  84 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
  85 },
  86 _cyr_cp866 = {
  87 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  88 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  89 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  90 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  91 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  92 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  93 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  94 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  95 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
  96 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
  97 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
  98 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
  99 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
 100 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
 101 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
 102 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
 103 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 104 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
 105 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
 106 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
 107 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
 108 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
 109 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
 110 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
 111 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 112 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 113 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
 114 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
 115 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
 116 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
 117 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
 118 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
 119 },
 120 _cyr_iso88595 = {
 121 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 122 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
 123 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
 124 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
 125 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
 126 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
 127 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
 128 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
 129 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 130 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 131 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 132 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
 133 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
 134 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
 135 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
 136 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 137 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 138 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
 139 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
 140 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
 141 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
 142 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
 143 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
 144 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
 145 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 146 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
 147 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
 148 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
 149 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
 150 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
 151 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
 152 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
 153 },
 154 _cyr_mac = {
 155 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 156 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
 157 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
 158 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
 159 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
 160 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
 161 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
 162 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
 163 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
 164 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
 165 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
 166 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
 167 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
 168 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
 169 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
 170 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
 171 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
 172 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
 173 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
 174 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
 175 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
 176 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
 177 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
 178 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
 179 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
 180 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
 181 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
 182 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
 183 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
 184 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
 185 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
 186 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
 187 };
 188 /* }}} */
 189 
 190 /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to)
 191 * This is the function that performs real in-place conversion of the string
 192 * between charsets.
 193 * Parameters:
 194 *    str - string to be converted
 195 *    from,to - one-symbol label of source and destination charset
 196 * The following symbols are used as labels:
 197 *    k - koi8-r
 198 *    w - windows-1251
 199 *    i - iso8859-5
 200 *    a - x-cp866
 201 *    d - x-cp866
 202 *    m - x-mac-cyrillic
 203 *****************************************************************************/
 204 static char * php_convert_cyr_string(unsigned char *str, size_t length, char from, char to)
 205 {
 206         const unsigned char *from_table, *to_table;
 207         unsigned char tmp;
 208         size_t i;
 209 
 210         from_table = NULL;
 211         to_table   = NULL;
 212 
 213         switch (toupper((int)(unsigned char)from))
 214         {
 215                 case 'W':
 216                         from_table = _cyr_win1251;
 217                         break;
 218                 case 'A':
 219                 case 'D':
 220                         from_table = _cyr_cp866;
 221                         break;
 222                 case 'I':
 223                         from_table = _cyr_iso88595;
 224                         break;
 225                 case 'M':
 226                         from_table = _cyr_mac;
 227                         break;
 228                 case 'K':
 229                         break;
 230                 default:
 231                         php_error_docref(NULL, E_WARNING, "Unknown source charset: %c", from);
 232                         break;
 233         }
 234 
 235         switch (toupper((int)(unsigned char)to))
 236         {
 237                 case 'W':
 238                         to_table = _cyr_win1251;
 239                         break;
 240                 case 'A':
 241                 case 'D':
 242                         to_table = _cyr_cp866;
 243                         break;
 244                 case 'I':
 245                         to_table = _cyr_iso88595;
 246                         break;
 247                 case 'M':
 248                         to_table = _cyr_mac;
 249                         break;
 250                 case 'K':
 251                         break;
 252                 default:
 253                         php_error_docref(NULL, E_WARNING, "Unknown destination charset: %c", to);
 254                         break;
 255         }
 256 
 257 
 258         if (!str)
 259                 return (char *)str;
 260 
 261         for (i = 0; i < length; i++) {
 262                 tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
 263                 str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
 264         }
 265         return (char *)str;
 266 }
 267 /* }}} */
 268 
 269 /* {{{ proto string convert_cyr_string(string str, string from, string to)
 270    Convert from one Cyrillic character set to another */
 271 PHP_FUNCTION(convert_cyr_string)
 272 {
 273         char *input, *fr_cs, *to_cs;
 274         size_t input_len, fr_cs_len, to_cs_len;
 275         zend_string *str;
 276 
 277         if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
 278                 return;
 279         }
 280 
 281         str = zend_string_init(input, input_len, 0);
 282 
 283         php_convert_cyr_string((unsigned char *) ZSTR_VAL(str), ZSTR_LEN(str), fr_cs[0], to_cs[0]);
 284         RETVAL_NEW_STR(str);
 285 }
 286 /* }}} */
 287 
 288 /*
 289  * Local variables:
 290  * tab-width: 4
 291  * c-basic-offset: 4
 292  * End:
 293  * vim600: sw=4 ts=4 fdm=marker
 294  * vim<600: sw=4 ts=4
 295  */

/* [<][>][^][v][top][bottom][index][help] */