root/ext/mbstring/libmbfl/filters/mbfilter_utf7.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_utf7_wchar
  2. mbfl_filt_conv_wchar_utf7
  3. mbfl_filt_conv_wchar_utf7_flush
  4. mbfl_filt_ident_utf7

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_utf7.h"
  36 
  37 static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter);
  38 
  39 static const unsigned char mbfl_base64_table[] = {
  40  /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */
  41    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
  42  /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */
  43    0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
  44  /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */
  45    0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
  46  /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */
  47    0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
  48  /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */
  49    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
  50 };
  51 
  52 static const char *mbfl_encoding_utf7_aliases[] = {"utf7", NULL};
  53 
  54 const mbfl_encoding mbfl_encoding_utf7 = {
  55         mbfl_no_encoding_utf7,
  56         "UTF-7",
  57         "UTF-7",
  58         (const char *(*)[])&mbfl_encoding_utf7_aliases,
  59         NULL,
  60         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
  61 };
  62 
  63 const struct mbfl_identify_vtbl vtbl_identify_utf7 = {
  64         mbfl_no_encoding_utf7,
  65         mbfl_filt_ident_common_ctor,
  66         mbfl_filt_ident_common_dtor,
  67         mbfl_filt_ident_utf7
  68 };
  69 
  70 const struct mbfl_convert_vtbl vtbl_utf7_wchar = {
  71         mbfl_no_encoding_utf7,
  72         mbfl_no_encoding_wchar,
  73         mbfl_filt_conv_common_ctor,
  74         mbfl_filt_conv_common_dtor,
  75         mbfl_filt_conv_utf7_wchar,
  76         mbfl_filt_conv_common_flush
  77 };
  78 
  79 const struct mbfl_convert_vtbl vtbl_wchar_utf7 = {
  80         mbfl_no_encoding_wchar,
  81         mbfl_no_encoding_utf7,
  82         mbfl_filt_conv_common_ctor,
  83         mbfl_filt_conv_common_dtor,
  84         mbfl_filt_conv_wchar_utf7,
  85         mbfl_filt_conv_wchar_utf7_flush
  86 };
  87 
  88 
  89 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  90 
  91 /*
  92  * UTF-7 => wchar
  93  */
  94 int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
  95 {
  96         int s, n;
  97 
  98         n = -1;
  99         if (filter->status != 0) {              /* Modified Base64 */
 100                 if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 101                         n = c - 65;
 102                 } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 103                         n = c - 71;
 104                 } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 105                         n = c + 4;
 106                 } else if (c == 0x2b) {                 /* '+' */
 107                         n = 62;
 108                 } else if (c == 0x2f) {                 /* '/' */
 109                         n = 63;
 110                 }
 111                 if (n < 0 || n > 63) {
 112                         if (c == 0x2d) {
 113                                 if (filter->status == 1) {              /* "+-" -> "+" */
 114                                         CK((*filter->output_function)(0x2b, filter->data));
 115                                 }
 116                         } else if (c >= 0 && c < 0x80) {        /* ASCII exclude '-' */
 117                                 CK((*filter->output_function)(c, filter->data));
 118                         } else {                /* illegal character */
 119                                 s = c & MBFL_WCSGROUP_MASK;
 120                                 s |= MBFL_WCSGROUP_THROUGH;
 121                                 CK((*filter->output_function)(s, filter->data));
 122                         }
 123                         filter->cache = 0;
 124                         filter->status = 0;
 125                         return c;
 126                 }
 127         }
 128 
 129         switch (filter->status) {
 130         /* directly encoded characters */
 131         case 0:
 132                 if (c == 0x2b) {        /* '+'  shift character */
 133                         filter->status = 1;
 134                 } else if (c >= 0 && c < 0x80) {        /* ASCII */
 135                         CK((*filter->output_function)(c, filter->data));
 136                 } else {                /* illegal character */
 137                         s = c & MBFL_WCSGROUP_MASK;
 138                         s |= MBFL_WCSGROUP_THROUGH;
 139                         CK((*filter->output_function)(s, filter->data));
 140                 }
 141                 break;
 142 
 143         /* decode Modified Base64 */
 144         case 1:
 145         case 2:
 146                 filter->cache |= n << 10;
 147                 filter->status = 3;
 148                 break;
 149         case 3:
 150                 filter->cache |= n << 4;
 151                 filter->status = 4;
 152                 break;
 153         case 4:
 154                 s = ((n >> 2) & 0xf) | (filter->cache & 0xffff);
 155                 n = (n & 0x3) << 14;
 156                 filter->status = 5;
 157                 if (s >= 0xd800 && s < 0xdc00) {
 158                         s = (((s & 0x3ff) << 16) + 0x400000) | n;
 159                         filter->cache = s;
 160                 } else if (s >= 0xdc00 && s < 0xe000) {
 161                         s &= 0x3ff;
 162                         s |= (filter->cache & 0xfff0000) >> 6;
 163                         filter->cache = n;
 164                         if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 165                                 CK((*filter->output_function)(s, filter->data));
 166                         } else {                /* illegal character */
 167                                 s &= MBFL_WCSGROUP_MASK;
 168                                 s |= MBFL_WCSGROUP_THROUGH;
 169                                 CK((*filter->output_function)(s, filter->data));
 170                         }
 171                 } else {
 172                         filter->cache = n;
 173                         CK((*filter->output_function)(s, filter->data));
 174                 }
 175                 break;
 176 
 177         case 5:
 178                 filter->cache |= n << 8;
 179                 filter->status = 6;
 180                 break;
 181         case 6:
 182                 filter->cache |= n << 2;
 183                 filter->status = 7;
 184                 break;
 185         case 7:
 186                 s = ((n >> 4) & 0x3) | (filter->cache & 0xffff);
 187                 n = (n & 0xf) << 12;
 188                 filter->status = 8;
 189                 if (s >= 0xd800 && s < 0xdc00) {
 190                         s = (((s & 0x3ff) << 16) + 0x400000) | n;
 191                         filter->cache = s;
 192                 } else if (s >= 0xdc00 && s < 0xe000) {
 193                         s &= 0x3ff;
 194                         s |= (filter->cache & 0xfff0000) >> 6;
 195                         filter->cache = n;
 196                         if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 197                                 CK((*filter->output_function)(s, filter->data));
 198                         } else {                /* illegal character */
 199                                 s &= MBFL_WCSGROUP_MASK;
 200                                 s |= MBFL_WCSGROUP_THROUGH;
 201                                 CK((*filter->output_function)(s, filter->data));
 202                         }
 203                 } else {
 204                         filter->cache = n;
 205                         CK((*filter->output_function)(s, filter->data));
 206                 }
 207                 break;
 208 
 209         case 8:
 210                 filter->cache |= n << 6;
 211                 filter->status = 9;
 212                 break;
 213         case 9:
 214                 s = n | (filter->cache & 0xffff);
 215                 filter->status = 2;
 216                 if (s >= 0xd800 && s < 0xdc00) {
 217                         s = (((s & 0x3ff) << 16) + 0x400000);
 218                         filter->cache = s;
 219                 } else if (s >= 0xdc00 && s < 0xe000) {
 220                         s &= 0x3ff;
 221                         s |= (filter->cache & 0xfff0000) >> 6;
 222                         filter->cache = 0;
 223                         if (s >= MBFL_WCSPLANE_SUPMIN && s < MBFL_WCSPLANE_SUPMAX) {
 224                                 CK((*filter->output_function)(s, filter->data));
 225                         } else {                /* illegal character */
 226                                 s &= MBFL_WCSGROUP_MASK;
 227                                 s |= MBFL_WCSGROUP_THROUGH;
 228                                 CK((*filter->output_function)(s, filter->data));
 229                         }
 230                 } else {
 231                         filter->cache = 0;
 232                         CK((*filter->output_function)(s, filter->data));
 233                 }
 234                 break;
 235 
 236         default:
 237                 filter->status = 0;
 238                 break;
 239         }
 240 
 241         return c;
 242 }
 243 
 244 /*
 245  * wchar => UTF-7
 246  */
 247 int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter)
 248 {
 249         int s, n;
 250 
 251         n = 0;
 252         if (c >= 0 && c < 0x80) {       /* ASCII */
 253                 if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 254                         n = 1;
 255                 } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 256                         n = 1;
 257                 } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 258                         n = 1;
 259                 } else if (c == '\0') {                 /* '\0' */
 260                         n = 1;
 261                 } else if (c == 0x2f) {                 /* '/' */
 262                         n = 1;
 263                 } else if (c == 0x2d) {                 /* '-' */
 264                         n = 1;
 265                 } else if (c == 0x20) {                 /* SPACE */
 266                         n = 2;
 267                 } else if (c == 0x09) {                 /* HTAB */
 268                         n = 2;
 269                 } else if (c == 0x0d) {                 /* CR */
 270                         n = 2;
 271                 } else if (c == 0x0a) {                 /* LF */
 272                         n = 2;
 273                 } else if (c == 0x27) {                 /* "'" */
 274                         n = 2;
 275                 } else if (c == 0x28) {                 /* '(' */
 276                         n = 2;
 277                 } else if (c == 0x29) {                 /* ')' */
 278                         n = 2;
 279                 } else if (c == 0x2c) {                 /* ',' */
 280                         n = 2;
 281                 } else if (c == 0x2e) {                 /* '.' */
 282                         n = 2;
 283                 } else if (c == 0x3a) {                 /* ':' */
 284                         n = 2;
 285                 } else if (c == 0x3f) {                 /* '?' */
 286                         n = 2;
 287                 }
 288         } else if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
 289                 ;
 290         } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
 291                 s = ((c >> 10) - 0x40) | 0xd800;
 292                 CK((*filter->filter_function)(s, filter));
 293                 s = (c & 0x3ff) | 0xdc00;
 294                 CK((*filter->filter_function)(s, filter));
 295                 return c;
 296         } else {
 297                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 298                         CK(mbfl_filt_conv_illegal_output(c, filter));
 299                 }
 300                 return c;
 301         }
 302 
 303         switch (filter->status) {
 304         case 0:
 305                 if (n != 0) {   /* directly encode characters */
 306                         CK((*filter->output_function)(c, filter->data));
 307                 } else {        /* Modified Base64 */
 308                         CK((*filter->output_function)(0x2b, filter->data));             /* '+' */
 309                         filter->status++;
 310                         filter->cache = c;
 311                 }
 312                 break;
 313 
 314         /* encode Modified Base64 */
 315         case 1:
 316                 s = filter->cache;
 317                 CK((*filter->output_function)(mbfl_base64_table[(s >> 10) & 0x3f], filter->data));
 318                 CK((*filter->output_function)(mbfl_base64_table[(s >> 4) & 0x3f], filter->data));
 319                 if (n != 0) {
 320                         CK((*filter->output_function)(mbfl_base64_table[(s << 2) & 0x3c], filter->data));
 321                         if (n == 1) {
 322                                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 323                         }
 324                         CK((*filter->output_function)(c, filter->data));
 325                         filter->status = 0;
 326                 } else {
 327                         filter->status++;
 328                         filter->cache = ((s & 0xf) << 16) | c;
 329                 }
 330                 break;
 331 
 332         case 2:
 333                 s = filter->cache;
 334                 CK((*filter->output_function)(mbfl_base64_table[(s >> 14) & 0x3f], filter->data));
 335                 CK((*filter->output_function)(mbfl_base64_table[(s >> 8) & 0x3f], filter->data));
 336                 CK((*filter->output_function)(mbfl_base64_table[(s >> 2) & 0x3f], filter->data));
 337                 if (n != 0) {
 338                         CK((*filter->output_function)(mbfl_base64_table[(s << 4) & 0x30], filter->data));
 339                         if (n == 1) {
 340                                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 341                         }
 342                         CK((*filter->output_function)(c, filter->data));
 343                         filter->status = 0;
 344                 } else {
 345                         filter->status++;
 346                         filter->cache = ((s & 0x3) << 16) | c;
 347                 }
 348                 break;
 349 
 350         case 3:
 351                 s = filter->cache;
 352                 CK((*filter->output_function)(mbfl_base64_table[(s >> 12) & 0x3f], filter->data));
 353                 CK((*filter->output_function)(mbfl_base64_table[(s >> 6) & 0x3f], filter->data));
 354                 CK((*filter->output_function)(mbfl_base64_table[s & 0x3f], filter->data));
 355                 if (n != 0) {
 356                         if (n == 1) {
 357                                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 358                         }
 359                         CK((*filter->output_function)(c, filter->data));
 360                         filter->status = 0;
 361                 } else {
 362                         filter->status = 1;
 363                         filter->cache = c;
 364                 }
 365                 break;
 366 
 367         default:
 368                 filter->status = 0;
 369                 break;
 370         }
 371 
 372         return c;
 373 
 374 }
 375 
 376 int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
 377 {
 378         int status, cache;
 379 
 380         status = filter->status;
 381         cache = filter->cache;
 382         filter->status = 0;
 383         filter->cache = 0;
 384         /* flush fragments */
 385         switch (status) {
 386         case 1:
 387                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 10) & 0x3f], filter->data));
 388                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 4) & 0x3f], filter->data));
 389                 CK((*filter->output_function)(mbfl_base64_table[(cache << 2) & 0x3c], filter->data));
 390                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 391                 break;
 392 
 393         case 2:
 394                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 14) & 0x3f], filter->data));
 395                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 8) & 0x3f], filter->data));
 396                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 2) & 0x3f], filter->data));
 397                 CK((*filter->output_function)(mbfl_base64_table[(cache << 4) & 0x30], filter->data));
 398                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 399                 break;
 400 
 401         case 3:
 402                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data));
 403                 CK((*filter->output_function)(mbfl_base64_table[(cache >> 6) & 0x3f], filter->data));
 404                 CK((*filter->output_function)(mbfl_base64_table[cache & 0x3f], filter->data));
 405                 CK((*filter->output_function)(0x2d, filter->data));             /* '-' */
 406                 break;
 407         }
 408 
 409         if (filter->flush_function != NULL) {
 410                 (*filter->flush_function)(filter->data);
 411         }
 412 
 413         return 0;
 414 }
 415 
 416 static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter)
 417 {
 418         int n;
 419 
 420         switch (filter->status) {
 421         /* directly encoded characters */
 422         case 0:
 423                 if (c == 0x2b) {        /* '+'  shift character */
 424                         filter->status++;
 425                 } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) {       /* illegal character */
 426                         filter->flag = 1;       /* bad */
 427                 }
 428                 break;
 429 
 430         /* Modified Base64 */
 431         case 1:
 432         case 2:
 433                 n = 0;
 434                 if (c >= 0x41 && c <= 0x5a) {           /* A - Z */
 435                         n = 1;
 436                 } else if (c >= 0x61 && c <= 0x7a) {    /* a - z */
 437                         n = 1;
 438                 } else if (c >= 0x30 && c <= 0x39) {    /* 0 - 9 */
 439                         n = 1;
 440                 } else if (c == 0x2b) {                 /* '+' */
 441                         n = 1;
 442                 } else if (c == 0x2f) {                 /* '/' */
 443                         n = 1;
 444                 }
 445                 if (n <= 0) {
 446                         if (filter->status == 1 && c != 0x2d) {
 447                                 filter->flag = 1;       /* bad */
 448                         } else if (c < 0 || c > 0x7f) {
 449                                 filter->flag = 1;       /* bad */
 450                         }
 451                         filter->status = 0;
 452                 } else {
 453                         filter->status = 2;
 454                 }
 455                 break;
 456 
 457         default:
 458                 filter->status = 0;
 459                 break;
 460         }
 461 
 462         return c;
 463 }
 464 
 465 

/* [<][>][^][v][top][bottom][index][help] */