root/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_2022jpms_wchar
  2. cp932ext3_cp932ext2_jis
  3. mbfl_filt_conv_wchar_2022jpms
  4. mbfl_filt_conv_any_2022jpms_flush
  5. mbfl_filt_ident_2022jpms

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_ja.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_iso2022_jp_ms.h"
  36 
  37 #include "unicode_table_cp932_ext.h"
  38 #include "unicode_table_jis.h"
  39 #include "cp932_table.h"
  40 
  41 int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
  42 
  43 static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL};
  44 
  45 const mbfl_encoding mbfl_encoding_2022jpms = {
  46         mbfl_no_encoding_2022jpms,
  47         "ISO-2022-JP-MS",
  48         "ISO-2022-JP",
  49         (const char *(*)[])&mbfl_encoding_2022jpms_aliases,
  50         NULL,
  51         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
  52 };
  53 
  54 const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
  55         mbfl_no_encoding_2022jpms,
  56         mbfl_filt_ident_common_ctor,
  57         mbfl_filt_ident_common_dtor,
  58         mbfl_filt_ident_2022jpms
  59 };
  60 
  61 const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
  62         mbfl_no_encoding_2022jpms,
  63         mbfl_no_encoding_wchar,
  64         mbfl_filt_conv_common_ctor,
  65         mbfl_filt_conv_common_dtor,
  66         mbfl_filt_conv_2022jpms_wchar,
  67         mbfl_filt_conv_common_flush
  68 };
  69 
  70 const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
  71         mbfl_no_encoding_wchar,
  72         mbfl_no_encoding_2022jpms,
  73         mbfl_filt_conv_common_ctor,
  74         mbfl_filt_conv_common_dtor,
  75         mbfl_filt_conv_wchar_2022jpms,
  76         mbfl_filt_conv_any_2022jpms_flush
  77 };
  78 
  79 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  80 
  81 #define sjistoidx(c1, c2) \
  82         (((c1) > 0x9f) \
  83         ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
  84         : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
  85 #define idxtojis1(c) (((c) / 94) + 0x21)
  86 #define idxtojis2(c) (((c) % 94) + 0x21)
  87 
  88 /*
  89  * ISO-2022-JP-MS => wchar
  90  */
  91 int
  92 mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter)
  93 {
  94         int c1, s, w;
  95 
  96 retry:
  97         switch (filter->status & 0xf) {
  98 /*      case 0x00:       ASCII */
  99 /*      case 0x10:       X 0201 latin */
 100 /*      case 0x20:       X 0201 kana */
 101 /*      case 0x80:       X 0208 */
 102 /*      case 0xa0:       UDC */
 103         case 0:
 104                 if (c == 0x1b) {
 105                         filter->status += 2;
 106                 } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {            /* kana */
 107                         CK((*filter->output_function)(0xff40 + c, filter->data));
 108                 } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) {                /* kanji first char */
 109                         filter->cache = c;
 110                         filter->status += 1;
 111                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
 112                         CK((*filter->output_function)(c, filter->data));
 113                 } else if (c > 0xa0 && c < 0xe0) {      /* GR kana */
 114                         CK((*filter->output_function)(0xfec0 + c, filter->data));
 115                 } else {
 116                         w = c & MBFL_WCSGROUP_MASK;
 117                         w |= MBFL_WCSGROUP_THROUGH;
 118                         CK((*filter->output_function)(w, filter->data));
 119                 }
 120                 break;
 121 
 122 /*      case 0x81:       X 0208 second char */
 123 /*      case 0xa1:       UDC second char */
 124         case 1:
 125                 w = 0;
 126                 filter->status &= ~0xf;
 127                 c1 = filter->cache;
 128                 if (c > 0x20 && c < 0x7f) {
 129                         s = (c1 - 0x21)*94 + c - 0x21;
 130                         if (filter->status == 0x80) {
 131                                 if (s <= 137) {
 132                                         if (s == 31) {
 133                                                 w = 0xff3c;                     /* FULLWIDTH REVERSE SOLIDUS */
 134                                         } else if (s == 32) {
 135                                                 w = 0xff5e;                     /* FULLWIDTH TILDE */
 136                                         } else if (s == 33) {
 137                                                 w = 0x2225;                     /* PARALLEL TO */
 138                                         } else if (s == 60) {
 139                                                 w = 0xff0d;                     /* FULLWIDTH HYPHEN-MINUS */
 140                                         } else if (s == 80) {
 141                                                 w = 0xffe0;                     /* FULLWIDTH CENT SIGN */
 142                                         } else if (s == 81) {
 143                                                 w = 0xffe1;                     /* FULLWIDTH POUND SIGN */
 144                                         } else if (s == 137) {
 145                                                 w = 0xffe2;                     /* FULLWIDTH NOT SIGN */
 146                                         }
 147                                 }
 148                                 if (w == 0) {
 149                                         if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {              /* vendor ext1 (13ku) */
 150                                                 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
 151                                         } else if (s >= 0 && s < jisx0208_ucs_table_size) {
 152                                                 w = jisx0208_ucs_table[s];
 153                                         } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {               /* vendor ext2 (89ku - 92ku) */
 154                                                 w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
 155                                         } else {
 156                                                 w = 0;
 157                                         }
 158                                 }
 159                                 if (w <= 0) {
 160                                         w = (c1 << 8) | c;
 161                                         w &= MBFL_WCSPLANE_MASK;
 162                                         w |= MBFL_WCSPLANE_JIS0208;
 163                                 }
 164                                 CK((*filter->output_function)(w, filter->data));
 165                         } else {
 166                                 if (c1 > 0x20 && c1 < 0x35) {
 167                                         w = 0xe000 + (c1 - 0x21)*94 + c - 0x21;
 168                                 }
 169                                 if (w <= 0) {
 170                                         w = (((c1 - 0x21) + 0x7f) << 8) | c;
 171                                         w &= MBFL_WCSPLANE_MASK;
 172                                         w |= MBFL_WCSPLANE_JIS0208;
 173                                 }
 174                                 CK((*filter->output_function)(w, filter->data));
 175                         }
 176                 } else if (c == 0x1b) {
 177                         filter->status += 2;
 178                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 179                         CK((*filter->output_function)(c, filter->data));
 180                 } else {
 181                         w = (c1 << 8) | c;
 182                         w &= MBFL_WCSGROUP_MASK;
 183                         w |= MBFL_WCSGROUP_THROUGH;
 184                         CK((*filter->output_function)(w, filter->data));
 185                 }
 186                 break;
 187 
 188         /* ESC */
 189 /*      case 0x02:      */
 190 /*      case 0x12:      */
 191 /*      case 0x22:      */
 192 /*      case 0x82:      */
 193 /*      case 0xa2:      */
 194         case 2:
 195                 if (c == 0x24) {                /* '$' */
 196                         filter->status++;
 197                 } else if (c == 0x28) {         /* '(' */
 198                         filter->status += 3;
 199                 } else {
 200                         filter->status &= ~0xf;
 201                         CK((*filter->output_function)(0x1b, filter->data));
 202                         goto retry;
 203                 }
 204                 break;
 205 
 206         /* ESC $ */
 207 /*      case 0x03:      */
 208 /*      case 0x13:      */
 209 /*      case 0x23:      */
 210 /*      case 0x83:      */
 211 /*      case 0xa3:      */
 212         case 3:
 213                 if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
 214                         filter->status = 0x80;
 215                 } else if (c == 0x28) {     /* '(' */
 216                         filter->status++;
 217                 } else {
 218                         filter->status &= ~0xf;
 219                         CK((*filter->output_function)(0x1b, filter->data));
 220                         CK((*filter->output_function)(0x24, filter->data));
 221                         goto retry;
 222                 }
 223                 break;
 224 
 225         /* ESC $ ( */
 226 /*      case 0x04:      */
 227 /*      case 0x14:      */
 228 /*      case 0x24:      */
 229 /*      case 0x84:      */
 230 /*      case 0xa4:      */
 231         case 4:
 232                 if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
 233                         filter->status = 0x80;
 234                 } else if (c == 0x3f) {                 /* '?' */
 235                         filter->status = 0xa0;
 236                 } else {
 237                         filter->status &= ~0xf;
 238                         CK((*filter->output_function)(0x1b, filter->data));
 239                         CK((*filter->output_function)(0x24, filter->data));
 240                         CK((*filter->output_function)(0x28, filter->data));
 241                         goto retry;
 242                 }
 243                 break;
 244 
 245         /* ESC ( */
 246 /*      case 0x05:      */
 247 /*      case 0x15:      */
 248 /*      case 0x25:      */
 249 /*      case 0x85:      */
 250 /*      case 0xa5:      */
 251         case 5:
 252                 if (c == 0x42) {                /* 'B' */
 253                         filter->status = 0;
 254                 } else if (c == 0x4a) {         /* 'J' */
 255                         filter->status = 0;
 256                 } else if (c == 0x49) {         /* 'I' */
 257                         filter->status = 0x20;
 258                 } else {
 259                         filter->status &= ~0xf;
 260                         CK((*filter->output_function)(0x1b, filter->data));
 261                         CK((*filter->output_function)(0x28, filter->data));
 262                         goto retry;
 263                 }
 264                 break;
 265 
 266         default:
 267                 filter->status = 0;
 268                 break;
 269         }
 270 
 271         return c;
 272 }
 273 
 274 static int
 275 cp932ext3_cp932ext2_jis(int c)
 276 {
 277         int idx;
 278 
 279         idx = sjistoidx(0xfa, 0x40) + c;
 280         if (idx >= sjistoidx(0xfa, 0x5c))
 281                 idx -=  sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40);
 282         else if (idx >= sjistoidx(0xfa, 0x55))
 283                 idx -=  sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa);
 284         else if (idx >= sjistoidx(0xfa, 0x40))
 285                 idx -=  sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef);
 286         return idxtojis1(idx) << 8 | idxtojis2(idx);
 287 }
 288 
 289 /*
 290  * wchar => ISO-2022-JP-MS
 291  */
 292 int
 293 mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
 294 {
 295         int c1, c2, s1, s2;
 296 
 297         s1 = 0;
 298         s2 = 0;
 299         if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
 300                 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
 301         } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
 302                 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
 303         } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
 304                 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
 305         } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
 306                 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
 307         } else if (c >= 0xe000 && c < (0xe000 + 20*94)) {       /* user  (95ku - 114ku) */
 308                 s1 = c - 0xe000;
 309                 c1 = s1/94 + 0x7f;
 310                 c2 = s1%94 + 0x21;
 311                 s1 = (c1 << 8) | c2;
 312         }
 313         if (s1 <= 0) {
 314                 c1 = c & ~MBFL_WCSPLANE_MASK;
 315                 if (c1 == MBFL_WCSPLANE_WINCP932) {
 316                         s1 = c & MBFL_WCSPLANE_MASK;
 317                         s2 = 1;
 318                 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
 319                         s1 = c & MBFL_WCSPLANE_MASK;
 320                 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
 321                         s1 = c & MBFL_WCSPLANE_MASK;
 322                         s1 |= 0x8080;
 323                 } else if (c == 0xa5) {         /* YEN SIGN */
 324                         s1 = 0x216f;                /* FULLWIDTH YEN SIGN */
 325                 } else if (c == 0x203e) {       /* OVER LINE */
 326                         s1 = 0x2131;    /* FULLWIDTH MACRON */
 327                 } else if (c == 0xff3c) {       /* FULLWIDTH REVERSE SOLIDUS */
 328                         s1 = 0x2140;
 329                 } else if (c == 0xff5e) {       /* FULLWIDTH TILDE */
 330                         s1 = 0x2141;
 331                 } else if (c == 0x2225) {       /* PARALLEL TO */
 332                         s1 = 0x2142;
 333                 } else if (c == 0xff0d) {       /* FULLWIDTH HYPHEN-MINUS */
 334                         s1 = 0x215d;
 335                 } else if (c == 0xffe0) {       /* FULLWIDTH CENT SIGN */
 336                         s1 = 0x2171;
 337                 } else if (c == 0xffe1) {       /* FULLWIDTH POUND SIGN */
 338                         s1 = 0x2172;
 339                 } else if (c == 0xffe2) {       /* FULLWIDTH NOT SIGN */
 340                         s1 = 0x224c;
 341                 }
 342         }
 343         if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
 344                 s1 = -1;
 345                 c1 = 0;
 346                 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
 347                 while (c1 < c2) {               /* CP932 vendor ext1 (13ku) */
 348                         if (c == cp932ext1_ucs_table[c1]) {
 349                                 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
 350                                 break;
 351                         }
 352                         c1++;
 353                 }
 354                 if (s1 <= 0) {
 355                         c1 = 0;
 356                         c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
 357                         while (c1 < c2) {               /* CP932 vendor ext3 (115ku - 119ku) */
 358                                 if (c == cp932ext3_ucs_table[c1]) {
 359                                         s1 = cp932ext3_cp932ext2_jis(c1);
 360                                         break;
 361                                 }
 362                                 c1++;
 363                         }
 364                 }
 365                 if (c == 0) {
 366                         s1 = 0;
 367                 } else if (s1 <= 0) {
 368                         s1 = -1;
 369                 }
 370         }
 371         if (s1 >= 0) {
 372                 if (s1 < 0x80) { /* latin */
 373                         if ((filter->status & 0xff00) != 0) {
 374                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 375                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 376                                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 377                         }
 378                         CK((*filter->output_function)(s1, filter->data));
 379                         filter->status = 0;
 380                 } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */
 381                         if ((filter->status & 0xff00) != 0x100) {
 382                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 383                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 384                                 CK((*filter->output_function)(0x49, filter->data));             /* 'I' */
 385                         }
 386                         filter->status = 0x100;
 387                         CK((*filter->output_function)(s1 & 0x7f, filter->data));
 388                 } else if (s1 < 0x7e7f) { /* X 0208 */
 389                         if ((filter->status & 0xff00) != 0x200) {
 390                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 391                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 392                                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 393                         }
 394                         filter->status = 0x200;
 395                         CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
 396                         CK((*filter->output_function)(s1 & 0x7f, filter->data));
 397                 } else if (s1 < 0x927f) { /* UDC */
 398                         if ((filter->status & 0xff00) != 0x800) {
 399                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 400                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 401                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 402                                 CK((*filter->output_function)(0x3f, filter->data));             /* '?' */
 403                         }
 404                         filter->status = 0x800;
 405                         CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data));
 406                         CK((*filter->output_function)(s1 & 0x7f, filter->data));
 407                 }
 408         } else {
 409                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 410                         CK(mbfl_filt_conv_illegal_output(c, filter));
 411                 }
 412         }
 413 
 414         return c;
 415 }
 416 
 417 int
 418 mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
 419 {
 420         /* back to latin */
 421         if ((filter->status & 0xff00) != 0) {
 422                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 423                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 424                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 425         }
 426 
 427         filter->status &= 0xff;
 428 
 429         if (filter->flush_function != NULL) {
 430                 return (*filter->flush_function)(filter->data);
 431         }
 432 
 433         return 0;
 434 }
 435 
 436 int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter)
 437 {
 438 retry:
 439         switch (filter->status & 0xf) {
 440 /*      case 0x00:       ASCII */
 441 /*      case 0x10:       X 0201 latin */
 442 /*      case 0x20:       X 0201 kana */
 443 /*      case 0x80:       X 0208 */
 444 /*      case 0xa0:       X UDC */
 445         case 0:
 446                 if (c == 0x1b) {
 447                         filter->status += 2;
 448                 } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) {                /* kanji first char */
 449                         filter->status += 1;
 450                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
 451                         ;
 452                 } else {
 453                         filter->flag = 1;       /* bad */
 454                 }
 455                 break;
 456 
 457 /*      case 0x81:       X 0208 second char */
 458 /*      case 0xa1:       UDC second char */
 459         case 1:
 460                 filter->status &= ~0xf;
 461                 if (c == 0x1b) {
 462                         goto retry;
 463                 } else if (c < 0x21 || c > 0x7e) {              /* bad */
 464                         filter->flag = 1;
 465                 }
 466                 break;
 467 
 468         /* ESC */
 469         case 2:
 470                 if (c == 0x24) {                /* '$' */
 471                         filter->status++;
 472                 } else if (c == 0x28) {         /* '(' */
 473                         filter->status += 3;
 474                 } else {
 475                         filter->flag = 1;       /* bad */
 476                         filter->status &= ~0xf;
 477                         goto retry;
 478                 }
 479                 break;
 480 
 481         /* ESC $ */
 482         case 3:
 483                 if (c == 0x40 || c == 0x42) {           /* '@' or 'B' */
 484                         filter->status = 0x80;
 485                 } else if (c == 0x28) {     /* '(' */
 486                         filter->status++;
 487                 } else {
 488                         filter->flag = 1;       /* bad */
 489                         filter->status &= ~0xf;
 490                         goto retry;
 491                 }
 492                 break;
 493 
 494         /* ESC $ ( */
 495         case 4:
 496                 if (c == 0x40 || c == 0x42) {           /* '@' or 'B' */
 497                         filter->status = 0x80;
 498                 } else if (c == 0x3f) {         /* '?' */
 499                         filter->status = 0xa0;
 500                 } else {
 501                         filter->flag = 1;       /* bad */
 502                         filter->status &= ~0xf;
 503                         goto retry;
 504                 }
 505                 break;
 506 
 507         /* ESC ( */
 508         case 5:
 509                 if (c == 0x42) {                /* 'B' */
 510                         filter->status = 0;
 511                 } else if (c == 0x4a) {         /* 'J' */
 512                         filter->status = 0;
 513                 } else if (c == 0x49) {         /* 'I' */
 514                         filter->status = 0x20;
 515                 } else {
 516                         filter->flag = 1;       /* bad */
 517                         filter->status &= ~0xf;
 518                         goto retry;
 519                 }
 520                 break;
 521 
 522         default:
 523                 filter->status = 0;
 524                 break;
 525         }
 526 
 527         return c;
 528 }

/* [<][>][^][v][top][bottom][index][help] */