root/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_2022jp_mobile_wchar
  2. mbfl_filt_conv_wchar_2022jp_mobile

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_iso2022_jp_ms.c
  26  * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_iso2022jp_mobile.h"
  36 #include "mbfilter_sjis_mobile.h"
  37 
  38 #include "unicode_table_cp932_ext.h"
  39 #include "unicode_table_jis.h"
  40 #include "cp932_table.h"
  41 
  42 extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter);
  43 extern int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
  44 
  45 static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};
  46 
  47 const mbfl_encoding mbfl_encoding_2022jp_kddi = {
  48         mbfl_no_encoding_2022jp_kddi,
  49         "ISO-2022-JP-MOBILE#KDDI",
  50         "ISO-2022-JP",
  51         &mbfl_encoding_2022jp_kddi_aliases,
  52         NULL,
  53         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
  54 };
  55 
  56 const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi = {
  57         mbfl_no_encoding_2022jp_kddi,
  58         mbfl_filt_ident_common_ctor,
  59         mbfl_filt_ident_common_dtor,
  60         mbfl_filt_ident_2022jpms
  61 };
  62 
  63 const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = {
  64         mbfl_no_encoding_2022jp_kddi,
  65         mbfl_no_encoding_wchar,
  66         mbfl_filt_conv_common_ctor,
  67         mbfl_filt_conv_common_dtor,
  68         mbfl_filt_conv_2022jp_mobile_wchar,
  69         mbfl_filt_conv_common_flush
  70 };
  71 
  72 const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
  73         mbfl_no_encoding_wchar,
  74         mbfl_no_encoding_2022jp_kddi,
  75         mbfl_filt_conv_common_ctor,
  76         mbfl_filt_conv_common_dtor,
  77         mbfl_filt_conv_wchar_2022jp_mobile,
  78         mbfl_filt_conv_any_jis_flush
  79 };
  80 
  81 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  82 
  83 #define sjistoidx(c1, c2) \
  84         (((c1) > 0x9f) \
  85         ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
  86         : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
  87 #define idxtojis1(c) (((c) / 94) + 0x21)
  88 #define idxtojis2(c) (((c) % 94) + 0x21)
  89 
  90 #define SJIS_ENCODE(c1,c2,s1,s2)        \
  91                 do {                                            \
  92                         s1 = c1;                                \
  93                         s1--;                                   \
  94                         s1 >>= 1;                               \
  95                         if ((c1) < 0x5f) {              \
  96                                 s1 += 0x71;                     \
  97                         } else {                                \
  98                                 s1 += 0xb1;                     \
  99                         }                                               \
 100                         s2 = c2;                                \
 101                         if ((c1) & 1) {                 \
 102                                 if ((c2) < 0x60) {      \
 103                                         s2--;                   \
 104                                 }                                       \
 105                                 s2 += 0x20;                     \
 106                         } else {                                \
 107                                 s2 += 0x7e;                     \
 108                         }                                               \
 109                 } while (0)
 110 
 111 #define SJIS_DECODE(c1,c2,s1,s2)        \
 112                 do {                                            \
 113                         s1 = c1;                                \
 114                         if (s1 < 0xa0) {                \
 115                                 s1 -= 0x81;                     \
 116                         } else {                                \
 117                                 s1 -= 0xc1;                     \
 118                         }                                               \
 119                         s1 <<= 1;                               \
 120                         s1 += 0x21;                             \
 121                         s2 = c2;                                \
 122                         if (s2 < 0x9f) {                \
 123                                 if (s2 < 0x7f) {        \
 124                                         s2++;                   \
 125                                 }                                       \
 126                                 s2 -= 0x20;                     \
 127                         } else {                                \
 128                                 s1++;                           \
 129                                 s2 -= 0x7e;                     \
 130                         }                                               \
 131                 } while (0)
 132 
 133 #define CODE2JIS(c1,c2,s1,s2)       \
 134         c1 = (s1)/94+0x21;                              \
 135         c2 = (s1)-94*((c1)-0x21)+0x21;  \
 136         s1 = ((c1) << 8) | (c2);                \
 137         s2 = 1
 138 
 139 /*
 140  * ISO-2022-JP-Mobile => wchar
 141  */
 142 int
 143 mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
 144 {
 145         int c1, s, w, snd = 0;
 146 
 147 retry:
 148         switch (filter->status & 0xf) {
 149 /*      case 0x00:       ASCII */
 150 /*      case 0x10:       X 0201 latin */
 151 /*      case 0x20:       X 0201 kana */
 152 /*      case 0x80:       X 0208 */
 153         case 0:
 154                 if (c == 0x1b) {
 155                         filter->status += 2;
 156                 } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {            /* kana */
 157                         CK((*filter->output_function)(0xff40 + c, filter->data));
 158                 } else if (filter->status == 0x80 && c > 0x20 && c < 0x80) {            /* kanji first char */
 159                         filter->cache = c;
 160                         filter->status += 1;
 161                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
 162                         CK((*filter->output_function)(c, filter->data));
 163                 } else if (c > 0xa0 && c < 0xe0) {      /* GR kana */
 164                         CK((*filter->output_function)(0xfec0 + c, filter->data));
 165                 } else {
 166                         w = c & MBFL_WCSGROUP_MASK;
 167                         w |= MBFL_WCSGROUP_THROUGH;
 168                         CK((*filter->output_function)(w, filter->data));
 169                 }
 170                 break;
 171 
 172 /*      case 0x81:       X 0208 second char */
 173         case 1:
 174                 w = 0;
 175                 filter->status &= ~0xf;
 176                 c1 = filter->cache;
 177                 if (c > 0x20 && c < 0x7f) {
 178                         s = (c1 - 0x21)*94 + c - 0x21;
 179 
 180                         if (s <= 137) {
 181                                 if (s == 31) {
 182                                         w = 0xff3c;                     /* FULLWIDTH REVERSE SOLIDUS */
 183                                 } else if (s == 32) {
 184                                         w = 0xff5e;                     /* FULLWIDTH TILDE */
 185                                 } else if (s == 33) {
 186                                         w = 0x2225;                     /* PARALLEL TO */
 187                                 } else if (s == 60) {
 188                                         w = 0xff0d;                     /* FULLWIDTH HYPHEN-MINUS */
 189                                 } else if (s == 80) {
 190                                         w = 0xffe0;                     /* FULLWIDTH CENT SIGN */
 191                                 } else if (s == 81) {
 192                                         w = 0xffe1;                     /* FULLWIDTH POUND SIGN */
 193                                 } else if (s == 137) {
 194                                         w = 0xffe2;                     /* FULLWIDTH NOT SIGN */
 195                                 }
 196                         }
 197 
 198                         if (w == 0) {
 199                                 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {              /* vendor ext1 (13ku) */
 200                                         w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
 201                                 } else if (s >= 0 && s < jisx0208_ucs_table_size) {
 202                                         w = jisx0208_ucs_table[s];
 203                                 } else {
 204                                         w = 0;
 205                                 }
 206                         }
 207 
 208                         if (s >= (84*94) && s < 91*94) {
 209                                 s += 22*94;
 210                                 if (filter->from->no_encoding == mbfl_no_encoding_2022jp_kddi) {
 211                                         w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
 212                                 }
 213                                 if (w > 0  && snd > 0) {
 214                                         CK((*filter->output_function)(snd, filter->data));
 215                                 }
 216                         }
 217 
 218                         if (w <= 0) {
 219                                 w = (c1 << 8) | c;
 220                                 w &= MBFL_WCSPLANE_MASK;
 221                                 w |= MBFL_WCSPLANE_JIS0208;
 222                                 }
 223                         CK((*filter->output_function)(w, filter->data));
 224                 } else if (c == 0x1b) {
 225                         filter->status += 2;
 226                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 227                         CK((*filter->output_function)(c, filter->data));
 228                 } else {
 229                         w = (c1 << 8) | c;
 230                         w &= MBFL_WCSGROUP_MASK;
 231                         w |= MBFL_WCSGROUP_THROUGH;
 232                         CK((*filter->output_function)(w, filter->data));
 233                 }
 234                 break;
 235 
 236         /* ESC */
 237 /*      case 0x02:      */
 238 /*      case 0x12:      */
 239 /*      case 0x22:      */
 240 /*      case 0x82:      */
 241         case 2:
 242                 if (c == 0x24) {                /* '$' */
 243                         filter->status++;
 244                 } else if (c == 0x28) {         /* '(' */
 245                         filter->status += 3;
 246                 } else {
 247                         filter->status &= ~0xf;
 248                         CK((*filter->output_function)(0x1b, filter->data));
 249                         goto retry;
 250                 }
 251                 break;
 252 
 253         /* ESC $ */
 254 /*      case 0x03:      */
 255 /*      case 0x13:      */
 256 /*      case 0x23:      */
 257 /*      case 0x83:      */
 258         case 3:
 259                 if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
 260                         filter->status = 0x80;
 261                 } else if (c == 0x28) {     /* '(' */
 262                         filter->status++;
 263                 } else {
 264                         filter->status &= ~0xf;
 265                         CK((*filter->output_function)(0x1b, filter->data));
 266                         CK((*filter->output_function)(0x24, filter->data));
 267                         goto retry;
 268                 }
 269                 break;
 270 
 271         /* ESC $ ( */
 272 /*      case 0x04:      */
 273 /*      case 0x14:      */
 274 /*      case 0x24:      */
 275 /*      case 0x84:      */
 276         case 4:
 277                 if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
 278                         filter->status = 0x80;
 279                 } else {
 280                         filter->status &= ~0xf;
 281                         CK((*filter->output_function)(0x1b, filter->data));
 282                         CK((*filter->output_function)(0x24, filter->data));
 283                         CK((*filter->output_function)(0x28, filter->data));
 284                         goto retry;
 285                 }
 286                 break;
 287 
 288         /* ESC ( */
 289 /*      case 0x05:      */
 290 /*      case 0x15:      */
 291 /*      case 0x25:      */
 292 /*      case 0x85:      */
 293         case 5:
 294                 if (c == 0x42) {                /* 'B' */
 295                         filter->status = 0;
 296                 } else if (c == 0x4a) {         /* 'J' */
 297                         filter->status = 0;
 298                 } else if (c == 0x49) {         /* 'I' */
 299                         filter->status = 0x20;
 300                 } else {
 301                         filter->status &= ~0xf;
 302                         CK((*filter->output_function)(0x1b, filter->data));
 303                         CK((*filter->output_function)(0x28, filter->data));
 304                         goto retry;
 305                 }
 306                 break;
 307 
 308         default:
 309                 filter->status = 0;
 310                 break;
 311         }
 312 
 313         return c;
 314 }
 315 
 316 /*
 317  * wchar => ISO-2022-JP-Mobile
 318  */
 319 int
 320 mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
 321 {
 322         int c1, c2, s1, s2;
 323 
 324         s1 = 0;
 325         s2 = 0;
 326         if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
 327                 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
 328         } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
 329                 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
 330         } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
 331                 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
 332         } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
 333                 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
 334         } else if (c >= 0xe000 && c < (0xe000 + 20*94)) {       /* user  (95ku - 114ku) */
 335                 s1 = c - 0xe000;
 336                 c1 = s1/94 + 0x7f;
 337                 c2 = s1%94 + 0x21;
 338                 s1 = (c1 << 8) | c2;
 339         }
 340         if (s1 <= 0) {
 341                 c1 = c & ~MBFL_WCSPLANE_MASK;
 342                 if (c1 == MBFL_WCSPLANE_WINCP932) {
 343                         s1 = c & MBFL_WCSPLANE_MASK;
 344                         s2 = 1;
 345                 } else if (c1 == MBFL_WCSPLANE_JIS0208) {
 346                         s1 = c & MBFL_WCSPLANE_MASK;
 347                 } else if (c1 == MBFL_WCSPLANE_JIS0212) {
 348                         s1 = c & MBFL_WCSPLANE_MASK;
 349                         s1 |= 0x8080;
 350                 } else if (c == 0xa5) {         /* YEN SIGN */
 351                         s1 = 0x216f;                /* FULLWIDTH YEN SIGN */
 352                 } else if (c == 0x203e) {       /* OVER LINE */
 353                         s1 = 0x2131;    /* FULLWIDTH MACRON */
 354                 } else if (c == 0xff3c) {       /* FULLWIDTH REVERSE SOLIDUS */
 355                         s1 = 0x2140;
 356                 } else if (c == 0xff5e) {       /* FULLWIDTH TILDE */
 357                         s1 = 0x2141;
 358                 } else if (c == 0x2225) {       /* PARALLEL TO */
 359                         s1 = 0x2142;
 360                 } else if (c == 0xff0d) {       /* FULLWIDTH HYPHEN-MINUS */
 361                         s1 = 0x215d;
 362                 } else if (c == 0xffe0) {       /* FULLWIDTH CENT SIGN */
 363                         s1 = 0x2171;
 364                 } else if (c == 0xffe1) {       /* FULLWIDTH POUND SIGN */
 365                         s1 = 0x2172;
 366                 } else if (c == 0xffe2) {       /* FULLWIDTH NOT SIGN */
 367                         s1 = 0x224c;
 368                 }
 369         }
 370 
 371         if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
 372                 s1 = -1;
 373                 c1 = 0;
 374                 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
 375                 while (c1 < c2) {               /* CP932 vendor ext1 (13ku) */
 376                         if (c == cp932ext1_ucs_table[c1]) {
 377                                 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
 378                                 break;
 379                         }
 380                         c1++;
 381                 }
 382                 if (c == 0) {
 383                         s1 = 0;
 384                 } else if (s1 <= 0) {
 385                         s1 = -1;
 386                 }
 387         }
 388 
 389         if (filter->to->no_encoding == mbfl_no_encoding_2022jp_kddi &&
 390                 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) {
 391                 CODE2JIS(c1,c2,s1,s2);
 392                 s1 -= 0x1600;
 393         }
 394 
 395         if (filter->status == 1 && filter->cache > 0) {
 396                 return c;
 397         }
 398 
 399         if (s1 >= 0) {
 400                 if (s1 < 0x80) { /* latin */
 401                         if ((filter->status & 0xff00) != 0) {
 402                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 403                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 404                                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 405                         }
 406                         CK((*filter->output_function)(s1, filter->data));
 407                         filter->status = 0;
 408                 } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */
 409                         if ((filter->status & 0xff00) != 0x100) {
 410                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 411                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 412                                 CK((*filter->output_function)(0x49, filter->data));             /* 'I' */
 413                         }
 414                         filter->status = 0x100;
 415                         CK((*filter->output_function)(s1 & 0x7f, filter->data));
 416                 } else if (s1 < 0x7e7f) { /* X 0208 */
 417                         if ((filter->status & 0xff00) != 0x200) {
 418                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 419                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 420                                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 421                         }
 422                         filter->status = 0x200;
 423                         CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
 424                         CK((*filter->output_function)(s1 & 0x7f, filter->data));
 425                 }
 426         } else {
 427                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 428                         CK(mbfl_filt_conv_illegal_output(c, filter));
 429                 }
 430         }
 431 
 432         return c;
 433 }

/* [<][>][^][v][top][bottom][index][help] */