root/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_jis2004_wchar
  2. mbfl_filt_conv_wchar_jis2004
  3. mbfl_filt_conv_jis2004_flush

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_sjis.c
  26  * by rui hirokawa <hirokawa@php.net> on 15 aug 2011.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_sjis_2004.h"
  36 
  37 #include "unicode_table_jis2004.h"
  38 #include "unicode_table_jis.h"
  39 
  40 extern const unsigned char mblen_table_sjis[];
  41 
  42 static int mbfl_filt_ident_sjis2004(int c, mbfl_identify_filter *filter);
  43 
  44 extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter);
  45 extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n);
  46 extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
  47 
  48 static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL};
  49 
  50 const mbfl_encoding mbfl_encoding_sjis2004 = {
  51         mbfl_no_encoding_sjis2004,
  52         "SJIS-2004",
  53         "Shift_JIS",
  54         (const char *(*)[])&mbfl_encoding_sjis2004_aliases,
  55         mblen_table_sjis,
  56         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE
  57 };
  58 
  59 const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = {
  60         mbfl_no_encoding_sjis2004,
  61         mbfl_filt_ident_common_ctor,
  62         mbfl_filt_ident_common_dtor,
  63         mbfl_filt_ident_sjis
  64 };
  65 
  66 const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = {
  67         mbfl_no_encoding_sjis2004,
  68         mbfl_no_encoding_wchar,
  69         mbfl_filt_conv_common_ctor,
  70         mbfl_filt_conv_common_dtor,
  71         mbfl_filt_conv_jis2004_wchar,
  72         mbfl_filt_conv_common_flush
  73 };
  74 
  75 const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = {
  76         mbfl_no_encoding_wchar,
  77         mbfl_no_encoding_sjis2004,
  78         mbfl_filt_conv_common_ctor,
  79         mbfl_filt_conv_common_dtor,
  80         mbfl_filt_conv_wchar_jis2004,
  81         mbfl_filt_conv_jis2004_flush
  82 };
  83 
  84 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  85 
  86 #define SJIS_ENCODE(c1,c2,s1,s2)        \
  87                 do {                                            \
  88                         s1 = c1;                                \
  89                         s1--;                                   \
  90                         s1 >>= 1;                               \
  91                         if ((c1) < 0x5f) {              \
  92                                 s1 += 0x71;                     \
  93                         } else {                                \
  94                                 s1 += 0xb1;                     \
  95                         }                                               \
  96                         s2 = c2;                                \
  97                         if ((c1) & 1) {                 \
  98                                 if ((c2) < 0x60) {      \
  99                                         s2--;                   \
 100                                 }                                       \
 101                                 s2 += 0x20;                     \
 102                         } else {                                \
 103                                 s2 += 0x7e;                     \
 104                         }                                               \
 105                 } while (0)
 106 
 107 #define SJIS_DECODE(c1,c2,s1,s2)        \
 108                 do {                                            \
 109                         s1 = c1;                                \
 110                         if (s1 < 0xa0) {                \
 111                                 s1 -= 0x81;                     \
 112                         } else {                                \
 113                                 s1 -= 0xc1;                     \
 114                         }                                               \
 115                         s1 <<= 1;                               \
 116                         s1 += 0x21;                             \
 117                         s2 = c2;                                \
 118                         if (s2 < 0x9f) {                \
 119                                 if (s2 < 0x7f) {        \
 120                                         s2++;                   \
 121                                 }                                       \
 122                                 s2 -= 0x20;                     \
 123                         } else {                                \
 124                                 s1++;                           \
 125                                 s2 -= 0x7e;                     \
 126                         }                                               \
 127                 } while (0)
 128 
 129 
 130 /*
 131  * JIS-2004 => wchar
 132  */
 133 int
 134 mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter)
 135 {
 136         int k;
 137         int c1, c2, s, s1 = 0, s2 = 0, w = 0, w1;
 138 
 139 retry:
 140         switch (filter->status & 0xf) {
 141         case 0:
 142                 if (c >= 0 && c < 0x80) {       /* latin */
 143                         if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
 144                                 CK((*filter->output_function)(c, filter->data));
 145                         } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
 146                                 if (c == 0x5c) {
 147                                         CK((*filter->output_function)(0x00a5, filter->data));
 148                                 } else if (c == 0x7e) {
 149                                         CK((*filter->output_function)(0x203e, filter->data));
 150                                 } else {
 151                                         CK((*filter->output_function)(c, filter->data));
 152                                 }
 153                         } else { /* ISO-2022-JP-2004 */
 154                                 if (c == 0x1b) {
 155                                         filter->status += 6;
 156                                 } else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0)
 157                                    && c > 0x20 && c < 0x7f) {           /* kanji first char */
 158                                         filter->cache = c;
 159                                         if (filter->status == 0x90) {
 160                                                 filter->status += 1; /* JIS X 0213 plane 1 */
 161                                         } else if (filter->status == 0xa0) {
 162                                                 filter->status += 4; /* JIS X 0213 plane 2 */
 163                                         } else {
 164                                                 filter->status += 5; /* JIS X 0208 */
 165                                         }
 166                                 } else {
 167                                         CK((*filter->output_function)(c, filter->data));
 168                                 }
 169                         }
 170                 } else {
 171                         if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
 172                                 if (c > 0xa0 && c < 0xff) {     /* X 0213 plane 1 first char */
 173                                         filter->status = 1;
 174                                         filter->cache = c;
 175                                 } else if (c == 0x8e) { /* kana first char */
 176                                         filter->status = 2;
 177                                 } else if (c == 0x8f) { /* X 0213 plane 2 first char */
 178                                         filter->status = 3;
 179                                 } else {
 180                                         w = c & MBFL_WCSGROUP_MASK;
 181                                         w |= MBFL_WCSGROUP_THROUGH;
 182                                         CK((*filter->output_function)(w, filter->data));
 183                                 }
 184                         } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
 185                                 if (c > 0xa0 && c < 0xe0) {     /* kana */
 186                                         CK((*filter->output_function)(0xfec0 + c, filter->data));
 187                                 } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */
 188                                         filter->status = 1;
 189                                         filter->cache = c;
 190                                 } else {
 191                                         w = c & MBFL_WCSGROUP_MASK;
 192                                         w |= MBFL_WCSGROUP_THROUGH;
 193                                         CK((*filter->output_function)(w, filter->data));
 194                                 }
 195                         } else {
 196                                 w = c & MBFL_WCSGROUP_MASK;
 197                                 w |= MBFL_WCSGROUP_THROUGH;
 198                                 CK((*filter->output_function)(w, filter->data));
 199                         }
 200                 }
 201                 break;
 202 
 203         case 1:         /* kanji second char */
 204                 filter->status &= ~0xf;
 205                 c1 = filter->cache;
 206 
 207                 if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
 208                         if (c > 0xa0 && c < 0xff) {
 209                                 s1 = c1 - 0x80;
 210                                 s2 = c - 0x80;
 211                         }
 212                 } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) {
 213                         if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
 214                                 SJIS_DECODE(c1, c, s1, s2);
 215                         }
 216                 } else {
 217                         s1 = c1;
 218                         s2 = c;
 219                 }
 220                 w1 = (s1 << 8) | s2;
 221 
 222                 if (w1 >= 0x2121) {
 223                         /* conversion for combining characters */
 224                         if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) ||
 225                                 (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 ||
 226                                 (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
 227                                 k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len);
 228                                 if (k >= 0) {
 229                                         w = jisx0213_u2_tbl[2*k];
 230                                         CK((*filter->output_function)(w, filter->data));
 231                                         w = jisx0213_u2_tbl[2*k+1];
 232                                 }
 233                         }
 234 
 235                         /* conversion for BMP  */
 236                         if (w <= 0) {
 237                                 w1 = (s1 - 0x21)*94 + s2 - 0x21;
 238                                 if (w1 >= 0 && w1 < jisx0213_ucs_table_size) {
 239                                         w = jisx0213_ucs_table[w1];
 240                                 }
 241                         }
 242 
 243                         /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */
 244                         if (w <= 0) {
 245                                 w1 = (s1 << 8) | s2;
 246                                 k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
 247                                 if (k >= 0) {
 248                                         w = jisx0213_jis_u5_tbl[k] + 0x20000;
 249                                 }
 250                         }
 251 
 252                         if (w <= 0) {
 253                                 if (s1 < 0x7f && s2 < 0x7f) {
 254                                         w = (s1 << 8) | s2;
 255                                         w &= MBFL_WCSPLANE_MASK;
 256                                         w |= MBFL_WCSPLANE_JIS0213;
 257                                 } else {
 258                                         w = (c1 << 8) | c;
 259                                         w &= MBFL_WCSGROUP_MASK;
 260                                         w |= MBFL_WCSGROUP_THROUGH;
 261                                 }
 262                         }
 263                         CK((*filter->output_function)(w, filter->data));
 264                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 265                         CK((*filter->output_function)(c, filter->data));
 266                 } else {
 267                         w = (c1 << 8) | c;
 268                         w &= MBFL_WCSGROUP_MASK;
 269                         w |= MBFL_WCSGROUP_THROUGH;
 270                         CK((*filter->output_function)(w, filter->data));
 271                 }
 272                 break;
 273 
 274         case 2: /* got 0x8e : EUC-JP-2004 kana */
 275                 filter->status = 0;
 276                 if (c > 0xa0 && c < 0xe0) {
 277                         w = 0xfec0 + c;
 278                         CK((*filter->output_function)(w, filter->data));
 279                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 280                         CK((*filter->output_function)(c, filter->data));
 281                 } else {
 282                         w = 0x8e00 | c;
 283                         w &= MBFL_WCSGROUP_MASK;
 284                         w |= MBFL_WCSGROUP_THROUGH;
 285                         CK((*filter->output_function)(w, filter->data));
 286                 }
 287                 break;
 288 
 289         case 3: /* X 0213 plane 2 first char : EUC-JP-2004 (0x8f), ISO-2022-JP-2004 */
 290                 if ((c >= 0 && c < 0x21) || c == 0x7f) {                /* CTLs */
 291                         CK((*filter->output_function)(c, filter->data));
 292                         filter->status = 0;
 293                 } else {
 294                         if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
 295                                 s1 = c - 0x80;
 296                         } else {
 297                                 s1 = c;
 298                         }
 299                         if (s1 > 0x20 && s1 < 0x80) {
 300                                 filter->cache = s1;
 301                                 filter->status++;
 302                         } else {
 303                                 if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 304                                         w = c | 0x8f00;
 305                                         w &= MBFL_WCSGROUP_MASK;
 306                                         w |= MBFL_WCSGROUP_THROUGH;
 307                                 } else {
 308                                         w = c & 0x7f;
 309                                         w &= MBFL_WCSPLANE_MASK;
 310                                         w |= MBFL_WCSPLANE_JIS0213;
 311                                 }
 312                                 CK((*filter->output_function)(w, filter->data));
 313                         }
 314                 }
 315                 break;
 316 
 317         case 4: /* X 0213 plane 2 second char : EUC-JP-2004, ISO-2022-JP-2004 */
 318 
 319                 filter->status &= ~0xf;
 320                 c1 = filter->cache;
 321                 if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) {
 322                         c2 = c - 0x80;
 323                 } else {
 324                         c2 = c;
 325                 }
 326                 s1 = c1 - 0x21;
 327                 s2 = c2 - 0x21;
 328 
 329                 if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) ||
 330                         (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) {
 331                         /* calc offset from ku */
 332                         for (k = 0; k < jisx0213_p2_ofst_len; k++) {
 333                                 if (s1 == jisx0213_p2_ofst[k]-1) {
 334                                         break;
 335                                 }
 336                         }
 337                         k = k - (jisx0213_p2_ofst[k]-1);
 338 
 339                         /* check for japanese chars in BMP */
 340                         s = (s1 + 94 + k)*94 + s2;
 341                         if (s >= 0 && s < jisx0213_ucs_table_size) {
 342                                 w = jisx0213_ucs_table[s];
 343                         } else {
 344                                 w = 0;
 345                         }
 346 
 347                         /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */
 348                         if (w <= 0) {
 349                                 w1 = ((c1 + k + 94) << 8) | c2;
 350                                 k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len);
 351                                 if (k >= 0) {
 352                                         w = jisx0213_jis_u5_tbl[k] + 0x20000;
 353                                 }
 354                         }
 355 
 356                         if (w <= 0) {
 357                                 w = ((c1 & 0x7f) << 8) | (c2 & 0x7f);
 358                                 w &= MBFL_WCSPLANE_MASK;
 359                                 w |= MBFL_WCSPLANE_JIS0213;
 360                         }
 361 
 362                         CK((*filter->output_function)(w, filter->data));
 363                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 364                         CK((*filter->output_function)(c, filter->data));
 365                 } else {
 366                         if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 367                                 w = (c1 << 8) | c | 0x8f0000;
 368                                 w &= MBFL_WCSGROUP_MASK;
 369                                 w |= MBFL_WCSGROUP_THROUGH;
 370                         } else {
 371                                 w = ((c1 & 0x7f) << 8) | (c2 & 0x7f);
 372                                 w &= MBFL_WCSPLANE_MASK;
 373                                 w |= MBFL_WCSPLANE_JIS0213;
 374                         }
 375                         CK((*filter->output_function)(w, filter->data));
 376                 }
 377 
 378                 break;
 379 
 380         case 5: /* X 0208 : ISO-2022-JP-2004 */
 381                 filter->status &= ~0xf;
 382                 c1 = filter->cache;
 383                 if (c > 0x20 && c < 0x7f) {
 384                         s = (c1 - 0x21)*94 + c - 0x21;
 385                         if (s >= 0 && s < jisx0208_ucs_table_size) {
 386                                 w = jisx0208_ucs_table[s];
 387                         }
 388                 }
 389                 if (w <= 0) {
 390                         w = (c1 << 8) | c;
 391                         w &= MBFL_WCSPLANE_MASK;
 392                         w |= MBFL_WCSPLANE_JIS0208;
 393                 }
 394                 CK((*filter->output_function)(w, filter->data));
 395                 break;
 396 
 397         /* ESC : ISO-2022-JP-2004 */
 398 /*      case 0x06:      */
 399 /*      case 0x16:      */
 400 /*      case 0x26:      */
 401 /*      case 0x86:      */
 402 /*      case 0x96:      */
 403 /*      case 0xa6:      */
 404         case 6:
 405                 if (c == 0x24) {                /* '$' */
 406                         filter->status++;
 407                 } else if (c == 0x28) {         /* '(' */
 408                         filter->status += 3;
 409                 } else {
 410                         filter->status &= ~0xf;
 411                         CK((*filter->output_function)(0x1b, filter->data));
 412                         goto retry;
 413                 }
 414                 break;
 415 
 416         /* ESC $ : ISO-2022-JP-2004 */
 417 /*      case 0x07:      */
 418 /*      case 0x17:      */
 419 /*      case 0x27:      */
 420 /*      case 0x87:      */
 421 /*      case 0x97:      */
 422 /*      case 0xa7:      */
 423         case 7:
 424                 if (c == 0x42) {        /* 'B' -> JIS X 0208-1983 */
 425                         filter->status = 0x80;
 426                 } else if (c == 0x28) {                 /* '(' */
 427                         filter->status++;
 428                 } else {
 429                         filter->status &= ~0xf;
 430                         CK((*filter->output_function)(0x1b, filter->data));
 431                         CK((*filter->output_function)(0x24, filter->data));
 432                         goto retry;
 433                 }
 434                 break;
 435 
 436                 break;
 437 
 438         /* ESC $ ( : ISO-2022-JP-2004 */
 439 /*      case 0x08:      */
 440 /*      case 0x18:      */
 441 /*      case 0x28:      */
 442 /*      case 0x88:      */
 443 /*      case 0x98:      */
 444 /*      case 0xa8:      */
 445         case 8:
 446                 if (c == 0x51) {        /* JIS X 0213 plane 1 */
 447                         filter->status = 0x90;
 448                 } else if (c == 0x50) {                 /* JIS X 0213 plane 2 */
 449                         filter->status = 0xa0;
 450                 } else {
 451                         filter->status &= ~0xf;
 452                         CK((*filter->output_function)(0x1b, filter->data));
 453                         CK((*filter->output_function)(0x24, filter->data));
 454                         CK((*filter->output_function)(0x28, filter->data));
 455                         goto retry;
 456                 }
 457                 break;
 458 
 459         /* ESC ( : ISO-2022-JP-2004 */
 460 /*      case 0x09:      */
 461 /*      case 0x19:      */
 462 /*      case 0x29:      */
 463 /*      case 0x89:      */
 464 /*      case 0x99:      */
 465         case 9:
 466                 if (c == 0x42) {                /* 'B' : ASCII */
 467                         filter->status = 0;
 468                 } else {
 469                         filter->status &= ~0xf;
 470                         CK((*filter->output_function)(0x1b, filter->data));
 471                         CK((*filter->output_function)(0x28, filter->data));
 472                         goto retry;
 473                 }
 474                 break;
 475 
 476         default:
 477                 filter->status = 0;
 478                 break;
 479         }
 480 
 481         return c;
 482 }
 483 
 484 int
 485 mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
 486         int k;
 487         int c1, c2, s1 = 0, s2;
 488 
 489 retry:
 490 
 491         /* check for 1st char of combining characters */
 492         if ((filter->status & 0xf)== 0 && (
 493                         c == 0x00E6 ||
 494                         (c >= 0x0254 && c <= 0x02E9) ||
 495                         (c >= 0x304B && c <= 0x3053) ||
 496                         (c >= 0x30AB && c <= 0x30C8) ||
 497                         c == 0x31F7)) {
 498                 for (k=0;k<jisx0213_u2_tbl_len;k++) {
 499                         if (c == jisx0213_u2_tbl[2*k]) {
 500                                 filter->status++;
 501                                 filter->cache = k;
 502                                 return c;
 503                         }
 504                 }
 505         }
 506 
 507         /* check for 2nd char of combining characters */
 508         if ((filter->status & 0xf) == 1 &&
 509                 filter->cache >= 0 && filter->cache <= jisx0213_u2_tbl_len) {
 510                 k = filter->cache;
 511                 filter->status &= ~0xf;
 512                 filter->cache = 0;
 513 
 514                 c1 = jisx0213_u2_tbl[2*k];
 515                 if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A)
 516                         && c == 0x0301) {
 517                         k++;
 518                 }
 519                 if (c == jisx0213_u2_tbl[2*k+1]) {
 520                         s1 = jisx0213_u2_key[k];
 521                 } else { /* fallback */
 522                         s1 = jisx0213_u2_fb_tbl[k];
 523 
 524                         if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
 525                                 c1 = (s1 >> 8) & 0xff;
 526                                 c2 = s1 & 0xff;
 527                                 SJIS_ENCODE(c1, c2, s1, s2);
 528                         } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 529                                 s2 = (s1 & 0xff) + 0x80;
 530                                 s1 = ((s1 >> 8) & 0xff) + 0x80;
 531                         } else {
 532                                 if (filter->status != 0x200) {
 533                                         CK((*filter->output_function)(0x1b, filter->data));
 534                                         CK((*filter->output_function)(0x24, filter->data));
 535                                         CK((*filter->output_function)(0x28, filter->data));
 536                                         CK((*filter->output_function)(0x51, filter->data));
 537                                 }
 538                                 filter->status = 0x200;
 539 
 540                                 s2 = s1 & 0x7f;
 541                                 s1 = (s1 >> 8) & 0x7f;
 542                         }
 543 
 544                         CK((*filter->output_function)(s1, filter->data));
 545                         CK((*filter->output_function)(s2, filter->data));
 546                         goto retry;
 547                 }
 548         }
 549 
 550         /* check for major japanese chars: U+4E00 - U+9FFF */
 551         if (s1 <= 0) {
 552                 for (k=0; k < uni2jis_tbl_len ;k++) {
 553                         if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) {
 554                                 s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]];
 555                                 break;
 556                         }
 557                 }
 558         }
 559 
 560         /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF */
 561         if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
 562                 k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
 563                 if (k >= 0) {
 564                         s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k];
 565                 }
 566         }
 567 
 568         /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */
 569         if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) {
 570                 k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
 571                 if (k >= 0) {
 572                         s1 = jisx0213_u5_jis_tbl[k];
 573                 }
 574         }
 575 
 576         if (s1 <= 0) {
 577                 /* CJK Compatibility Forms: U+FE30 - U+FE4F */
 578                 if (c == 0xfe45) {
 579                         s1 = 0x233e;
 580                 } else if (c == 0xfe46) {
 581                         s1 = 0x233d;
 582                 } else if (c >= 0xf91d && c <= 0xf9dc) {
 583                         /* CJK Compatibility Ideographs: U+F900 - U+F92A */
 584                         k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len);
 585                         if (k >= 0) {
 586                                 s1 = ucs_r2b_jisx0213_cmap_val[k];
 587                         }
 588                 }
 589         }
 590 
 591         if (s1 <= 0) {
 592                 c1 = c & ~MBFL_WCSPLANE_MASK;
 593                 if (c1 == MBFL_WCSPLANE_JIS0213) {
 594                         s1 = c & MBFL_WCSPLANE_MASK;
 595                 }
 596                 if (c == 0) {
 597                         s1 = 0;
 598                 } else if (s1 <= 0) {
 599                         s1 = -1;
 600                 }
 601         } else if (s1 >= 0x9980) {
 602                 s1 = -1;
 603         }
 604 
 605         if (s1 >= 0) {
 606                 if (s1 < 0x80) { /* ASCII */
 607                         if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 &&
 608                                 (filter->status & 0xff00) != 0) {
 609                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 610                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 611                                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 612                         }
 613                         filter->status = 0;
 614                         CK((*filter->output_function)(s1, filter->data));
 615                 } else if (s1 < 0x100) { /* latin or kana */
 616                         if  (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 617                                 CK((*filter->output_function)(0x8e, filter->data));
 618                         }
 619                         CK((*filter->output_function)(s1, filter->data));
 620                 } else if (s1 < 0x7f00) { /* X 0213 plane 1 */
 621                         if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
 622                                 c1 = (s1 >> 8) & 0xff;
 623                                 c2 = s1 & 0xff;
 624                                 SJIS_ENCODE(c1, c2, s1, s2);
 625                         } else if  (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 626                                 s2 = (s1 & 0xff) + 0x80;
 627                                 s1 = ((s1 >> 8) & 0xff) + 0x80;
 628                         } else {
 629                                 if ((filter->status & 0xff00) != 0x200) {
 630                                         CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 631                                         CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 632                                         CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 633                                         CK((*filter->output_function)(0x51, filter->data));             /* 'Q' */
 634                                 }
 635                                 filter->status = 0x200;
 636                                 s2 = s1 & 0xff;
 637                                 s1 = (s1 >> 8) & 0xff;
 638                         }
 639                         CK((*filter->output_function)(s1, filter->data));
 640                         CK((*filter->output_function)(s2, filter->data));
 641                 } else { /* X 0213 plane 2 */
 642                         if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
 643                                 c1 = (s1 >> 8) & 0xff;
 644                                 c2 = s1 & 0xff;
 645                                 SJIS_ENCODE(c1, c2, s1, s2);
 646                         } else {
 647                                 s2 = s1 & 0xff;
 648                                 k = ((s1 >> 8) & 0xff) - 0x7f;
 649                                 if (k >= 0 && k < jisx0213_p2_ofst_len) {
 650                                         s1  = jisx0213_p2_ofst[k] - 1 + 0x21;
 651                                 }
 652                                 if  (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 653                                         s2 |= 0x80;
 654                                         s1 |= 0x80;
 655                                         CK((*filter->output_function)(0x8f, filter->data));
 656                                 } else {
 657                                         if ((filter->status & 0xff00) != 0x200) {
 658                                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 659                                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 660                                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 661                                                 CK((*filter->output_function)(0x50, filter->data));             /* 'P' */
 662                                         }
 663                                         filter->status = 0x200;
 664                                 }
 665                         }
 666 
 667                         CK((*filter->output_function)(s1, filter->data));
 668                         CK((*filter->output_function)(s2, filter->data));
 669                 }
 670         } else {
 671                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 672                         CK(mbfl_filt_conv_illegal_output(c, filter));
 673                 }
 674         }
 675 }
 676 
 677 int
 678 mbfl_filt_conv_jis2004_flush(mbfl_convert_filter *filter)
 679 {
 680         int k, c1, c2, s1, s2;
 681 
 682         k = filter->cache;
 683         filter->cache = 0;
 684 
 685         if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
 686                 s1 = jisx0213_u2_fb_tbl[k];
 687 
 688                 if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
 689                         c1 = (s1 >> 8) & 0xff;
 690                         c2 = s1 & 0xff;
 691                         SJIS_ENCODE(c1, c2, s1, s2);
 692                 } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
 693                         s2 = (s1 & 0xff) | 0x80;
 694                         s1 = ((s1 >> 8) & 0xff) | 0x80;
 695                 } else {
 696                         s2 = s1 & 0x7f;
 697                         s1 = (s1 >> 8) & 0x7f;
 698                         if ((filter->status & 0xff00) != 0x200) {
 699                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 700                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 701                                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 702                                 CK((*filter->output_function)(0x51, filter->data));             /* 'Q' */
 703                         }
 704                         filter->status = 0x200;
 705                 }
 706 
 707                 CK((*filter->output_function)(s1, filter->data));
 708                 CK((*filter->output_function)(s2, filter->data));
 709         }
 710 
 711         /* back to latin */
 712         if ((filter->status & 0xff00) != 0) {
 713                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 714                 CK((*filter->output_function)(0x28, filter->data));             /* '(' */
 715                 CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
 716         }
 717 
 718         filter->status = 0;
 719 
 720         if (filter->flush_function != NULL) {
 721                 return (*filter->flush_function)(filter->data);
 722         }
 723 
 724         return 0;
 725 }

/* [<][>][^][v][top][bottom][index][help] */