root/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_2022kr_wchar
  2. mbfl_filt_conv_wchar_2022kr
  3. mbfl_filt_conv_any_2022kr_flush
  4. mbfl_filt_ident_2022kr

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_kr.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_iso2022_kr.h"
  36 #include "unicode_table_uhc.h"
  37 
  38 static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter);
  39 
  40 const mbfl_encoding mbfl_encoding_2022kr = {
  41         mbfl_no_encoding_2022kr,
  42         "ISO-2022-KR",
  43         "ISO-2022-KR",
  44         NULL,
  45         NULL,
  46         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
  47 };
  48 
  49 const struct mbfl_identify_vtbl vtbl_identify_2022kr = {
  50         mbfl_no_encoding_2022kr,
  51         mbfl_filt_ident_common_ctor,
  52         mbfl_filt_ident_common_dtor,
  53         mbfl_filt_ident_2022kr
  54 };
  55 
  56 const struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
  57         mbfl_no_encoding_wchar,
  58         mbfl_no_encoding_2022kr,
  59         mbfl_filt_conv_common_ctor,
  60         mbfl_filt_conv_common_dtor,
  61         mbfl_filt_conv_wchar_2022kr,
  62         mbfl_filt_conv_any_2022kr_flush
  63 };
  64 
  65 const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
  66         mbfl_no_encoding_2022kr,
  67         mbfl_no_encoding_wchar,
  68         mbfl_filt_conv_common_ctor,
  69         mbfl_filt_conv_common_dtor,
  70         mbfl_filt_conv_2022kr_wchar,
  71         mbfl_filt_conv_common_flush
  72 };
  73 
  74 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  75 
  76 /*
  77  * ISO-2022-KR => wchar
  78  */
  79 int
  80 mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
  81 {
  82         int c1, w, flag;
  83 
  84 retry:
  85         switch (filter->status & 0xf) {
  86                 /* case 0x00: ASCII */
  87                 /* case 0x10: KSC5601 */
  88         case 0:
  89                 if (c == 0x1b) { /* ESC */
  90                         filter->status += 2;
  91                 } else if (c == 0x0f) { /* SI (ASCII) */
  92                         filter->status &= ~0xff;
  93                 } else if (c == 0x0e) { /* SO (KSC5601) */
  94                         filter->status |= 0x10;
  95                 } else if ((filter->status & 0x10) != 0  && c > 0x20 && c < 0x7f) {
  96                         /* KSC5601 lead byte */
  97                         filter->cache = c;
  98                         filter->status += 1;
  99                 } else if ((filter->status & 0x10) == 0 &&  c >= 0 && c < 0x80) {
 100                         /* latin, CTLs */
 101                         CK((*filter->output_function)(c, filter->data));
 102                 } else {
 103                         w = c & MBFL_WCSGROUP_MASK;
 104                         w |= MBFL_WCSGROUP_THROUGH;
 105                         CK((*filter->output_function)(w, filter->data));
 106                 }
 107                 break;
 108 
 109         case 1:         /* dbcs second byte */
 110                 filter->status &= ~0xf;
 111                 c1 = filter->cache;
 112                 flag = 0;
 113                 if (c1 > 0x20 && c1 < 0x47) {
 114                         flag = 1;
 115                 } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
 116                         flag = 2;
 117                 }
 118                 if (flag > 0 && c > 0x20 && c < 0x7f) {
 119                         if (flag == 1){
 120                                 w = (c1 - 0x21)*190 + (c - 0x41) + 0x80;
 121                                 if (w >= 0 && w < uhc2_ucs_table_size) {
 122                                         w = uhc2_ucs_table[w];
 123                                 } else {
 124                                         w = 0;
 125                                 }
 126                         } else {
 127                                 w = (c1 - 0x47)*94 + (c - 0x21);
 128                                 if (w >= 0 && w < uhc3_ucs_table_size) {
 129                                         w = uhc3_ucs_table[w];
 130                                 } else {
 131                                         w = 0;
 132                                 }
 133                         }
 134 
 135                         if (w <= 0) {
 136                                 w = (c1 << 8) | c;
 137                                 w &= MBFL_WCSPLANE_MASK;
 138                                 w |= MBFL_WCSPLANE_KSC5601;
 139                         }
 140                         CK((*filter->output_function)(w, filter->data));
 141                 } else if (c == 0x1b) {  /* ESC */
 142                         filter->status++;
 143                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 144                         CK((*filter->output_function)(c, filter->data));
 145                 } else {
 146                         w = (c1 << 8) | c;
 147                         w &= MBFL_WCSGROUP_MASK;
 148                         w |= MBFL_WCSGROUP_THROUGH;
 149                         CK((*filter->output_function)(w, filter->data));
 150                 }
 151                 break;
 152 
 153         case 2:                 /* ESC */
 154                 if (c == 0x24) { /* '$' */
 155                         filter->status++;
 156                 } else {
 157                         filter->status &= ~0xf;
 158                         CK((*filter->output_function)(0x1b, filter->data));
 159                         goto retry;
 160                 }
 161                 break;
 162         case 3:         /* ESC $ */
 163                 if (c == 0x29) { /* ')' */
 164                         filter->status++;
 165                 } else {
 166                         filter->status &= ~0xf;
 167                         CK((*filter->output_function)(0x1b, filter->data));
 168                         CK((*filter->output_function)(0x24, filter->data));
 169                         goto retry;
 170                 }
 171                 break;
 172         case 4:         /* ESC $ )  */
 173                 if (c == 0x43) { /* 'C' */
 174                         filter->status &= ~0xf;
 175                         filter->status |= 0x100;
 176                 } else {
 177                         filter->status &= ~0xf;
 178                         CK((*filter->output_function)(0x1b, filter->data));
 179                         CK((*filter->output_function)(0x24, filter->data));
 180                         CK((*filter->output_function)(0x29, filter->data));
 181                         goto retry;
 182                 }
 183                 break;
 184         default:
 185                 filter->status = 0;
 186                 break;
 187         }
 188 
 189         return c;
 190 }
 191 
 192 /*
 193  * wchar => ISO-2022-KR
 194  */
 195 int
 196 mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
 197 {
 198         int c1, c2, s;
 199 
 200         s = 0;
 201 
 202         if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
 203                 s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
 204         } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
 205                 s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
 206         } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
 207                 s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
 208         } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
 209                 s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
 210         } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
 211                 s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
 212         } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
 213                 s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
 214         } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
 215                 s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
 216         }
 217 
 218         c1 = (s >> 8) & 0xff;
 219         c2 = s & 0xff;
 220         /* exclude UHC extension area */
 221         if (c1 < 0xa1 || c2 < 0xa1){
 222                 s = c;
 223         }
 224         if (s & 0x8000) {
 225                 s -= 0x8080;
 226         }
 227 
 228         if (s <= 0) {
 229                 c1 = c & ~MBFL_WCSPLANE_MASK;
 230                 if (c1 == MBFL_WCSPLANE_KSC5601) {
 231                         s = c & MBFL_WCSPLANE_MASK;
 232                 }
 233                 if (c == 0) {
 234                         s = 0;
 235                 } else if (s <= 0) {
 236                         s = -1;
 237                 }
 238         } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
 239                 s = -1;
 240         }
 241         if (s >= 0) {
 242                 if (s < 0x80 && s > 0) {        /* ASCII */
 243                         if ((filter->status & 0x10) != 0) {
 244                                 CK((*filter->output_function)(0x0f, filter->data));             /* SI */
 245                                 filter->status &= ~0x10;
 246                         }
 247                         CK((*filter->output_function)(s, filter->data));
 248                 } else {
 249                         if ( (filter->status & 0x100) == 0) {
 250                                 CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
 251                                 CK((*filter->output_function)(0x24, filter->data));             /* '$' */
 252                                 CK((*filter->output_function)(0x29, filter->data));             /* ')' */
 253                                 CK((*filter->output_function)(0x43, filter->data));             /* 'C' */
 254                                 filter->status |= 0x100;
 255                         }
 256                         if ((filter->status & 0x10) == 0) {
 257                                 CK((*filter->output_function)(0x0e, filter->data));             /* SO */
 258                                 filter->status |= 0x10;
 259                         }
 260                         CK((*filter->output_function)((s >> 8) & 0xff, filter->data));
 261                         CK((*filter->output_function)(s & 0xff, filter->data));
 262                 }
 263         } else {
 264                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 265                         CK(mbfl_filt_conv_illegal_output(c, filter));
 266                 }
 267         }
 268 
 269         return c;
 270 }
 271 
 272 int
 273 mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
 274 {
 275         /* back to ascii */
 276         if ((filter->status & 0xff00) != 0) {
 277                 CK((*filter->output_function)(0x0f, filter->data));             /* SI */
 278         }
 279 
 280         filter->status &= 0xff;
 281 
 282         if (filter->flush_function != NULL) {
 283                 return (*filter->flush_function)(filter->data);
 284         }
 285 
 286         return 0;
 287 }
 288 
 289 static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter)
 290 {
 291 retry:
 292         switch (filter->status & 0xf) {
 293 /*      case 0x00:       ASCII */
 294 /*      case 0x10:       KSC5601 mode */
 295 /*      case 0x20:       KSC5601 DBCS */
 296 /*      case 0x40:       KSC5601 SBCS */
 297         case 0:
 298                 if (!(filter->status & 0x10)) {
 299                         if (c == 0x1b)
 300                                 filter->status += 2;
 301                 } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) {            /* kanji first char */
 302                         filter->status += 1;
 303                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
 304                         ;
 305                 } else {
 306                         filter->flag = 1;       /* bad */
 307                 }
 308                 break;
 309 
 310 /*      case 0x21:       KSC5601 second char */
 311         case 1:
 312                 filter->status &= ~0xf;
 313                 if (c < 0x21 || c > 0x7e) {             /* bad */
 314                         filter->flag = 1;
 315                 }
 316                 break;
 317 
 318         /* ESC */
 319         case 2:
 320                 if (c == 0x24) {                /* '$' */
 321                         filter->status++;
 322                 } else {
 323                         filter->flag = 1;       /* bad */
 324                         filter->status &= ~0xf;
 325                         goto retry;
 326                 }
 327                 break;
 328 
 329         /* ESC $ */
 330         case 3:
 331                 if (c == 0x29) {                /* ')' */
 332                         filter->status++;
 333                 } else {
 334                         filter->flag = 1;       /* bad */
 335                         filter->status &= ~0xf;
 336                         goto retry;
 337                 }
 338                 break;
 339 
 340         /* ESC $) */
 341         case 5:
 342                 if (c == 0x43) {                /* 'C' */
 343                         filter->status = 0x10;
 344                 } else {
 345                         filter->flag = 1;       /* bad */
 346                         filter->status &= ~0xf;
 347                         goto retry;
 348                 }
 349                 break;
 350 
 351         default:
 352                 filter->status = 0;
 353                 break;
 354         }
 355 
 356         return c;
 357 }
 358 
 359 

/* [<][>][^][v][top][bottom][index][help] */