root/ext/mbstring/libmbfl/filters/mbfilter_hz.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_hz_wchar
  2. mbfl_filt_conv_wchar_hz
  3. mbfl_filt_conv_any_hz_flush
  4. mbfl_filt_ident_hz

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_cn.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_hz.h"
  36 
  37 #include "unicode_table_cp936.h"
  38 
  39 static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter);
  40 
  41 const mbfl_encoding mbfl_encoding_hz = {
  42         mbfl_no_encoding_hz,
  43         "HZ",
  44         "HZ-GB-2312",
  45         NULL,
  46         NULL,
  47         MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE
  48 };
  49 
  50 const struct mbfl_identify_vtbl vtbl_identify_hz = {
  51         mbfl_no_encoding_hz,
  52         mbfl_filt_ident_common_ctor,
  53         mbfl_filt_ident_common_dtor,
  54         mbfl_filt_ident_hz
  55 };
  56 
  57 const struct mbfl_convert_vtbl vtbl_hz_wchar = {
  58         mbfl_no_encoding_hz,
  59         mbfl_no_encoding_wchar,
  60         mbfl_filt_conv_common_ctor,
  61         mbfl_filt_conv_common_dtor,
  62         mbfl_filt_conv_hz_wchar,
  63         mbfl_filt_conv_common_flush
  64 };
  65 
  66 const struct mbfl_convert_vtbl vtbl_wchar_hz = {
  67         mbfl_no_encoding_wchar,
  68         mbfl_no_encoding_hz,
  69         mbfl_filt_conv_common_ctor,
  70         mbfl_filt_conv_common_dtor,
  71         mbfl_filt_conv_wchar_hz,
  72         mbfl_filt_conv_any_hz_flush
  73 };
  74 
  75 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  76 
  77 /*
  78  * HZ => wchar
  79  */
  80 int
  81 mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter)
  82 {
  83         int c1, s, w;
  84 
  85         switch (filter->status & 0xf) {
  86 /*      case 0x00:       ASCII */
  87 /*      case 0x10:       GB2312 */
  88         case 0:
  89                 if (c == 0x7e) {
  90                         filter->status += 2;
  91                 } else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) {    /* DBCS first char */
  92                         filter->cache = c;
  93                         filter->status += 1;
  94                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
  95                         CK((*filter->output_function)(c, filter->data));
  96                 } else {
  97                         w = c & MBFL_WCSGROUP_MASK;
  98                         w |= MBFL_WCSGROUP_THROUGH;
  99                         CK((*filter->output_function)(w, filter->data));
 100                 }
 101                 break;
 102 
 103 /*      case 0x11:       GB2312 second char */
 104         case 1:
 105                 filter->status &= ~0xf;
 106                 c1 = filter->cache;
 107                 if (c1 > 0x20 && c1 < 0x7f && c > 0x20 && c < 0x7f) {
 108                         s = (c1 - 1)*192 + c + 0x40; /* GB2312 */
 109                         if (s >= 0 && s < cp936_ucs_table_size) {
 110                                 w = cp936_ucs_table[s];
 111                         } else {
 112                                 w = 0;
 113                         }
 114                         if (w <= 0) {
 115                                 w = (c1 << 8) | c;
 116                                 w &= MBFL_WCSPLANE_MASK;
 117                                 w |= MBFL_WCSPLANE_GB2312;
 118                         }
 119                         CK((*filter->output_function)(w, filter->data));
 120                 } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
 121                         CK((*filter->output_function)(c, filter->data));
 122                 } else {
 123                         w = (c1 << 8) | c;
 124                         w &= MBFL_WCSGROUP_MASK;
 125                         w |= MBFL_WCSGROUP_THROUGH;
 126                         CK((*filter->output_function)(w, filter->data));
 127                 }
 128                 break;
 129 
 130         /* '~' */
 131         case 2:
 132                 if (c == 0x7d) {                /* '}' */
 133                         filter->status = 0x0;
 134                 } else if (c == 0x7b) {         /* '{' */
 135                         filter->status = 0x10;
 136                 } else if (c == 0x7e) { /* '~' */
 137                         filter->status = 0x0;
 138                         CK((*filter->output_function)(0x007e, filter->data));
 139                 }
 140                 break;
 141 
 142         default:
 143                 filter->status = 0;
 144                 break;
 145         }
 146 
 147         return c;
 148 }
 149 
 150 /*
 151  * wchar => HZ
 152  */
 153 int
 154 mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter)
 155 {
 156         int s;
 157 
 158         s = 0;
 159         if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
 160                 s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
 161         } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
 162                 s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
 163         } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
 164                 s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
 165         } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
 166                 s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
 167         } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) {
 168                 if (c == 0xff04) {
 169                         s = 0xa1e7;
 170                 } else if (c == 0xff5e) {
 171                         s = 0xa1ab;
 172                 } else if (c >= 0xff01 && c <= 0xff5d) {
 173                         s = c - 0xff01 + 0xa3a1;
 174                 } else if (c >= 0xffe0 && c <= 0xffe5) {
 175                         s = ucs_hff_s_cp936_table[c-0xffe0];
 176                 }
 177         }
 178         if (s & 0x8000) {
 179                 s -= 0x8080;
 180         }
 181 
 182         if (s <= 0) {
 183                 if (c == 0) {
 184                         s = 0;
 185                 } else if (s <= 0) {
 186                         s = -1;
 187                 }
 188         } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
 189                 s = -1;
 190         }
 191         if (s >= 0) {
 192                 if (s < 0x80) { /* ASCII */
 193                         if ((filter->status & 0xff00) != 0) {
 194                                 CK((*filter->output_function)(0x7e, filter->data));             /* '~' */
 195                                 CK((*filter->output_function)(0x7d, filter->data));             /* '}' */
 196                         }
 197                         filter->status = 0;
 198                         if (s == 0x7e){
 199                                 CK((*filter->output_function)(0x7e, filter->data));
 200                         }
 201                         CK((*filter->output_function)(s, filter->data));
 202                 } else { /* GB 2312-80 */
 203                         if ((filter->status & 0xff00) != 0x200) {
 204                                 CK((*filter->output_function)(0x7e, filter->data));             /* '~' */
 205                                 CK((*filter->output_function)(0x7b, filter->data));             /* '{' */
 206                         }
 207                         filter->status = 0x200;
 208                         CK((*filter->output_function)((s >> 8) & 0x7f, filter->data));
 209                         CK((*filter->output_function)(s & 0x7f, filter->data));
 210                 }
 211         } else {
 212                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 213                         CK(mbfl_filt_conv_illegal_output(c, filter));
 214                 }
 215         }
 216 
 217         return c;
 218 }
 219 
 220 int
 221 mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter)
 222 {
 223         /* back to latin */
 224         if ((filter->status & 0xff00) != 0) {
 225                 CK((*filter->output_function)(0x7e, filter->data));             /* ~ */
 226                 CK((*filter->output_function)(0x7d, filter->data));             /* '{' */
 227         }
 228         filter->status &= 0xff;
 229         return 0;
 230 }
 231 
 232 static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter)
 233 {
 234         switch (filter->status & 0xf) {
 235 /*      case 0x00:       ASCII */
 236 /*      case 0x10:       GB2312 */
 237         case 0:
 238                 if (c == 0x7e) {
 239                         filter->status += 2;
 240                 } else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) {            /* DBCS first char */
 241                         filter->status += 1;
 242                 } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
 243                         ;
 244                 } else {
 245                         filter->flag = 1;       /* bad */
 246                 }
 247                 break;
 248 
 249 /*      case 0x11:       GB2312 second char */
 250         case 1:
 251                 filter->status &= ~0xf;
 252                 if (c < 0x21 || c > 0x7e) {             /* bad */
 253                         filter->flag = 1;
 254                 }
 255                 break;
 256 
 257         case 2:
 258                 if (c == 0x7d) {                /* '}' */
 259                         filter->status = 0;
 260                 } else if (c == 0x7b) {         /* '{' */
 261                         filter->status = 0x10;
 262                 } else if (c == 0x7e) {         /* '~' */
 263                         filter->status = 0;
 264                 } else {
 265                         filter->flag = 1;       /* bad */
 266                         filter->status &= ~0xf;
 267                 }
 268                 break;
 269 
 270         default:
 271                 filter->status = 0;
 272                 break;
 273         }
 274 
 275         return c;
 276 }
 277 
 278 
 279 /*
 280  * Local variables:
 281  * tab-width: 4
 282  * c-basic-offset: 4
 283  * End:
 284  */

/* [<][>][^][v][top][bottom][index][help] */