root/ext/mbstring/libmbfl/filters/mbfilter_utf16.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_utf16_wchar
  2. mbfl_filt_conv_utf16be_wchar
  3. mbfl_filt_conv_wchar_utf16be
  4. mbfl_filt_conv_utf16le_wchar
  5. mbfl_filt_conv_wchar_utf16le

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this file:
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_utf16.h"
  36 
  37 static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};
  38 
  39 const mbfl_encoding mbfl_encoding_utf16 = {
  40         mbfl_no_encoding_utf16,
  41         "UTF-16",
  42         "UTF-16",
  43         (const char *(*)[])&mbfl_encoding_utf16_aliases,
  44         NULL,
  45         MBFL_ENCTYPE_MWC2BE
  46 };
  47 
  48 const mbfl_encoding mbfl_encoding_utf16be = {
  49         mbfl_no_encoding_utf16be,
  50         "UTF-16BE",
  51         "UTF-16BE",
  52         NULL,
  53         NULL,
  54         MBFL_ENCTYPE_MWC2BE
  55 };
  56 
  57 const mbfl_encoding mbfl_encoding_utf16le = {
  58         mbfl_no_encoding_utf16le,
  59         "UTF-16LE",
  60         "UTF-16LE",
  61         NULL,
  62         NULL,
  63         MBFL_ENCTYPE_MWC2LE
  64 };
  65 
  66 const struct mbfl_convert_vtbl vtbl_utf16_wchar = {
  67         mbfl_no_encoding_utf16,
  68         mbfl_no_encoding_wchar,
  69         mbfl_filt_conv_common_ctor,
  70         mbfl_filt_conv_common_dtor,
  71         mbfl_filt_conv_utf16_wchar,
  72         mbfl_filt_conv_common_flush
  73 };
  74 
  75 const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {
  76         mbfl_no_encoding_wchar,
  77         mbfl_no_encoding_utf16,
  78         mbfl_filt_conv_common_ctor,
  79         mbfl_filt_conv_common_dtor,
  80         mbfl_filt_conv_wchar_utf16be,
  81         mbfl_filt_conv_common_flush
  82 };
  83 
  84 const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {
  85         mbfl_no_encoding_utf16be,
  86         mbfl_no_encoding_wchar,
  87         mbfl_filt_conv_common_ctor,
  88         mbfl_filt_conv_common_dtor,
  89         mbfl_filt_conv_utf16be_wchar,
  90         mbfl_filt_conv_common_flush
  91 };
  92 
  93 const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {
  94         mbfl_no_encoding_wchar,
  95         mbfl_no_encoding_utf16be,
  96         mbfl_filt_conv_common_ctor,
  97         mbfl_filt_conv_common_dtor,
  98         mbfl_filt_conv_wchar_utf16be,
  99         mbfl_filt_conv_common_flush
 100 };
 101 
 102 const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {
 103         mbfl_no_encoding_utf16le,
 104         mbfl_no_encoding_wchar,
 105         mbfl_filt_conv_common_ctor,
 106         mbfl_filt_conv_common_dtor,
 107         mbfl_filt_conv_utf16le_wchar,
 108         mbfl_filt_conv_common_flush
 109 };
 110 
 111 const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {
 112         mbfl_no_encoding_wchar,
 113         mbfl_no_encoding_utf16le,
 114         mbfl_filt_conv_common_ctor,
 115         mbfl_filt_conv_common_dtor,
 116         mbfl_filt_conv_wchar_utf16le,
 117         mbfl_filt_conv_common_flush
 118 };
 119 
 120 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
 121 
 122 /*
 123  * UTF-16 => wchar
 124  */
 125 int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
 126 {
 127         int n, endian;
 128 
 129         endian = filter->status & 0xff00;
 130         switch (filter->status & 0x0f) {
 131         case 0:
 132                 if (endian) {
 133                         n = c & 0xff;
 134                 } else {
 135                         n = (c & 0xff) << 8;
 136                 }
 137                 filter->cache |= n;
 138                 filter->status++;
 139                 break;
 140         default:
 141                 if (endian) {
 142                         n = (c & 0xff) << 8;
 143                 } else {
 144                         n = c & 0xff;
 145                 }
 146                 n |= filter->cache & 0xffff;
 147                 filter->status &= ~0x0f;
 148                 if (n >= 0xd800 && n < 0xdc00) {
 149                         filter->cache = ((n & 0x3ff) << 16) + 0x400000;
 150                 } else if (n >= 0xdc00 && n < 0xe000) {
 151                         n &= 0x3ff;
 152                         n |= (filter->cache & 0xfff0000) >> 6;
 153                         filter->cache = 0;
 154                         if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
 155                                 CK((*filter->output_function)(n, filter->data));
 156                         } else {                /* illegal character */
 157                                 n &= MBFL_WCSGROUP_MASK;
 158                                 n |= MBFL_WCSGROUP_THROUGH;
 159                                 CK((*filter->output_function)(n, filter->data));
 160                         }
 161                 } else {
 162                         int is_first = filter->status & 0x10;
 163                         filter->cache = 0;
 164                         filter->status |= 0x10;
 165                         if (!is_first) {
 166                                 if (n == 0xfffe) {
 167                                         if (endian) {
 168                                                 filter->status &= ~0x100;               /* big-endian */
 169                                         } else {
 170                                                 filter->status |= 0x100;                /* little-endian */
 171                                         }
 172                                         break;
 173                                 } else if (n == 0xfeff) {
 174                                         break;
 175                                 }
 176                         }
 177                         CK((*filter->output_function)(n, filter->data));
 178                 }
 179                 break;
 180         }
 181 
 182         return c;
 183 }
 184 
 185 /*
 186  * UTF-16BE => wchar
 187  */
 188 int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
 189 {
 190         int n;
 191 
 192         switch (filter->status) {
 193         case 0:
 194                 filter->status = 1;
 195                 n = (c & 0xff) << 8;
 196                 filter->cache |= n;
 197                 break;
 198         default:
 199                 filter->status = 0;
 200                 n = (filter->cache & 0xff00) | (c & 0xff);
 201                 if (n >= 0xd800 && n < 0xdc00) {
 202                         filter->cache = ((n & 0x3ff) << 16) + 0x400000;
 203                 } else if (n >= 0xdc00 && n < 0xe000) {
 204                         n &= 0x3ff;
 205                         n |= (filter->cache & 0xfff0000) >> 6;
 206                         filter->cache = 0;
 207                         if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
 208                                 CK((*filter->output_function)(n, filter->data));
 209                         } else {                /* illegal character */
 210                                 n &= MBFL_WCSGROUP_MASK;
 211                                 n |= MBFL_WCSGROUP_THROUGH;
 212                                 CK((*filter->output_function)(n, filter->data));
 213                         }
 214                 } else {
 215                         filter->cache = 0;
 216                         CK((*filter->output_function)(n, filter->data));
 217                 }
 218                 break;
 219         }
 220 
 221         return c;
 222 }
 223 
 224 /*
 225  * wchar => UTF-16BE
 226  */
 227 int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
 228 {
 229         int n;
 230 
 231         if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
 232                 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
 233                 CK((*filter->output_function)(c & 0xff, filter->data));
 234         } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
 235                 n = ((c >> 10) - 0x40) | 0xd800;
 236                 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
 237                 CK((*filter->output_function)(n & 0xff, filter->data));
 238                 n = (c & 0x3ff) | 0xdc00;
 239                 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
 240                 CK((*filter->output_function)(n & 0xff, filter->data));
 241         } else {
 242                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 243                         CK(mbfl_filt_conv_illegal_output(c, filter));
 244                 }
 245         }
 246 
 247         return c;
 248 }
 249 
 250 /*
 251  * UTF-16LE => wchar
 252  */
 253 int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
 254 {
 255         int n;
 256 
 257         switch (filter->status) {
 258         case 0:
 259                 filter->status = 1;
 260                 n = c & 0xff;
 261                 filter->cache |= n;
 262                 break;
 263         default:
 264                 filter->status = 0;
 265                 n = (filter->cache & 0xff) | ((c & 0xff) << 8);
 266                 if (n >= 0xd800 && n < 0xdc00) {
 267                         filter->cache = ((n & 0x3ff) << 16) + 0x400000;
 268                 } else if (n >= 0xdc00 && n < 0xe000) {
 269                         n &= 0x3ff;
 270                         n |= (filter->cache & 0xfff0000) >> 6;
 271                         filter->cache = 0;
 272                         if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
 273                                 CK((*filter->output_function)(n, filter->data));
 274                         } else {                /* illegal character */
 275                                 n &= MBFL_WCSGROUP_MASK;
 276                                 n |= MBFL_WCSGROUP_THROUGH;
 277                                 CK((*filter->output_function)(n, filter->data));
 278                         }
 279                 } else {
 280                         filter->cache = 0;
 281                         CK((*filter->output_function)(n, filter->data));
 282                 }
 283                 break;
 284         }
 285 
 286         return c;
 287 }
 288 
 289 /*
 290  * wchar => UTF-16LE
 291  */
 292 int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
 293 {
 294         int n;
 295 
 296         if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {
 297                 CK((*filter->output_function)(c & 0xff, filter->data));
 298                 CK((*filter->output_function)((c >> 8) & 0xff, filter->data));
 299         } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {
 300                 n = ((c >> 10) - 0x40) | 0xd800;
 301                 CK((*filter->output_function)(n & 0xff, filter->data));
 302                 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
 303                 n = (c & 0x3ff) | 0xdc00;
 304                 CK((*filter->output_function)(n & 0xff, filter->data));
 305                 CK((*filter->output_function)((n >> 8) & 0xff, filter->data));
 306         } else {
 307                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 308                         CK(mbfl_filt_conv_illegal_output(c, filter));
 309                 }
 310         }
 311 
 312         return c;
 313 }
 314 
 315 
 316 

/* [<][>][^][v][top][bottom][index][help] */