root/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_wchar_cp1254
  2. mbfl_filt_conv_cp1254_wchar
  3. mbfl_filt_ident_cp1254

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this part: Haluk AKIN <halukakin@gmail.com>
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter_ru.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #include "mbfilter.h"
  35 #include "mbfilter_cp1254.h"
  36 #include "unicode_table_cp1254.h"
  37 
  38 static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter);
  39 
  40 static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
  41 
  42 const mbfl_encoding mbfl_encoding_cp1254 = {
  43         mbfl_no_encoding_cp1254,
  44         "Windows-1254",
  45         "Windows-1254",
  46         (const char *(*)[])&mbfl_encoding_cp1254_aliases,
  47         NULL,
  48         MBFL_ENCTYPE_SBCS
  49 };
  50 
  51 const struct mbfl_identify_vtbl vtbl_identify_cp1254 = {
  52         mbfl_no_encoding_cp1254,
  53         mbfl_filt_ident_common_ctor,
  54         mbfl_filt_ident_common_dtor,
  55         mbfl_filt_ident_cp1254
  56 };
  57 
  58 const struct mbfl_convert_vtbl vtbl_cp1254_wchar = {
  59         mbfl_no_encoding_cp1254,
  60         mbfl_no_encoding_wchar,
  61         mbfl_filt_conv_common_ctor,
  62         mbfl_filt_conv_common_dtor,
  63         mbfl_filt_conv_cp1254_wchar,
  64         mbfl_filt_conv_common_flush
  65 };
  66 
  67 const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = {
  68         mbfl_no_encoding_wchar,
  69         mbfl_no_encoding_cp1254,
  70         mbfl_filt_conv_common_ctor,
  71         mbfl_filt_conv_common_dtor,
  72         mbfl_filt_conv_wchar_cp1254,
  73         mbfl_filt_conv_common_flush
  74 };
  75 
  76 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  77 
  78 /*
  79  * wchar => cp1254
  80  */
  81 int
  82 mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter)
  83 {
  84         int s, n;
  85 
  86         if (c < 0x80) {
  87                 s = c;
  88         } else {
  89                 s = -1;
  90                 n = cp1254_ucs_table_len-1;
  91                 while (n >= 0) {
  92                         if (c == cp1254_ucs_table[n] && c != 0xfffe) {
  93                                 s = cp1254_ucs_table_min + n;
  94                                 break;
  95                         }
  96                         n--;
  97                 }
  98                 if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) {
  99                         s = c & MBFL_WCSPLANE_MASK;
 100                 }
 101         }
 102 
 103         if (s >= 0) {
 104                 CK((*filter->output_function)(s, filter->data));
 105         } else {
 106                 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
 107                         CK(mbfl_filt_conv_illegal_output(c, filter));
 108                 }
 109         }
 110 
 111         return c;
 112 }
 113 
 114 /*
 115  * cp1254 => wchar
 116  */
 117 int
 118 mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter)
 119 {
 120         int s;
 121 
 122         if (c >= 0 && c < cp1254_ucs_table_min) {
 123                 s = c;
 124         } else if (c >= cp1254_ucs_table_min && c < 0x100) {
 125                 s = cp1254_ucs_table[c - cp1254_ucs_table_min];
 126                 if (s <= 0) {
 127                         s = c;
 128                         s &= MBFL_WCSPLANE_MASK;
 129                         s |= MBFL_WCSPLANE_CP1254;
 130                 }
 131         } else {
 132                 s = c;
 133                 s &= MBFL_WCSGROUP_MASK;
 134                 s |= MBFL_WCSGROUP_THROUGH;
 135         }
 136 
 137         CK((*filter->output_function)(s, filter->data));
 138 
 139         return c;
 140 }
 141 
 142 /* We only distinguish the MS extensions to ISO-8859-1.
 143  * Actually, this is pretty much a NO-OP, since the identification
 144  * system doesn't allow us to discriminate between a positive match,
 145  * a possible match and a definite non-match.
 146  * The problem here is that cp1254 looks like SJIS for certain chars.
 147  * */
 148 static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter)
 149 {
 150         if (c >= 0x80 && c < 0xff)
 151                 filter->flag = 0;
 152         else
 153                 filter->flag = 1; /* not it */
 154         return c;
 155 }
 156 
 157 

/* [<][>][^][v][top][bottom][index][help] */