root/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_filt_conv_html_enc
  2. mbfl_filt_conv_html_enc_flush
  3. mbfl_filt_conv_html_dec_ctor
  4. mbfl_filt_conv_html_dec_dtor
  5. mbfl_filt_conv_html_dec
  6. mbfl_filt_conv_html_dec_flush

   1 /*
   2  * "streamable kanji code filter and converter"
   3  * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
   4  *
   5  * LICENSE NOTICES
   6  *
   7  * This file is part of "streamable kanji code filter and converter",
   8  * which is distributed under the terms of GNU Lesser General Public
   9  * License (version 2) as published by the Free Software Foundation.
  10  *
  11  * This software is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with "streamable kanji code filter and converter";
  18  * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  19  * Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * The author of this part: Marcus Boerger <helly@php.net>
  22  *
  23  */
  24 /*
  25  * The source code included in this files was separated from mbfilter.c
  26  * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
  27  *
  28  */
  29 
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33 
  34 #ifdef HAVE_STRING_H
  35 #include <string.h>
  36 #endif
  37 
  38 #ifdef HAVE_STRINGS_H
  39 #include <strings.h>
  40 #endif
  41 
  42 #include "mbfilter.h"
  43 #include "mbfilter_htmlent.h"
  44 #include "html_entities.h"
  45 
  46 static const int htmlentitifieds[256] = {
  47   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  48   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  49   0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  50   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
  51   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  52   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  53   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  55   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  57   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  58   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  59   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  60   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  61   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  62   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
  63 };
  64 
  65 static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};
  66 
  67 const mbfl_encoding mbfl_encoding_html_ent = {
  68         mbfl_no_encoding_html_ent,
  69         "HTML-ENTITIES",
  70         "HTML-ENTITIES",
  71         (const char *(*)[])&mbfl_encoding_html_ent_aliases,
  72         NULL,
  73         MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
  74 };
  75 
  76 const struct mbfl_convert_vtbl vtbl_wchar_html = {
  77         mbfl_no_encoding_wchar,
  78         mbfl_no_encoding_html_ent,
  79         mbfl_filt_conv_common_ctor,
  80         mbfl_filt_conv_common_dtor,
  81         mbfl_filt_conv_html_enc,
  82         mbfl_filt_conv_html_enc_flush
  83 };
  84 
  85 const struct mbfl_convert_vtbl vtbl_html_wchar = {
  86         mbfl_no_encoding_html_ent,
  87         mbfl_no_encoding_wchar,
  88         mbfl_filt_conv_html_dec_ctor,
  89         mbfl_filt_conv_html_dec_dtor,
  90         mbfl_filt_conv_html_dec,
  91         mbfl_filt_conv_html_dec_flush };
  92 
  93 
  94 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
  95 
  96 /*
  97  * any => HTML
  98  */
  99 int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
 100 {
 101         int tmp[64];
 102         int i;
 103         unsigned int uc;
 104         const mbfl_html_entity_entry *e;
 105 
 106         if (c < sizeof(htmlentitifieds) / sizeof(htmlentitifieds[0]) &&
 107                                 htmlentitifieds[c] != 1) {
 108                 CK((*filter->output_function)(c, filter->data));
 109         } else {
 110                 CK((*filter->output_function)('&', filter->data));
 111                 for (i = 0; (e = &mbfl_html_entity_list[i])->name != NULL; i++) {
 112                         if (c == e->code) {
 113                                 char *p;
 114 
 115                                 for (p = e->name; *p != '\0'; p++) {
 116                                         CK((*filter->output_function)((int)*p, filter->data));
 117                                 }
 118                                 goto last;
 119                         }
 120                 }
 121 
 122                 {
 123                         int *p = tmp + sizeof(tmp) / sizeof(tmp[0]);
 124 
 125                         CK((*filter->output_function)('#', filter->data));
 126 
 127                         uc = (unsigned int)c;
 128 
 129                         *(--p) = '\0';
 130                         do {
 131                                 *(--p) = "0123456789"[uc % 10];
 132                                 uc /= 10;
 133                         } while (uc);
 134 
 135                         for (; *p != '\0'; p++) {
 136                                 CK((*filter->output_function)(*p, filter->data));
 137                         }
 138                 }
 139         last:
 140                 CK((*filter->output_function)(';', filter->data));
 141         }
 142         return c;
 143 }
 144 
 145 int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
 146 {
 147         filter->status = 0;
 148         filter->opaque = NULL;
 149 
 150         if (filter->flush_function != NULL) {
 151                 (*filter->flush_function)(filter->data);
 152         }
 153 
 154         return 0;
 155 }
 156 
 157 /*
 158  * HTML => any
 159  */
 160 #define html_enc_buffer_size    16
 161 static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
 162 
 163 void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
 164 {
 165         filter->status = 0;
 166         filter->opaque = mbfl_malloc(html_enc_buffer_size+1);
 167 }
 168 
 169 void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
 170 {
 171         filter->status = 0;
 172         if (filter->opaque)
 173         {
 174                 mbfl_free((void*)filter->opaque);
 175         }
 176         filter->opaque = NULL;
 177 }
 178 
 179 int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
 180 {
 181         int  pos, ent = 0;
 182         mbfl_html_entity_entry *entity;
 183         char *buffer = (char*)filter->opaque;
 184 
 185         if (!filter->status) {
 186                 if (c == '&' ) {
 187                         filter->status = 1;
 188                         buffer[0] = '&';
 189                 } else {
 190                         CK((*filter->output_function)(c, filter->data));
 191                 }
 192         } else {
 193                 if (c == ';') {
 194                         if (buffer[1]=='#') {
 195                                 if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
 196                                         if (filter->status > 3) {
 197                                                 /* numeric entity */
 198                                                 for (pos=3; pos<filter->status; pos++) {
 199                                                         int v =  buffer[pos];
 200                                                         if (v >= '0' && v <= '9') {
 201                                                                 v = v - '0';
 202                                                         } else if (v >= 'A' && v <= 'F') {
 203                                                                 v = v - 'A' + 10;
 204                                                         } else if (v >= 'a' && v <= 'f') {
 205                                                                 v = v - 'a' + 10;
 206                                                         } else {
 207                                                                 ent = -1;
 208                                                                 break;
 209                                                         }
 210                                                         ent = ent * 16 + v;
 211                                                 }
 212                                         } else {
 213                                                 ent = -1;
 214                                         }
 215                                 } else {
 216                                         /* numeric entity */
 217                                         if (filter->status > 2) {
 218                                                 for (pos=2; pos<filter->status; pos++) {
 219                                                         int v = buffer[pos];
 220                                                         if (v >= '0' && v <= '9') {
 221                                                                 v = v - '0';
 222                                                         } else {
 223                                                                 ent = -1;
 224                                                                 break;
 225                                                         }
 226                                                         ent = ent*10 + v;
 227                                                 }
 228                                         } else {
 229                                                 ent = -1;
 230                                         }
 231                                 }
 232                                 if (ent >= 0 && ent < 0x110000) {
 233                                         CK((*filter->output_function)(ent, filter->data));
 234                                 } else {
 235                                         for (pos = 0; pos < filter->status; pos++) {
 236                                                 CK((*filter->output_function)(buffer[pos], filter->data));
 237                                         }
 238                                         CK((*filter->output_function)(c, filter->data));
 239                                 }
 240                                 filter->status = 0;
 241                                 /*php_error_docref("ref.mbstring", E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
 242                         } else {
 243                                 /* named entity */
 244                                 buffer[filter->status] = 0;
 245                                 entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
 246                                 while (entity->name) {
 247                                         if (!strcmp(buffer+1, entity->name))    {
 248                                                 ent = entity->code;
 249                                                 break;
 250                                         }
 251                                         entity++;
 252                                 }
 253                                 if (ent) {
 254                                         /* decoded */
 255                                         CK((*filter->output_function)(ent, filter->data));
 256                                         filter->status = 0;
 257                                         /*php_error_docref("ref.mbstring", E_NOTICE,"mbstring decoded '%s'=%d", buffer, ent);*/
 258                                 } else {
 259                                         /* failure */
 260                                         buffer[filter->status++] = ';';
 261                                         buffer[filter->status] = 0;
 262                                         /* php_error_docref("ref.mbstring", E_WARNING, "mbstring cannot decode '%s'", buffer); */
 263                                         mbfl_filt_conv_html_dec_flush(filter);
 264                                 }
 265                         }
 266                 } else {
 267                         /* add character */
 268                         buffer[filter->status++] = c;
 269                         /* add character and check */
 270                         if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
 271                         {
 272                                 /* illegal character or end of buffer */
 273                                 if (c=='&')
 274                                         filter->status--;
 275                                 buffer[filter->status] = 0;
 276                                 /* php_error_docref("ref.mbstring", E_WARNING, "mbstring cannot decode '%s'", buffer)l */
 277                                 mbfl_filt_conv_html_dec_flush(filter);
 278                                 if (c=='&')
 279                                 {
 280                                         buffer[filter->status++] = '&';
 281                                 }
 282                         }
 283                 }
 284         }
 285         return c;
 286 }
 287 
 288 int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
 289 {
 290         int status, pos = 0;
 291         unsigned char *buffer;
 292         int err = 0;
 293 
 294         buffer = (unsigned char*)filter->opaque;
 295         status = filter->status;
 296         filter->status = 0;
 297 
 298         /* flush fragments */
 299         while (status--) {
 300                 int e = (*filter->output_function)(buffer[pos++], filter->data);
 301                 if (e != 0)
 302                         err = e;
 303         }
 304 
 305         if (filter->flush_function != NULL) {
 306                 (*filter->flush_function)(filter->data);
 307         }
 308 
 309         return err;
 310 }
 311 
 312 

/* [<][>][^][v][top][bottom][index][help] */