This source file includes following definitions.
- mbfl_filt_conv_html_enc
- mbfl_filt_conv_html_enc_flush
- mbfl_filt_conv_html_dec_ctor
- mbfl_filt_conv_html_dec_dtor
- mbfl_filt_conv_html_dec
- mbfl_filt_conv_html_dec_flush
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37
38 #ifdef HAVE_STRINGS_H
39 #include <strings.h>
40 #endif
41
42 #include "mbfilter.h"
43 #include "mbfilter_htmlent.h"
44 #include "html_entities.h"
45
46 static const int htmlentitifieds[256] = {
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
63 };
64
65 static const char *mbfl_encoding_html_ent_aliases[] = {"HTML", "html", NULL};
66
67 const mbfl_encoding mbfl_encoding_html_ent = {
68 mbfl_no_encoding_html_ent,
69 "HTML-ENTITIES",
70 "HTML-ENTITIES",
71 (const char *(*)[])&mbfl_encoding_html_ent_aliases,
72 NULL,
73 MBFL_ENCTYPE_ENC_STRM | MBFL_ENCTYPE_GL_UNSAFE
74 };
75
76 const struct mbfl_convert_vtbl vtbl_wchar_html = {
77 mbfl_no_encoding_wchar,
78 mbfl_no_encoding_html_ent,
79 mbfl_filt_conv_common_ctor,
80 mbfl_filt_conv_common_dtor,
81 mbfl_filt_conv_html_enc,
82 mbfl_filt_conv_html_enc_flush
83 };
84
85 const struct mbfl_convert_vtbl vtbl_html_wchar = {
86 mbfl_no_encoding_html_ent,
87 mbfl_no_encoding_wchar,
88 mbfl_filt_conv_html_dec_ctor,
89 mbfl_filt_conv_html_dec_dtor,
90 mbfl_filt_conv_html_dec,
91 mbfl_filt_conv_html_dec_flush };
92
93
94 #define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
95
96
97
98
99 int mbfl_filt_conv_html_enc(int c, mbfl_convert_filter *filter)
100 {
101 int tmp[64];
102 int i;
103 unsigned int uc;
104 const mbfl_html_entity_entry *e;
105
106 if (c < sizeof(htmlentitifieds) / sizeof(htmlentitifieds[0]) &&
107 htmlentitifieds[c] != 1) {
108 CK((*filter->output_function)(c, filter->data));
109 } else {
110 CK((*filter->output_function)('&', filter->data));
111 for (i = 0; (e = &mbfl_html_entity_list[i])->name != NULL; i++) {
112 if (c == e->code) {
113 char *p;
114
115 for (p = e->name; *p != '\0'; p++) {
116 CK((*filter->output_function)((int)*p, filter->data));
117 }
118 goto last;
119 }
120 }
121
122 {
123 int *p = tmp + sizeof(tmp) / sizeof(tmp[0]);
124
125 CK((*filter->output_function)('#', filter->data));
126
127 uc = (unsigned int)c;
128
129 *(--p) = '\0';
130 do {
131 *(--p) = "0123456789"[uc % 10];
132 uc /= 10;
133 } while (uc);
134
135 for (; *p != '\0'; p++) {
136 CK((*filter->output_function)(*p, filter->data));
137 }
138 }
139 last:
140 CK((*filter->output_function)(';', filter->data));
141 }
142 return c;
143 }
144
145 int mbfl_filt_conv_html_enc_flush(mbfl_convert_filter *filter)
146 {
147 filter->status = 0;
148 filter->opaque = NULL;
149
150 if (filter->flush_function != NULL) {
151 (*filter->flush_function)(filter->data);
152 }
153
154 return 0;
155 }
156
157
158
159
160 #define html_enc_buffer_size 16
161 static const char html_entity_chars[] = "#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
162
163 void mbfl_filt_conv_html_dec_ctor(mbfl_convert_filter *filter)
164 {
165 filter->status = 0;
166 filter->opaque = mbfl_malloc(html_enc_buffer_size+1);
167 }
168
169 void mbfl_filt_conv_html_dec_dtor(mbfl_convert_filter *filter)
170 {
171 filter->status = 0;
172 if (filter->opaque)
173 {
174 mbfl_free((void*)filter->opaque);
175 }
176 filter->opaque = NULL;
177 }
178
179 int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
180 {
181 int pos, ent = 0;
182 mbfl_html_entity_entry *entity;
183 char *buffer = (char*)filter->opaque;
184
185 if (!filter->status) {
186 if (c == '&' ) {
187 filter->status = 1;
188 buffer[0] = '&';
189 } else {
190 CK((*filter->output_function)(c, filter->data));
191 }
192 } else {
193 if (c == ';') {
194 if (buffer[1]=='#') {
195 if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
196 if (filter->status > 3) {
197
198 for (pos=3; pos<filter->status; pos++) {
199 int v = buffer[pos];
200 if (v >= '0' && v <= '9') {
201 v = v - '0';
202 } else if (v >= 'A' && v <= 'F') {
203 v = v - 'A' + 10;
204 } else if (v >= 'a' && v <= 'f') {
205 v = v - 'a' + 10;
206 } else {
207 ent = -1;
208 break;
209 }
210 ent = ent * 16 + v;
211 }
212 } else {
213 ent = -1;
214 }
215 } else {
216
217 if (filter->status > 2) {
218 for (pos=2; pos<filter->status; pos++) {
219 int v = buffer[pos];
220 if (v >= '0' && v <= '9') {
221 v = v - '0';
222 } else {
223 ent = -1;
224 break;
225 }
226 ent = ent*10 + v;
227 }
228 } else {
229 ent = -1;
230 }
231 }
232 if (ent >= 0 && ent < 0x110000) {
233 CK((*filter->output_function)(ent, filter->data));
234 } else {
235 for (pos = 0; pos < filter->status; pos++) {
236 CK((*filter->output_function)(buffer[pos], filter->data));
237 }
238 CK((*filter->output_function)(c, filter->data));
239 }
240 filter->status = 0;
241
242 } else {
243
244 buffer[filter->status] = 0;
245 entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
246 while (entity->name) {
247 if (!strcmp(buffer+1, entity->name)) {
248 ent = entity->code;
249 break;
250 }
251 entity++;
252 }
253 if (ent) {
254
255 CK((*filter->output_function)(ent, filter->data));
256 filter->status = 0;
257
258 } else {
259
260 buffer[filter->status++] = ';';
261 buffer[filter->status] = 0;
262
263 mbfl_filt_conv_html_dec_flush(filter);
264 }
265 }
266 } else {
267
268 buffer[filter->status++] = c;
269
270 if (!strchr(html_entity_chars, c) || filter->status+1==html_enc_buffer_size || (c=='#' && filter->status>2))
271 {
272
273 if (c=='&')
274 filter->status--;
275 buffer[filter->status] = 0;
276
277 mbfl_filt_conv_html_dec_flush(filter);
278 if (c=='&')
279 {
280 buffer[filter->status++] = '&';
281 }
282 }
283 }
284 }
285 return c;
286 }
287
288 int mbfl_filt_conv_html_dec_flush(mbfl_convert_filter *filter)
289 {
290 int status, pos = 0;
291 unsigned char *buffer;
292 int err = 0;
293
294 buffer = (unsigned char*)filter->opaque;
295 status = filter->status;
296 filter->status = 0;
297
298
299 while (status--) {
300 int e = (*filter->output_function)(buffer[pos++], filter->data);
301 if (e != 0)
302 err = e;
303 }
304
305 if (filter->flush_function != NULL) {
306 (*filter->flush_function)(filter->data);
307 }
308
309 return err;
310 }
311
312