root/ext/mbstring/libmbfl/mbfl/mbfilter.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbfl_buffer_converter_new
  2. mbfl_buffer_converter_new2
  3. mbfl_buffer_converter_delete
  4. mbfl_buffer_converter_reset
  5. mbfl_buffer_converter_illegal_mode
  6. mbfl_buffer_converter_illegal_substchar
  7. mbfl_buffer_converter_strncat
  8. mbfl_buffer_converter_feed
  9. mbfl_buffer_converter_feed2
  10. mbfl_buffer_converter_flush
  11. mbfl_buffer_converter_getbuffer
  12. mbfl_buffer_converter_result
  13. mbfl_buffer_converter_feed_result
  14. mbfl_buffer_illegalchars
  15. mbfl_encoding_detector_new
  16. mbfl_encoding_detector_new2
  17. mbfl_encoding_detector_delete
  18. mbfl_encoding_detector_feed
  19. mbfl_encoding_detector_judge2
  20. mbfl_encoding_detector_judge
  21. mbfl_convert_encoding
  22. mbfl_identify_encoding
  23. mbfl_identify_encoding2
  24. filter_count_output
  25. mbfl_strlen
  26. collector_strpos
  27. mbfl_oddlen
  28. mbfl_strpos
  29. mbfl_substr_count
  30. collector_substr
  31. mbfl_substr
  32. mbfl_strcut
  33. is_fullwidth
  34. filter_count_width
  35. mbfl_strwidth
  36. collector_strimwidth
  37. mbfl_strimwidth
  38. mbfl_ja_jp_hantozen
  39. mime_header_encoder_block_collector
  40. mime_header_encoder_collector
  41. mime_header_encoder_result
  42. mime_header_encoder_new
  43. mime_header_encoder_delete
  44. mime_header_encoder_feed
  45. mbfl_mime_header_encode
  46. mime_header_decoder_collector
  47. mime_header_decoder_result
  48. mime_header_decoder_new
  49. mime_header_decoder_delete
  50. mime_header_decoder_feed
  51. mbfl_mime_header_decode
  52. collector_encode_htmlnumericentity
  53. collector_decode_htmlnumericentity
  54. collector_encode_hex_htmlnumericentity
  55. mbfl_filt_decode_htmlnumericentity_flush
  56. mbfl_html_numeric_entity

   1 /*
   2  * charset=UTF-8
   3  * vim600: encoding=utf-8
   4  */
   5 
   6 /*
   7  * "streamable kanji code filter and converter"
   8  *
   9  * Copyright (c) 1998,1999,2000,2001 HappySize, Inc. All rights reserved.
  10  *
  11  * This software is released under the GNU Lesser General Public License.
  12  * (Version 2.1, February 1999)
  13  * Please read the following detail of the licence (in japanese).
  14  *
  15  * ◆使用許諾条件◆
  16  *
  17  * このソフトウェアは株式会社ハッピーサイズによって開発されました。株式会社ハッ
  18  * ピーサイズは、著作権法および万国著作権条約の定めにより、このソフトウェアに関
  19  * するすべての権利を留保する権利を持ち、ここに行使します。株式会社ハッピーサイ
  20  * ズは以下に明記した条件に従って、このソフトウェアを使用する排他的ではない権利
  21  * をお客様に許諾します。何人たりとも、以下の条件に反してこのソフトウェアを使用
  22  * することはできません。
  23  *
  24  * このソフトウェアを「GNU Lesser General Public License (Version 2.1, February
  25  * 1999)」に示された条件で使用することを、全ての方に許諾します。「GNU Lesser
  26  * General Public License」を満たさない使用には、株式会社ハッピーサイズから書面
  27  * による許諾を得る必要があります。
  28  *
  29  * 「GNU Lesser General Public License」の全文は以下のウェブページから取得でき
  30  * ます。「GNU Lesser General Public License」とは、これまでLibrary General
  31  * Public Licenseと呼ばれていたものです。
  32  *     http://www.gnu.org/ --- GNUウェブサイト
  33  *     http://www.gnu.org/copyleft/lesser.html --- ライセンス文面
  34  * このライセンスの内容がわからない方、守れない方には使用を許諾しません。
  35  *
  36  * しかしながら、当社とGNUプロジェクトとの特定の関係を示唆または主張するもので
  37  * はありません。
  38  *
  39  * ◆保証内容◆
  40  *
  41  * このソフトウェアは、期待された動作・機能・性能を持つことを目標として設計され
  42  * 開発されていますが、これを保証するものではありません。このソフトウェアは「こ
  43  * のまま」の状態で提供されており、たとえばこのソフトウェアの有用性ないし特定の
  44  * 目的に合致することといった、何らかの保証内容が、明示されたり暗黙に示されてい
  45  * る場合であっても、その保証は無効です。このソフトウェアを使用した結果ないし使
  46  * 用しなかった結果によって、直接あるいは間接に受けた身体的な傷害、財産上の損害
  47  * 、データの損失あるいはその他の全ての損害については、その損害の可能性が使用者
  48  * 、当社あるいは第三者によって警告されていた場合であっても、当社はその損害の賠
  49  * 償および補填を行いません。この規定は他の全ての、書面上または書面に無い保証・
  50  * 契約・規定に優先します。
  51  *
  52  * ◆著作権者の連絡先および使用条件についての問い合わせ先◆
  53  *
  54  * 〒102-0073
  55  * 東京都千代田区九段北1-13-5日本地所第一ビル4F
  56  * 株式会社ハッピーサイズ
  57  * Phone: 03-3512-3655, Fax: 03-3512-3656
  58  * Email: sales@happysize.co.jp
  59  * Web: http://happysize.com/
  60  *
  61  * ◆著者◆
  62  *
  63  * 金本 茂 <sgk@happysize.co.jp>
  64  *
  65  * ◆履歴◆
  66  *
  67  * 1998/11/10 sgk implementation in C++
  68  * 1999/4/25  sgk Cで書きなおし。
  69  * 1999/4/26  sgk 入力フィルタを実装。漢字コードを推定しながらフィルタを追加。
  70  * 1999/6/??      Unicodeサポート。
  71  * 1999/6/22  sgk ライセンスをLGPLに変更。
  72  *
  73  */
  74 
  75 /*
  76  * Unicode support
  77  *
  78  * Portions copyright (c) 1999,2000,2001 by the PHP3 internationalization team.
  79  * All rights reserved.
  80  *
  81  */
  82 
  83 
  84 #ifdef HAVE_CONFIG_H
  85 #include "config.h"
  86 #endif
  87 
  88 #include <stddef.h>
  89 
  90 #ifdef HAVE_STRING_H
  91 #include <string.h>
  92 #endif
  93 
  94 #ifdef HAVE_STRINGS_H
  95 #include <strings.h>
  96 #endif
  97 
  98 #ifdef HAVE_STDDEF_H
  99 #include <stddef.h>
 100 #endif
 101 
 102 #include "mbfilter.h"
 103 #include "mbfl_filter_output.h"
 104 #include "mbfilter_pass.h"
 105 #include "filters/mbfilter_tl_jisx0201_jisx0208.h"
 106 
 107 #include "eaw_table.h"
 108 
 109 /* hex character table "0123456789ABCDEF" */
 110 static char mbfl_hexchar_table[] = {
 111         0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
 112 };
 113 
 114 
 115 
 116 /*
 117  * encoding filter
 118  */
 119 #define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)
 120 
 121 
 122 /*
 123  *  buffering converter
 124  */
 125 mbfl_buffer_converter *
 126 mbfl_buffer_converter_new(
 127     enum mbfl_no_encoding from,
 128     enum mbfl_no_encoding to,
 129     int buf_initsz)
 130 {
 131         const mbfl_encoding *_from = mbfl_no2encoding(from);
 132         const mbfl_encoding *_to = mbfl_no2encoding(to);
 133 
 134         return mbfl_buffer_converter_new2(_from ? _from: &mbfl_encoding_pass, _to ? _to: &mbfl_encoding_pass, buf_initsz);
 135 }
 136 
 137 mbfl_buffer_converter *
 138 mbfl_buffer_converter_new2(
 139         const mbfl_encoding *from,
 140         const mbfl_encoding *to,
 141     int buf_initsz)
 142 {
 143         mbfl_buffer_converter *convd;
 144 
 145         /* allocate */
 146         convd = (mbfl_buffer_converter*)mbfl_malloc(sizeof (mbfl_buffer_converter));
 147         if (convd == NULL) {
 148                 return NULL;
 149         }
 150 
 151         /* initialize */
 152         convd->from = from;
 153         convd->to = to;
 154 
 155         /* create convert filter */
 156         convd->filter1 = NULL;
 157         convd->filter2 = NULL;
 158         if (mbfl_convert_filter_get_vtbl(convd->from->no_encoding, convd->to->no_encoding) != NULL) {
 159                 convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
 160         } else {
 161                 convd->filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, convd->to->no_encoding, mbfl_memory_device_output, NULL, &convd->device);
 162                 if (convd->filter2 != NULL) {
 163                         convd->filter1 = mbfl_convert_filter_new(convd->from->no_encoding,
 164                                         mbfl_no_encoding_wchar,
 165                                         (int (*)(int, void*))convd->filter2->filter_function,
 166                                         (int (*)(void*))convd->filter2->filter_flush,
 167                                         convd->filter2);
 168                         if (convd->filter1 == NULL) {
 169                                 mbfl_convert_filter_delete(convd->filter2);
 170                         }
 171                 }
 172         }
 173         if (convd->filter1 == NULL) {
 174                 return NULL;
 175         }
 176 
 177         mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
 178 
 179         return convd;
 180 }
 181 
 182 
 183 void
 184 mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
 185 {
 186         if (convd != NULL) {
 187                 if (convd->filter1) {
 188                         mbfl_convert_filter_delete(convd->filter1);
 189                 }
 190                 if (convd->filter2) {
 191                         mbfl_convert_filter_delete(convd->filter2);
 192                 }
 193                 mbfl_memory_device_clear(&convd->device);
 194                 mbfl_free((void*)convd);
 195         }
 196 }
 197 
 198 void
 199 mbfl_buffer_converter_reset(mbfl_buffer_converter *convd)
 200 {
 201         mbfl_memory_device_reset(&convd->device);
 202 }
 203 
 204 int
 205 mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
 206 {
 207         if (convd != NULL) {
 208                 if (convd->filter2 != NULL) {
 209                         convd->filter2->illegal_mode = mode;
 210                 } else if (convd->filter1 != NULL) {
 211                         convd->filter1->illegal_mode = mode;
 212                 } else {
 213                         return 0;
 214                 }
 215         }
 216 
 217         return 1;
 218 }
 219 
 220 int
 221 mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, int substchar)
 222 {
 223         if (convd != NULL) {
 224                 if (convd->filter2 != NULL) {
 225                         convd->filter2->illegal_substchar = substchar;
 226                 } else if (convd->filter1 != NULL) {
 227                         convd->filter1->illegal_substchar = substchar;
 228                 } else {
 229                         return 0;
 230                 }
 231         }
 232 
 233         return 1;
 234 }
 235 
 236 int
 237 mbfl_buffer_converter_strncat(mbfl_buffer_converter *convd, const unsigned char *p, int n)
 238 {
 239         mbfl_convert_filter *filter;
 240         int (*filter_function)(int c, mbfl_convert_filter *filter);
 241 
 242         if (convd != NULL && p != NULL) {
 243                 filter = convd->filter1;
 244                 if (filter != NULL) {
 245                         filter_function = filter->filter_function;
 246                         while (n > 0) {
 247                                 if ((*filter_function)(*p++, filter) < 0) {
 248                                         break;
 249                                 }
 250                                 n--;
 251                         }
 252                 }
 253         }
 254 
 255         return n;
 256 }
 257 
 258 int
 259 mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
 260 {
 261         return mbfl_buffer_converter_feed2(convd, string, NULL);
 262 }
 263 
 264 int
 265 mbfl_buffer_converter_feed2(mbfl_buffer_converter *convd, mbfl_string *string, int *loc)
 266 {
 267         int n;
 268         unsigned char *p;
 269         mbfl_convert_filter *filter;
 270         int (*filter_function)(int c, mbfl_convert_filter *filter);
 271 
 272         if (convd == NULL || string == NULL) {
 273                 return -1;
 274         }
 275         mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
 276         /* feed data */
 277         n = string->len;
 278         p = string->val;
 279 
 280         filter = convd->filter1;
 281         if (filter != NULL) {
 282                 filter_function = filter->filter_function;
 283                 while (n > 0) {
 284                         if ((*filter_function)(*p++, filter) < 0) {
 285                                 if (loc) {
 286                                         *loc = p - string->val;
 287                                 }
 288                                 return -1;
 289                         }
 290                         n--;
 291                 }
 292         }
 293         if (loc) {
 294                 *loc = p - string->val;
 295         }
 296         return 0;
 297 }
 298 
 299 
 300 int
 301 mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
 302 {
 303         if (convd == NULL) {
 304                 return -1;
 305         }
 306 
 307         if (convd->filter1 != NULL) {
 308                 mbfl_convert_filter_flush(convd->filter1);
 309         }
 310         if (convd->filter2 != NULL) {
 311                 mbfl_convert_filter_flush(convd->filter2);
 312         }
 313 
 314         return 0;
 315 }
 316 
 317 mbfl_string *
 318 mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result)
 319 {
 320         if (convd != NULL && result != NULL && convd->device.buffer != NULL) {
 321                 result->no_encoding = convd->to->no_encoding;
 322                 result->val = convd->device.buffer;
 323                 result->len = convd->device.pos;
 324         } else {
 325                 result = NULL;
 326         }
 327 
 328         return result;
 329 }
 330 
 331 mbfl_string *
 332 mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
 333 {
 334         if (convd == NULL || result == NULL) {
 335                 return NULL;
 336         }
 337         result->no_encoding = convd->to->no_encoding;
 338         return mbfl_memory_device_result(&convd->device, result);
 339 }
 340 
 341 mbfl_string *
 342 mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string,
 343                                   mbfl_string *result)
 344 {
 345         if (convd == NULL || string == NULL || result == NULL) {
 346                 return NULL;
 347         }
 348         mbfl_buffer_converter_feed(convd, string);
 349         if (convd->filter1 != NULL) {
 350                 mbfl_convert_filter_flush(convd->filter1);
 351         }
 352         if (convd->filter2 != NULL) {
 353                 mbfl_convert_filter_flush(convd->filter2);
 354         }
 355         result->no_encoding = convd->to->no_encoding;
 356         return mbfl_memory_device_result(&convd->device, result);
 357 }
 358 
 359 int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
 360 {
 361         int num_illegalchars = 0;
 362 
 363         if (convd == NULL) {
 364                 return 0;
 365         }
 366 
 367         if (convd->filter1 != NULL) {
 368                 num_illegalchars += convd->filter1->num_illegalchar;
 369         }
 370 
 371         if (convd->filter2 != NULL) {
 372                 num_illegalchars += convd->filter2->num_illegalchar;
 373         }
 374 
 375         return (num_illegalchars);
 376 }
 377 
 378 /*
 379  * encoding detector
 380  */
 381 mbfl_encoding_detector *
 382 mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
 383 {
 384         mbfl_encoding_detector *identd;
 385 
 386         int i, num;
 387         mbfl_identify_filter *filter;
 388 
 389         if (elist == NULL || elistsz <= 0) {
 390                 return NULL;
 391         }
 392 
 393         /* allocate */
 394         identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
 395         if (identd == NULL) {
 396                 return NULL;
 397         }
 398         identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
 399         if (identd->filter_list == NULL) {
 400                 mbfl_free(identd);
 401                 return NULL;
 402         }
 403 
 404         /* create filters */
 405         i = 0;
 406         num = 0;
 407         while (i < elistsz) {
 408                 filter = mbfl_identify_filter_new(elist[i]);
 409                 if (filter != NULL) {
 410                         identd->filter_list[num] = filter;
 411                         num++;
 412                 }
 413                 i++;
 414         }
 415         identd->filter_list_size = num;
 416 
 417         /* set strict flag */
 418         identd->strict = strict;
 419 
 420         return identd;
 421 }
 422 
 423 mbfl_encoding_detector *
 424 mbfl_encoding_detector_new2(const mbfl_encoding **elist, int elistsz, int strict)
 425 {
 426         mbfl_encoding_detector *identd;
 427 
 428         int i, num;
 429         mbfl_identify_filter *filter;
 430 
 431         if (elist == NULL || elistsz <= 0) {
 432                 return NULL;
 433         }
 434 
 435         /* allocate */
 436         identd = (mbfl_encoding_detector*)mbfl_malloc(sizeof(mbfl_encoding_detector));
 437         if (identd == NULL) {
 438                 return NULL;
 439         }
 440         identd->filter_list = (mbfl_identify_filter **)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter *));
 441         if (identd->filter_list == NULL) {
 442                 mbfl_free(identd);
 443                 return NULL;
 444         }
 445 
 446         /* create filters */
 447         i = 0;
 448         num = 0;
 449         while (i < elistsz) {
 450                 filter = mbfl_identify_filter_new2(elist[i]);
 451                 if (filter != NULL) {
 452                         identd->filter_list[num] = filter;
 453                         num++;
 454                 }
 455                 i++;
 456         }
 457         identd->filter_list_size = num;
 458 
 459         /* set strict flag */
 460         identd->strict = strict;
 461 
 462         return identd;
 463 }
 464 
 465 
 466 void
 467 mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
 468 {
 469         int i;
 470 
 471         if (identd != NULL) {
 472                 if (identd->filter_list != NULL) {
 473                         i = identd->filter_list_size;
 474                         while (i > 0) {
 475                                 i--;
 476                                 mbfl_identify_filter_delete(identd->filter_list[i]);
 477                         }
 478                         mbfl_free((void *)identd->filter_list);
 479                 }
 480                 mbfl_free((void *)identd);
 481         }
 482 }
 483 
 484 int
 485 mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
 486 {
 487         int i, n, num, bad, res;
 488         unsigned char *p;
 489         mbfl_identify_filter *filter;
 490 
 491         res = 0;
 492         /* feed data */
 493         if (identd != NULL && string != NULL && string->val != NULL) {
 494                 num = identd->filter_list_size;
 495                 n = string->len;
 496                 p = string->val;
 497                 bad = 0;
 498                 while (n > 0) {
 499                         for (i = 0; i < num; i++) {
 500                                 filter = identd->filter_list[i];
 501                                 if (!filter->flag) {
 502                                         (*filter->filter_function)(*p, filter);
 503                                         if (filter->flag) {
 504                                                 bad++;
 505                                         }
 506                                 }
 507                         }
 508                         if ((num - 1) <= bad) {
 509                                 res = 1;
 510                                 break;
 511                         }
 512                         p++;
 513                         n--;
 514                 }
 515         }
 516 
 517         return res;
 518 }
 519 
 520 const mbfl_encoding *mbfl_encoding_detector_judge2(mbfl_encoding_detector *identd)
 521 {
 522         mbfl_identify_filter *filter;
 523         const mbfl_encoding *encoding = NULL;
 524         int n;
 525 
 526         /* judge */
 527         if (identd != NULL) {
 528                 n = identd->filter_list_size - 1;
 529                 while (n >= 0) {
 530                         filter = identd->filter_list[n];
 531                         if (!filter->flag) {
 532                                 if (!identd->strict || !filter->status) {
 533                                         encoding = filter->encoding;
 534                                 }
 535                         }
 536                         n--;
 537                 }
 538 
 539                 /* fallback judge */
 540                 if (!encoding) {
 541                         n = identd->filter_list_size - 1;
 542                         while (n >= 0) {
 543                                 filter = identd->filter_list[n];
 544                                 if (!filter->flag) {
 545                                         encoding = filter->encoding;
 546                                 }
 547                                 n--;
 548                         }
 549                 }
 550         }
 551 
 552         return encoding;
 553 }
 554 
 555 enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
 556 {
 557         const mbfl_encoding *encoding = mbfl_encoding_detector_judge2(identd);
 558         return !encoding ? mbfl_no_encoding_invalid: encoding->no_encoding;
 559 }
 560 
 561 
 562 /*
 563  * encoding converter
 564  */
 565 mbfl_string *
 566 mbfl_convert_encoding(
 567     mbfl_string *string,
 568     mbfl_string *result,
 569     enum mbfl_no_encoding toenc)
 570 {
 571         int n;
 572         unsigned char *p;
 573         const mbfl_encoding *encoding;
 574         mbfl_memory_device device;
 575         mbfl_convert_filter *filter1;
 576         mbfl_convert_filter *filter2;
 577 
 578         /* initialize */
 579         encoding = mbfl_no2encoding(toenc);
 580         if (encoding == NULL || string == NULL || result == NULL) {
 581                 return NULL;
 582         }
 583 
 584         filter1 = NULL;
 585         filter2 = NULL;
 586         if (mbfl_convert_filter_get_vtbl(string->no_encoding, toenc) != NULL) {
 587                 filter1 = mbfl_convert_filter_new(string->no_encoding, toenc, mbfl_memory_device_output, 0, &device);
 588         } else {
 589                 filter2 = mbfl_convert_filter_new(mbfl_no_encoding_wchar, toenc, mbfl_memory_device_output, 0, &device);
 590                 if (filter2 != NULL) {
 591                         filter1 = mbfl_convert_filter_new(string->no_encoding, mbfl_no_encoding_wchar, (int (*)(int, void*))filter2->filter_function, NULL, filter2);
 592                         if (filter1 == NULL) {
 593                                 mbfl_convert_filter_delete(filter2);
 594                         }
 595                 }
 596         }
 597         if (filter1 == NULL) {
 598                 return NULL;
 599         }
 600 
 601         if (filter2 != NULL) {
 602                 filter2->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
 603                 filter2->illegal_substchar = 0x3f;              /* '?' */
 604         }
 605 
 606         mbfl_memory_device_init(&device, string->len, (string->len >> 2) + 8);
 607 
 608         /* feed data */
 609         n = string->len;
 610         p = string->val;
 611         if (p != NULL) {
 612                 while (n > 0) {
 613                         if ((*filter1->filter_function)(*p++, filter1) < 0) {
 614                                 break;
 615                         }
 616                         n--;
 617                 }
 618         }
 619 
 620         mbfl_convert_filter_flush(filter1);
 621         mbfl_convert_filter_delete(filter1);
 622         if (filter2 != NULL) {
 623                 mbfl_convert_filter_flush(filter2);
 624                 mbfl_convert_filter_delete(filter2);
 625         }
 626 
 627         return mbfl_memory_device_result(&device, result);
 628 }
 629 
 630 
 631 /*
 632  * identify encoding
 633  */
 634 const mbfl_encoding *
 635 mbfl_identify_encoding(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
 636 {
 637         int i, n, num, bad;
 638         unsigned char *p;
 639         mbfl_identify_filter *flist, *filter;
 640         const mbfl_encoding *encoding;
 641 
 642         /* flist is an array of mbfl_identify_filter instances */
 643         flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
 644         if (flist == NULL) {
 645                 return NULL;
 646         }
 647 
 648         num = 0;
 649         if (elist != NULL) {
 650                 for (i = 0; i < elistsz; i++) {
 651                         if (!mbfl_identify_filter_init(&flist[num], elist[i])) {
 652                                 num++;
 653                         }
 654                 }
 655         }
 656 
 657         /* feed data */
 658         n = string->len;
 659         p = string->val;
 660 
 661         if (p != NULL) {
 662                 bad = 0;
 663                 while (n > 0) {
 664                         for (i = 0; i < num; i++) {
 665                                 filter = &flist[i];
 666                                 if (!filter->flag) {
 667                                         (*filter->filter_function)(*p, filter);
 668                                         if (filter->flag) {
 669                                                 bad++;
 670                                         }
 671                                 }
 672                         }
 673                         if ((num - 1) <= bad && !strict) {
 674                                 break;
 675                         }
 676                         p++;
 677                         n--;
 678                 }
 679         }
 680 
 681         /* judge */
 682         encoding = NULL;
 683 
 684         for (i = 0; i < num; i++) {
 685                 filter = &flist[i];
 686                 if (!filter->flag) {
 687                         if (strict && filter->status) {
 688                                 continue;
 689                         }
 690                         encoding = filter->encoding;
 691                         break;
 692                 }
 693         }
 694 
 695         /* fall-back judge */
 696         if (!encoding) {
 697                 for (i = 0; i < num; i++) {
 698                         filter = &flist[i];
 699                         if (!filter->flag && (!strict || !filter->status)) {
 700                                 encoding = filter->encoding;
 701                                 break;
 702                         }
 703                 }
 704         }
 705 
 706         /* cleanup */
 707         /* dtors should be called in reverse order */
 708         i = num; while (--i >= 0) {
 709                 mbfl_identify_filter_cleanup(&flist[i]);
 710         }
 711 
 712         mbfl_free((void *)flist);
 713 
 714         return encoding;
 715 }
 716 
 717 const mbfl_encoding *
 718 mbfl_identify_encoding2(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict)
 719 {
 720         int i, n, num, bad;
 721         unsigned char *p;
 722         mbfl_identify_filter *flist, *filter;
 723         const mbfl_encoding *encoding;
 724 
 725         /* flist is an array of mbfl_identify_filter instances */
 726         flist = (mbfl_identify_filter *)mbfl_calloc(elistsz, sizeof(mbfl_identify_filter));
 727         if (flist == NULL) {
 728                 return NULL;
 729         }
 730 
 731         num = 0;
 732         if (elist != NULL) {
 733                 for (i = 0; i < elistsz; i++) {
 734                         if (!mbfl_identify_filter_init2(&flist[num], elist[i])) {
 735                                 num++;
 736                         }
 737                 }
 738         }
 739 
 740         /* feed data */
 741         n = string->len;
 742         p = string->val;
 743 
 744         if (p != NULL) {
 745                 bad = 0;
 746                 while (n > 0) {
 747                         for (i = 0; i < num; i++) {
 748                                 filter = &flist[i];
 749                                 if (!filter->flag) {
 750                                         (*filter->filter_function)(*p, filter);
 751                                         if (filter->flag) {
 752                                                 bad++;
 753                                         }
 754                                 }
 755                         }
 756                         if ((num - 1) <= bad && !strict) {
 757                                 break;
 758                         }
 759                         p++;
 760                         n--;
 761                 }
 762         }
 763 
 764         /* judge */
 765         encoding = NULL;
 766 
 767         for (i = 0; i < num; i++) {
 768                 filter = &flist[i];
 769                 if (!filter->flag) {
 770                         if (strict && filter->status) {
 771                                 continue;
 772                         }
 773                         encoding = filter->encoding;
 774                         break;
 775                 }
 776         }
 777 
 778         /* fall-back judge */
 779         if (!encoding) {
 780                 for (i = 0; i < num; i++) {
 781                         filter = &flist[i];
 782                         if (!filter->flag && (!strict || !filter->status)) {
 783                                 encoding = filter->encoding;
 784                                 break;
 785                         }
 786                 }
 787         }
 788 
 789         /* cleanup */
 790         /* dtors should be called in reverse order */
 791         i = num; while (--i >= 0) {
 792                 mbfl_identify_filter_cleanup(&flist[i]);
 793         }
 794 
 795         mbfl_free((void *)flist);
 796 
 797         return encoding;
 798 }
 799 
 800 /*
 801  *  strlen
 802  */
 803 static int
 804 filter_count_output(int c, void *data)
 805 {
 806         (*(int *)data)++;
 807         return c;
 808 }
 809 
 810 int
 811 mbfl_strlen(mbfl_string *string)
 812 {
 813         int len, n, m, k;
 814         unsigned char *p;
 815         const unsigned char *mbtab;
 816         const mbfl_encoding *encoding;
 817 
 818         encoding = mbfl_no2encoding(string->no_encoding);
 819         if (encoding == NULL || string == NULL) {
 820                 return -1;
 821         }
 822 
 823         len = 0;
 824         if (encoding->flag & MBFL_ENCTYPE_SBCS) {
 825                 len = string->len;
 826         } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
 827                 len = string->len/2;
 828         } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
 829                 len = string->len/4;
 830         } else if (encoding->mblen_table != NULL) {
 831                 mbtab = encoding->mblen_table;
 832                 n = 0;
 833                 p = string->val;
 834                 k = string->len;
 835                 /* count */
 836                 if (p != NULL) {
 837                         while (n < k) {
 838                                 m = mbtab[*p];
 839                                 n += m;
 840                                 p += m;
 841                                 len++;
 842                         };
 843                 }
 844         } else {
 845                 /* wchar filter */
 846                 mbfl_convert_filter *filter = mbfl_convert_filter_new(
 847                   string->no_encoding,
 848                   mbfl_no_encoding_wchar,
 849                   filter_count_output, 0, &len);
 850                 if (filter == NULL) {
 851                         return -1;
 852                 }
 853                 /* count */
 854                 n = string->len;
 855                 p = string->val;
 856                 if (p != NULL) {
 857                         while (n > 0) {
 858                                 (*filter->filter_function)(*p++, filter);
 859                                 n--;
 860                         }
 861                 }
 862                 mbfl_convert_filter_delete(filter);
 863         }
 864 
 865         return len;
 866 }
 867 
 868 
 869 /*
 870  *  strpos
 871  */
 872 struct collector_strpos_data {
 873         mbfl_convert_filter *next_filter;
 874         mbfl_wchar_device needle;
 875         int needle_len;
 876         int start;
 877         int output;
 878         int found_pos;
 879         int needle_pos;
 880         int matched_pos;
 881 };
 882 
 883 static int
 884 collector_strpos(int c, void* data)
 885 {
 886         int *p, *h, *m, n;
 887         struct collector_strpos_data *pc = (struct collector_strpos_data*)data;
 888 
 889         if (pc->output >= pc->start) {
 890                 if (c == (int)pc->needle.buffer[pc->needle_pos]) {
 891                         if (pc->needle_pos == 0) {
 892                                 pc->found_pos = pc->output;                     /* found position */
 893                         }
 894                         pc->needle_pos++;                                               /* needle pointer */
 895                         if (pc->needle_pos >= pc->needle_len) {
 896                                 pc->matched_pos = pc->found_pos;        /* matched position */
 897                                 pc->needle_pos--;
 898                                 goto retry;
 899                         }
 900                 } else if (pc->needle_pos != 0) {
 901 retry:
 902                         h = (int *)pc->needle.buffer;
 903                         h++;
 904                         for (;;) {
 905                                 pc->found_pos++;
 906                                 p = h;
 907                                 m = (int *)pc->needle.buffer;
 908                                 n = pc->needle_pos - 1;
 909                                 while (n > 0 && *p == *m) {
 910                                         n--;
 911                                         p++;
 912                                         m++;
 913                                 }
 914                                 if (n <= 0) {
 915                                         if (*m != c) {
 916                                                 pc->needle_pos = 0;
 917                                         }
 918                                         break;
 919                                 } else {
 920                                         h++;
 921                                         pc->needle_pos--;
 922                                 }
 923                         }
 924                 }
 925         }
 926 
 927         pc->output++;
 928         return c;
 929 }
 930 
 931 /*
 932  *      oddlen
 933  */
 934 int
 935 mbfl_oddlen(mbfl_string *string)
 936 {
 937         int len, n, m, k;
 938         unsigned char *p;
 939         const unsigned char *mbtab;
 940         const mbfl_encoding *encoding;
 941 
 942 
 943         if (string == NULL) {
 944                 return -1;
 945         }
 946         encoding = mbfl_no2encoding(string->no_encoding);
 947         if (encoding == NULL) {
 948                 return -1;
 949         }
 950 
 951         len = 0;
 952         if (encoding->flag & MBFL_ENCTYPE_SBCS) {
 953                 return 0;
 954         } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
 955                 return len % 2;
 956         } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
 957                 return len % 4;
 958         } else if (encoding->mblen_table != NULL) {
 959                 mbtab = encoding->mblen_table;
 960                 n = 0;
 961                 p = string->val;
 962                 k = string->len;
 963                 /* count */
 964                 if (p != NULL) {
 965                         while (n < k) {
 966                                 m = mbtab[*p];
 967                                 n += m;
 968                                 p += m;
 969                         };
 970                 }
 971                 return n-k;
 972         } else {
 973                 /* how can i do ? */
 974                 return 0;
 975         }
 976         /* NOT REACHED */
 977 }
 978 
 979 int
 980 mbfl_strpos(
 981     mbfl_string *haystack,
 982     mbfl_string *needle,
 983     int offset,
 984     int reverse)
 985 {
 986         int result;
 987         mbfl_string _haystack_u8, _needle_u8;
 988         const mbfl_string *haystack_u8, *needle_u8 = NULL;
 989         const unsigned char *u8_tbl;
 990 
 991         if (haystack == NULL || haystack->val == NULL || needle == NULL || needle->val == NULL) {
 992                 return -8;
 993         }
 994 
 995         {
 996                 const mbfl_encoding *u8_enc;
 997                 u8_enc = mbfl_no2encoding(mbfl_no_encoding_utf8);
 998                 if (u8_enc == NULL || u8_enc->mblen_table == NULL) {
 999                         return -8;
1000                 }
1001                 u8_tbl = u8_enc->mblen_table;
1002         }
1003 
1004         if (haystack->no_encoding != mbfl_no_encoding_utf8) {
1005                 mbfl_string_init(&_haystack_u8);
1006                 haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, mbfl_no_encoding_utf8);
1007                 if (haystack_u8 == NULL) {
1008                         result = -4;
1009                         goto out;
1010                 }
1011         } else {
1012                 haystack_u8 = haystack;
1013         }
1014 
1015         if (needle->no_encoding != mbfl_no_encoding_utf8) {
1016                 mbfl_string_init(&_needle_u8);
1017                 needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, mbfl_no_encoding_utf8);
1018                 if (needle_u8 == NULL) {
1019                         result = -4;
1020                         goto out;
1021                 }
1022         } else {
1023                 needle_u8 = needle;
1024         }
1025 
1026         if (needle_u8->len < 1) {
1027                 result = -8;
1028                 goto out;
1029         }
1030 
1031         result = -1;
1032         if (haystack_u8->len < needle_u8->len) {
1033                 goto out;
1034         }
1035 
1036         if (!reverse) {
1037                 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1038                 unsigned int needle_u8_len = needle_u8->len;
1039                 unsigned int i;
1040                 const unsigned char *p, *q, *e;
1041                 const unsigned char *haystack_u8_val = haystack_u8->val,
1042                                     *needle_u8_val = needle_u8->val;
1043                 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1044                         jtbl[i] = needle_u8_len + 1;
1045                 }
1046                 for (i = 0; i < needle_u8_len - 1; ++i) {
1047                         jtbl[needle_u8_val[i]] = needle_u8_len - i;
1048                 }
1049                 e = haystack_u8_val + haystack_u8->len;
1050                 p = haystack_u8_val;
1051                 while (--offset >= 0) {
1052                         if (p >= e) {
1053                                 result = -16;
1054                                 goto out;
1055                         }
1056                         p += u8_tbl[*p];
1057                 }
1058                 p += needle_u8_len;
1059                 if (p > e) {
1060                         goto out;
1061                 }
1062                 while (p <= e) {
1063                         const unsigned char *pv = p;
1064                         q = needle_u8_val + needle_u8_len;
1065                         for (;;) {
1066                                 if (q == needle_u8_val) {
1067                                         result = 0;
1068                                         while (p > haystack_u8_val) {
1069                                                 unsigned char c = *--p;
1070                                                 if (c < 0x80) {
1071                                                         ++result;
1072                                                 } else if ((c & 0xc0) != 0x80) {
1073                                                         ++result;
1074                                                 }
1075                                         }
1076                                         goto out;
1077                                 }
1078                                 if (*--q != *--p) {
1079                                         break;
1080                                 }
1081                         }
1082                         p += jtbl[*p];
1083                         if (p <= pv) {
1084                                 p = pv + 1;
1085                         }
1086                 }
1087         } else {
1088                 unsigned int jtbl[1 << (sizeof(unsigned char) * 8)];
1089                 unsigned int needle_u8_len = needle_u8->len, needle_len = 0;
1090                 unsigned int i;
1091                 const unsigned char *p, *e, *q, *qe;
1092                 const unsigned char *haystack_u8_val = haystack_u8->val,
1093                                     *needle_u8_val = needle_u8->val;
1094                 for (i = 0; i < sizeof(jtbl) / sizeof(*jtbl); ++i) {
1095                         jtbl[i] = needle_u8_len;
1096                 }
1097                 for (i = needle_u8_len - 1; i > 0; --i) {
1098                         unsigned char c = needle_u8_val[i];
1099                         jtbl[c] = i;
1100                         if (c < 0x80) {
1101                                 ++needle_len;
1102                         } else if ((c & 0xc0) != 0x80) {
1103                                 ++needle_len;
1104                         }
1105                 }
1106                 {
1107                         unsigned char c = needle_u8_val[0];
1108                         if (c < 0x80) {
1109                                 ++needle_len;
1110                         } else if ((c & 0xc0) != 0x80) {
1111                                 ++needle_len;
1112                         }
1113                 }
1114                 e = haystack_u8_val;
1115                 p = e + haystack_u8->len;
1116                 qe = needle_u8_val + needle_u8_len;
1117                 if (offset < 0) {
1118                         if (-offset > needle_len) {
1119                                 offset += needle_len;
1120                                 while (offset < 0) {
1121                                         unsigned char c;
1122                                         if (p <= e) {
1123                                                 result = -16;
1124                                                 goto out;
1125                                         }
1126                                         c = *(--p);
1127                                         if (c < 0x80) {
1128                                                 ++offset;
1129                                         } else if ((c & 0xc0) != 0x80) {
1130                                                 ++offset;
1131                                         }
1132                                 }
1133                         }
1134                 } else {
1135                         const unsigned char *ee = haystack_u8_val + haystack_u8->len;
1136                         while (--offset >= 0) {
1137                                 if (e >= ee) {
1138                                         result = -16;
1139                                         goto out;
1140                                 }
1141                                 e += u8_tbl[*e];
1142                         }
1143                 }
1144                 if (p < e + needle_u8_len) {
1145                         goto out;
1146                 }
1147                 p -= needle_u8_len;
1148                 while (p >= e) {
1149                         const unsigned char *pv = p;
1150                         q = needle_u8_val;
1151                         for (;;) {
1152                                 if (q == qe) {
1153                                         result = 0;
1154                                         p -= needle_u8_len;
1155                                         while (p > haystack_u8_val) {
1156                                                 unsigned char c = *--p;
1157                                                 if (c < 0x80) {
1158                                                         ++result;
1159                                                 } else if ((c & 0xc0) != 0x80) {
1160                                                         ++result;
1161                                                 }
1162                                         }
1163                                         goto out;
1164                                 }
1165                                 if (*q != *p) {
1166                                         break;
1167                                 }
1168                                 ++p, ++q;
1169                         }
1170                         p -= jtbl[*p];
1171                         if (p >= pv) {
1172                                 p = pv - 1;
1173                         }
1174                 }
1175         }
1176 out:
1177         if (haystack_u8 == &_haystack_u8) {
1178                 mbfl_string_clear(&_haystack_u8);
1179         }
1180         if (needle_u8 == &_needle_u8) {
1181                 mbfl_string_clear(&_needle_u8);
1182         }
1183         return result;
1184 }
1185 
1186 /*
1187  *  substr_count
1188  */
1189 
1190 int
1191 mbfl_substr_count(
1192     mbfl_string *haystack,
1193     mbfl_string *needle
1194    )
1195 {
1196         int n, result = 0;
1197         unsigned char *p;
1198         mbfl_convert_filter *filter;
1199         struct collector_strpos_data pc;
1200 
1201         if (haystack == NULL || needle == NULL) {
1202                 return -8;
1203         }
1204         /* needle is converted into wchar */
1205         mbfl_wchar_device_init(&pc.needle);
1206         filter = mbfl_convert_filter_new(
1207           needle->no_encoding,
1208           mbfl_no_encoding_wchar,
1209           mbfl_wchar_device_output, 0, &pc.needle);
1210         if (filter == NULL) {
1211                 return -4;
1212         }
1213         p = needle->val;
1214         n = needle->len;
1215         if (p != NULL) {
1216                 while (n > 0) {
1217                         if ((*filter->filter_function)(*p++, filter) < 0) {
1218                                 break;
1219                         }
1220                         n--;
1221                 }
1222         }
1223         mbfl_convert_filter_flush(filter);
1224         mbfl_convert_filter_delete(filter);
1225         pc.needle_len = pc.needle.pos;
1226         if (pc.needle.buffer == NULL) {
1227                 return -4;
1228         }
1229         if (pc.needle_len <= 0) {
1230                 mbfl_wchar_device_clear(&pc.needle);
1231                 return -2;
1232         }
1233         /* initialize filter and collector data */
1234         filter = mbfl_convert_filter_new(
1235           haystack->no_encoding,
1236           mbfl_no_encoding_wchar,
1237           collector_strpos, 0, &pc);
1238         if (filter == NULL) {
1239                 mbfl_wchar_device_clear(&pc.needle);
1240                 return -4;
1241         }
1242         pc.start = 0;
1243         pc.output = 0;
1244         pc.needle_pos = 0;
1245         pc.found_pos = 0;
1246         pc.matched_pos = -1;
1247 
1248         /* feed data */
1249         p = haystack->val;
1250         n = haystack->len;
1251         if (p != NULL) {
1252                 while (n > 0) {
1253                         if ((*filter->filter_function)(*p++, filter) < 0) {
1254                                 pc.matched_pos = -4;
1255                                 break;
1256                         }
1257                         if (pc.matched_pos >= 0) {
1258                                 ++result;
1259                                 pc.matched_pos = -1;
1260                                 pc.needle_pos = 0;
1261                         }
1262                         n--;
1263                 }
1264         }
1265         mbfl_convert_filter_flush(filter);
1266         mbfl_convert_filter_delete(filter);
1267         mbfl_wchar_device_clear(&pc.needle);
1268 
1269         return result;
1270 }
1271 
1272 /*
1273  *  substr
1274  */
1275 struct collector_substr_data {
1276         mbfl_convert_filter *next_filter;
1277         int start;
1278         int stop;
1279         int output;
1280 };
1281 
1282 static int
1283 collector_substr(int c, void* data)
1284 {
1285         struct collector_substr_data *pc = (struct collector_substr_data*)data;
1286 
1287         if (pc->output >= pc->stop) {
1288                 return -1;
1289         }
1290 
1291         if (pc->output >= pc->start) {
1292                 (*pc->next_filter->filter_function)(c, pc->next_filter);
1293         }
1294 
1295         pc->output++;
1296 
1297         return c;
1298 }
1299 
1300 mbfl_string *
1301 mbfl_substr(
1302     mbfl_string *string,
1303     mbfl_string *result,
1304     int from,
1305     int length)
1306 {
1307         const mbfl_encoding *encoding;
1308         int n, m, k, len, start, end;
1309         unsigned char *p, *w;
1310         const unsigned char *mbtab;
1311 
1312         encoding = mbfl_no2encoding(string->no_encoding);
1313         if (encoding == NULL || string == NULL || result == NULL) {
1314                 return NULL;
1315         }
1316         mbfl_string_init(result);
1317         result->no_language = string->no_language;
1318         result->no_encoding = string->no_encoding;
1319 
1320         if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
1321            encoding->mblen_table != NULL) {
1322                 len = string->len;
1323                 start = from;
1324                 end = from + length;
1325                 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1326                         start *= 2;
1327                         end = start + length*2;
1328                 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1329                         start *= 4;
1330                         end = start + length*4;
1331                 } else if (encoding->mblen_table != NULL) {
1332                         mbtab = encoding->mblen_table;
1333                         start = 0;
1334                         end = 0;
1335                         n = 0;
1336                         k = 0;
1337                         p = string->val;
1338                         if (p != NULL) {
1339                                 /* search start position */
1340                                 while (k <= from) {
1341                                         start = n;
1342                                         if (n >= len) {
1343                                                 break;
1344                                         }
1345                                         m = mbtab[*p];
1346                                         n += m;
1347                                         p += m;
1348                                         k++;
1349                                 }
1350                                 /* detect end position */
1351                                 k = 0;
1352                                 end = start;
1353                                 while (k < length) {
1354                                         end = n;
1355                                         if (n >= len) {
1356                                                 break;
1357                                         }
1358                                         m = mbtab[*p];
1359                                         n += m;
1360                                         p += m;
1361                                         k++;
1362                                 }
1363                         }
1364                 }
1365 
1366                 if (start > len) {
1367                         start = len;
1368                 }
1369                 if (start < 0) {
1370                         start = 0;
1371                 }
1372                 if (end > len) {
1373                         end = len;
1374                 }
1375                 if (end < 0) {
1376                         end = 0;
1377                 }
1378                 if (start > end) {
1379                         start = end;
1380                 }
1381 
1382                 /* allocate memory and copy */
1383                 n = end - start;
1384                 result->len = 0;
1385                 result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
1386                 if (w != NULL) {
1387                         p = string->val;
1388                         if (p != NULL) {
1389                                 p += start;
1390                                 result->len = n;
1391                                 while (n > 0) {
1392                                         *w++ = *p++;
1393                                         n--;
1394                                 }
1395                         }
1396                         *w++ = '\0';
1397                         *w++ = '\0';
1398                         *w++ = '\0';
1399                         *w = '\0';
1400                 } else {
1401                         result = NULL;
1402                 }
1403         } else {
1404                 mbfl_memory_device device;
1405                 struct collector_substr_data pc;
1406                 mbfl_convert_filter *decoder;
1407                 mbfl_convert_filter *encoder;
1408 
1409                 mbfl_memory_device_init(&device, length + 1, 0);
1410                 mbfl_string_init(result);
1411                 result->no_language = string->no_language;
1412                 result->no_encoding = string->no_encoding;
1413                 /* output code filter */
1414                 decoder = mbfl_convert_filter_new(
1415                     mbfl_no_encoding_wchar,
1416                     string->no_encoding,
1417                     mbfl_memory_device_output, 0, &device);
1418                 /* wchar filter */
1419                 encoder = mbfl_convert_filter_new(
1420                     string->no_encoding,
1421                     mbfl_no_encoding_wchar,
1422                     collector_substr, 0, &pc);
1423                 if (decoder == NULL || encoder == NULL) {
1424                         mbfl_convert_filter_delete(encoder);
1425                         mbfl_convert_filter_delete(decoder);
1426                         return NULL;
1427                 }
1428                 pc.next_filter = decoder;
1429                 pc.start = from;
1430                 pc.stop = from + length;
1431                 pc.output = 0;
1432 
1433                 /* feed data */
1434                 p = string->val;
1435                 n = string->len;
1436                 if (p != NULL) {
1437                         while (n > 0) {
1438                                 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1439                                         break;
1440                                 }
1441                                 n--;
1442                         }
1443                 }
1444 
1445                 mbfl_convert_filter_flush(encoder);
1446                 mbfl_convert_filter_flush(decoder);
1447                 result = mbfl_memory_device_result(&device, result);
1448                 mbfl_convert_filter_delete(encoder);
1449                 mbfl_convert_filter_delete(decoder);
1450         }
1451 
1452         return result;
1453 }
1454 
1455 /*
1456  *  strcut
1457  */
1458 mbfl_string *
1459 mbfl_strcut(
1460     mbfl_string *string,
1461     mbfl_string *result,
1462     int from,
1463     int length)
1464 {
1465         const mbfl_encoding *encoding;
1466         mbfl_memory_device device;
1467 
1468         /* validate the parameters */
1469         if (string == NULL || string->val == NULL || result == NULL) {
1470                 return NULL;
1471         }
1472 
1473         if (from < 0 || length < 0) {
1474                 return NULL;
1475         }
1476 
1477         if (from >= string->len) {
1478                 from = string->len;
1479         }
1480 
1481         encoding = mbfl_no2encoding(string->no_encoding);
1482         if (encoding == NULL) {
1483                 return NULL;
1484         }
1485 
1486         mbfl_string_init(result);
1487         result->no_language = string->no_language;
1488         result->no_encoding = string->no_encoding;
1489 
1490         if ((encoding->flag & (MBFL_ENCTYPE_SBCS
1491                                 | MBFL_ENCTYPE_WCS2BE
1492                                 | MBFL_ENCTYPE_WCS2LE
1493                                 | MBFL_ENCTYPE_WCS4BE
1494                                 | MBFL_ENCTYPE_WCS4LE))
1495                         || encoding->mblen_table != NULL) {
1496                 const unsigned char *start = NULL;
1497                 const unsigned char *end = NULL;
1498                 unsigned char *w;
1499                 unsigned int sz;
1500 
1501                 if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
1502                         from &= -2;
1503 
1504                         if (length >= string->len - from) {
1505                                 length = string->len - from;
1506                         }
1507 
1508                         start = string->val + from;
1509                         end   = start + (length & -2);
1510                 } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
1511                         from &= -4;
1512 
1513                         if (length >= string->len - from) {
1514                                 length = string->len - from;
1515                         }
1516 
1517                         start = string->val + from;
1518                         end   = start + (length & -4);
1519                 } else if ((encoding->flag & MBFL_ENCTYPE_SBCS)) {
1520                         if (length >= string->len - from) {
1521                                 length = string->len - from;
1522                         }
1523 
1524                         start = string->val + from;
1525                         end = start + length;
1526                 } else if (encoding->mblen_table != NULL) {
1527                         const unsigned char *mbtab = encoding->mblen_table;
1528                         const unsigned char *p, *q;
1529                         int m;
1530 
1531                         /* search start position */
1532                         for (m = 0, p = string->val, q = p + from;
1533                                         p < q; p += (m = mbtab[*p]));
1534 
1535                         if (p > q) {
1536                                 p -= m;
1537                         }
1538 
1539                         start = p;
1540 
1541                         /* search end position */
1542                         if (length >= (int)string->len - (start - string->val)) {
1543                                 end = string->val + string->len;
1544                         } else {
1545                                 for (q = p + length; p < q; p += (m = mbtab[*p]));
1546 
1547                                 if (p > q) {
1548                                         p -= m;
1549                                 }
1550                                 end = p;
1551                         }
1552                 } else {
1553                         /* never reached */
1554                         return NULL;
1555                 }
1556 
1557                 /* allocate memory and copy string */
1558                 sz = end - start;
1559                 if ((w = (unsigned char*)mbfl_calloc(sz + 8,
1560                                 sizeof(unsigned char))) == NULL) {
1561                         return NULL;
1562                 }
1563 
1564                 memcpy(w, start, sz);
1565                 w[sz] = '\0';
1566                 w[sz + 1] = '\0';
1567                 w[sz + 2] = '\0';
1568                 w[sz + 3] = '\0';
1569 
1570                 result->val = w;
1571                 result->len = sz;
1572         } else {
1573                 mbfl_convert_filter *encoder     = NULL;
1574                 mbfl_convert_filter *decoder     = NULL;
1575                 const unsigned char *p, *q, *r;
1576                 struct {
1577                         mbfl_convert_filter encoder;
1578                         mbfl_convert_filter decoder;
1579                         const unsigned char *p;
1580                         int pos;
1581                 } bk, _bk;
1582 
1583                 /* output code filter */
1584                 if (!(decoder = mbfl_convert_filter_new(
1585                                 mbfl_no_encoding_wchar,
1586                                 string->no_encoding,
1587                                 mbfl_memory_device_output, 0, &device))) {
1588                         return NULL;
1589                 }
1590 
1591                 /* wchar filter */
1592                 if (!(encoder = mbfl_convert_filter_new(
1593                                 string->no_encoding,
1594                                 mbfl_no_encoding_wchar,
1595                                 mbfl_filter_output_null,
1596                                 NULL, NULL))) {
1597                         mbfl_convert_filter_delete(decoder);
1598                         return NULL;
1599                 }
1600 
1601                 mbfl_memory_device_init(&device, length + 8, 0);
1602 
1603                 p = string->val;
1604 
1605                 /* search start position */
1606                 for (q = string->val + from; p < q; p++) {
1607                         (*encoder->filter_function)(*p, encoder);
1608                 }
1609 
1610                 /* switch the drain direction */
1611                 encoder->output_function = (int(*)(int,void *))decoder->filter_function;
1612                 encoder->flush_function = (int(*)(void *))decoder->filter_flush;
1613                 encoder->data = decoder;
1614 
1615                 q = string->val + string->len;
1616 
1617                 /* save the encoder, decoder state and the pointer */
1618                 mbfl_convert_filter_copy(decoder, &_bk.decoder);
1619                 mbfl_convert_filter_copy(encoder, &_bk.encoder);
1620                 _bk.p = p;
1621                 _bk.pos = device.pos;
1622 
1623                 if (length > q - p) {
1624                         length = q - p;
1625                 }
1626 
1627                 if (length >= 20) {
1628                         /* output a little shorter than "length" */
1629                         /* XXX: the constant "20" was determined purely on the heuristics. */
1630                         for (r = p + length - 20; p < r; p++) {
1631                                 (*encoder->filter_function)(*p, encoder);
1632                         }
1633 
1634                         /* if the offset of the resulting string exceeds the length,
1635                          * then restore the state */
1636                         if (device.pos > length) {
1637                                 p = _bk.p;
1638                                 device.pos = _bk.pos;
1639                                 decoder->filter_dtor(decoder);
1640                                 encoder->filter_dtor(encoder);
1641                                 mbfl_convert_filter_copy(&_bk.decoder, decoder);
1642                                 mbfl_convert_filter_copy(&_bk.encoder, encoder);
1643                                 bk = _bk;
1644                         } else {
1645                                 /* save the encoder, decoder state and the pointer */
1646                                 mbfl_convert_filter_copy(decoder, &bk.decoder);
1647                                 mbfl_convert_filter_copy(encoder, &bk.encoder);
1648                                 bk.p = p;
1649                                 bk.pos = device.pos;
1650 
1651                                 /* flush the stream */
1652                                 (*encoder->filter_flush)(encoder);
1653 
1654                                 /* if the offset of the resulting string exceeds the length,
1655                                  * then restore the state */
1656                                 if (device.pos > length) {
1657                                         bk.decoder.filter_dtor(&bk.decoder);
1658                                         bk.encoder.filter_dtor(&bk.encoder);
1659 
1660                                         p = _bk.p;
1661                                         device.pos = _bk.pos;
1662                                         decoder->filter_dtor(decoder);
1663                                         encoder->filter_dtor(encoder);
1664                                         mbfl_convert_filter_copy(&_bk.decoder, decoder);
1665                                         mbfl_convert_filter_copy(&_bk.encoder, encoder);
1666                                         bk = _bk;
1667                                 } else {
1668                                         _bk.decoder.filter_dtor(&_bk.decoder);
1669                                         _bk.encoder.filter_dtor(&_bk.encoder);
1670 
1671                                         p = bk.p;
1672                                         device.pos = bk.pos;
1673                                         decoder->filter_dtor(decoder);
1674                                         encoder->filter_dtor(encoder);
1675                                         mbfl_convert_filter_copy(&bk.decoder, decoder);
1676                                         mbfl_convert_filter_copy(&bk.encoder, encoder);
1677                                 }
1678                         }
1679                 } else {
1680                         bk = _bk;
1681                 }
1682 
1683                 /* detect end position */
1684                 while (p < q) {
1685                         (*encoder->filter_function)(*p, encoder);
1686 
1687                         if (device.pos > length) {
1688                                 /* restore filter */
1689                                 p = bk.p;
1690                                 device.pos = bk.pos;
1691                                 decoder->filter_dtor(decoder);
1692                                 encoder->filter_dtor(encoder);
1693                                 mbfl_convert_filter_copy(&bk.decoder, decoder);
1694                                 mbfl_convert_filter_copy(&bk.encoder, encoder);
1695                                 break;
1696                         }
1697 
1698                         p++;
1699 
1700                         /* backup current state */
1701                         mbfl_convert_filter_copy(decoder, &_bk.decoder);
1702                         mbfl_convert_filter_copy(encoder, &_bk.encoder);
1703                         _bk.pos = device.pos;
1704                         _bk.p = p;
1705 
1706                         (*encoder->filter_flush)(encoder);
1707 
1708                         if (device.pos > length) {
1709                                 _bk.decoder.filter_dtor(&_bk.decoder);
1710                                 _bk.encoder.filter_dtor(&_bk.encoder);
1711 
1712                                 /* restore filter */
1713                                 p = bk.p;
1714                                 device.pos = bk.pos;
1715                                 decoder->filter_dtor(decoder);
1716                                 encoder->filter_dtor(encoder);
1717                                 mbfl_convert_filter_copy(&bk.decoder, decoder);
1718                                 mbfl_convert_filter_copy(&bk.encoder, encoder);
1719                                 break;
1720                         }
1721 
1722                         bk.decoder.filter_dtor(&bk.decoder);
1723                         bk.encoder.filter_dtor(&bk.encoder);
1724 
1725                         p = _bk.p;
1726                         device.pos = _bk.pos;
1727                         decoder->filter_dtor(decoder);
1728                         encoder->filter_dtor(encoder);
1729                         mbfl_convert_filter_copy(&_bk.decoder, decoder);
1730                         mbfl_convert_filter_copy(&_bk.encoder, encoder);
1731 
1732                         bk = _bk;
1733                 }
1734 
1735                 (*encoder->filter_flush)(encoder);
1736 
1737                 bk.decoder.filter_dtor(&bk.decoder);
1738                 bk.encoder.filter_dtor(&bk.encoder);
1739 
1740                 result = mbfl_memory_device_result(&device, result);
1741 
1742                 mbfl_convert_filter_delete(encoder);
1743                 mbfl_convert_filter_delete(decoder);
1744         }
1745 
1746         return result;
1747 }
1748 
1749 
1750 /*
1751  *  strwidth
1752  */
1753 static int is_fullwidth(int c)
1754 {
1755         int i;
1756 
1757         if (c < mbfl_eaw_table[0].begin) {
1758                 return 0;
1759         }
1760 
1761         for (i = 0; i < sizeof(mbfl_eaw_table) / sizeof(mbfl_eaw_table[0]); i++) {
1762                 if (mbfl_eaw_table[i].begin <= c && c <= mbfl_eaw_table[i].end) {
1763                         return 1;
1764                 }
1765         }
1766 
1767         return 0;
1768 }
1769 
1770 static int
1771 filter_count_width(int c, void* data)
1772 {
1773         (*(int *)data) += (is_fullwidth(c) ? 2: 1);
1774         return c;
1775 }
1776 
1777 int
1778 mbfl_strwidth(mbfl_string *string)
1779 {
1780         int len, n;
1781         unsigned char *p;
1782         mbfl_convert_filter *filter;
1783 
1784         len = 0;
1785         if (string->len > 0 && string->val != NULL) {
1786                 /* wchar filter */
1787                 filter = mbfl_convert_filter_new(
1788                     string->no_encoding,
1789                     mbfl_no_encoding_wchar,
1790                     filter_count_width, 0, &len);
1791                 if (filter == NULL) {
1792                         mbfl_convert_filter_delete(filter);
1793                         return -1;
1794                 }
1795 
1796                 /* feed data */
1797                 p = string->val;
1798                 n = string->len;
1799                 while (n > 0) {
1800                         (*filter->filter_function)(*p++, filter);
1801                         n--;
1802                 }
1803 
1804                 mbfl_convert_filter_flush(filter);
1805                 mbfl_convert_filter_delete(filter);
1806         }
1807 
1808         return len;
1809 }
1810 
1811 
1812 /*
1813  *  strimwidth
1814  */
1815 struct collector_strimwidth_data {
1816         mbfl_convert_filter *decoder;
1817         mbfl_convert_filter *decoder_backup;
1818         mbfl_memory_device device;
1819         int from;
1820         int width;
1821         int outwidth;
1822         int outchar;
1823         int status;
1824         int endpos;
1825 };
1826 
1827 static int
1828 collector_strimwidth(int c, void* data)
1829 {
1830         struct collector_strimwidth_data *pc = (struct collector_strimwidth_data*)data;
1831 
1832         switch (pc->status) {
1833         case 10:
1834                 (*pc->decoder->filter_function)(c, pc->decoder);
1835                 break;
1836         default:
1837                 if (pc->outchar >= pc->from) {
1838                         pc->outwidth += (is_fullwidth(c) ? 2: 1);
1839 
1840                         if (pc->outwidth > pc->width) {
1841                                 if (pc->status == 0) {
1842                                         pc->endpos = pc->device.pos;
1843                                         mbfl_convert_filter_copy(pc->decoder, pc->decoder_backup);
1844                                 }
1845                                 pc->status++;
1846                                 (*pc->decoder->filter_function)(c, pc->decoder);
1847                                 c = -1;
1848                         } else {
1849                                 (*pc->decoder->filter_function)(c, pc->decoder);
1850                         }
1851                 }
1852                 pc->outchar++;
1853                 break;
1854         }
1855 
1856         return c;
1857 }
1858 
1859 mbfl_string *
1860 mbfl_strimwidth(
1861     mbfl_string *string,
1862     mbfl_string *marker,
1863     mbfl_string *result,
1864     int from,
1865     int width)
1866 {
1867         struct collector_strimwidth_data pc;
1868         mbfl_convert_filter *encoder;
1869         int n, mkwidth;
1870         unsigned char *p;
1871 
1872         if (string == NULL || result == NULL) {
1873                 return NULL;
1874         }
1875         mbfl_string_init(result);
1876         result->no_language = string->no_language;
1877         result->no_encoding = string->no_encoding;
1878         mbfl_memory_device_init(&pc.device, width, 0);
1879 
1880         /* output code filter */
1881         pc.decoder = mbfl_convert_filter_new(
1882             mbfl_no_encoding_wchar,
1883             string->no_encoding,
1884             mbfl_memory_device_output, 0, &pc.device);
1885         pc.decoder_backup = mbfl_convert_filter_new(
1886             mbfl_no_encoding_wchar,
1887             string->no_encoding,
1888             mbfl_memory_device_output, 0, &pc.device);
1889         /* wchar filter */
1890         encoder = mbfl_convert_filter_new(
1891             string->no_encoding,
1892             mbfl_no_encoding_wchar,
1893             collector_strimwidth, 0, &pc);
1894         if (pc.decoder == NULL || pc.decoder_backup == NULL || encoder == NULL) {
1895                 mbfl_convert_filter_delete(encoder);
1896                 mbfl_convert_filter_delete(pc.decoder);
1897                 mbfl_convert_filter_delete(pc.decoder_backup);
1898                 return NULL;
1899         }
1900         mkwidth = 0;
1901         if (marker) {
1902                 mkwidth = mbfl_strwidth(marker);
1903         }
1904         pc.from = from;
1905         pc.width = width - mkwidth;
1906         pc.outwidth = 0;
1907         pc.outchar = 0;
1908         pc.status = 0;
1909         pc.endpos = 0;
1910 
1911         /* feed data */
1912         p = string->val;
1913         n = string->len;
1914         if (p != NULL) {
1915                 while (n > 0) {
1916                         n--;
1917                         if ((*encoder->filter_function)(*p++, encoder) < 0) {
1918                                 break;
1919                         }
1920                 }
1921                 mbfl_convert_filter_flush(encoder);
1922                 if (pc.status != 0 && mkwidth > 0) {
1923                         pc.width += mkwidth;
1924                         while (n > 0) {
1925                                 if ((*encoder->filter_function)(*p++, encoder) < 0) {
1926                                         break;
1927                                 }
1928                                 n--;
1929                         }
1930                         mbfl_convert_filter_flush(encoder);
1931                         if (pc.status != 1) {
1932                                 pc.status = 10;
1933                                 pc.device.pos = pc.endpos;
1934                                 mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1935                                 mbfl_convert_filter_reset(encoder, marker->no_encoding, mbfl_no_encoding_wchar);
1936                                 p = marker->val;
1937                                 n = marker->len;
1938                                 while (n > 0) {
1939                                         if ((*encoder->filter_function)(*p++, encoder) < 0) {
1940                                                 break;
1941                                         }
1942                                         n--;
1943                                 }
1944                                 mbfl_convert_filter_flush(encoder);
1945                         }
1946                 } else if (pc.status != 0) {
1947                         pc.device.pos = pc.endpos;
1948                         mbfl_convert_filter_copy(pc.decoder_backup, pc.decoder);
1949                 }
1950                 mbfl_convert_filter_flush(pc.decoder);
1951         }
1952         result = mbfl_memory_device_result(&pc.device, result);
1953         mbfl_convert_filter_delete(encoder);
1954         mbfl_convert_filter_delete(pc.decoder);
1955         mbfl_convert_filter_delete(pc.decoder_backup);
1956 
1957         return result;
1958 }
1959 
1960 mbfl_string *
1961 mbfl_ja_jp_hantozen(
1962     mbfl_string *string,
1963     mbfl_string *result,
1964     int mode)
1965 {
1966         int n;
1967         unsigned char *p;
1968         const mbfl_encoding *encoding;
1969         mbfl_memory_device device;
1970         mbfl_convert_filter *decoder = NULL;
1971         mbfl_convert_filter *encoder = NULL;
1972         mbfl_convert_filter *tl_filter = NULL;
1973         mbfl_convert_filter *next_filter = NULL;
1974         mbfl_filt_tl_jisx0201_jisx0208_param *param = NULL;
1975 
1976         /* validate parameters */
1977         if (string == NULL || result == NULL) {
1978                 return NULL;
1979         }
1980 
1981         encoding = mbfl_no2encoding(string->no_encoding);
1982         if (encoding == NULL) {
1983                 return NULL;
1984         }
1985 
1986         mbfl_memory_device_init(&device, string->len, 0);
1987         mbfl_string_init(result);
1988 
1989         result->no_language = string->no_language;
1990         result->no_encoding = string->no_encoding;
1991 
1992         decoder = mbfl_convert_filter_new(
1993                 mbfl_no_encoding_wchar,
1994                 string->no_encoding,
1995                 mbfl_memory_device_output, 0, &device);
1996         if (decoder == NULL) {
1997                 goto out;
1998         }
1999         next_filter = decoder;
2000 
2001         param =
2002                 (mbfl_filt_tl_jisx0201_jisx0208_param *)mbfl_malloc(sizeof(mbfl_filt_tl_jisx0201_jisx0208_param));
2003         if (param == NULL) {
2004                 goto out;
2005         }
2006 
2007         param->mode = mode;
2008 
2009         tl_filter = mbfl_convert_filter_new2(
2010                 &vtbl_tl_jisx0201_jisx0208,
2011                 (int(*)(int, void*))next_filter->filter_function,
2012                 (int(*)(void*))next_filter->filter_flush,
2013                 next_filter);
2014         if (tl_filter == NULL) {
2015                 mbfl_free(param);
2016                 goto out;
2017         }
2018 
2019         tl_filter->opaque = param;
2020         next_filter = tl_filter;
2021 
2022         encoder = mbfl_convert_filter_new(
2023                 string->no_encoding,
2024                 mbfl_no_encoding_wchar,
2025                 (int(*)(int, void*))next_filter->filter_function,
2026                 (int(*)(void*))next_filter->filter_flush,
2027                 next_filter);
2028         if (encoder == NULL) {
2029                 goto out;
2030         }
2031 
2032         /* feed data */
2033         p = string->val;
2034         n = string->len;
2035         if (p != NULL) {
2036                 while (n > 0) {
2037                         if ((*encoder->filter_function)(*p++, encoder) < 0) {
2038                                 break;
2039                         }
2040                         n--;
2041                 }
2042         }
2043 
2044         mbfl_convert_filter_flush(encoder);
2045         result = mbfl_memory_device_result(&device, result);
2046 out:
2047         if (tl_filter != NULL) {
2048                 if (tl_filter->opaque != NULL) {
2049                         mbfl_free(tl_filter->opaque);
2050                 }
2051                 mbfl_convert_filter_delete(tl_filter);
2052         }
2053 
2054         if (decoder != NULL) {
2055                 mbfl_convert_filter_delete(decoder);
2056         }
2057 
2058         if (encoder != NULL) {
2059                 mbfl_convert_filter_delete(encoder);
2060         }
2061 
2062         return result;
2063 }
2064 
2065 
2066 /*
2067  *  MIME header encode
2068  */
2069 struct mime_header_encoder_data {
2070         mbfl_convert_filter *conv1_filter;
2071         mbfl_convert_filter *block_filter;
2072         mbfl_convert_filter *conv2_filter;
2073         mbfl_convert_filter *conv2_filter_backup;
2074         mbfl_convert_filter *encod_filter;
2075         mbfl_convert_filter *encod_filter_backup;
2076         mbfl_memory_device outdev;
2077         mbfl_memory_device tmpdev;
2078         int status1;
2079         int status2;
2080         int prevpos;
2081         int linehead;
2082         int firstindent;
2083         int encnamelen;
2084         int lwsplen;
2085         char encname[128];
2086         char lwsp[16];
2087 };
2088 
2089 static int
2090 mime_header_encoder_block_collector(int c, void *data)
2091 {
2092         int n;
2093         struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2094 
2095         switch (pe->status2) {
2096         case 1: /* encoded word */
2097                 pe->prevpos = pe->outdev.pos;
2098                 mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup);
2099                 mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup);
2100                 (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2101                 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2102                 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2103                 n = pe->outdev.pos - pe->linehead + pe->firstindent;
2104                 pe->outdev.pos = pe->prevpos;
2105                 mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter);
2106                 mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter);
2107                 if (n >= 74) {
2108                         (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2109                         (*pe->encod_filter->filter_flush)(pe->encod_filter);
2110                         mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */
2111                         mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2112                         pe->linehead = pe->outdev.pos;
2113                         pe->firstindent = 0;
2114                         mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2115                         c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2116                 } else {
2117                         c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2118                 }
2119                 break;
2120 
2121         default:
2122                 mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen);
2123                 c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter);
2124                 pe->status2 = 1;
2125                 break;
2126         }
2127 
2128         return c;
2129 }
2130 
2131 static int
2132 mime_header_encoder_collector(int c, void *data)
2133 {
2134         static int qp_table[256] = {
2135                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2136                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */
2137                 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */
2138                 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */
2139                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */
2140                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */
2141                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */
2142                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */
2143                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */
2144                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
2145                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */
2146                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */
2147                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */
2148                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */
2149                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */
2150                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  /* 0xF0 */
2151         };
2152 
2153         int n;
2154         struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data;
2155 
2156         switch (pe->status1) {
2157         case 11:        /* encoded word */
2158                 (*pe->block_filter->filter_function)(c, pe->block_filter);
2159                 break;
2160 
2161         default:        /* ASCII */
2162                 if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */
2163                         mbfl_memory_device_output(c, &pe->tmpdev);
2164                         pe->status1 = 1;
2165                 } else if (pe->status1 == 0 && c == 0x20) {     /* repeat SPACE */
2166                         mbfl_memory_device_output(c, &pe->tmpdev);
2167                 } else {
2168                         if (pe->tmpdev.pos < 74 && c == 0x20) {
2169                                 n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent;
2170                                 if (n > 74) {
2171                                         mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);         /* LWSP */
2172                                         pe->linehead = pe->outdev.pos;
2173                                         pe->firstindent = 0;
2174                                 } else if (pe->outdev.pos > 0) {
2175                                         mbfl_memory_device_output(0x20, &pe->outdev);
2176                                 }
2177                                 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2178                                 mbfl_memory_device_reset(&pe->tmpdev);
2179                                 pe->status1 = 0;
2180                         } else {
2181                                 n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent;
2182                                 if (n > 60)  {
2183                                         mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);         /* LWSP */
2184                                         pe->linehead = pe->outdev.pos;
2185                                         pe->firstindent = 0;
2186                                 } else if (pe->outdev.pos > 0)  {
2187                                         mbfl_memory_device_output(0x20, &pe->outdev);
2188                                 }
2189                                 mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev);
2190                                 mbfl_memory_device_reset(&pe->tmpdev);
2191                                 (*pe->block_filter->filter_function)(c, pe->block_filter);
2192                                 pe->status1 = 11;
2193                         }
2194                 }
2195                 break;
2196         }
2197 
2198         return c;
2199 }
2200 
2201 mbfl_string *
2202 mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result)
2203 {
2204         if (pe->status1 >= 10) {
2205                 (*pe->conv2_filter->filter_flush)(pe->conv2_filter);
2206                 (*pe->encod_filter->filter_flush)(pe->encod_filter);
2207                 mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2);         /* ?= */
2208         } else if (pe->tmpdev.pos > 0) {
2209                 if (pe->outdev.pos > 0) {
2210                         if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos) > 74) {
2211                                 mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen);
2212                         } else {
2213                                 mbfl_memory_device_output(0x20, &pe->outdev);
2214                         }
2215                 }
2216                 mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev);
2217         }
2218         mbfl_memory_device_reset(&pe->tmpdev);
2219         pe->prevpos = 0;
2220         pe->linehead = 0;
2221         pe->status1 = 0;
2222         pe->status2 = 0;
2223 
2224         return mbfl_memory_device_result(&pe->outdev, result);
2225 }
2226 
2227 struct mime_header_encoder_data*
2228 mime_header_encoder_new(
2229     enum mbfl_no_encoding incode,
2230     enum mbfl_no_encoding outcode,
2231     enum mbfl_no_encoding transenc)
2232 {
2233         int n;
2234         const char *s;
2235         const mbfl_encoding *outencoding;
2236         struct mime_header_encoder_data *pe;
2237 
2238         /* get output encoding and check MIME charset name */
2239         outencoding = mbfl_no2encoding(outcode);
2240         if (outencoding == NULL || outencoding->mime_name == NULL || outencoding->mime_name[0] == '\0') {
2241                 return NULL;
2242         }
2243 
2244         pe = (struct mime_header_encoder_data*)mbfl_malloc(sizeof(struct mime_header_encoder_data));
2245         if (pe == NULL) {
2246                 return NULL;
2247         }
2248 
2249         mbfl_memory_device_init(&pe->outdev, 0, 0);
2250         mbfl_memory_device_init(&pe->tmpdev, 0, 0);
2251         pe->prevpos = 0;
2252         pe->linehead = 0;
2253         pe->firstindent = 0;
2254         pe->status1 = 0;
2255         pe->status2 = 0;
2256 
2257         /* make the encoding description string  exp. "=?ISO-2022-JP?B?" */
2258         n = 0;
2259         pe->encname[n++] = 0x3d;
2260         pe->encname[n++] = 0x3f;
2261         s = outencoding->mime_name;
2262         while (*s) {
2263                 pe->encname[n++] = *s++;
2264         }
2265         pe->encname[n++] = 0x3f;
2266         if (transenc == mbfl_no_encoding_qprint) {
2267                 pe->encname[n++] = 0x51;
2268         } else {
2269                 pe->encname[n++] = 0x42;
2270                 transenc = mbfl_no_encoding_base64;
2271         }
2272         pe->encname[n++] = 0x3f;
2273         pe->encname[n] = '\0';
2274         pe->encnamelen = n;
2275 
2276         n = 0;
2277         pe->lwsp[n++] = 0x0d;
2278         pe->lwsp[n++] = 0x0a;
2279         pe->lwsp[n++] = 0x20;
2280         pe->lwsp[n] = '\0';
2281         pe->lwsplen = n;
2282 
2283         /* transfer encode filter */
2284         pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2285         pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev));
2286 
2287         /* Output code filter */
2288         pe->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2289         pe->conv2_filter_backup = mbfl_convert_filter_new(mbfl_no_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter);
2290 
2291         /* encoded block filter */
2292         pe->block_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, mbfl_no_encoding_wchar, mime_header_encoder_block_collector, 0, pe);
2293 
2294         /* Input code filter */
2295         pe->conv1_filter = mbfl_convert_filter_new(incode, mbfl_no_encoding_wchar, mime_header_encoder_collector, 0, pe);
2296 
2297         if (pe->encod_filter == NULL ||
2298             pe->encod_filter_backup == NULL ||
2299             pe->conv2_filter == NULL ||
2300             pe->conv2_filter_backup == NULL ||
2301             pe->conv1_filter == NULL) {
2302                 mime_header_encoder_delete(pe);
2303                 return NULL;
2304         }
2305 
2306         if (transenc == mbfl_no_encoding_qprint) {
2307                 pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER;
2308                 pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER;
2309         } else {
2310                 pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER;
2311                 pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER;
2312         }
2313 
2314         return pe;
2315 }
2316 
2317 void
2318 mime_header_encoder_delete(struct mime_header_encoder_data *pe)
2319 {
2320         if (pe) {
2321                 mbfl_convert_filter_delete(pe->conv1_filter);
2322                 mbfl_convert_filter_delete(pe->block_filter);
2323                 mbfl_convert_filter_delete(pe->conv2_filter);
2324                 mbfl_convert_filter_delete(pe->conv2_filter_backup);
2325                 mbfl_convert_filter_delete(pe->encod_filter);
2326                 mbfl_convert_filter_delete(pe->encod_filter_backup);
2327                 mbfl_memory_device_clear(&pe->outdev);
2328                 mbfl_memory_device_clear(&pe->tmpdev);
2329                 mbfl_free((void*)pe);
2330         }
2331 }
2332 
2333 int
2334 mime_header_encoder_feed(int c, struct mime_header_encoder_data *pe)
2335 {
2336         return (*pe->conv1_filter->filter_function)(c, pe->conv1_filter);
2337 }
2338 
2339 mbfl_string *
2340 mbfl_mime_header_encode(
2341     mbfl_string *string,
2342     mbfl_string *result,
2343     enum mbfl_no_encoding outcode,
2344     enum mbfl_no_encoding encoding,
2345     const char *linefeed,
2346     int indent)
2347 {
2348         int n;
2349         unsigned char *p;
2350         struct mime_header_encoder_data *pe;
2351 
2352         mbfl_string_init(result);
2353         result->no_language = string->no_language;
2354         result->no_encoding = mbfl_no_encoding_ascii;
2355 
2356         pe = mime_header_encoder_new(string->no_encoding, outcode, encoding);
2357         if (pe == NULL) {
2358                 return NULL;
2359         }
2360 
2361         if (linefeed != NULL) {
2362                 n = 0;
2363                 while (*linefeed && n < 8) {
2364                         pe->lwsp[n++] = *linefeed++;
2365                 }
2366                 pe->lwsp[n++] = 0x20;
2367                 pe->lwsp[n] = '\0';
2368                 pe->lwsplen = n;
2369         }
2370         if (indent > 0 && indent < 74) {
2371                 pe->firstindent = indent;
2372         }
2373 
2374         n = string->len;
2375         p = string->val;
2376         while (n > 0) {
2377                 (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter);
2378                 n--;
2379         }
2380 
2381         result = mime_header_encoder_result(pe, result);
2382         mime_header_encoder_delete(pe);
2383 
2384         return result;
2385 }
2386 
2387 
2388 /*
2389  *  MIME header decode
2390  */
2391 struct mime_header_decoder_data {
2392         mbfl_convert_filter *deco_filter;
2393         mbfl_convert_filter *conv1_filter;
2394         mbfl_convert_filter *conv2_filter;
2395         mbfl_memory_device outdev;
2396         mbfl_memory_device tmpdev;
2397         int cspos;
2398         int status;
2399         enum mbfl_no_encoding encoding;
2400         enum mbfl_no_encoding incode;
2401         enum mbfl_no_encoding outcode;
2402 };
2403 
2404 static int
2405 mime_header_decoder_collector(int c, void* data)
2406 {
2407         const mbfl_encoding *encoding;
2408         struct mime_header_decoder_data *pd = (struct mime_header_decoder_data*)data;
2409 
2410         switch (pd->status) {
2411         case 1:
2412                 if (c == 0x3f) {                /* ? */
2413                         mbfl_memory_device_output(c, &pd->tmpdev);
2414                         pd->cspos = pd->tmpdev.pos;
2415                         pd->status = 2;
2416                 } else {
2417                         mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2418                         mbfl_memory_device_reset(&pd->tmpdev);
2419                         if (c == 0x3d) {                /* = */
2420                                 mbfl_memory_device_output(c, &pd->tmpdev);
2421                         } else if (c == 0x0d || c == 0x0a) {    /* CR or LF */
2422                                 pd->status = 9;
2423                         } else {
2424                                 (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2425                                 pd->status = 0;
2426                         }
2427                 }
2428                 break;
2429         case 2:         /* store charset string */
2430                 if (c == 0x3f) {                /* ? */
2431                         /* identify charset */
2432                         mbfl_memory_device_output('\0', &pd->tmpdev);
2433                         encoding = mbfl_name2encoding((const char *)&pd->tmpdev.buffer[pd->cspos]);
2434                         if (encoding != NULL) {
2435                                 pd->incode = encoding->no_encoding;
2436                                 pd->status = 3;
2437                         }
2438                         mbfl_memory_device_unput(&pd->tmpdev);
2439                         mbfl_memory_device_output(c, &pd->tmpdev);
2440                 } else {
2441                         mbfl_memory_device_output(c, &pd->tmpdev);
2442                         if (pd->tmpdev.pos > 100) {             /* too long charset string */
2443                                 pd->status = 0;
2444                         } else if (c == 0x0d || c == 0x0a) {    /* CR or LF */
2445                                 mbfl_memory_device_unput(&pd->tmpdev);
2446                                 pd->status = 9;
2447                         }
2448                         if (pd->status != 2) {
2449                                 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2450                                 mbfl_memory_device_reset(&pd->tmpdev);
2451                         }
2452                 }
2453                 break;
2454         case 3:         /* identify encoding */
2455                 mbfl_memory_device_output(c, &pd->tmpdev);
2456                 if (c == 0x42 || c == 0x62) {           /* 'B' or 'b' */
2457                         pd->encoding = mbfl_no_encoding_base64;
2458                         pd->status = 4;
2459                 } else if (c == 0x51 || c == 0x71) {    /* 'Q' or 'q' */
2460                         pd->encoding = mbfl_no_encoding_qprint;
2461                         pd->status = 4;
2462                 } else {
2463                         if (c == 0x0d || c == 0x0a) {   /* CR or LF */
2464                                 mbfl_memory_device_unput(&pd->tmpdev);
2465                                 pd->status = 9;
2466                         } else {
2467                                 pd->status = 0;
2468                         }
2469                         mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2470                         mbfl_memory_device_reset(&pd->tmpdev);
2471                 }
2472                 break;
2473         case 4:         /* reset filter */
2474                 mbfl_memory_device_output(c, &pd->tmpdev);
2475                 if (c == 0x3f) {                /* ? */
2476                         /* charset convert filter */
2477                         mbfl_convert_filter_reset(pd->conv1_filter, pd->incode, mbfl_no_encoding_wchar);
2478                         /* decode filter */
2479                         mbfl_convert_filter_reset(pd->deco_filter, pd->encoding, mbfl_no_encoding_8bit);
2480                         pd->status = 5;
2481                 } else {
2482                         if (c == 0x0d || c == 0x0a) {   /* CR or LF */
2483                                 mbfl_memory_device_unput(&pd->tmpdev);
2484                                 pd->status = 9;
2485                         } else {
2486                                 pd->status = 0;
2487                         }
2488                         mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2489                 }
2490                 mbfl_memory_device_reset(&pd->tmpdev);
2491                 break;
2492         case 5:         /* encoded block */
2493                 if (c == 0x3f) {                /* ? */
2494                         pd->status = 6;
2495                 } else {
2496                         (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2497                 }
2498                 break;
2499         case 6:         /* check end position */
2500                 if (c == 0x3d) {                /* = */
2501                         /* flush and reset filter */
2502                         (*pd->deco_filter->filter_flush)(pd->deco_filter);
2503                         (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2504                         mbfl_convert_filter_reset(pd->conv1_filter, mbfl_no_encoding_ascii, mbfl_no_encoding_wchar);
2505                         pd->status = 7;
2506                 } else {
2507                         (*pd->deco_filter->filter_function)(0x3f, pd->deco_filter);
2508                         if (c != 0x3f) {                /* ? */
2509                                 (*pd->deco_filter->filter_function)(c, pd->deco_filter);
2510                                 pd->status = 5;
2511                         }
2512                 }
2513                 break;
2514         case 7:         /* after encoded block */
2515                 if (c == 0x0d || c == 0x0a) {   /* CR LF */
2516                         pd->status = 8;
2517                 } else {
2518                         mbfl_memory_device_output(c, &pd->tmpdev);
2519                         if (c == 0x3d) {                /* = */
2520                                 pd->status = 1;
2521                         } else if (c != 0x20 && c != 0x09) {            /* not space */
2522                                 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2523                                 mbfl_memory_device_reset(&pd->tmpdev);
2524                                 pd->status = 0;
2525                         }
2526                 }
2527                 break;
2528         case 8:         /* folding */
2529         case 9:         /* folding */
2530                 if (c != 0x0d && c != 0x0a && c != 0x20 && c != 0x09) {
2531                         if (c == 0x3d) {                /* = */
2532                                 if (pd->status == 8) {
2533                                         mbfl_memory_device_output(0x20, &pd->tmpdev);   /* SPACE */
2534                                 } else {
2535                                         (*pd->conv1_filter->filter_function)(0x20, pd->conv1_filter);
2536                                 }
2537                                 mbfl_memory_device_output(c, &pd->tmpdev);
2538                                 pd->status = 1;
2539                         } else {
2540                                 mbfl_memory_device_output(0x20, &pd->tmpdev);
2541                                 mbfl_memory_device_output(c, &pd->tmpdev);
2542                                 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2543                                 mbfl_memory_device_reset(&pd->tmpdev);
2544                                 pd->status = 0;
2545                         }
2546                 }
2547                 break;
2548         default:                /* non encoded block */
2549                 if (c == 0x0d || c == 0x0a) {   /* CR LF */
2550                         pd->status = 9;
2551                 } else if (c == 0x3d) {         /* = */
2552                         mbfl_memory_device_output(c, &pd->tmpdev);
2553                         pd->status = 1;
2554                 } else {
2555                         (*pd->conv1_filter->filter_function)(c, pd->conv1_filter);
2556                 }
2557                 break;
2558         }
2559 
2560         return c;
2561 }
2562 
2563 mbfl_string *
2564 mime_header_decoder_result(struct mime_header_decoder_data *pd, mbfl_string *result)
2565 {
2566         switch (pd->status) {
2567         case 1:
2568         case 2:
2569         case 3:
2570         case 4:
2571         case 7:
2572         case 8:
2573         case 9:
2574                 mbfl_convert_filter_devcat(pd->conv1_filter, &pd->tmpdev);
2575                 break;
2576         case 5:
2577         case 6:
2578                 (*pd->deco_filter->filter_flush)(pd->deco_filter);
2579                 (*pd->conv1_filter->filter_flush)(pd->conv1_filter);
2580                 break;
2581         }
2582         (*pd->conv2_filter->filter_flush)(pd->conv2_filter);
2583         mbfl_memory_device_reset(&pd->tmpdev);
2584         pd->status = 0;
2585 
2586         return mbfl_memory_device_result(&pd->outdev, result);
2587 }
2588 
2589 struct mime_header_decoder_data*
2590 mime_header_decoder_new(enum mbfl_no_encoding outcode)
2591 {
2592         struct mime_header_decoder_data *pd;
2593 
2594         pd = (struct mime_header_decoder_data*)mbfl_malloc(sizeof(struct mime_header_decoder_data));
2595         if (pd == NULL) {
2596                 return NULL;
2597         }
2598 
2599         mbfl_memory_device_init(&pd->outdev, 0, 0);
2600         mbfl_memory_device_init(&pd->tmpdev, 0, 0);
2601         pd->cspos = 0;
2602         pd->status = 0;
2603         pd->encoding = mbfl_no_encoding_pass;
2604         pd->incode = mbfl_no_encoding_ascii;
2605         pd->outcode = outcode;
2606         /* charset convert filter */
2607         pd->conv2_filter = mbfl_convert_filter_new(mbfl_no_encoding_wchar, pd->outcode, mbfl_memory_device_output, 0, &pd->outdev);
2608         pd->conv1_filter = mbfl_convert_filter_new(pd->incode, mbfl_no_encoding_wchar, mbfl_filter_output_pipe, 0, pd->conv2_filter);
2609         /* decode filter */
2610         pd->deco_filter = mbfl_convert_filter_new(pd->encoding, mbfl_no_encoding_8bit, mbfl_filter_output_pipe, 0, pd->conv1_filter);
2611 
2612         if (pd->conv1_filter == NULL || pd->conv2_filter == NULL || pd->deco_filter == NULL) {
2613                 mime_header_decoder_delete(pd);
2614                 return NULL;
2615         }
2616 
2617         return pd;
2618 }
2619 
2620 void
2621 mime_header_decoder_delete(struct mime_header_decoder_data *pd)
2622 {
2623         if (pd) {
2624                 mbfl_convert_filter_delete(pd->conv2_filter);
2625                 mbfl_convert_filter_delete(pd->conv1_filter);
2626                 mbfl_convert_filter_delete(pd->deco_filter);
2627                 mbfl_memory_device_clear(&pd->outdev);
2628                 mbfl_memory_device_clear(&pd->tmpdev);
2629                 mbfl_free((void*)pd);
2630         }
2631 }
2632 
2633 int
2634 mime_header_decoder_feed(int c, struct mime_header_decoder_data *pd)
2635 {
2636         return mime_header_decoder_collector(c, pd);
2637 }
2638 
2639 mbfl_string *
2640 mbfl_mime_header_decode(
2641     mbfl_string *string,
2642     mbfl_string *result,
2643     enum mbfl_no_encoding outcode)
2644 {
2645         int n;
2646         unsigned char *p;
2647         struct mime_header_decoder_data *pd;
2648 
2649         mbfl_string_init(result);
2650         result->no_language = string->no_language;
2651         result->no_encoding = outcode;
2652 
2653         pd = mime_header_decoder_new(outcode);
2654         if (pd == NULL) {
2655                 return NULL;
2656         }
2657 
2658         /* feed data */
2659         n = string->len;
2660         p = string->val;
2661         while (n > 0) {
2662                 mime_header_decoder_collector(*p++, pd);
2663                 n--;
2664         }
2665 
2666         result = mime_header_decoder_result(pd, result);
2667         mime_header_decoder_delete(pd);
2668 
2669         return result;
2670 }
2671 
2672 
2673 
2674 /*
2675  *  convert HTML numeric entity
2676  */
2677 struct collector_htmlnumericentity_data {
2678         mbfl_convert_filter *decoder;
2679         int status;
2680         int cache;
2681         int digit;
2682         int *convmap;
2683         int mapsize;
2684 };
2685 
2686 static int
2687 collector_encode_htmlnumericentity(int c, void *data)
2688 {
2689         struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2690         int f, n, s, r, d, size, *mapelm;
2691 
2692         size = pc->mapsize;
2693         f = 0;
2694         n = 0;
2695         while (n < size) {
2696                 mapelm = &(pc->convmap[n*4]);
2697                 if (c >= mapelm[0] && c <= mapelm[1]) {
2698                         s = (c + mapelm[2]) & mapelm[3];
2699                         if (s >= 0) {
2700                                 (*pc->decoder->filter_function)(0x26, pc->decoder);     /* '&' */
2701                                 (*pc->decoder->filter_function)(0x23, pc->decoder);     /* '#' */
2702                                 r = 100000000;
2703                                 s %= r;
2704                                 while (r > 0) {
2705                                         d = s/r;
2706                                         if (d || f) {
2707                                                 f = 1;
2708                                                 s %= r;
2709                                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2710                                         }
2711                                         r /= 10;
2712                                 }
2713                                 if (!f) {
2714                                         f = 1;
2715                                         (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2716                                 }
2717                                 (*pc->decoder->filter_function)(0x3b, pc->decoder);             /* ';' */
2718                         }
2719                 }
2720                 if (f) {
2721                         break;
2722                 }
2723                 n++;
2724         }
2725         if (!f) {
2726                 (*pc->decoder->filter_function)(c, pc->decoder);
2727         }
2728 
2729         return c;
2730 }
2731 
2732 static int
2733 collector_decode_htmlnumericentity(int c, void *data)
2734 {
2735         struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2736         int f, n, s, r, d, size, *mapelm;
2737 
2738         switch (pc->status) {
2739         case 1:
2740                 if (c == 0x23) {        /* '#' */
2741                         pc->status = 2;
2742                 } else {
2743                         pc->status = 0;
2744                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2745                         (*pc->decoder->filter_function)(c, pc->decoder);
2746                 }
2747                 break;
2748         case 2:
2749                 if (c == 0x78) {        /* 'x' */
2750                         pc->status = 4;
2751                 } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2752                         pc->cache = c - 0x30;
2753                         pc->status = 3;
2754                         pc->digit = 1;
2755                 } else {
2756                         pc->status = 0;
2757                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2758                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2759                         (*pc->decoder->filter_function)(c, pc->decoder);
2760                 }
2761                 break;
2762         case 3:
2763                 s = 0;
2764                 f = 0;
2765                 if (c >= 0x30 && c <= 0x39) {   /* '0' - '9' */
2766                         if (pc->digit > 9) {
2767                                 pc->status = 0;
2768                                 s = pc->cache;
2769                                 f = 1;
2770                         } else {
2771                                 s = pc->cache*10 + c - 0x30;
2772                                 pc->cache = s;
2773                                 pc->digit++;
2774                         }
2775                 } else {
2776                         pc->status = 0;
2777                         s = pc->cache;
2778                         f = 1;
2779                         n = 0;
2780                         size = pc->mapsize;
2781                         while (n < size) {
2782                                 mapelm = &(pc->convmap[n*4]);
2783                                 d = s - mapelm[2];
2784                                 if (d >= mapelm[0] && d <= mapelm[1]) {
2785                                         f = 0;
2786                                         (*pc->decoder->filter_function)(d, pc->decoder);
2787                                         if (c != 0x3b) {        /* ';' */
2788                                                 (*pc->decoder->filter_function)(c, pc->decoder);
2789                                         }
2790                                         break;
2791                                 }
2792                                 n++;
2793                         }
2794                 }
2795                 if (f) {
2796                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2797                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2798                         r = 1;
2799                         n = pc->digit;
2800                         while (n > 0) {
2801                                 r *= 10;
2802                                 n--;
2803                         }
2804                         s %= r;
2805                         r /= 10;
2806                         while (r > 0) {
2807                                 d = s/r;
2808                                 s %= r;
2809                                 r /= 10;
2810                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2811                         }
2812                         (*pc->decoder->filter_function)(c, pc->decoder);
2813                 }
2814                 break;
2815         case 4:
2816                 if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
2817                         pc->cache = c - 0x30;
2818                         pc->status = 5;
2819                         pc->digit = 1;
2820                 } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
2821                         pc->cache = c - 0x41 + 10;
2822                         pc->status = 5;
2823                         pc->digit = 1;
2824                 } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
2825                         pc->cache = c - 0x61 + 10;
2826                         pc->status = 5;
2827                         pc->digit = 1;
2828                 } else {
2829                         pc->status = 0;
2830                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2831                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2832                         (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
2833                         (*pc->decoder->filter_function)(c, pc->decoder);
2834                 }
2835                 break;
2836         case 5:
2837                 s = 0;
2838                 f = 0;
2839                 if ((c >= 0x30 && c <= 0x39) ||
2840                         (c >= 0x41 && c <= 0x46) ||
2841                         (c >= 0x61 && c <= 0x66)) {     /* '0' - '9' or 'a' - 'f'  */
2842                         if (pc->digit > 9) {
2843                                 pc->status = 0;
2844                                 s = pc->cache;
2845                                 f = 1;
2846                         } else {
2847                                 if (c >= 0x30 && c <= 0x39) {
2848                                         s = pc->cache*16 + (c - 0x30);
2849                                 } else if (c >= 0x41 && c <= 0x46)  {
2850                                         s = pc->cache*16 + (c - 0x41 + 10);
2851                                 } else {
2852                                         s = pc->cache*16 + (c - 0x61 + 10);
2853                                 }
2854                                 pc->cache = s;
2855                                 pc->digit++;
2856                         }
2857                 } else {
2858                         pc->status = 0;
2859                         s = pc->cache;
2860                         f = 1;
2861                         n = 0;
2862                         size = pc->mapsize;
2863                         while (n < size) {
2864                                 mapelm = &(pc->convmap[n*4]);
2865                                 d = s - mapelm[2];
2866                                 if (d >= mapelm[0] && d <= mapelm[1]) {
2867                                         f = 0;
2868                                         (*pc->decoder->filter_function)(d, pc->decoder);
2869                                         if (c != 0x3b) {        /* ';' */
2870                                                 (*pc->decoder->filter_function)(c, pc->decoder);
2871                                         }
2872                                         break;
2873                                 }
2874                                 n++;
2875                         }
2876                 }
2877                 if (f) {
2878                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2879                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2880                         (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
2881                         r = 1;
2882                         n = pc->digit;
2883                         while (n > 0) {
2884                                 r *= 16;
2885                                 n--;
2886                         }
2887                         s %= r;
2888                         r /= 16;
2889                         while (r > 0) {
2890                                 d = s/r;
2891                                 s %= r;
2892                                 r /= 16;
2893                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2894                         }
2895                         (*pc->decoder->filter_function)(c, pc->decoder);
2896                 }
2897                 break;
2898         default:
2899                 if (c == 0x26) {        /* '&' */
2900                         pc->status = 1;
2901                 } else {
2902                         (*pc->decoder->filter_function)(c, pc->decoder);
2903                 }
2904                 break;
2905         }
2906 
2907         return c;
2908 }
2909 
2910 static int
2911 collector_encode_hex_htmlnumericentity(int c, void *data)
2912 {
2913         struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)data;
2914         int f, n, s, r, d, size, *mapelm;
2915 
2916         size = pc->mapsize;
2917         f = 0;
2918         n = 0;
2919         while (n < size) {
2920                 mapelm = &(pc->convmap[n*4]);
2921                 if (c >= mapelm[0] && c <= mapelm[1]) {
2922                         s = (c + mapelm[2]) & mapelm[3];
2923                         if (s >= 0) {
2924                                 (*pc->decoder->filter_function)(0x26, pc->decoder);     /* '&' */
2925                                 (*pc->decoder->filter_function)(0x23, pc->decoder);     /* '#' */
2926                                 (*pc->decoder->filter_function)(0x78, pc->decoder);     /* 'x' */
2927                                 r = 0x1000000;
2928                                 s %= r;
2929                                 while (r > 0) {
2930                                         d = s/r;
2931                                         if (d || f) {
2932                                                 f = 1;
2933                                                 s %= r;
2934                                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2935                                         }
2936                                         r /= 16;
2937                                 }
2938                                 if (!f) {
2939                                         f = 1;
2940                                         (*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
2941                                 }
2942                                 (*pc->decoder->filter_function)(0x3b, pc->decoder);             /* ';' */
2943                         }
2944                 }
2945                 if (f) {
2946                         break;
2947                 }
2948                 n++;
2949         }
2950         if (!f) {
2951                 (*pc->decoder->filter_function)(c, pc->decoder);
2952         }
2953 
2954         return c;
2955 }
2956 
2957 int mbfl_filt_decode_htmlnumericentity_flush(mbfl_convert_filter *filter)
2958 {
2959         struct collector_htmlnumericentity_data *pc = (struct collector_htmlnumericentity_data *)filter;
2960         int n, s, r, d;
2961 
2962         if (pc->status) {
2963                 switch (pc->status) {
2964                 case 1: /* '&' */
2965                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2966                         break;
2967                 case 2: /* '#' */
2968                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2969                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2970                         break;
2971                 case 3: /* '0'-'9' */
2972                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2973                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2974 
2975                         s = pc->cache;
2976                         r = 1;
2977                         n = pc->digit;
2978                         while (n > 0) {
2979                                 r *= 10;
2980                                 n--;
2981                         }
2982                         s %= r;
2983                         r /= 10;
2984                         while (r > 0) {
2985                                 d = s/r;
2986                                 s %= r;
2987                                 r /= 10;
2988                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
2989                         }
2990 
2991                         break;
2992                 case 4: /* 'x' */
2993                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2994                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
2995                         (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
2996                         break;
2997                 case 5: /* '0'-'9','a'-'f' */
2998                         (*pc->decoder->filter_function)(0x26, pc->decoder);             /* '&' */
2999                         (*pc->decoder->filter_function)(0x23, pc->decoder);             /* '#' */
3000                         (*pc->decoder->filter_function)(0x78, pc->decoder);             /* 'x' */
3001 
3002                         s = pc->cache;
3003                         r = 1;
3004                         n = pc->digit;
3005                         while (n > 0) {
3006                                 r *= 16;
3007                                 n--;
3008                         }
3009                         s %= r;
3010                         r /= 16;
3011                         while (r > 0) {
3012                                 d = s/r;
3013                                 s %= r;
3014                                 r /= 16;
3015                                 (*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
3016                         }
3017                         break;
3018                 default:
3019                         break;
3020                 }
3021         }
3022 
3023         pc->status = 0;
3024         pc->cache = 0;
3025         pc->digit = 0;
3026 
3027         return 0;
3028 }
3029 
3030 
3031 mbfl_string *
3032 mbfl_html_numeric_entity(
3033     mbfl_string *string,
3034     mbfl_string *result,
3035     int *convmap,
3036     int mapsize,
3037     int type)
3038 {
3039         struct collector_htmlnumericentity_data pc;
3040         mbfl_memory_device device;
3041         mbfl_convert_filter *encoder;
3042         int n;
3043         unsigned char *p;
3044 
3045         if (string == NULL || result == NULL) {
3046                 return NULL;
3047         }
3048         mbfl_string_init(result);
3049         result->no_language = string->no_language;
3050         result->no_encoding = string->no_encoding;
3051         mbfl_memory_device_init(&device, string->len, 0);
3052 
3053         /* output code filter */
3054         pc.decoder = mbfl_convert_filter_new(
3055             mbfl_no_encoding_wchar,
3056             string->no_encoding,
3057             mbfl_memory_device_output, 0, &device);
3058         /* wchar filter */
3059         if (type == 0) { /* decimal output */
3060                 encoder = mbfl_convert_filter_new(
3061                     string->no_encoding,
3062                     mbfl_no_encoding_wchar,
3063                     collector_encode_htmlnumericentity, 0, &pc);
3064         } else if (type == 2) { /* hex output */
3065                 encoder = mbfl_convert_filter_new(
3066                     string->no_encoding,
3067                     mbfl_no_encoding_wchar,
3068                     collector_encode_hex_htmlnumericentity, 0, &pc);
3069         } else { /* type == 1: decimal/hex input */
3070                 encoder = mbfl_convert_filter_new(
3071                     string->no_encoding,
3072                     mbfl_no_encoding_wchar,
3073                     collector_decode_htmlnumericentity,
3074                         (int (*)(void*))mbfl_filt_decode_htmlnumericentity_flush, &pc);
3075         }
3076         if (pc.decoder == NULL || encoder == NULL) {
3077                 mbfl_convert_filter_delete(encoder);
3078                 mbfl_convert_filter_delete(pc.decoder);
3079                 return NULL;
3080         }
3081         pc.status = 0;
3082         pc.cache = 0;
3083         pc.digit = 0;
3084         pc.convmap = convmap;
3085         pc.mapsize = mapsize;
3086 
3087         /* feed data */
3088         p = string->val;
3089         n = string->len;
3090         if (p != NULL) {
3091                 while (n > 0) {
3092                         if ((*encoder->filter_function)(*p++, encoder) < 0) {
3093                                 break;
3094                         }
3095                         n--;
3096                 }
3097         }
3098         mbfl_convert_filter_flush(encoder);
3099         mbfl_convert_filter_flush(pc.decoder);
3100         result = mbfl_memory_device_result(&device, result);
3101         mbfl_convert_filter_delete(encoder);
3102         mbfl_convert_filter_delete(pc.decoder);
3103 
3104         return result;
3105 }
3106 
3107 /*
3108  * Local variables:
3109  * tab-width: 4
3110  * c-basic-offset: 4
3111  * End:
3112  */

/* [<][>][^][v][top][bottom][index][help] */