root/ext/mbstring/libmbfl/tests/conv_encoding.tests/gen_exp.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mappings_init
  2. mappings_destroy
  3. mappings_grow
  4. mappings_add
  5. utf32_utf8
  6. to_cp932_visitor
  7. from_cp932_visitor
  8. to_cp50220_visitor
  9. to_cp50222_visitor
  10. main

   1 #include <stdio.h>
   2 #include <stddef.h>
   3 #include <string.h>
   4 #include <stdlib.h>
   5 #include <errno.h>
   6 
   7 struct mappings_entry {
   8         int cp_uni;
   9         int n;
  10         int cp_932[16];
  11 };
  12 
  13 struct mappings {
  14         size_t n;
  15         size_t nalloc;
  16         struct mappings_entry *entries;
  17 };
  18 
  19 static void mappings_init(struct mappings *map)
  20 {
  21         map->n = 0;
  22         map->nalloc = 0;
  23         map->entries = 0;
  24 }
  25 
  26 static void mappings_destroy(struct mappings *map)
  27 {
  28         if (map->entries)
  29                 free(map->entries);
  30 }
  31 
  32 static int mappings_grow(struct mappings *map)
  33 {
  34         if (map->n >= map->nalloc) {
  35                 struct mappings_entry *new_entries;
  36                 size_t n = map->nalloc << 1, a;
  37                 if (n == 0)
  38                         n = 1;
  39                 else if (n <= map->n)
  40                         return 2;
  41                 a = sizeof(*map->entries) * n;
  42                 if (a / n != sizeof(*map->entries))
  43                         return 2;
  44                 new_entries = realloc(map->entries, a);
  45                 if (!new_entries)
  46                         return 2;
  47                 map->entries = new_entries;
  48                 map->nalloc = n;
  49         }
  50         return 0;
  51 }
  52 
  53 static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
  54 {
  55         size_t i;
  56         size_t s = 0, e = map->n;
  57         struct mappings_entry *entry;
  58 
  59         for (;;) {
  60                 i = (s + e) / 2;
  61                 entry = &map->entries[i];
  62                 if (e == i || entry->cp_uni > cp_uni) {
  63                         if (e == i) {
  64                                 int r = mappings_grow(map);
  65                                 if (r)
  66                                         return r;
  67                                 if (map->n > i) {
  68                                         size_t n = map->n - i, a = sizeof(*map->entries) * n;
  69                                         if (a / n != sizeof(*map->entries))
  70                                                 return 2;
  71                                         memmove(&map->entries[i + 1], &map->entries[i], a);
  72                                 }
  73                                 ++map->n;
  74                                 entry = &map->entries[i];
  75                                 entry->cp_uni = cp_uni;
  76                                 entry->n = 0;
  77                                 break;
  78                         }
  79                         e = i;
  80                 } else if (entry->cp_uni < cp_uni) {
  81                         if (s == i) {
  82                                 int r = mappings_grow(map);
  83                                 if (r)
  84                                         return r;
  85                                 if (map->n > i + 1) {
  86                                         size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
  87                                         if (a / n != sizeof(*map->entries))
  88                                                 return 2;
  89                                         memmove(&map->entries[i + 2], &map->entries[i + 1], a);
  90                                 }
  91                                 ++map->n;
  92                                 entry = &map->entries[i + 1];
  93                                 entry->cp_uni = cp_uni;
  94                                 entry->n = 0;
  95                                 break;
  96                         }
  97                         s = i;
  98                 } else {
  99                         break;
 100                 }
 101         }
 102         if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
 103                 return 1;
 104         entry->cp_932[entry->n++] = cp_932;
 105         return 0;
 106 }
 107 
 108 struct generator_entry {
 109         const char *name;
 110         const char *prologue;
 111         const char *epilogue;
 112         void(*visitor)(const struct mappings_entry *);
 113 };
 114 
 115 static int utf32_utf8(char *buf, int k)
 116 {
 117         int retval = 0;
 118 
 119         if (k < 0x80) {
 120                 buf[0] = k;
 121                 retval = 1;
 122         } else if (k < 0x800) {
 123                 buf[0] = 0xc0 | (k >> 6);
 124                 buf[1] = 0x80 | (k & 0x3f);
 125                 retval = 2;
 126         } else if (k < 0x10000) {
 127                 buf[0] = 0xe0 | (k >> 12);
 128                 buf[1] = 0x80 | ((k >> 6) & 0x3f);
 129                 buf[2] = 0x80 | (k & 0x3f);
 130                 retval = 3;
 131         } else if (k < 0x200000) {
 132                 buf[0] = 0xf0 | (k >> 18);
 133                 buf[1] = 0x80 | ((k >> 12) & 0x3f);
 134                 buf[2] = 0x80 | ((k >> 6) & 0x3f);
 135                 buf[3] = 0x80 | (k & 0x3f);
 136                 retval = 4;
 137         } else if (k < 0x4000000) {
 138                 buf[0] = 0xf8 | (k >> 24);
 139                 buf[1] = 0x80 | ((k >> 18) & 0x3f);
 140                 buf[2] = 0x80 | ((k >> 12) & 0x3f);
 141                 buf[3] = 0x80 | ((k >> 6) & 0x3f);
 142                 buf[4] = 0x80 | (k & 0x3f);
 143                 retval = 5;
 144         } else {
 145                 buf[0] = 0xfc | (k >> 30);
 146                 buf[1] = 0x80 | ((k >> 24) & 0x3f);
 147                 buf[2] = 0x80 | ((k >> 18) & 0x3f);
 148                 buf[3] = 0x80 | ((k >> 12) & 0x3f);
 149                 buf[4] = 0x80 | ((k >> 6) & 0x3f);
 150                 buf[5] = 0x80 | (k & 0x3f);
 151                 retval = 6;
 152         }
 153         buf[retval] = '\0';
 154 
 155         return retval;
 156 }
 157 
 158 static const char epilogue[] =
 159 "close\n";
 160 
 161 static const char prologue_to_cp932[] =
 162 "#!/usr/bin/expect -f\n"
 163 "spawn tests/conv_encoding Japanese CP932 UTF-8\n"
 164 "set timeout 1\n"
 165 "\n"
 166 "expect_after {\n"
 167 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
 168 "}\n";
 169 
 170 static const char prologue_to_cp50220[] =
 171 "#!/usr/bin/expect -f\n"
 172 "spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
 173 "set timeout 1\n"
 174 "\n"
 175 "expect_after {\n"
 176 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
 177 "}\n";
 178 
 179 static const char prologue_to_cp50222[] =
 180 "#!/usr/bin/expect -f\n"
 181 "spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
 182 "set timeout 1\n"
 183 "\n"
 184 "expect_after {\n"
 185 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
 186 "}\n";
 187 
 188 static const char prologue_from_cp932[] =
 189 "#!/usr/bin/expect -f\n"
 190 "spawn tests/conv_encoding Japanese UTF-8 CP932\n"
 191 "set timeout 1\n"
 192 "\n"
 193 "expect_after {\n"
 194 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
 195 "}\n";
 196 
 197 static void to_cp932_visitor(const struct mappings_entry *entry)
 198 {
 199         char buf_uni[32], buf_cp932[8];
 200         int i;
 201 
 202         if (entry->cp_uni < 32 || entry->cp_uni == 127)
 203                 return;
 204 
 205         i = utf32_utf8(buf_uni, entry->cp_uni);
 206         buf_uni[i * 4] = '\0';
 207         while (--i >= 0) {
 208                 unsigned char c = ((unsigned char *)buf_uni)[i];
 209                 buf_uni[i * 4] = '\\';
 210                 buf_uni[i * 4 + 1] = 'x';
 211                 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
 212                 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
 213         }
 214 
 215         printf("set test \"U+%06X\"\n"
 216                "send -- \"%s\r\"\n"
 217                    "sleep 0.001\n"
 218                "expect {\n", entry->cp_uni, buf_uni);
 219 
 220         for (i = 0; i < entry->n; ++i) {
 221                 int len = 0;
 222                 const int c = entry->cp_932[i];
 223                 if (c >= 0x100) {
 224                         len = 2;
 225                         sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
 226                 } else {
 227                         len = 1;
 228                         sprintf(buf_cp932, "%%%02x", c);
 229                 }
 230                 printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
 231         }
 232 
 233         printf("}\n");
 234 }
 235 
 236 static void from_cp932_visitor(const struct mappings_entry *entry)
 237 {
 238         char buf_uni[32], buf_cp932[8];
 239         int i, len;
 240 
 241         if (entry->cp_uni < 32 || entry->cp_uni == 127)
 242                 return;
 243 
 244         len = utf32_utf8(buf_uni, entry->cp_uni);
 245         buf_uni[len * 3] = '\0';
 246         i = len;
 247         while (--i >= 0) {
 248                 unsigned char c = ((unsigned char *)buf_uni)[i];
 249                 buf_uni[i * 3] = '%';
 250                 buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
 251                 buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
 252         }
 253 
 254         for (i = 0; i < entry->n; ++i) {
 255                 const int c = entry->cp_932[i];
 256                 if (c >= 0x100)
 257                         sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
 258                 else
 259                         sprintf(buf_cp932, "\\x%02x", c);
 260                 printf("set test \"U+%06X\"\n"
 261                            "send -- \"%s\r\"\n"
 262                            "sleep 0.001\n"
 263                            "expect {\n"
 264                        "    \"%s (%d)\\r\\n\" { pass $test }\n"
 265                        "}\n",
 266                            entry->cp_uni, buf_cp932, buf_uni, len);
 267         }
 268 }
 269 
 270 static void to_cp50220_visitor(const struct mappings_entry *entry)
 271 {
 272         char buf_uni[32], buf_cp50220[32];
 273         int i;
 274 
 275         if (entry->cp_uni < 32 || entry->cp_uni == 127)
 276                 return;
 277 
 278         i = utf32_utf8(buf_uni, entry->cp_uni);
 279         buf_uni[i * 4] = '\0';
 280         while (--i >= 0) {
 281                 unsigned char c = ((unsigned char *)buf_uni)[i];
 282                 buf_uni[i * 4] = '\\';
 283                 buf_uni[i * 4 + 1] = 'x';
 284                 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
 285                 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
 286         }
 287 
 288         printf("set test \"U+%06X\"\n"
 289                "send -- \"%s\r\"\n"
 290                    "sleep 0.001\n"
 291                "expect {\n", entry->cp_uni, buf_uni);
 292 
 293         for (i = 0; i < entry->n; ++i) {
 294                 int len = 0;
 295                 const int c = entry->cp_932[i];
 296                 if (c >= 0xa1 && c < 0xe0) {
 297                         static const int jisx0208_tl_map[] = {
 298                                 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
 299                                 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
 300                                 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
 301                                 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
 302                                 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
 303                                 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
 304                                 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
 305                                 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
 306                         };
 307                         const int j = jisx0208_tl_map[c - 0xa0];
 308                         len = 8;
 309                         sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
 310                 } else if (c >= 0x100) {
 311                         const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
 312                         len = 8;
 313                         sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
 314                 } else {
 315                         len = 1;
 316                         sprintf(buf_cp50220, "%%%02x", c);
 317                 }
 318                 printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
 319         }
 320 
 321         printf("}\n");
 322 }
 323 
 324 static void to_cp50222_visitor(const struct mappings_entry *entry)
 325 {
 326         char buf_uni[32], buf_cp50220[32];
 327         int i;
 328 
 329         if (entry->cp_uni < 32 || entry->cp_uni == 127)
 330                 return;
 331 
 332         i = utf32_utf8(buf_uni, entry->cp_uni);
 333         buf_uni[i * 4] = '\0';
 334         while (--i >= 0) {
 335                 unsigned char c = ((unsigned char *)buf_uni)[i];
 336                 buf_uni[i * 4] = '\\';
 337                 buf_uni[i * 4 + 1] = 'x';
 338                 buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
 339                 buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
 340         }
 341 
 342         printf("set test \"U+%06X\"\n"
 343                "send -- \"%s\r\"\n"
 344                    "sleep 0.001\n"
 345                "expect {\n", entry->cp_uni, buf_uni);
 346 
 347         for (i = 0; i < entry->n; ++i) {
 348                 int len = 0;
 349                 const int c = entry->cp_932[i];
 350                 if (c >= 0xa1 && c < 0xe0) {
 351                         len = 3;
 352                         sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
 353                 } else if (c >= 0x100) {
 354                         const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
 355                         len = 8;
 356                         sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
 357                 } else {
 358                         len = 1;
 359                         sprintf(buf_cp50220, "%%%02x", c);
 360                 }
 361                 printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
 362         }
 363 
 364         printf("}\n");
 365 }
 366 
 367 
 368 static struct generator_entry entries[] = {
 369         { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
 370         { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
 371         { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
 372         { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
 373         { NULL }
 374 };
 375 
 376 static const char cp932_txt[] = "CP932.TXT";
 377 
 378 int main(int argc, char **argv)
 379 {
 380         int retval = 0;
 381         FILE *fp;
 382         char buf[1024];
 383         struct generator_entry* gen;
 384         struct mappings map;
 385 
 386         if (argc <= 1) {
 387                 fprintf(stderr, "usage: %s generator\n", argv[0]);
 388                 return 255;
 389         }
 390 
 391         for (gen = entries;; ++gen) {
 392                 if (!gen->name) {
 393                         fprintf(stderr, "Unknown generator: %s\n", argv[1]);
 394                         return 1;
 395                 }
 396                 if (strcmp(gen->name, argv[1]) == 0)
 397                         break;
 398         }
 399 
 400     fp = fopen(cp932_txt, "r");
 401         if (!fp) {
 402                 fprintf(stderr, "Failed to open %s\n", cp932_txt);
 403                 return 2;
 404         }
 405 
 406         mappings_init(&map);
 407 
 408         while (fgets(buf, sizeof(buf), fp)) {
 409                 const char *fields[16];
 410                 char *p = buf;
 411                 int field = 0;
 412                 int cp_932, cp_uni;
 413                 for (;;) {
 414                         char *q = 0;
 415                         int eol = 0;
 416 
 417                         if (field >= sizeof(fields) / sizeof(*fields)) {
 418                                 fprintf(stderr, "Too many fields (incorrect file?)\n");
 419                                 retval = 3;
 420                                 goto out;
 421                         }
 422 
 423                         for (;;) {
 424                                 if (*p == '\0' || *p == '#' || *p == 0x0a) {
 425                                         eol = 1;
 426                                         break;
 427                                 } else if (*p != ' ' && *p != '\t') {
 428                                         break;
 429                                 }
 430                                 ++p;
 431                         }
 432 
 433                         if (eol)
 434                                 break;
 435 
 436                         q = p;
 437 
 438                         for (;;) {
 439                                 if (*p == '\0' || *p == '#' || *p == 0x0a) {
 440                                         eol = 1;
 441                                         break;
 442                                 } else if (*p == ' ' || *p == '\t') {
 443                                         break;
 444                                 }
 445                                 ++p;
 446                         }
 447 
 448                         *p = '\0';
 449                         fields[field++] = q;
 450 
 451                         if (eol)
 452                                 break;
 453                         ++p;
 454                 }
 455                 if (field == 0 || field == 1) {
 456                         continue;
 457                 } else if (field != 2) {
 458                         fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
 459                         retval = 3;
 460                         goto out;
 461                 }
 462                 cp_932 = strtol(fields[0], NULL, 0);
 463                 if (errno == ERANGE || errno == EINVAL) {
 464                         fprintf(stderr, "Malformed field value: %s\n", fields[0]);
 465                         retval = 4;
 466                         goto out;
 467                 }
 468                 cp_uni = strtol(fields[1], NULL, 0);
 469                 if (errno == ERANGE || errno == EINVAL) {
 470                         fprintf(stderr, "Malformed field value: %s\n", fields[1]);
 471                         retval = 4;
 472                         goto out;
 473                 }
 474 
 475                 if (mappings_add(&map, cp_uni, cp_932)) {
 476                         fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
 477                         retval = 4;
 478                         goto out;
 479                 }
 480         }
 481 
 482         {
 483                 size_t i;
 484                 printf("%s", gen->prologue);
 485                 for (i = 0; i < map.n; ++i)
 486                         gen->visitor(&map.entries[i]);
 487                 printf("%s", gen->epilogue);
 488         }
 489 
 490 out:
 491         mappings_destroy(&map);
 492         return retval;
 493 }
 494 
 495 /*
 496  * vim: sts=4 sw=4 ts=4 noet
 497  */

/* [<][>][^][v][top][bottom][index][help] */