root/ext/mbstring/oniguruma/enc/mktable.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. IsAlpha
  2. IsBlank
  3. IsCntrl
  4. IsDigit
  5. IsGraph
  6. IsLower
  7. IsPrint
  8. IsPunct
  9. IsSpace
  10. IsUpper
  11. IsXDigit
  12. IsWord
  13. IsAscii
  14. IsNewline
  15. exec
  16. main

   1 /**********************************************************************
   2   mktable.c
   3 **********************************************************************/
   4 /*-
   5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #include <stdlib.h>
  31 #include <stdio.h>
  32 #include <locale.h>
  33 
  34 #define __USE_ISOC99
  35 #include <ctype.h>
  36 
  37 #include "regenc.h"
  38 
  39 #define ASCII                0
  40 #define UNICODE_ISO_8859_1   1
  41 #define ISO_8859_1           2
  42 #define ISO_8859_2           3
  43 #define ISO_8859_3           4
  44 #define ISO_8859_4           5
  45 #define ISO_8859_5           6
  46 #define ISO_8859_6           7
  47 #define ISO_8859_7           8
  48 #define ISO_8859_8           9
  49 #define ISO_8859_9          10
  50 #define ISO_8859_10         11
  51 #define ISO_8859_11         12
  52 #define ISO_8859_13         13
  53 #define ISO_8859_14         14
  54 #define ISO_8859_15         15
  55 #define ISO_8859_16         16
  56 #define KOI8                17
  57 #define KOI8_R              18
  58 
  59 typedef struct {
  60   int   num;
  61   char* name;
  62 } ENC_INFO;
  63 
  64 static ENC_INFO Info[] = {
  65   { ASCII,               "ASCII" },
  66   { UNICODE_ISO_8859_1,  "UNICODE_ISO_8859_1"  },
  67   { ISO_8859_1,  "ISO_8859_1"  },
  68   { ISO_8859_2,  "ISO_8859_2"  },
  69   { ISO_8859_3,  "ISO_8859_3"  },
  70   { ISO_8859_4,  "ISO_8859_4"  },
  71   { ISO_8859_5,  "ISO_8859_5"  },
  72   { ISO_8859_6,  "ISO_8859_6"  },
  73   { ISO_8859_7,  "ISO_8859_7"  },
  74   { ISO_8859_8,  "ISO_8859_8"  },
  75   { ISO_8859_9,  "ISO_8859_9"  },
  76   { ISO_8859_10, "ISO_8859_10" },
  77   { ISO_8859_11, "ISO_8859_11" },
  78   { ISO_8859_13, "ISO_8859_13" },
  79   { ISO_8859_14, "ISO_8859_14" },
  80   { ISO_8859_15, "ISO_8859_15" },
  81   { ISO_8859_16, "ISO_8859_16" },
  82   { KOI8,        "KOI8" },
  83   { KOI8_R,      "KOI8_R" }
  84 };
  85 
  86 
  87 static int IsAlpha(int enc, int c)
  88 {
  89   if (enc == ASCII)
  90     return isalpha(c);
  91 
  92   if (c >= 0x41 && c <= 0x5a) return 1;
  93   if (c >= 0x61 && c <= 0x7a) return 1;
  94 
  95   switch (enc) {
  96   case UNICODE_ISO_8859_1:
  97   case ISO_8859_1:
  98   case ISO_8859_9:
  99     if (c == 0xaa) return 1;
 100     if (c == 0xb5) return 1;
 101     if (c == 0xba) return 1;
 102     if (c >= 0xc0 && c <= 0xd6) return 1;
 103     if (c >= 0xd8 && c <= 0xf6) return 1;
 104     if (c >= 0xf8 && c <= 0xff) return 1;
 105     break;
 106 
 107   case ISO_8859_2:
 108     if (c == 0xa1 || c == 0xa3) return 1;
 109     if (c == 0xa5 || c == 0xa6) return 1;
 110     if (c >= 0xa9 && c <= 0xac) return 1;
 111     if (c >= 0xae && c <= 0xaf) return 1;
 112     if (c == 0xb1 || c == 0xb3) return 1;
 113     if (c == 0xb5 || c == 0xb6) return 1;
 114     if (c >= 0xb9 && c <= 0xbc) return 1;
 115     if (c >= 0xbe && c <= 0xbf) return 1;
 116     if (c >= 0xc0 && c <= 0xd6) return 1;
 117     if (c >= 0xd8 && c <= 0xf6) return 1;
 118     if (c >= 0xf8 && c <= 0xfe) return 1;
 119     break;
 120 
 121   case ISO_8859_3:
 122     if (c == 0xa1) return 1;
 123     if (c == 0xa6) return 1;
 124     if (c >= 0xa9 && c <= 0xac) return 1;
 125     if (c == 0xaf) return 1;
 126     if (c == 0xb1) return 1;
 127     if (c == 0xb5 || c == 0xb6) return 1;
 128     if (c >= 0xb9 && c <= 0xbc) return 1;
 129     if (c == 0xbf) return 1;
 130     if (c >= 0xc0 && c <= 0xc2) return 1;
 131     if (c >= 0xc4 && c <= 0xcf) return 1;
 132     if (c >= 0xd1 && c <= 0xd6) return 1;
 133     if (c >= 0xd8 && c <= 0xe2) return 1;
 134     if (c >= 0xe4 && c <= 0xef) return 1;
 135     if (c >= 0xf1 && c <= 0xf6) return 1;
 136     if (c >= 0xf8 && c <= 0xfe) return 1;
 137     break;
 138 
 139   case ISO_8859_4:
 140     if (c >= 0xa1 && c <= 0xa3) return 1;
 141     if (c == 0xa5 || c == 0xa6) return 1;
 142     if (c >= 0xa9 && c <= 0xac) return 1;
 143     if (c == 0xae) return 1;
 144     if (c == 0xb1 || c == 0xb3) return 1;
 145     if (c == 0xb5 || c == 0xb6) return 1;
 146     if (c >= 0xb9 && c <= 0xbf) return 1;
 147     if (c >= 0xc0 && c <= 0xd6) return 1;
 148     if (c >= 0xd8 && c <= 0xf6) return 1;
 149     if (c >= 0xf8 && c <= 0xfe) return 1;
 150     break;
 151 
 152   case ISO_8859_5:
 153     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
 154     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
 155     break;
 156 
 157   case ISO_8859_6:
 158     if (c >= 0xc1 && c <= 0xda) return 1;
 159     if (c >= 0xe0 && c <= 0xf2) return 1;
 160     break;
 161 
 162   case ISO_8859_7:
 163     if (c == 0xb6) return 1;
 164     if (c >= 0xb8 && c <= 0xba) return 1;
 165     if (c == 0xbc) return 1;
 166     if (c >= 0xbe && c <= 0xbf) return 1;
 167     if (c == 0xc0) return 1;
 168     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
 169     if (c >= 0xdc && c <= 0xfe) return 1;
 170     break;
 171 
 172   case ISO_8859_8:
 173     if (c == 0xb5) return 1;
 174     if (c >= 0xe0 && c <= 0xfa) return 1;
 175     break;
 176 
 177   case ISO_8859_10:
 178     if (c >= 0xa1 && c <= 0xa6) return 1;
 179     if (c >= 0xa8 && c <= 0xac) return 1;
 180     if (c == 0xae || c == 0xaf) return 1;
 181     if (c >= 0xb1 && c <= 0xb6) return 1;
 182     if (c >= 0xb8 && c <= 0xbc) return 1;
 183     if (c >= 0xbe && c <= 0xff) return 1;
 184     break;
 185 
 186   case ISO_8859_11:
 187     if (c >= 0xa1 && c <= 0xda) return 1;
 188     if (c >= 0xdf && c <= 0xfb) return 1;
 189     break;
 190 
 191   case ISO_8859_13:
 192     if (c == 0xa8) return 1;
 193     if (c == 0xaa) return 1;
 194     if (c == 0xaf) return 1;
 195     if (c == 0xb5) return 1;
 196     if (c == 0xb8) return 1;
 197     if (c == 0xba) return 1;
 198     if (c >= 0xbf && c <= 0xd6) return 1;
 199     if (c >= 0xd8 && c <= 0xf6) return 1;
 200     if (c >= 0xf8 && c <= 0xfe) return 1;
 201     break;
 202 
 203   case ISO_8859_14:
 204     if (c == 0xa1 || c == 0xa2) return 1;
 205     if (c == 0xa4 || c == 0xa5) return 1;
 206     if (c == 0xa6 || c == 0xa8) return 1;
 207     if (c >= 0xaa && c <= 0xac) return 1;
 208     if (c >= 0xaf && c <= 0xb5) return 1;
 209     if (c >= 0xb7 && c <= 0xff) return 1;
 210     break;
 211 
 212   case ISO_8859_15:
 213     if (c == 0xaa) return 1;
 214     if (c == 0xb5) return 1;
 215     if (c == 0xba) return 1;
 216     if (c >= 0xc0 && c <= 0xd6) return 1;
 217     if (c >= 0xd8 && c <= 0xf6) return 1;
 218     if (c >= 0xf8 && c <= 0xff) return 1;
 219     if (c == 0xa6) return 1;
 220     if (c == 0xa8) return 1;
 221     if (c == 0xb4) return 1;
 222     if (c == 0xb8) return 1;
 223     if (c == 0xbc) return 1;
 224     if (c == 0xbd) return 1;
 225     if (c == 0xbe) return 1;
 226     break;
 227 
 228   case ISO_8859_16:
 229     if (c == 0xa1) return 1;
 230     if (c == 0xa2) return 1;
 231     if (c == 0xa3) return 1;
 232     if (c == 0xa6) return 1;
 233     if (c == 0xa8) return 1;
 234     if (c == 0xaa) return 1;
 235     if (c == 0xac) return 1;
 236     if (c == 0xae) return 1;
 237     if (c == 0xaf) return 1;
 238     if (c == 0xb2) return 1;
 239     if (c == 0xb3) return 1;
 240     if (c == 0xb4) return 1;
 241     if (c >= 0xb8 && c <= 0xba) return 1;
 242     if (c == 0xbc) return 1;
 243     if (c == 0xbd) return 1;
 244     if (c == 0xbe) return 1;
 245     if (c == 0xbf) return 1;
 246     if (c >= 0xc0 && c <= 0xde) return 1;
 247     if (c >= 0xdf && c <= 0xff) return 1;
 248     break;
 249 
 250   case KOI8_R:
 251     if (c == 0xa3 || c == 0xb3) return 1;
 252     /* fall */
 253   case KOI8:
 254     if (c >= 0xc0 && c <= 0xff) return 1;
 255     break;
 256 
 257   default:
 258     exit(-1);
 259   }
 260 
 261   return 0;
 262 }
 263 
 264 static int IsBlank(int enc, int c)
 265 {
 266   if (enc == ASCII)
 267     return isblank(c);
 268 
 269   if (c == 0x09 || c == 0x20) return 1;
 270 
 271   switch (enc) {
 272   case UNICODE_ISO_8859_1:
 273   case ISO_8859_1:
 274   case ISO_8859_2:
 275   case ISO_8859_3:
 276   case ISO_8859_4:
 277   case ISO_8859_5:
 278   case ISO_8859_6:
 279   case ISO_8859_7:
 280   case ISO_8859_8:
 281   case ISO_8859_9:
 282   case ISO_8859_10:
 283   case ISO_8859_11:
 284   case ISO_8859_13:
 285   case ISO_8859_14:
 286   case ISO_8859_15:
 287   case ISO_8859_16:
 288   case KOI8:
 289     if (c == 0xa0) return 1;
 290     break;
 291 
 292   case KOI8_R:
 293     if (c == 0x9a) return 1;
 294     break;
 295 
 296   default:
 297     exit(-1);
 298   }
 299 
 300   return 0;
 301 }
 302 
 303 static int IsCntrl(int enc, int c)
 304 {
 305   if (enc == ASCII)
 306     return iscntrl(c);
 307 
 308   if (c >= 0x00 && c <= 0x1F) return 1;
 309 
 310   switch (enc) {
 311   case UNICODE_ISO_8859_1:
 312     if (c == 0xad) return 1;
 313     /* fall */
 314   case ISO_8859_1:
 315   case ISO_8859_2:
 316   case ISO_8859_3:
 317   case ISO_8859_4:
 318   case ISO_8859_5:
 319   case ISO_8859_6:
 320   case ISO_8859_7:
 321   case ISO_8859_8:
 322   case ISO_8859_9:
 323   case ISO_8859_10:
 324   case ISO_8859_11:
 325   case ISO_8859_13:
 326   case ISO_8859_14:
 327   case ISO_8859_15:
 328   case ISO_8859_16:
 329   case KOI8:
 330     if (c >= 0x7f && c <= 0x9F) return 1;
 331     break;
 332 
 333 
 334   case KOI8_R:
 335     if (c == 0x7f) return 1;
 336     break;
 337 
 338   default:
 339     exit(-1);
 340   }
 341 
 342   return 0;
 343 }
 344 
 345 static int IsDigit(int enc ARG_UNUSED, int c)
 346 {
 347   if (c >= 0x30 && c <= 0x39) return 1;
 348   return 0;
 349 }
 350 
 351 static int IsGraph(int enc, int c)
 352 {
 353   if (enc == ASCII)
 354     return isgraph(c);
 355 
 356   if (c >= 0x21 && c <= 0x7e) return 1;
 357 
 358   switch (enc) {
 359   case UNICODE_ISO_8859_1:
 360   case ISO_8859_1:
 361   case ISO_8859_2:
 362   case ISO_8859_4:
 363   case ISO_8859_5:
 364   case ISO_8859_9:
 365   case ISO_8859_10:
 366   case ISO_8859_13:
 367   case ISO_8859_14:
 368   case ISO_8859_15:
 369   case ISO_8859_16:
 370     if (c >= 0xa1 && c <= 0xff) return 1;
 371     break;
 372 
 373   case ISO_8859_3:
 374     if (c >= 0xa1) {
 375       if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
 376           c == 0xe3 || c == 0xf0)
 377         return 0;
 378       else
 379         return 1;
 380     }
 381     break;
 382 
 383   case ISO_8859_6:
 384     if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
 385       return 1;
 386     if (c >= 0xc1 && c <= 0xda) return 1;
 387     if (c >= 0xe0 && c <= 0xf2) return 1;
 388     break;
 389 
 390   case ISO_8859_7:
 391     if (c >= 0xa1 && c <= 0xfe &&
 392         c != 0xa4 && c != 0xa5 && c != 0xaa &&
 393         c != 0xae && c != 0xd2) return 1;
 394     break;
 395 
 396   case ISO_8859_8:
 397     if (c >= 0xa2 && c <= 0xfa) {
 398       if (c >= 0xbf && c <= 0xde) return 0;
 399       return 1;
 400     }
 401     break;
 402 
 403   case ISO_8859_11:
 404     if (c >= 0xa1 && c <= 0xda) return 1;
 405     if (c >= 0xdf && c <= 0xfb) return 1;
 406     break;
 407 
 408   case KOI8:
 409     if (c >= 0xc0 && c <= 0xff) return 1;
 410     break;
 411 
 412   case KOI8_R:
 413     if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
 414     break;
 415 
 416   default:
 417     exit(-1);
 418   }
 419 
 420   return 0;
 421 }
 422 
 423 static int IsLower(int enc, int c)
 424 {
 425   if (enc == ASCII)
 426     return islower(c);
 427 
 428   if (c >= 0x61 && c <= 0x7a) return 1;
 429 
 430   switch (enc) {
 431   case UNICODE_ISO_8859_1:
 432   case ISO_8859_1:
 433   case ISO_8859_9:
 434     if (c == 0xaa) return 1;
 435     if (c == 0xb5) return 1;
 436     if (c == 0xba) return 1;
 437     if (c >= 0xdf && c <= 0xf6) return 1;
 438     if (c >= 0xf8 && c <= 0xff) return 1;
 439     break;
 440 
 441   case ISO_8859_2:
 442     if (c == 0xb1 || c == 0xb3) return 1;
 443     if (c == 0xb5 || c == 0xb6) return 1;
 444     if (c >= 0xb9 && c <= 0xbc) return 1;
 445     if (c >= 0xbe && c <= 0xbf) return 1;
 446     if (c >= 0xdf && c <= 0xf6) return 1;
 447     if (c >= 0xf8 && c <= 0xfe) return 1;
 448     break;
 449 
 450   case ISO_8859_3:
 451     if (c == 0xb1) return 1;
 452     if (c == 0xb5 || c == 0xb6) return 1;
 453     if (c >= 0xb9 && c <= 0xbc) return 1;
 454     if (c == 0xbf) return 1;
 455     if (c == 0xdf) return 1;
 456     if (c >= 0xe0 && c <= 0xe2) return 1;
 457     if (c >= 0xe4 && c <= 0xef) return 1;
 458     if (c >= 0xf1 && c <= 0xf6) return 1;
 459     if (c >= 0xf8 && c <= 0xfe) return 1;
 460     break;
 461 
 462   case ISO_8859_4:
 463     if (c == 0xa2) return 1;
 464     if (c == 0xb1 || c == 0xb3) return 1;
 465     if (c == 0xb5 || c == 0xb6) return 1;
 466     if (c >= 0xb9 && c <= 0xbc) return 1;
 467     if (c >= 0xbe && c <= 0xbf) return 1;
 468     if (c == 0xdf) return 1;
 469     if (c >= 0xe0 && c <= 0xf6) return 1;
 470     if (c >= 0xf8 && c <= 0xfe) return 1;
 471     break;
 472 
 473   case ISO_8859_5:
 474     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
 475     break;
 476 
 477   case ISO_8859_6:
 478     break;
 479 
 480   case ISO_8859_7:
 481     if (c == 0xc0) return 1;
 482     if (c >= 0xdc && c <= 0xfe) return 1;
 483     break;
 484 
 485   case ISO_8859_8:
 486     if (c == 0xb5) return 1;
 487     break;
 488 
 489   case ISO_8859_10:
 490     if (c >= 0xb1 && c <= 0xb6) return 1;
 491     if (c >= 0xb8 && c <= 0xbc) return 1;
 492     if (c == 0xbe || c == 0xbf) return 1;
 493     if (c >= 0xdf && c <= 0xff) return 1;
 494     break;
 495 
 496   case ISO_8859_11:
 497     break;
 498 
 499   case ISO_8859_13:
 500     if (c == 0xb5) return 1;
 501     if (c == 0xb8) return 1;
 502     if (c == 0xba) return 1;
 503     if (c == 0xbf) return 1;
 504     if (c >= 0xdf && c <= 0xf6) return 1;
 505     if (c >= 0xf8 && c <= 0xfe) return 1;
 506     break;
 507 
 508   case ISO_8859_14:
 509     if (c == 0xa2) return 1;
 510     if (c == 0xa5) return 1;
 511     if (c == 0xab) return 1;
 512     if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
 513     if (c >= 0xb8 && c <= 0xba) return 1;
 514     if (c == 0xbc) return 1;
 515     if (c == 0xbe || c == 0xbf) return 1;
 516     if (c >= 0xdf && c <= 0xff) return 1;
 517     break;
 518 
 519   case ISO_8859_15:
 520     if (c == 0xaa) return 1;
 521     if (c == 0xb5) return 1;
 522     if (c == 0xba) return 1;
 523     if (c >= 0xdf && c <= 0xf6) return 1;
 524     if (c >= 0xf8 && c <= 0xff) return 1;
 525     if (c == 0xa8) return 1;
 526     if (c == 0xb8) return 1;
 527     if (c == 0xbd) return 1;
 528     break;
 529 
 530   case ISO_8859_16:
 531     if (c == 0xa2) return 1;
 532     if (c == 0xa8) return 1;
 533     if (c == 0xae) return 1;
 534     if (c == 0xb3) return 1;
 535     if (c >= 0xb8 && c <= 0xba) return 1;
 536     if (c == 0xbd) return 1;
 537     if (c == 0xbf) return 1;
 538     if (c >= 0xdf && c <= 0xff) return 1;
 539     break;
 540 
 541   case KOI8_R:
 542     if (c == 0xa3) return 1;
 543     /* fall */
 544   case KOI8:
 545     if (c >= 0xc0 && c <= 0xdf) return 1;
 546     break;
 547 
 548   default:
 549     exit(-1);
 550   }
 551 
 552   return 0;
 553 }
 554 
 555 static int IsPrint(int enc, int c)
 556 {
 557   if (enc == ASCII)
 558     return isprint(c);
 559 
 560   if (c >= 0x20 && c <= 0x7e) return 1;
 561 
 562   switch (enc) {
 563   case UNICODE_ISO_8859_1:
 564     if (c >= 0x09 && c <= 0x0d) return 1;
 565     if (c == 0x85) return 1;
 566     /* fall */
 567   case ISO_8859_1:
 568   case ISO_8859_2:
 569   case ISO_8859_4:
 570   case ISO_8859_5:
 571   case ISO_8859_9:
 572   case ISO_8859_10:
 573   case ISO_8859_13:
 574   case ISO_8859_14:
 575   case ISO_8859_15:
 576   case ISO_8859_16:
 577     if (c >= 0xa0 && c <= 0xff) return 1;
 578     break;
 579 
 580   case ISO_8859_3:
 581     if (c >= 0xa0) {
 582       if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
 583           c == 0xe3 || c == 0xf0)
 584         return 0;
 585       else
 586         return 1;
 587     }
 588     break;
 589 
 590   case ISO_8859_6:
 591     if (c == 0xa0) return 1;
 592     if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
 593       return 1;
 594     if (c >= 0xc1 && c <= 0xda) return 1;
 595     if (c >= 0xe0 && c <= 0xf2) return 1;
 596     break;
 597 
 598   case ISO_8859_7:
 599     if (c >= 0xa0 && c <= 0xfe &&
 600         c != 0xa4 && c != 0xa5 && c != 0xaa &&
 601         c != 0xae && c != 0xd2) return 1;
 602     break;
 603 
 604   case ISO_8859_8:
 605     if (c >= 0xa0 && c <= 0xfa) {
 606       if (c >= 0xbf && c <= 0xde) return 0;
 607       if (c == 0xa1) return 0;
 608       return 1;
 609     }
 610     break;
 611 
 612   case ISO_8859_11:
 613     if (c >= 0xa0 && c <= 0xda) return 1;
 614     if (c >= 0xdf && c <= 0xfb) return 1;
 615     break;
 616 
 617   case KOI8:
 618     if (c == 0xa0) return 1;
 619     if (c >= 0xc0 && c <= 0xff) return 1;
 620     break;
 621 
 622   case KOI8_R:
 623     if (c >= 0x80 && c <= 0xff) return 1;
 624     break;
 625 
 626   default:
 627     exit(-1);
 628   }
 629 
 630   return 0;
 631 }
 632 
 633 static int IsPunct(int enc, int c)
 634 {
 635   if (enc == ASCII)
 636     return ispunct(c);
 637 
 638   if (enc == UNICODE_ISO_8859_1) {
 639     if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
 640         c == 0x7c || c == 0x7e) return 1;
 641     if (c >= 0x3c && c <= 0x3e) return 1;
 642   }
 643 
 644   if (c >= 0x21 && c <= 0x2f) return 1;
 645   if (c >= 0x3a && c <= 0x40) return 1;
 646   if (c >= 0x5b && c <= 0x60) return 1;
 647   if (c >= 0x7b && c <= 0x7e) return 1;
 648 
 649   switch (enc) {
 650   case ISO_8859_1:
 651   case ISO_8859_9:
 652   case ISO_8859_15:
 653     if (c == 0xad) return 1;
 654     /* fall */
 655   case UNICODE_ISO_8859_1:
 656     if (c == 0xa1) return 1;
 657     if (c == 0xab) return 1;
 658     if (c == 0xb7) return 1;
 659     if (c == 0xbb) return 1;
 660     if (c == 0xbf) return 1;
 661     break;
 662 
 663   case ISO_8859_2:
 664   case ISO_8859_4:
 665   case ISO_8859_5:
 666   case ISO_8859_14:
 667     if (c == 0xad) return 1;
 668     break;
 669 
 670   case ISO_8859_3:
 671   case ISO_8859_10:
 672     if (c == 0xad) return 1;
 673     if (c == 0xb7) return 1;
 674     if (c == 0xbd) return 1;
 675     break;
 676 
 677   case ISO_8859_6:
 678     if (c == 0xac) return 1;
 679     if (c == 0xad) return 1;
 680     if (c == 0xbb) return 1;
 681     if (c == 0xbf) return 1;
 682     break;
 683 
 684   case ISO_8859_7:
 685     if (c == 0xa1 || c == 0xa2) return 1;
 686     if (c == 0xab) return 1;
 687     if (c == 0xaf) return 1;
 688     if (c == 0xad) return 1;
 689     if (c == 0xb7 || c == 0xbb) return 1;
 690     break;
 691 
 692   case ISO_8859_8:
 693     if (c == 0xab) return 1;
 694     if (c == 0xad) return 1;
 695     if (c == 0xb7) return 1;
 696     if (c == 0xbb) return 1;
 697     if (c == 0xdf) return 1;
 698     break;
 699 
 700   case ISO_8859_13:
 701     if (c == 0xa1 || c == 0xa5) return 1;
 702     if (c == 0xab || c == 0xad) return 1;
 703     if (c == 0xb4 || c == 0xb7) return 1;
 704     if (c == 0xbb) return 1;
 705     if (c == 0xff) return 1;
 706     break;
 707 
 708   case ISO_8859_16:
 709     if (c == 0xa5) return 1;
 710     if (c == 0xab) return 1;
 711     if (c == 0xad) return 1;
 712     if (c == 0xb5) return 1;
 713     if (c == 0xb7) return 1;
 714     if (c == 0xbb) return 1;
 715     break;
 716 
 717   case KOI8_R:
 718     if (c == 0x9e) return 1;
 719     break;
 720 
 721   case ISO_8859_11:
 722   case KOI8:
 723     break;
 724 
 725   default:
 726     exit(-1);
 727   }
 728 
 729   return 0;
 730 }
 731 
 732 static int IsSpace(int enc, int c)
 733 {
 734   if (enc == ASCII)
 735     return isspace(c);
 736 
 737   if (c >= 0x09 && c <= 0x0d) return 1;
 738   if (c == 0x20) return 1;
 739 
 740   switch (enc) {
 741   case UNICODE_ISO_8859_1:
 742     if (c == 0x85) return 1;
 743     /* fall */
 744   case ISO_8859_1:
 745   case ISO_8859_2:
 746   case ISO_8859_3:
 747   case ISO_8859_4:
 748   case ISO_8859_5:
 749   case ISO_8859_6:
 750   case ISO_8859_7:
 751   case ISO_8859_8:
 752   case ISO_8859_9:
 753   case ISO_8859_10:
 754   case ISO_8859_11:
 755   case ISO_8859_13:
 756   case ISO_8859_14:
 757   case ISO_8859_15:
 758   case ISO_8859_16:
 759   case KOI8:
 760     if (c == 0xa0) return 1;
 761     break;
 762 
 763   case KOI8_R:
 764     if (c == 0x9a) return 1;
 765     break;
 766 
 767   default:
 768     exit(-1);
 769   }
 770 
 771   return 0;
 772 }
 773 
 774 static int IsUpper(int enc, int c)
 775 {
 776   if (enc == ASCII)
 777     return isupper(c);
 778 
 779   if (c >= 0x41 && c <= 0x5a) return 1;
 780 
 781   switch (enc) {
 782   case UNICODE_ISO_8859_1:
 783   case ISO_8859_1:
 784   case ISO_8859_9:
 785     if (c >= 0xc0 && c <= 0xd6) return 1;
 786     if (c >= 0xd8 && c <= 0xde) return 1;
 787     break;
 788 
 789   case ISO_8859_2:
 790     if (c == 0xa1 || c == 0xa3) return 1;
 791     if (c == 0xa5 || c == 0xa6) return 1;
 792     if (c >= 0xa9 && c <= 0xac) return 1;
 793     if (c >= 0xae && c <= 0xaf) return 1;
 794     if (c >= 0xc0 && c <= 0xd6) return 1;
 795     if (c >= 0xd8 && c <= 0xde) return 1;
 796     break;
 797 
 798   case ISO_8859_3:
 799     if (c == 0xa1) return 1;
 800     if (c == 0xa6) return 1;
 801     if (c >= 0xa9 && c <= 0xac) return 1;
 802     if (c == 0xaf) return 1;
 803     if (c >= 0xc0 && c <= 0xc2) return 1;
 804     if (c >= 0xc4 && c <= 0xcf) return 1;
 805     if (c >= 0xd1 && c <= 0xd6) return 1;
 806     if (c >= 0xd8 && c <= 0xde) return 1;
 807     break;
 808 
 809   case ISO_8859_4:
 810     if (c == 0xa1 || c == 0xa3) return 1;
 811     if (c == 0xa5 || c == 0xa6) return 1;
 812     if (c >= 0xa9 && c <= 0xac) return 1;
 813     if (c == 0xae) return 1;
 814     if (c == 0xbd) return 1;
 815     if (c >= 0xc0 && c <= 0xd6) return 1;
 816     if (c >= 0xd8 && c <= 0xde) return 1;
 817     break;
 818 
 819   case ISO_8859_5:
 820     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
 821     break;
 822 
 823   case ISO_8859_6:
 824     break;
 825 
 826   case ISO_8859_7:
 827     if (c == 0xb6) return 1;
 828     if (c >= 0xb8 && c <= 0xba) return 1;
 829     if (c == 0xbc) return 1;
 830     if (c >= 0xbe && c <= 0xbf) return 1;
 831     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
 832     break;
 833 
 834   case ISO_8859_8:
 835   case ISO_8859_11:
 836     break;
 837 
 838   case ISO_8859_10:
 839     if (c >= 0xa1 && c <= 0xa6) return 1;
 840     if (c >= 0xa8 && c <= 0xac) return 1;
 841     if (c == 0xae || c == 0xaf) return 1;
 842     if (c >= 0xc0 && c <= 0xde) return 1;
 843     break;
 844 
 845   case ISO_8859_13:
 846     if (c == 0xa8) return 1;
 847     if (c == 0xaa) return 1;
 848     if (c == 0xaf) return 1;
 849     if (c >= 0xc0 && c <= 0xd6) return 1;
 850     if (c >= 0xd8 && c <= 0xde) return 1;
 851     break;
 852 
 853   case ISO_8859_14:
 854     if (c == 0xa1) return 1;
 855     if (c == 0xa4 || c == 0xa6) return 1;
 856     if (c == 0xa8) return 1;
 857     if (c == 0xaa || c == 0xac) return 1;
 858     if (c == 0xaf || c == 0xb0) return 1;
 859     if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
 860     if (c == 0xbb || c == 0xbd) return 1;
 861     if (c >= 0xc0 && c <= 0xde) return 1;
 862     break;
 863 
 864   case ISO_8859_15:
 865     if (c >= 0xc0 && c <= 0xd6) return 1;
 866     if (c >= 0xd8 && c <= 0xde) return 1;
 867     if (c == 0xa6) return 1;
 868     if (c == 0xb4) return 1;
 869     if (c == 0xbc) return 1;
 870     if (c == 0xbe) return 1;
 871     break;
 872 
 873   case ISO_8859_16:
 874     if (c == 0xa1) return 1;
 875     if (c == 0xa3) return 1;
 876     if (c == 0xa6) return 1;
 877     if (c == 0xaa) return 1;
 878     if (c == 0xac) return 1;
 879     if (c == 0xaf) return 1;
 880     if (c == 0xb2) return 1;
 881     if (c == 0xb4) return 1;
 882     if (c == 0xbc) return 1;
 883     if (c == 0xbe) return 1;
 884     if (c >= 0xc0 && c <= 0xde) return 1;
 885     break;
 886 
 887   case KOI8_R:
 888     if (c == 0xb3) return 1;
 889     /* fall */
 890   case KOI8:
 891     if (c >= 0xe0 && c <= 0xff) return 1;
 892     break;
 893 
 894   default:
 895     exit(-1);
 896   }
 897 
 898   return 0;
 899 }
 900 
 901 static int IsXDigit(int enc, int c)
 902 {
 903   if (enc == ASCII)
 904     return isxdigit(c);
 905 
 906   if (c >= 0x30 && c <= 0x39) return 1;
 907   if (c >= 0x41 && c <= 0x46) return 1;
 908   if (c >= 0x61 && c <= 0x66) return 1;
 909   return 0;
 910 }
 911 
 912 static int IsWord(int enc, int c)
 913 {
 914   if (enc == ASCII) {
 915     return (isalpha(c) || isdigit(c) || c == 0x5f);
 916   }
 917 
 918   if (c >= 0x30 && c <= 0x39) return 1;
 919   if (c >= 0x41 && c <= 0x5a) return 1;
 920   if (c == 0x5f) return 1;
 921   if (c >= 0x61 && c <= 0x7a) return 1;
 922 
 923   switch (enc) {
 924   case UNICODE_ISO_8859_1:
 925   case ISO_8859_1:
 926   case ISO_8859_9:
 927     if (c == 0xaa) return 1;
 928     if (c >= 0xb2 && c <= 0xb3) return 1;
 929     if (c == 0xb5) return 1;
 930     if (c >= 0xb9 && c <= 0xba) return 1;
 931     if (c >= 0xbc && c <= 0xbe) return 1;
 932     if (c >= 0xc0 && c <= 0xd6) return 1;
 933     if (c >= 0xd8 && c <= 0xf6) return 1;
 934     if (c >= 0xf8 && c <= 0xff) return 1;
 935     break;
 936 
 937   case ISO_8859_2:
 938     if (c == 0xa1 || c == 0xa3) return 1;
 939     if (c == 0xa5 || c == 0xa6) return 1;
 940     if (c >= 0xa9 && c <= 0xac) return 1;
 941     if (c >= 0xae && c <= 0xaf) return 1;
 942     if (c == 0xb1 || c == 0xb3) return 1;
 943     if (c == 0xb5 || c == 0xb6) return 1;
 944     if (c >= 0xb9 && c <= 0xbc) return 1;
 945     if (c >= 0xbe && c <= 0xbf) return 1;
 946     if (c >= 0xc0 && c <= 0xd6) return 1;
 947     if (c >= 0xd8 && c <= 0xf6) return 1;
 948     if (c >= 0xf8 && c <= 0xfe) return 1;
 949     break;
 950 
 951   case ISO_8859_3:
 952     if (c == 0xa1) return 1;
 953     if (c == 0xa6) return 1;
 954     if (c >= 0xa9 && c <= 0xac) return 1;
 955     if (c == 0xaf) return 1;
 956     if (c >= 0xb1 && c <= 0xb3) return 1;
 957     if (c == 0xb5 || c == 0xb6) return 1;
 958     if (c >= 0xb9 && c <= 0xbd) return 1;
 959     if (c == 0xbf) return 1;
 960     if (c >= 0xc0 && c <= 0xc2) return 1;
 961     if (c >= 0xc4 && c <= 0xcf) return 1;
 962     if (c >= 0xd1 && c <= 0xd6) return 1;
 963     if (c >= 0xd8 && c <= 0xe2) return 1;
 964     if (c >= 0xe4 && c <= 0xef) return 1;
 965     if (c >= 0xf1 && c <= 0xf6) return 1;
 966     if (c >= 0xf8 && c <= 0xfe) return 1;
 967     break;
 968 
 969   case ISO_8859_4:
 970     if (c >= 0xa1 && c <= 0xa3) return 1;
 971     if (c == 0xa5 || c == 0xa6) return 1;
 972     if (c >= 0xa9 && c <= 0xac) return 1;
 973     if (c == 0xae) return 1;
 974     if (c == 0xb1 || c == 0xb3) return 1;
 975     if (c == 0xb5 || c == 0xb6) return 1;
 976     if (c >= 0xb9 && c <= 0xbf) return 1;
 977     if (c >= 0xc0 && c <= 0xd6) return 1;
 978     if (c >= 0xd8 && c <= 0xf6) return 1;
 979     if (c >= 0xf8 && c <= 0xfe) return 1;
 980     break;
 981 
 982   case ISO_8859_5:
 983     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
 984     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
 985     break;
 986 
 987   case ISO_8859_6:
 988     if (c >= 0xc1 && c <= 0xda) return 1;
 989     if (c >= 0xe0 && c <= 0xea) return 1;
 990     if (c >= 0xeb && c <= 0xf2) return 1;
 991     break;
 992 
 993   case ISO_8859_7:
 994     if (c == 0xb2 || c == 0xb3) return 1;
 995     if (c == 0xb6) return 1;
 996     if (c >= 0xb8 && c <= 0xba) return 1;
 997     if (c >= 0xbc && c <= 0xbf) return 1;
 998     if (c == 0xc0) return 1;
 999     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
1000     if (c >= 0xdc && c <= 0xfe) return 1;
1001     break;
1002 
1003   case ISO_8859_8:
1004     if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1005     if (c >= 0xbc && c <= 0xbe) return 1;
1006     if (c >= 0xe0 && c <= 0xfa) return 1;
1007     break;
1008 
1009   case ISO_8859_10:
1010     if (c >= 0xa1 && c <= 0xff) {
1011       if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
1012         return 1;
1013     }
1014     break;
1015 
1016   case ISO_8859_11:
1017     if (c >= 0xa1 && c <= 0xda) return 1;
1018     if (c >= 0xdf && c <= 0xfb) return 1;
1019     break;
1020 
1021   case ISO_8859_13:
1022     if (c == 0xa8) return 1;
1023     if (c == 0xaa) return 1;
1024     if (c == 0xaf) return 1;
1025     if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1026     if (c >= 0xbc && c <= 0xbe) return 1;
1027     if (c == 0xb8) return 1;
1028     if (c == 0xba) return 1;
1029     if (c >= 0xbf && c <= 0xd6) return 1;
1030     if (c >= 0xd8 && c <= 0xf6) return 1;
1031     if (c >= 0xf8 && c <= 0xfe) return 1;
1032     break;
1033 
1034   case ISO_8859_14:
1035     if (c >= 0xa1 && c <= 0xff) {
1036       if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
1037           c == 0xb6) return 0;
1038       return 1;
1039     }
1040     break;
1041 
1042   case ISO_8859_15:
1043     if (c == 0xaa) return 1;
1044     if (c >= 0xb2 && c <= 0xb3) return 1;
1045     if (c == 0xb5) return 1;
1046     if (c >= 0xb9 && c <= 0xba) return 1;
1047     if (c >= 0xbc && c <= 0xbe) return 1;
1048     if (c >= 0xc0 && c <= 0xd6) return 1;
1049     if (c >= 0xd8 && c <= 0xf6) return 1;
1050     if (c >= 0xf8 && c <= 0xff) return 1;
1051     if (c == 0xa6) return 1;
1052     if (c == 0xa8) return 1;
1053     if (c == 0xb4) return 1;
1054     if (c == 0xb8) return 1;
1055     break;
1056 
1057   case ISO_8859_16:
1058     if (c == 0xa1) return 1;
1059     if (c == 0xa2) return 1;
1060     if (c == 0xa3) return 1;
1061     if (c == 0xa6) return 1;
1062     if (c == 0xa8) return 1;
1063     if (c == 0xaa) return 1;
1064     if (c == 0xac) return 1;
1065     if (c == 0xae) return 1;
1066     if (c == 0xaf) return 1;
1067     if (c == 0xb2) return 1;
1068     if (c == 0xb3) return 1;
1069     if (c == 0xb4) return 1;
1070     if (c >= 0xb8 && c <= 0xba) return 1;
1071     if (c == 0xbc) return 1;
1072     if (c == 0xbd) return 1;
1073     if (c == 0xbe) return 1;
1074     if (c == 0xbf) return 1;
1075     if (c >= 0xc0 && c <= 0xde) return 1;
1076     if (c >= 0xdf && c <= 0xff) return 1;
1077     break;
1078 
1079   case KOI8_R:
1080     if (c == 0x9d) return 1;
1081     if (c == 0xa3 || c == 0xb3) return 1;
1082     /* fall */
1083   case KOI8:
1084     if (c >= 0xc0 && c <= 0xff) return 1;
1085     break;
1086 
1087   default:
1088     exit(-1);
1089   }
1090 
1091   return 0;
1092 }
1093 
1094 static int IsAscii(int enc ARG_UNUSED, int c)
1095 {
1096   if (c >= 0x00 && c <= 0x7f) return 1;
1097   return 0;
1098 }
1099 
1100 static int IsNewline(int enc ARG_UNUSED, int c)
1101 {
1102   if (c == 0x0a) return 1;
1103   return 0;
1104 }
1105 
1106 static int exec(FILE* fp, ENC_INFO* einfo)
1107 {
1108 #define NCOL  8
1109 
1110   int c, val, enc;
1111 
1112   enc = einfo->num;
1113 
1114   fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
1115           einfo->name);
1116 
1117   for (c = 0; c < 256; c++) {
1118     val = 0;
1119     if (IsNewline(enc, c))  val |= BIT_CTYPE_NEWLINE;
1120     if (IsAlpha (enc, c))   val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM);
1121     if (IsBlank (enc, c))   val |= BIT_CTYPE_BLANK;
1122     if (IsCntrl (enc, c))   val |= BIT_CTYPE_CNTRL;
1123     if (IsDigit (enc, c))   val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM);
1124     if (IsGraph (enc, c))   val |= BIT_CTYPE_GRAPH;
1125     if (IsLower (enc, c))   val |= BIT_CTYPE_LOWER;
1126     if (IsPrint (enc, c))   val |= BIT_CTYPE_PRINT;
1127     if (IsPunct (enc, c))   val |= BIT_CTYPE_PUNCT;
1128     if (IsSpace (enc, c))   val |= BIT_CTYPE_SPACE;
1129     if (IsUpper (enc, c))   val |= BIT_CTYPE_UPPER;
1130     if (IsXDigit(enc, c))   val |= BIT_CTYPE_XDIGIT;
1131     if (IsWord  (enc, c))   val |= BIT_CTYPE_WORD;
1132     if (IsAscii (enc, c))   val |= BIT_CTYPE_ASCII;
1133 
1134     if (c % NCOL == 0) fputs("  ", fp);
1135     fprintf(fp, "0x%04x", val);
1136     if (c != 255) fputs(",", fp);
1137     if (c != 0 && c % NCOL == (NCOL-1))
1138       fputs("\n", fp);
1139     else
1140       fputs(" ", fp);
1141   }
1142   fprintf(fp, "};\n");
1143   return 0;
1144 }
1145 
1146 extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
1147 {
1148   int i;
1149   FILE* fp = stdout;
1150 
1151   setlocale(LC_ALL, "C");
1152   /* setlocale(LC_ALL, "POSIX"); */
1153   /* setlocale(LC_ALL, "en_GB.iso88591"); */
1154   /* setlocale(LC_ALL, "de_BE.iso88591"); */
1155   /* setlocale(LC_ALL, "fr_FR.iso88591"); */
1156 
1157   for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
1158     exec(fp, &Info[i]);
1159   }
1160 
1161   return 0;
1162 }

/* [<][>][^][v][top][bottom][index][help] */