root/ext/gd/libgd/gdkanji.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. debug
  2. error
  3. DetectKanjiCode
  4. SJIStoJIS
  5. han2zen
  6. do_convert
  7. do_check_and_conv
  8. any2eucjp
  9. strwidth
  10. main

   1 
   2 /* gdkanji.c (Kanji code converter)                            */
   3 /*                 written by Masahito Yamaga (ma@yama-ga.com) */
   4 
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <string.h>
   8 #include "gd.h"
   9 #include "gdhelpers.h"
  10 
  11 #include <stdarg.h>
  12 #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
  13 #include <iconv.h>
  14 #ifdef HAVE_ERRNO_H
  15 #include <errno.h>
  16 #endif
  17 #endif
  18 
  19 #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
  20 #define HAVE_ICONV 1
  21 #endif
  22 
  23 #define LIBNAME "any2eucjp()"
  24 
  25 #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
  26 #ifndef SJISPRE
  27 #define SJISPRE 1
  28 #endif
  29 #endif
  30 
  31 #ifdef TRUE
  32 #undef TRUE
  33 #endif
  34 #ifdef FALSE
  35 #undef FALSE
  36 #endif
  37 
  38 #define TRUE  1
  39 #define FALSE 0
  40 
  41 #define NEW 1
  42 #define OLD 2
  43 #define ESCI 3
  44 #define NEC 4
  45 #define EUC 5
  46 #define SJIS 6
  47 #define EUCORSJIS 7
  48 #define ASCII 8
  49 
  50 #define NEWJISSTR "JIS7"
  51 #define OLDJISSTR "jis"
  52 #define EUCSTR    "eucJP"
  53 #define SJISSTR   "SJIS"
  54 
  55 #define ESC 27
  56 #define SS2 142
  57 
  58 static void
  59 debug (const char *format,...)
  60 {
  61 #ifdef DEBUG
  62   va_list args;
  63 
  64   va_start (args, format);
  65   fprintf (stdout, "%s: ", LIBNAME);
  66   vfprintf (stdout, format, args);
  67   fprintf (stdout, "\n");
  68   va_end (args);
  69 #endif
  70 }
  71 
  72 static void
  73 error (const char *format,...)
  74 {
  75         va_list args;
  76         char *tmp;
  77 
  78         va_start(args, format);
  79         vspprintf(&tmp, 0, format, args);
  80         va_end(args);
  81         php_error_docref(NULL, E_WARNING, "%s: %s", LIBNAME, tmp);
  82         efree(tmp);
  83 }
  84 
  85 /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
  86 
  87 static int
  88 DetectKanjiCode (unsigned char *str)
  89 {
  90   static int whatcode = ASCII;
  91   int oldcode = ASCII;
  92   int c, i;
  93   char *lang = NULL;
  94 
  95   c = '\1';
  96   i = 0;
  97 
  98   if (whatcode != EUCORSJIS && whatcode != ASCII)
  99     {
 100       oldcode = whatcode;
 101       whatcode = ASCII;
 102     }
 103 
 104   while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
 105     {
 106       if ((c = str[i++]) != '\0')
 107         {
 108           if (c == ESC)
 109             {
 110               c = str[i++];
 111               if (c == '$')
 112                 {
 113                   c = str[i++];
 114                   if (c == 'B')
 115                     whatcode = NEW;
 116                   else if (c == '@')
 117                     whatcode = OLD;
 118                 }
 119               else if (c == '(')
 120                 {
 121                   c = str[i++];
 122                   if (c == 'I')
 123                     whatcode = ESCI;
 124                 }
 125               else if (c == 'K')
 126                 whatcode = NEC;
 127             }
 128           else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
 129             whatcode = SJIS;
 130           else if (c == SS2)
 131             {
 132               c = str[i++];
 133               if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
 134                 whatcode = SJIS;
 135               else if (c >= 161 && c <= 223)
 136                 whatcode = EUCORSJIS;
 137             }
 138           else if (c >= 161 && c <= 223)
 139             {
 140               c = str[i++];
 141               if (c >= 240 && c <= 254)
 142                 whatcode = EUC;
 143               else if (c >= 161 && c <= 223)
 144                 whatcode = EUCORSJIS;
 145               else if (c >= 224 && c <= 239)
 146                 {
 147                   whatcode = EUCORSJIS;
 148                   while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
 149                     {
 150                       if (c >= 129)
 151                         {
 152                           if (c <= 141 || (c >= 143 && c <= 159))
 153                             whatcode = SJIS;
 154                           else if (c >= 253 && c <= 254)
 155                             whatcode = EUC;
 156                         }
 157                       c = str[i++];
 158                     }
 159                 }
 160               else if (c <= 159)
 161                 whatcode = SJIS;
 162             }
 163           else if (c >= 240 && c <= 254)
 164             whatcode = EUC;
 165           else if (c >= 224 && c <= 239)
 166             {
 167               c = str[i++];
 168               if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
 169                 whatcode = SJIS;
 170               else if (c >= 253 && c <= 254)
 171                 whatcode = EUC;
 172               else if (c >= 161 && c <= 252)
 173                 whatcode = EUCORSJIS;
 174             }
 175         }
 176     }
 177 
 178 #ifdef DEBUG
 179   if (whatcode == ASCII)
 180     debug ("Kanji code not included.");
 181   else if (whatcode == EUCORSJIS)
 182     debug ("Kanji code not detected.");
 183   else
 184     debug ("Kanji code detected at %d byte.", i);
 185 #endif
 186 
 187   if (whatcode == EUCORSJIS && oldcode != ASCII)
 188     whatcode = oldcode;
 189 
 190   if (whatcode == EUCORSJIS)
 191     {
 192       if (getenv ("LC_ALL"))
 193         lang = getenv ("LC_ALL");
 194       else if (getenv ("LC_CTYPE"))
 195         lang = getenv ("LC_CTYPE");
 196       else if (getenv ("LANG"))
 197         lang = getenv ("LANG");
 198 
 199       if (lang)
 200         {
 201           if (strcmp (lang, "ja_JP.SJIS") == 0 ||
 202 #ifdef hpux
 203               strcmp (lang, "japanese") == 0 ||
 204 #endif
 205               strcmp (lang, "ja_JP.mscode") == 0 ||
 206               strcmp (lang, "ja_JP.PCK") == 0)
 207             whatcode = SJIS;
 208           else if (strncmp (lang, "ja", 2) == 0)
 209 #ifdef SJISPRE
 210             whatcode = SJIS;
 211 #else
 212             whatcode = EUC;
 213 #endif
 214         }
 215     }
 216 
 217   if (whatcode == EUCORSJIS)
 218 #ifdef SJISPRE
 219     whatcode = SJIS;
 220 #else
 221     whatcode = EUC;
 222 #endif
 223 
 224   return whatcode;
 225 }
 226 
 227 /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
 228 
 229 static void
 230 SJIStoJIS (int *p1, int *p2)
 231 {
 232   register unsigned char c1 = *p1;
 233   register unsigned char c2 = *p2;
 234   register int adjust = c2 < 159;
 235   register int rowOffset = c1 < 160 ? 112 : 176;
 236   register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
 237 
 238   *p1 = ((c1 - rowOffset) << 1) - adjust;
 239   *p2 -= cellOffset;
 240 }
 241 
 242 /* han2zen() was derived from han2zen() written by Ken Lunde. */
 243 
 244 #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
 245 #define IS_HANDAKU(c) (c >= 202 && c <= 206)
 246 
 247 static void
 248 han2zen (int *p1, int *p2)
 249 {
 250   int c = *p1;
 251   int daku = FALSE;
 252   int handaku = FALSE;
 253   int mtable[][2] =
 254   {
 255     {129, 66},
 256     {129, 117},
 257     {129, 118},
 258     {129, 65},
 259     {129, 69},
 260     {131, 146},
 261     {131, 64},
 262     {131, 66},
 263     {131, 68},
 264     {131, 70},
 265     {131, 72},
 266     {131, 131},
 267     {131, 133},
 268     {131, 135},
 269     {131, 98},
 270     {129, 91},
 271     {131, 65},
 272     {131, 67},
 273     {131, 69},
 274     {131, 71},
 275     {131, 73},
 276     {131, 74},
 277     {131, 76},
 278     {131, 78},
 279     {131, 80},
 280     {131, 82},
 281     {131, 84},
 282     {131, 86},
 283     {131, 88},
 284     {131, 90},
 285     {131, 92},
 286     {131, 94},
 287     {131, 96},
 288     {131, 99},
 289     {131, 101},
 290     {131, 103},
 291     {131, 105},
 292     {131, 106},
 293     {131, 107},
 294     {131, 108},
 295     {131, 109},
 296     {131, 110},
 297     {131, 113},
 298     {131, 116},
 299     {131, 119},
 300     {131, 122},
 301     {131, 125},
 302     {131, 126},
 303     {131, 128},
 304     {131, 129},
 305     {131, 130},
 306     {131, 132},
 307     {131, 134},
 308     {131, 136},
 309     {131, 137},
 310     {131, 138},
 311     {131, 139},
 312     {131, 140},
 313     {131, 141},
 314     {131, 143},
 315     {131, 147},
 316     {129, 74},
 317     {129, 75}
 318   };
 319 
 320   if (*p2 == 222 && IS_DAKU (*p1))
 321     daku = TRUE;                /* Daku-ten */
 322   else if (*p2 == 223 && IS_HANDAKU (*p1))
 323     handaku = TRUE;             /* Han-daku-ten */
 324 
 325   *p1 = mtable[c - 161][0];
 326   *p2 = mtable[c - 161][1];
 327 
 328   if (daku)
 329     {
 330       if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
 331         (*p2)++;
 332       else if (*p2 == 131 && *p2 == 69)
 333         *p2 = 148;
 334     }
 335   else if (handaku && *p2 >= 110 && *p2 <= 122)
 336     (*p2) += 2;
 337 }
 338 
 339 /* Recast strcpy to handle unsigned chars used below. */
 340 #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
 341 
 342 static void
 343 do_convert (unsigned char *to, unsigned char *from, const char *code)
 344 {
 345 #ifdef HAVE_ICONV
 346   iconv_t cd;
 347   size_t from_len, to_len;
 348 
 349   if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
 350     {
 351       error ("iconv_open() error");
 352 #ifdef HAVE_ERRNO_H
 353       if (errno == EINVAL)
 354         error ("invalid code specification: \"%s\" or \"%s\"",
 355                EUCSTR, code);
 356 #endif
 357       strcpy ((char *) to, (const char *) from);
 358       return;
 359     }
 360 
 361   from_len = strlen ((const char *) from) + 1;
 362   to_len = BUFSIZ;
 363 
 364   if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
 365     {
 366 #ifdef HAVE_ERRNO_H
 367       if (errno == EINVAL)
 368         error ("invalid end of input string");
 369       else if (errno == EILSEQ)
 370         error ("invalid code in input string");
 371       else if (errno == E2BIG)
 372         error ("output buffer overflow at do_convert()");
 373       else
 374 #endif
 375         error ("something happen");
 376       strcpy ((char *) to, (const char *) from);
 377       return;
 378     }
 379 
 380   if (iconv_close (cd) != 0)
 381     {
 382       error ("iconv_close() error");
 383     }
 384 #else
 385   int p1, p2, i, j;
 386   int jisx0208 = FALSE;
 387   int hankaku = FALSE;
 388 
 389   j = 0;
 390   if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
 391     {
 392       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
 393         {
 394           if (from[i] == ESC)
 395             {
 396               i++;
 397               if (from[i] == '$')
 398                 {
 399                   jisx0208 = TRUE;
 400                   hankaku = FALSE;
 401                   i++;
 402                 }
 403               else if (from[i] == '(')
 404                 {
 405                   jisx0208 = FALSE;
 406                   i++;
 407                   if (from[i] == 'I')   /* Hankaku Kana */
 408                     hankaku = TRUE;
 409                   else
 410                     hankaku = FALSE;
 411                 }
 412             }
 413           else
 414             {
 415               if (jisx0208)
 416                 to[j++] = from[i] + 128;
 417               else if (hankaku)
 418                 {
 419                   to[j++] = SS2;
 420                   to[j++] = from[i] + 128;
 421                 }
 422               else
 423                 to[j++] = from[i];
 424             }
 425         }
 426     }
 427   else if (strcmp (code, SJISSTR) == 0)
 428     {
 429       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
 430         {
 431           p1 = from[i];
 432           if (p1 < 127)
 433             to[j++] = p1;
 434           else if ((p1 >= 161) && (p1 <= 223))
 435             {                   /* Hankaku Kana */
 436               to[j++] = SS2;
 437               to[j++] = p1;
 438             }
 439           else
 440             {
 441               p2 = from[++i];
 442               SJIStoJIS (&p1, &p2);
 443               to[j++] = p1 + 128;
 444               to[j++] = p2 + 128;
 445             }
 446         }
 447     }
 448   else
 449     {
 450       error ("invalid code specification: \"%s\"", code);
 451       return;
 452     }
 453 
 454   if (j >= BUFSIZ)
 455     {
 456       error ("output buffer overflow at do_convert()");
 457       ustrcpy (to, from);
 458     }
 459   else
 460     to[j] = '\0';
 461 #endif /* HAVE_ICONV */
 462 }
 463 
 464 static int
 465 do_check_and_conv (unsigned char *to, unsigned char *from)
 466 {
 467   static unsigned char tmp[BUFSIZ];
 468   int p1, p2, i, j;
 469   int kanji = TRUE;
 470 
 471   switch (DetectKanjiCode (from))
 472     {
 473     case NEW:
 474       debug ("Kanji code is New JIS.");
 475       do_convert (tmp, from, NEWJISSTR);
 476       break;
 477     case OLD:
 478       debug ("Kanji code is Old JIS.");
 479       do_convert (tmp, from, OLDJISSTR);
 480       break;
 481     case ESCI:
 482       debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
 483       do_convert (tmp, from, NEWJISSTR);
 484       break;
 485     case NEC:
 486       debug ("Kanji code is NEC Kanji.");
 487       error ("cannot convert NEC Kanji.");
 488       ustrcpy (tmp, from);
 489       kanji = FALSE;
 490       break;
 491     case EUC:
 492       debug ("Kanji code is EUC.");
 493       ustrcpy (tmp, from);
 494       break;
 495     case SJIS:
 496       debug ("Kanji code is SJIS.");
 497       do_convert (tmp, from, SJISSTR);
 498       break;
 499     case EUCORSJIS:
 500       debug ("Kanji code is EUC or SJIS.");
 501       ustrcpy (tmp, from);
 502       kanji = FALSE;
 503       break;
 504     case ASCII:
 505       debug ("This is ASCII string.");
 506       ustrcpy (tmp, from);
 507       kanji = FALSE;
 508       break;
 509     default:
 510       debug ("This string includes unknown code.");
 511       ustrcpy (tmp, from);
 512       kanji = FALSE;
 513       break;
 514     }
 515 
 516   /* Hankaku Kana ---> Zenkaku Kana */
 517   if (kanji)
 518     {
 519       j = 0;
 520       for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
 521         {
 522           if (tmp[i] == SS2)
 523             {
 524               p1 = tmp[++i];
 525               if (tmp[i + 1] == SS2)
 526                 {
 527                   p2 = tmp[i + 2];
 528                   if (p2 == 222 || p2 == 223)
 529                     i += 2;
 530                   else
 531                     p2 = 0;
 532                 }
 533               else
 534                 p2 = 0;
 535               han2zen (&p1, &p2);
 536               SJIStoJIS (&p1, &p2);
 537               to[j++] = p1 + 128;
 538               to[j++] = p2 + 128;
 539             }
 540           else
 541             to[j++] = tmp[i];
 542         }
 543 
 544       if (j >= BUFSIZ)
 545         {
 546           error ("output buffer overflow at Hankaku --> Zenkaku");
 547           ustrcpy (to, tmp);
 548         }
 549       else
 550         to[j] = '\0';
 551     }
 552   else
 553     ustrcpy (to, tmp);
 554 
 555   return kanji;
 556 }
 557 
 558 int
 559 any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
 560 {
 561   static unsigned char tmp_dest[BUFSIZ];
 562   int ret;
 563 
 564   if (strlen ((const char *) src) >= BUFSIZ)
 565     {
 566       error ("input string too large");
 567       return -1;
 568     }
 569   if (dest_max > BUFSIZ)
 570     {
 571       error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
 572       return -1;
 573     }
 574   ret = do_check_and_conv (tmp_dest, src);
 575   if (strlen ((const char *) tmp_dest) >= dest_max)
 576     {
 577       error ("output buffer overflow");
 578       ustrcpy (dest, src);
 579       return -1;
 580     }
 581   ustrcpy (dest, tmp_dest);
 582   return ret;
 583 }
 584 
 585 #if 0
 586 unsigned int
 587 strwidth (unsigned char *s)
 588 {
 589   unsigned char *t;
 590   unsigned int i;
 591 
 592   t = (unsigned char *) gdMalloc (BUFSIZ);
 593   any2eucjp (t, s, BUFSIZ);
 594   i = strlen (t);
 595   gdFree (t);
 596   return i;
 597 }
 598 
 599 #ifdef DEBUG
 600 int
 601 main ()
 602 {
 603   unsigned char input[BUFSIZ];
 604   unsigned char *output;
 605   unsigned char *str;
 606   int c, i = 0;
 607 
 608   while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
 609     input[i++] = c;
 610   input[i] = '\0';
 611 
 612   printf ("input : %d bytes\n", strlen ((const char *) input));
 613   printf ("output: %d bytes\n", strwidth (input));
 614 
 615   output = (unsigned char *) gdMalloc (BUFSIZ);
 616   any2eucjp (output, input, BUFSIZ);
 617   str = output;
 618   while (*str != '\0')
 619     putchar (*(str++));
 620   putchar ('\n');
 621   gdFree (output);
 622 
 623   return 0;
 624 }
 625 #endif
 626 #endif

/* [<][>][^][v][top][bottom][index][help] */