root/ext/mbstring/ucgendat/ucgendat.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_range
  2. ordered_range_insert
  3. add_decomp
  4. add_title
  5. add_upper
  6. add_lower
  7. ordered_ccl_insert
  8. make_number
  9. add_number
  10. read_cdata
  11. find_decomp
  12. decomp_it
  13. expand_decomp
  14. cmpcomps
  15. read_compexdata
  16. create_comps
  17. write_case
  18. write_cdata
  19. usage
  20. main

   1 /* Further modified for PHP */
   2 /* $Id$ */
   3 
   4 /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucgendat.c,v 1.36.2.4 2007/01/02 21:43:51 kurt Exp $ */
   5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
   6  *
   7  * Copyright 1998-2007 The OpenLDAP Foundation.
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted only as authorized by the OpenLDAP
  12  * Public License.
  13  *
  14  * A copy of this license is available at
  15  * <http://www.OpenLDAP.org/license.html>.
  16  */
  17 
  18 /* Copyright 2001 Computing Research Labs, New Mexico State University
  19  *
  20  * Permission is hereby granted, free of charge, to any person obtaining a
  21  * copy of this software and associated documentation files (the "Software"),
  22  * to deal in the Software without restriction, including without limitation
  23  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  24  * and/or sell copies of the Software, and to permit persons to whom the
  25  * Software is furnished to do so, subject to the following conditions:
  26  *
  27  * The above copyright notice and this permission notice shall be included in
  28  * all copies or substantial portions of the Software.
  29  *
  30  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  31  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  32  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  33  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
  34  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
  35  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
  36  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  37  */
  38 /* orig Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
  39 
  40 #include <stdio.h>
  41 #include <ctype.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <unistd.h>
  45 
  46 #define ac_uint2 unsigned short
  47 #define ac_uint4 unsigned int
  48 #define LDAP_DIRSEP "/"
  49 #define AC_MEMCPY memcpy
  50 
  51 #ifndef HARDCODE_DATA
  52 #define HARDCODE_DATA   1
  53 #endif
  54 
  55 #undef ishdigit
  56 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
  57                       ((cc) >= 'A' && (cc) <= 'F') ||\
  58                       ((cc) >= 'a' && (cc) <= 'f'))
  59 
  60 /*
  61  * A header written to the output file with the byte-order-mark and the number
  62  * of property nodes.
  63  */
  64 static ac_uint2 hdr[2] = {0xfeff, 0};
  65 
  66 #define NUMPROPS 50
  67 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
  68 
  69 typedef struct {
  70     char *name;
  71     int len;
  72 } _prop_t;
  73 
  74 /*
  75  * List of properties expected to be found in the Unicode Character Database
  76  * including some implementation specific properties.
  77  *
  78  * The implementation specific properties are:
  79  * Cm = Composed (can be decomposed)
  80  * Nb = Non-breaking
  81  * Sy = Symmetric (has left and right forms)
  82  * Hd = Hex digit
  83  * Qm = Quote marks
  84  * Mr = Mirroring
  85  * Ss = Space, other
  86  * Cp = Defined character
  87  */
  88 static _prop_t props[NUMPROPS] = {
  89     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
  90     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
  91     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
  92     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
  93     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
  94     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
  95     {"S",  1}, {"WS", 2}, {"ON", 2},
  96     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
  97     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
  98 };
  99 
 100 typedef struct {
 101     ac_uint4 *ranges;
 102     ac_uint2 used;
 103     ac_uint2 size;
 104 } _ranges_t;
 105 
 106 static _ranges_t proptbl[NUMPROPS];
 107 
 108 /*
 109  * Make sure this array is sized to be on a 4-byte boundary at compile time.
 110  */
 111 static ac_uint2 propcnt[NEEDPROPS];
 112 
 113 /*
 114  * Array used to collect a decomposition before adding it to the decomposition
 115  * table.
 116  */
 117 static ac_uint4 dectmp[64];
 118 static ac_uint4 dectmp_size;
 119 
 120 typedef struct {
 121     ac_uint4 code;
 122     ac_uint2 size;
 123     ac_uint2 used;
 124     ac_uint4 *decomp;
 125 } _decomp_t;
 126 
 127 /*
 128  * List of decomposition.  Created and expanded in order as the characters are
 129  * encountered. First list contains canonical mappings, second also includes
 130  * compatibility mappings.
 131  */
 132 static _decomp_t *decomps;
 133 static ac_uint4 decomps_used;
 134 static ac_uint4 decomps_size;
 135 
 136 static _decomp_t *kdecomps;
 137 static ac_uint4 kdecomps_used;
 138 static ac_uint4 kdecomps_size;
 139 
 140 /*
 141  * Composition exclusion table stuff.
 142  */
 143 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
 144 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
 145 static ac_uint4 compexs[8192];
 146 
 147 /*
 148  * Struct for holding a composition pair, and array of composition pairs
 149  */
 150 typedef struct {
 151     ac_uint4 comp;
 152     ac_uint4 count;
 153     ac_uint4 code1;
 154     ac_uint4 code2;
 155 } _comp_t;
 156 
 157 #if 0
 158 static _comp_t *comps;
 159 #endif
 160 static ac_uint4 comps_used;
 161 
 162 /*
 163  * Types and lists for handling lists of case mappings.
 164  */
 165 typedef struct {
 166     ac_uint4 key;
 167     ac_uint4 other1;
 168     ac_uint4 other2;
 169 } _case_t;
 170 
 171 static _case_t *upper;
 172 static _case_t *lower;
 173 static _case_t *title;
 174 static ac_uint4 upper_used;
 175 static ac_uint4 upper_size;
 176 static ac_uint4 lower_used;
 177 static ac_uint4 lower_size;
 178 static ac_uint4 title_used;
 179 static ac_uint4 title_size;
 180 
 181 /*
 182  * Array used to collect case mappings before adding them to a list.
 183  */
 184 static ac_uint4 cases[3];
 185 
 186 /*
 187  * An array to hold ranges for combining classes.
 188  */
 189 static ac_uint4 *ccl;
 190 static ac_uint4 ccl_used;
 191 static ac_uint4 ccl_size;
 192 
 193 /*
 194  * Structures for handling numbers.
 195  */
 196 typedef struct {
 197     ac_uint4 code;
 198     ac_uint4 idx;
 199 } _codeidx_t;
 200 
 201 typedef struct {
 202     short numerator;
 203     short denominator;
 204 } _num_t;
 205 
 206 /*
 207  * Arrays to hold the mapping of codes to numbers.
 208  */
 209 static _codeidx_t *ncodes;
 210 static ac_uint4 ncodes_used;
 211 static ac_uint4 ncodes_size;
 212 
 213 static _num_t *nums;
 214 static ac_uint4 nums_used;
 215 static ac_uint4 nums_size;
 216 
 217 /*
 218  * Array for holding numbers.
 219  */
 220 static _num_t *nums;
 221 static ac_uint4 nums_used;
 222 static ac_uint4 nums_size;
 223 
 224 static void
 225 add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2)
 226 {
 227     int i, j, k, len;
 228     _ranges_t *rlp;
 229     char *name;
 230 
 231     for (k = 0; k < 2; k++) {
 232         if (k == 0) {
 233             name = p1;
 234             len = 2;
 235         } else {
 236             if (p2 == 0)
 237               break;
 238 
 239             name = p2;
 240             len = 1;
 241         }
 242 
 243         for (i = 0; i < NUMPROPS; i++) {
 244             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 245               break;
 246         }
 247 
 248         if (i == NUMPROPS)
 249           continue;
 250 
 251         rlp = &proptbl[i];
 252 
 253         /*
 254          * Resize the range list if necessary.
 255          */
 256         if (rlp->used == rlp->size) {
 257             if (rlp->size == 0)
 258               rlp->ranges = (ac_uint4 *)
 259                   malloc(sizeof(ac_uint4) << 3);
 260             else
 261               rlp->ranges = (ac_uint4 *)
 262                   realloc((char *) rlp->ranges,
 263                           sizeof(ac_uint4) * (rlp->size + 8));
 264             rlp->size += 8;
 265         }
 266 
 267         /*
 268          * If this is the first code for this property list, just add it
 269          * and return.
 270          */
 271         if (rlp->used == 0) {
 272             rlp->ranges[0] = start;
 273             rlp->ranges[1] = end;
 274             rlp->used += 2;
 275             continue;
 276         }
 277 
 278         /*
 279          * Optimize the case of adding the range to the end.
 280          */
 281         j = rlp->used - 1;
 282         if (start > rlp->ranges[j]) {
 283             j = rlp->used;
 284             rlp->ranges[j++] = start;
 285             rlp->ranges[j++] = end;
 286             rlp->used = j;
 287             continue;
 288         }
 289 
 290         /*
 291          * Need to locate the insertion point.
 292          */
 293         for (i = 0;
 294              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
 295 
 296         /*
 297          * If the start value lies in the current range, then simply set the
 298          * new end point of the range to the end value passed as a parameter.
 299          */
 300         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
 301             rlp->ranges[i + 1] = end;
 302             return;
 303         }
 304 
 305         /*
 306          * Shift following values up by two.
 307          */
 308         for (j = rlp->used; j > i; j -= 2) {
 309             rlp->ranges[j] = rlp->ranges[j - 2];
 310             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 311         }
 312 
 313         /*
 314          * Add the new range at the insertion point.
 315          */
 316         rlp->ranges[i] = start;
 317         rlp->ranges[i + 1] = end;
 318         rlp->used += 2;
 319     }
 320 }
 321 
 322 static void
 323 ordered_range_insert(ac_uint4 c, char *name, int len)
 324 {
 325     int i, j;
 326     ac_uint4 s, e;
 327     _ranges_t *rlp;
 328 
 329     if (len == 0)
 330       return;
 331 
 332     /*
 333      * Deal with directionality codes introduced in Unicode 3.0.
 334      */
 335     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
 336         (len == 3 &&
 337          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
 338           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
 339           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
 340         /*
 341          * Mark all of these as Other Neutral to preserve compatibility with
 342          * older versions.
 343          */
 344         len = 2;
 345         name = "ON";
 346     }
 347 
 348     for (i = 0; i < NUMPROPS; i++) {
 349         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 350           break;
 351     }
 352 
 353     if (i == NUMPROPS)
 354       return;
 355 
 356     /*
 357      * Have a match, so insert the code in order.
 358      */
 359     rlp = &proptbl[i];
 360 
 361     /*
 362      * Resize the range list if necessary.
 363      */
 364     if (rlp->used == rlp->size) {
 365         if (rlp->size == 0)
 366           rlp->ranges = (ac_uint4 *)
 367               malloc(sizeof(ac_uint4) << 3);
 368         else
 369           rlp->ranges = (ac_uint4 *)
 370               realloc((char *) rlp->ranges,
 371                       sizeof(ac_uint4) * (rlp->size + 8));
 372         rlp->size += 8;
 373     }
 374 
 375     /*
 376      * If this is the first code for this property list, just add it
 377      * and return.
 378      */
 379     if (rlp->used == 0) {
 380         rlp->ranges[0] = rlp->ranges[1] = c;
 381         rlp->used += 2;
 382         return;
 383     }
 384 
 385     /*
 386      * Optimize the cases of extending the last range and adding new ranges to
 387      * the end.
 388      */
 389     j = rlp->used - 1;
 390     e = rlp->ranges[j];
 391     s = rlp->ranges[j - 1];
 392 
 393     if (c == e + 1) {
 394         /*
 395          * Extend the last range.
 396          */
 397         rlp->ranges[j] = c;
 398         return;
 399     }
 400 
 401     if (c > e + 1) {
 402         /*
 403          * Start another range on the end.
 404          */
 405         j = rlp->used;
 406         rlp->ranges[j] = rlp->ranges[j + 1] = c;
 407         rlp->used += 2;
 408         return;
 409     }
 410 
 411     if (c >= s)
 412       /*
 413        * The code is a duplicate of a code in the last range, so just return.
 414        */
 415       return;
 416 
 417     /*
 418      * The code should be inserted somewhere before the last range in the
 419      * list.  Locate the insertion point.
 420      */
 421     for (i = 0;
 422          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
 423 
 424     s = rlp->ranges[i];
 425     e = rlp->ranges[i + 1];
 426 
 427     if (c == e + 1)
 428       /*
 429        * Simply extend the current range.
 430        */
 431       rlp->ranges[i + 1] = c;
 432     else if (c < s) {
 433         /*
 434          * Add a new entry before the current location.  Shift all entries
 435          * before the current one up by one to make room.
 436          */
 437         for (j = rlp->used; j > i; j -= 2) {
 438             rlp->ranges[j] = rlp->ranges[j - 2];
 439             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 440         }
 441         rlp->ranges[i] = rlp->ranges[i + 1] = c;
 442 
 443         rlp->used += 2;
 444     }
 445 }
 446 
 447 static void
 448 add_decomp(ac_uint4 code, short compat)
 449 {
 450     ac_uint4 i, j, size;
 451     _decomp_t **pdecomps;
 452     ac_uint4 *pdecomps_used;
 453     ac_uint4 *pdecomps_size;
 454 
 455     if (compat) {
 456         pdecomps = &kdecomps;
 457         pdecomps_used = &kdecomps_used;
 458         pdecomps_size = &kdecomps_size;
 459     } else {
 460         pdecomps = &decomps;
 461         pdecomps_used = &decomps_used;
 462         pdecomps_size = &decomps_size;
 463     }
 464 
 465     /*
 466      * Add the code to the composite property.
 467      */
 468     if (!compat) {
 469         ordered_range_insert(code, "Cm", 2);
 470     }
 471 
 472     /*
 473      * Locate the insertion point for the code.
 474      */
 475     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
 476 
 477     /*
 478      * Allocate space for a new decomposition.
 479      */
 480     if (*pdecomps_used == *pdecomps_size) {
 481         if (*pdecomps_size == 0)
 482           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
 483         else
 484           *pdecomps = (_decomp_t *)
 485               realloc((char *) *pdecomps,
 486                       sizeof(_decomp_t) * (*pdecomps_size + 8));
 487         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
 488                       sizeof(_decomp_t) << 3);
 489         *pdecomps_size += 8;
 490     }
 491 
 492     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
 493         /*
 494          * Shift the decomps up by one if the codes don't match.
 495          */
 496         for (j = *pdecomps_used; j > i; j--)
 497           (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
 498                         sizeof(_decomp_t));
 499     }
 500 
 501     /*
 502      * Insert or replace a decomposition.
 503      */
 504     size = dectmp_size + (4 - (dectmp_size & 3));
 505     if ((*pdecomps)[i].size < size) {
 506         if ((*pdecomps)[i].size == 0)
 507           (*pdecomps)[i].decomp = (ac_uint4 *)
 508               malloc(sizeof(ac_uint4) * size);
 509         else
 510           (*pdecomps)[i].decomp = (ac_uint4 *)
 511               realloc((char *) (*pdecomps)[i].decomp,
 512                       sizeof(ac_uint4) * size);
 513         (*pdecomps)[i].size = size;
 514     }
 515 
 516     if ((*pdecomps)[i].code != code)
 517       (*pdecomps_used)++;
 518 
 519     (*pdecomps)[i].code = code;
 520     (*pdecomps)[i].used = dectmp_size;
 521     (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
 522                   sizeof(ac_uint4) * dectmp_size);
 523 
 524     /*
 525      * NOTICE: This needs changing later so it is more general than simply
 526      * pairs.  This calculation is done here to simplify allocation elsewhere.
 527      */
 528     if (!compat && dectmp_size == 2)
 529       comps_used++;
 530 }
 531 
 532 static void
 533 add_title(ac_uint4 code)
 534 {
 535     ac_uint4 i, j;
 536 
 537     /*
 538      * Always map the code to itself.
 539      */
 540     cases[2] = code;
 541 
 542     if (title_used == title_size) {
 543         if (title_size == 0)
 544           title = (_case_t *) malloc(sizeof(_case_t) << 3);
 545         else
 546           title = (_case_t *) realloc((char *) title,
 547                                       sizeof(_case_t) * (title_size + 8));
 548         title_size += 8;
 549     }
 550 
 551     /*
 552      * Locate the insertion point.
 553      */
 554     for (i = 0; i < title_used && code > title[i].key; i++) ;
 555 
 556     if (i < title_used) {
 557         /*
 558          * Shift the array up by one.
 559          */
 560         for (j = title_used; j > i; j--)
 561           (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
 562                         sizeof(_case_t));
 563     }
 564 
 565     title[i].key = cases[2];    /* Title */
 566     title[i].other1 = cases[0]; /* Upper */
 567     title[i].other2 = cases[1]; /* Lower */
 568 
 569     title_used++;
 570 }
 571 
 572 static void
 573 add_upper(ac_uint4 code)
 574 {
 575     ac_uint4 i, j;
 576 
 577     /*
 578      * Always map the code to itself.
 579      */
 580     cases[0] = code;
 581 
 582     /*
 583      * If the title case character is not present, then make it the same as
 584      * the upper case.
 585      */
 586     if (cases[2] == 0)
 587       cases[2] = code;
 588 
 589     if (upper_used == upper_size) {
 590         if (upper_size == 0)
 591           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
 592         else
 593           upper = (_case_t *) realloc((char *) upper,
 594                                       sizeof(_case_t) * (upper_size + 8));
 595         upper_size += 8;
 596     }
 597 
 598     /*
 599      * Locate the insertion point.
 600      */
 601     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
 602 
 603     if (i < upper_used) {
 604         /*
 605          * Shift the array up by one.
 606          */
 607         for (j = upper_used; j > i; j--)
 608           (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
 609                         sizeof(_case_t));
 610     }
 611 
 612     upper[i].key = cases[0];    /* Upper */
 613     upper[i].other1 = cases[1]; /* Lower */
 614     upper[i].other2 = cases[2]; /* Title */
 615 
 616     upper_used++;
 617 }
 618 
 619 static void
 620 add_lower(ac_uint4 code)
 621 {
 622     ac_uint4 i, j;
 623 
 624     /*
 625      * Always map the code to itself.
 626      */
 627     cases[1] = code;
 628 
 629     /*
 630      * If the title case character is empty, then make it the same as the
 631      * upper case.
 632      */
 633     if (cases[2] == 0)
 634       cases[2] = cases[0];
 635 
 636     if (lower_used == lower_size) {
 637         if (lower_size == 0)
 638           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
 639         else
 640           lower = (_case_t *) realloc((char *) lower,
 641                                       sizeof(_case_t) * (lower_size + 8));
 642         lower_size += 8;
 643     }
 644 
 645     /*
 646      * Locate the insertion point.
 647      */
 648     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
 649 
 650     if (i < lower_used) {
 651         /*
 652          * Shift the array up by one.
 653          */
 654         for (j = lower_used; j > i; j--)
 655           (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
 656                         sizeof(_case_t));
 657     }
 658 
 659     lower[i].key = cases[1];    /* Lower */
 660     lower[i].other1 = cases[0]; /* Upper */
 661     lower[i].other2 = cases[2]; /* Title */
 662 
 663     lower_used++;
 664 }
 665 
 666 static void
 667 ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code)
 668 {
 669     ac_uint4 i, j;
 670 
 671     if (ccl_used == ccl_size) {
 672         if (ccl_size == 0)
 673           ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24);
 674         else
 675           ccl = (ac_uint4 *)
 676               realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24));
 677         ccl_size += 24;
 678     }
 679 
 680     /*
 681      * Optimize adding the first item.
 682      */
 683     if (ccl_used == 0) {
 684         ccl[0] = ccl[1] = c;
 685         ccl[2] = ccl_code;
 686         ccl_used += 3;
 687         return;
 688     }
 689 
 690     /*
 691      * Handle the special case of extending the range on the end.  This
 692      * requires that the combining class codes are the same.
 693      */
 694     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
 695         ccl[ccl_used - 2] = c;
 696         return;
 697     }
 698 
 699     /*
 700      * Handle the special case of adding another range on the end.
 701      */
 702     if (c > ccl[ccl_used - 2] + 1 ||
 703         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
 704         ccl[ccl_used++] = c;
 705         ccl[ccl_used++] = c;
 706         ccl[ccl_used++] = ccl_code;
 707         return;
 708     }
 709 
 710     /*
 711      * Locate either the insertion point or range for the code.
 712      */
 713     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
 714 
 715     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
 716         /*
 717          * Extend an existing range.
 718          */
 719         ccl[i + 1] = c;
 720         return;
 721     } else if (c < ccl[i]) {
 722         /*
 723          * Start a new range before the current location.
 724          */
 725         for (j = ccl_used; j > i; j -= 3) {
 726             ccl[j] = ccl[j - 3];
 727             ccl[j - 1] = ccl[j - 4];
 728             ccl[j - 2] = ccl[j - 5];
 729         }
 730         ccl[i] = ccl[i + 1] = c;
 731         ccl[i + 2] = ccl_code;
 732     }
 733 }
 734 
 735 /*
 736  * Adds a number if it does not already exist and returns an index value
 737  * multiplied by 2.
 738  */
 739 static ac_uint4
 740 make_number(short num, short denom)
 741 {
 742     ac_uint4 n;
 743 
 744     /*
 745      * Determine if the number already exists.
 746      */
 747     for (n = 0; n < nums_used; n++) {
 748         if (nums[n].numerator == num && nums[n].denominator == denom)
 749           return n << 1;
 750     }
 751 
 752     if (nums_used == nums_size) {
 753         if (nums_size == 0)
 754           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
 755         else
 756           nums = (_num_t *) realloc((char *) nums,
 757                                     sizeof(_num_t) * (nums_size + 8));
 758         nums_size += 8;
 759     }
 760 
 761     n = nums_used++;
 762     nums[n].numerator = num;
 763     nums[n].denominator = denom;
 764 
 765     return n << 1;
 766 }
 767 
 768 static void
 769 add_number(ac_uint4 code, short num, short denom)
 770 {
 771     ac_uint4 i, j;
 772 
 773     /*
 774      * Insert the code in order.
 775      */
 776     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
 777 
 778     /*
 779      * Handle the case of the codes matching and simply replace the number
 780      * that was there before.
 781      */
 782     if (i < ncodes_used && code == ncodes[i].code) {
 783         ncodes[i].idx = make_number(num, denom);
 784         return;
 785     }
 786 
 787     /*
 788      * Resize the array if necessary.
 789      */
 790     if (ncodes_used == ncodes_size) {
 791         if (ncodes_size == 0)
 792           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
 793         else
 794           ncodes = (_codeidx_t *)
 795               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
 796 
 797         ncodes_size += 8;
 798     }
 799 
 800     /*
 801      * Shift things around to insert the code if necessary.
 802      */
 803     if (i < ncodes_used) {
 804         for (j = ncodes_used; j > i; j--) {
 805             ncodes[j].code = ncodes[j - 1].code;
 806             ncodes[j].idx = ncodes[j - 1].idx;
 807         }
 808     }
 809     ncodes[i].code = code;
 810     ncodes[i].idx = make_number(num, denom);
 811 
 812     ncodes_used++;
 813 }
 814 
 815 /*
 816  * This routine assumes that the line is a valid Unicode Character Database
 817  * entry.
 818  */
 819 static void
 820 read_cdata(FILE *in)
 821 {
 822     ac_uint4 i, lineno, skip, code, ccl_code;
 823     short wnum, neg, number[2], compat;
 824     char line[512], *s, *e;
 825 
 826     lineno = skip = 0;
 827     while (fgets(line, sizeof(line), in)) {
 828         if( (s=strchr(line, '\n')) ) *s = '\0';
 829         lineno++;
 830 
 831         /*
 832          * Skip blank lines and lines that start with a '#'.
 833          */
 834         if (line[0] == 0 || line[0] == '#')
 835           continue;
 836 
 837         /*
 838          * If lines need to be skipped, do it here.
 839          */
 840         if (skip) {
 841             skip--;
 842             continue;
 843         }
 844 
 845         /*
 846          * Collect the code.  The code can be up to 6 hex digits in length to
 847          * allow surrogates to be specified.
 848          */
 849         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
 850             code <<= 4;
 851             if (*s >= '0' && *s <= '9')
 852               code += *s - '0';
 853             else if (*s >= 'A' && *s <= 'F')
 854               code += (*s - 'A') + 10;
 855             else if (*s >= 'a' && *s <= 'f')
 856               code += (*s - 'a') + 10;
 857         }
 858 
 859         /*
 860          * Handle the following special cases:
 861          * 1. 4E00-9FA5 CJK Ideographs.
 862          * 2. AC00-D7A3 Hangul Syllables.
 863          * 3. D800-DFFF Surrogates.
 864          * 4. E000-F8FF Private Use Area.
 865          * 5. F900-FA2D Han compatibility.
 866          * ...Plus additional ranges in newer Unicode versions...
 867          */
 868         switch (code) {
 869           case 0x3400:
 870             /* CJK Ideograph Extension A */
 871             add_range(0x3400, 0x4db5, "Lo", "L");
 872 
 873             add_range(0x3400, 0x4db5, "Cp", 0);
 874 
 875             skip = 1;
 876             break;
 877           case 0x4e00:
 878             /*
 879              * The Han ideographs.
 880              */
 881             add_range(0x4e00, 0x9fff, "Lo", "L");
 882 
 883             /*
 884              * Add the characters to the defined category.
 885              */
 886             add_range(0x4e00, 0x9fa5, "Cp", 0);
 887 
 888             skip = 1;
 889             break;
 890           case 0xac00:
 891             /*
 892              * The Hangul syllables.
 893              */
 894             add_range(0xac00, 0xd7a3, "Lo", "L");
 895 
 896             /*
 897              * Add the characters to the defined category.
 898              */
 899             add_range(0xac00, 0xd7a3, "Cp", 0);
 900 
 901             skip = 1;
 902             break;
 903           case 0xd800:
 904             /*
 905              * Make a range of all surrogates and assume some default
 906              * properties.
 907              */
 908             add_range(0x010000, 0x10ffff, "Cs", "L");
 909             skip = 5;
 910             break;
 911           case 0xe000:
 912             /*
 913              * The Private Use area.  Add with a default set of properties.
 914              */
 915             add_range(0xe000, 0xf8ff, "Co", "L");
 916             skip = 1;
 917             break;
 918           case 0xf900:
 919             /*
 920              * The CJK compatibility area.
 921              */
 922             add_range(0xf900, 0xfaff, "Lo", "L");
 923 
 924             /*
 925              * Add the characters to the defined category.
 926              */
 927             add_range(0xf900, 0xfaff, "Cp", 0);
 928 
 929             skip = 1;
 930             break;
 931           case 0x20000:
 932             /* CJK Ideograph Extension B */
 933             add_range(0x20000, 0x2a6d6, "Lo", "L");
 934 
 935             add_range(0x20000, 0x2a6d6, "Cp", 0);
 936 
 937             skip = 1;
 938             break;
 939           case 0xf0000:
 940             /* Plane 15 private use */
 941             add_range(0xf0000, 0xffffd, "Co", "L");
 942             skip = 1;
 943             break;
 944 
 945           case 0x100000:
 946             /* Plane 16 private use */
 947             add_range(0x100000, 0x10fffd, "Co", "L");
 948             skip = 1;
 949             break;
 950         }
 951 
 952         if (skip)
 953           continue;
 954 
 955         /*
 956          * Add the code to the defined category.
 957          */
 958         ordered_range_insert(code, "Cp", 2);
 959 
 960         /*
 961          * Locate the first character property field.
 962          */
 963         for (i = 0; *s != 0 && i < 2; s++) {
 964             if (*s == ';')
 965               i++;
 966         }
 967         for (e = s; *e && *e != ';'; e++) ;
 968 
 969         ordered_range_insert(code, s, e - s);
 970 
 971         /*
 972          * Locate the combining class code.
 973          */
 974         for (s = e; *s != 0 && i < 3; s++) {
 975             if (*s == ';')
 976               i++;
 977         }
 978 
 979         /*
 980          * Convert the combining class code from decimal.
 981          */
 982         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
 983           ccl_code = (ccl_code * 10) + (*e - '0');
 984 
 985         /*
 986          * Add the code if it not 0.
 987          */
 988         if (ccl_code != 0)
 989           ordered_ccl_insert(code, ccl_code);
 990 
 991         /*
 992          * Locate the second character property field.
 993          */
 994         for (s = e; *s != 0 && i < 4; s++) {
 995             if (*s == ';')
 996               i++;
 997         }
 998         for (e = s; *e && *e != ';'; e++) ;
 999 
1000         ordered_range_insert(code, s, e - s);
1001 
1002         /*
1003          * Check for a decomposition.
1004          */
1005         s = ++e;
1006         if (*s != ';') {
1007             compat = *s == '<';
1008             if (compat) {
1009                 /*
1010                  * Skip compatibility formatting tag.
1011                  */
1012                 while (*s++ != '>');
1013             }
1014             /*
1015              * Collect the codes of the decomposition.
1016              */
1017             for (dectmp_size = 0; *s != ';'; ) {
1018                 /*
1019                  * Skip all leading non-hex digits.
1020                  */
1021                 while (!ishdigit(*s))
1022                   s++;
1023 
1024                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
1025                     dectmp[dectmp_size] <<= 4;
1026                     if (*s >= '0' && *s <= '9')
1027                       dectmp[dectmp_size] += *s - '0';
1028                     else if (*s >= 'A' && *s <= 'F')
1029                       dectmp[dectmp_size] += (*s - 'A') + 10;
1030                     else if (*s >= 'a' && *s <= 'f')
1031                       dectmp[dectmp_size] += (*s - 'a') + 10;
1032                 }
1033                 dectmp_size++;
1034             }
1035 
1036             /*
1037              * If there are any codes in the temporary decomposition array,
1038              * then add the character with its decomposition.
1039              */
1040             if (dectmp_size > 0) {
1041                 if (!compat) {
1042                     add_decomp(code, 0);
1043                 }
1044                 add_decomp(code, 1);
1045             }
1046         }
1047 
1048         /*
1049          * Skip to the number field.
1050          */
1051         for (i = 0; i < 3 && *s; s++) {
1052             if (*s == ';')
1053               i++;
1054         }
1055 
1056         /*
1057          * Scan the number in.
1058          */
1059         number[0] = number[1] = 0;
1060         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
1061             if (*e == '-') {
1062                 neg = 1;
1063                 continue;
1064             }
1065 
1066             if (*e == '/') {
1067                 /*
1068                  * Move the denominator of the fraction.
1069                  */
1070                 if (neg)
1071                   number[wnum] *= -1;
1072                 neg = 0;
1073                 e++;
1074                 wnum++;
1075             }
1076             number[wnum] = (number[wnum] * 10) + (*e - '0');
1077         }
1078 
1079         if (e > s) {
1080             /*
1081              * Adjust the denominator in case of integers and add the number.
1082              */
1083             if (wnum == 0)
1084               number[1] = 1;
1085 
1086             add_number(code, number[0], number[1]);
1087         }
1088 
1089         /*
1090          * Skip to the start of the possible case mappings.
1091          */
1092         for (s = e, i = 0; i < 4 && *s; s++) {
1093             if (*s == ';')
1094               i++;
1095         }
1096 
1097         /*
1098          * Collect the case mappings.
1099          */
1100         cases[0] = cases[1] = cases[2] = 0;
1101         for (i = 0; i < 3; i++) {
1102             while (ishdigit(*s)) {
1103                 cases[i] <<= 4;
1104                 if (*s >= '0' && *s <= '9')
1105                   cases[i] += *s - '0';
1106                 else if (*s >= 'A' && *s <= 'F')
1107                   cases[i] += (*s - 'A') + 10;
1108                 else if (*s >= 'a' && *s <= 'f')
1109                   cases[i] += (*s - 'a') + 10;
1110                 s++;
1111             }
1112             if (*s == ';')
1113               s++;
1114         }
1115         if (cases[0] && cases[1])
1116           /*
1117            * Add the upper and lower mappings for a title case character.
1118            */
1119           add_title(code);
1120         else if (cases[1])
1121           /*
1122            * Add the lower and title case mappings for the upper case
1123            * character.
1124            */
1125           add_upper(code);
1126         else if (cases[0])
1127           /*
1128            * Add the upper and title case mappings for the lower case
1129            * character.
1130            */
1131           add_lower(code);
1132     }
1133 }
1134 
1135 #if 0
1136 
1137 static _decomp_t *
1138 find_decomp(ac_uint4 code, short compat)
1139 {
1140     long l, r, m;
1141     _decomp_t *decs;
1142 
1143     l = 0;
1144     r = (compat ? kdecomps_used : decomps_used) - 1;
1145     decs = compat ? kdecomps : decomps;
1146     while (l <= r) {
1147         m = (l + r) >> 1;
1148         if (code > decs[m].code)
1149           l = m + 1;
1150         else if (code < decs[m].code)
1151           r = m - 1;
1152         else
1153           return &decs[m];
1154     }
1155     return 0;
1156 }
1157 
1158 static void
1159 decomp_it(_decomp_t *d, short compat)
1160 {
1161     ac_uint4 i;
1162     _decomp_t *dp;
1163 
1164     for (i = 0; i < d->used; i++) {
1165         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
1166           decomp_it(dp, compat);
1167         else
1168           dectmp[dectmp_size++] = d->decomp[i];
1169     }
1170 }
1171 
1172 
1173 /*
1174  * Expand all decompositions by recursively decomposing each character
1175  * in the decomposition.
1176  */
1177 static void
1178 expand_decomp(void)
1179 {
1180     ac_uint4 i;
1181 
1182     for (i = 0; i < decomps_used; i++) {
1183         dectmp_size = 0;
1184         decomp_it(&decomps[i], 0);
1185         if (dectmp_size > 0)
1186           add_decomp(decomps[i].code, 0);
1187     }
1188 
1189     for (i = 0; i < kdecomps_used; i++) {
1190         dectmp_size = 0;
1191         decomp_it(&kdecomps[i], 1);
1192         if (dectmp_size > 0)
1193           add_decomp(kdecomps[i].code, 1);
1194     }
1195 }
1196 
1197 static int
1198 cmpcomps(const void *v_comp1, const void *v_comp2)
1199 {
1200         const _comp_t *comp1 = v_comp1, *comp2 = v_comp2;
1201     long diff = comp1->code1 - comp2->code1;
1202 
1203     if (!diff)
1204         diff = comp1->code2 - comp2->code2;
1205     return (int) diff;
1206 }
1207 
1208 #endif
1209 
1210 /*
1211  * Load composition exclusion data
1212  */
1213 static void
1214 read_compexdata(FILE *in)
1215 {
1216     ac_uint2 i;
1217     ac_uint4 code;
1218     char line[512], *s;
1219 
1220     (void) memset((char *) compexs, 0, sizeof(compexs));
1221 
1222     while (fgets(line, sizeof(line), in)) {
1223         if( (s=strchr(line, '\n')) ) *s = '\0';
1224         /*
1225          * Skip blank lines and lines that start with a '#'.
1226          */
1227         if (line[0] == 0 || line[0] == '#')
1228             continue;
1229 
1230         /*
1231          * Collect the code.  Assume max 6 digits
1232          */
1233 
1234         for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) {
1235             if (isspace((unsigned char)*s)) break;
1236             code <<= 4;
1237             if (*s >= '0' && *s <= '9')
1238                 code += *s - '0';
1239             else if (*s >= 'A' && *s <= 'F')
1240                 code += (*s - 'A') + 10;
1241             else if (*s >= 'a' && *s <= 'f')
1242                 code += (*s - 'a') + 10;
1243         }
1244         COMPEX_SET(code);
1245     }
1246 }
1247 
1248 #if 0
1249 
1250 /*
1251  * Creates array of compositions from decomposition array
1252  */
1253 static void
1254 create_comps(void)
1255 {
1256     ac_uint4 i, cu;
1257 
1258     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
1259 
1260     for (i = cu = 0; i < decomps_used; i++) {
1261         if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
1262             continue;
1263         comps[cu].comp = decomps[i].code;
1264         comps[cu].count = 2;
1265         comps[cu].code1 = decomps[i].decomp[0];
1266         comps[cu].code2 = decomps[i].decomp[1];
1267         cu++;
1268     }
1269     comps_used = cu;
1270     qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);
1271 }
1272 
1273 #endif
1274 
1275 #if HARDCODE_DATA
1276 static void
1277 write_case(FILE *out, _case_t *tab, int num, int first)
1278 {
1279     int i;
1280 
1281     for (i=0; i<num; i++) {
1282         if (first) first = 0;
1283         else fprintf(out, ",");
1284         fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx",
1285                 (unsigned long) tab[i].key, (unsigned long) tab[i].other1,
1286                 (unsigned long) tab[i].other2);
1287     }
1288 }
1289 
1290 #define PREF "static const "
1291 
1292 #endif
1293 
1294 static void
1295 write_cdata(char *opath)
1296 {
1297     FILE *out;
1298         ac_uint4 bytes;
1299     ac_uint4 i, idx, nprops;
1300 #if !(HARDCODE_DATA)
1301     ac_uint2 casecnt[2];
1302 #endif
1303     char path[BUFSIZ];
1304 #if HARDCODE_DATA
1305     int j, k;
1306 
1307     /*****************************************************************
1308      *
1309      * Generate the ctype data.
1310      *
1311      *****************************************************************/
1312 
1313     /*
1314      * Open the output file.
1315      */
1316     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath);
1317     if ((out = fopen(path, "w")) == 0)
1318       return;
1319 #else
1320     /*
1321      * Open the ctype.dat file.
1322      */
1323     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
1324     if ((out = fopen(path, "wb")) == 0)
1325       return;
1326 #endif
1327 
1328     /*
1329      * Collect the offsets for the properties.  The offsets array is
1330      * on a 4-byte boundary to keep things efficient for architectures
1331      * that need such a thing.
1332      */
1333     for (i = idx = 0; i < NUMPROPS; i++) {
1334         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1335         idx += proptbl[i].used;
1336     }
1337 
1338     /*
1339      * Add the sentinel index which is used by the binary search as the upper
1340      * bound for a search.
1341      */
1342     propcnt[i] = idx;
1343 
1344     /*
1345      * Record the actual number of property lists.  This may be different than
1346      * the number of offsets actually written because of aligning on a 4-byte
1347      * boundary.
1348      */
1349     hdr[1] = NUMPROPS;
1350 
1351     /*
1352      * Calculate the byte count needed and pad the property counts array to a
1353      * 4-byte boundary.
1354      */
1355     if ((bytes = sizeof(ac_uint2) * (NUMPROPS + 1)) & 3)
1356       bytes += 4 - (bytes & 3);
1357     nprops = bytes / sizeof(ac_uint2);
1358     bytes += sizeof(ac_uint4) * idx;
1359 
1360 #if HARDCODE_DATA
1361     fprintf(out,
1362         "/* This file was generated from a modified version UCData's ucgendat.\n"
1363         " *\n"
1364         " *                     DO NOT EDIT THIS FILE!\n"
1365         " * \n"
1366         " * Instead, compile ucgendat.c (bundled with PHP in ext/mbstring), download\n"
1367         " * the appropriate UnicodeData-x.x.x.txt and CompositionExclusions-x.x.x.txt\n"
1368         " * files from  http://www.unicode.org/Public/ and run this program.\n"
1369         " *\n"
1370         " * More information can be found in the UCData package. Unfortunately,\n"
1371         " * the project's page doesn't seem to be live anymore, so you can use\n"
1372         " * OpenLDAPs modified copy (look in libraries/liblunicode/ucdata) */\n\n");
1373 
1374     fprintf(out, PREF "unsigned short _ucprop_size = %d;\n\n", NUMPROPS);
1375 
1376     fprintf(out, PREF "unsigned short  _ucprop_offsets[] = {");
1377 
1378     for (i = 0; i<nprops; i++) {
1379        if (i) fprintf(out, ",");
1380        if (!(i&7)) fprintf(out, "\n\t");
1381        else fprintf(out, " ");
1382        fprintf(out, "0x%04x", propcnt[i]);
1383     }
1384     fprintf(out, "\n};\n\n");
1385 
1386     fprintf(out, PREF "unsigned int _ucprop_ranges[] = {");
1387 
1388     k = 0;
1389     for (i = 0; i < NUMPROPS; i++) {
1390         if (proptbl[i].used > 0) {
1391           for (j=0; j<proptbl[i].used; j++) {
1392             if (k) fprintf(out, ",");
1393             if (!(k&3)) fprintf(out,"\n\t");
1394             else fprintf(out, " ");
1395             k++;
1396             fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]);
1397           }
1398         }
1399     }
1400     fprintf(out, "\n};\n\n");
1401 #else
1402     /*
1403      * Write the header.
1404      */
1405     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1406 
1407     /*
1408      * Write the byte count.
1409      */
1410     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1411 
1412     /*
1413      * Write the property list counts.
1414      */
1415     fwrite((char *) propcnt, sizeof(ac_uint2), nprops, out);
1416 
1417     /*
1418      * Write the property lists.
1419      */
1420     for (i = 0; i < NUMPROPS; i++) {
1421         if (proptbl[i].used > 0)
1422           fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4),
1423                  proptbl[i].used, out);
1424     }
1425 
1426     fclose(out);
1427 #endif
1428 
1429     /*****************************************************************
1430      *
1431      * Generate the case mapping data.
1432      *
1433      *****************************************************************/
1434 
1435 #if HARDCODE_DATA
1436     fprintf(out, PREF "unsigned int _uccase_size = %ld;\n\n",
1437         (long) (upper_used + lower_used + title_used));
1438 
1439     fprintf(out,
1440         "/* Starting indexes of the case tables\n"
1441         " * UpperIndex = 0\n"
1442         " * LowerIndex = _uccase_len[0]\n"
1443         " * TitleIndex = LowerIndex + _uccase_len[1] */\n\n");
1444     fprintf(out, PREF "unsigned short _uccase_len[2] = {%ld, %ld};\n\n",
1445         (long) upper_used * 3, (long) lower_used * 3);
1446     fprintf(out, PREF "unsigned int _uccase_map[] = {");
1447 
1448     if (upper_used > 0)
1449       /*
1450        * Write the upper case table.
1451        */
1452       write_case(out, upper, upper_used, 1);
1453 
1454     if (lower_used > 0)
1455       /*
1456        * Write the lower case table.
1457        */
1458       write_case(out, lower, lower_used, !upper_used);
1459 
1460     if (title_used > 0)
1461       /*
1462        * Write the title case table.
1463        */
1464       write_case(out, title, title_used, !(upper_used||lower_used));
1465 
1466     if (!(upper_used || lower_used || title_used))
1467         fprintf(out, "\t0");
1468 
1469     fprintf(out, "\n};\n\n");
1470 #else
1471     /*
1472      * Open the case.dat file.
1473      */
1474     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
1475     if ((out = fopen(path, "wb")) == 0)
1476       return;
1477 
1478     /*
1479      * Write the case mapping tables.
1480      */
1481     hdr[1] = upper_used + lower_used + title_used;
1482     casecnt[0] = upper_used;
1483     casecnt[1] = lower_used;
1484 
1485     /*
1486      * Write the header.
1487      */
1488     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1489 
1490     /*
1491      * Write the upper and lower case table sizes.
1492      */
1493     fwrite((char *) casecnt, sizeof(ac_uint2), 2, out);
1494 
1495     if (upper_used > 0)
1496       /*
1497        * Write the upper case table.
1498        */
1499       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1500 
1501     if (lower_used > 0)
1502       /*
1503        * Write the lower case table.
1504        */
1505       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1506 
1507     if (title_used > 0)
1508       /*
1509        * Write the title case table.
1510        */
1511       fwrite((char *) title, sizeof(_case_t), title_used, out);
1512 
1513     fclose(out);
1514 #endif
1515 
1516 #if 0
1517 
1518     /*****************************************************************
1519      *
1520      * Generate the composition data.
1521      *
1522      *****************************************************************/
1523 
1524     /*
1525      * Create compositions from decomposition data
1526      */
1527     create_comps();
1528 
1529 #if HARDCODE_DATA
1530     fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n",
1531         comps_used * 4L);
1532 
1533     fprintf(out, PREF "ac_uint4 _uccomp_data[] = {");
1534 
1535      /*
1536       * Now, if comps exist, write them out.
1537       */
1538     if (comps_used > 0) {
1539         for (i=0; i<comps_used; i++) {
1540             if (i) fprintf(out, ",");
1541             fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx",
1542                 (unsigned long) comps[i].comp, (unsigned long) comps[i].count,
1543                 (unsigned long) comps[i].code1, (unsigned long) comps[i].code2);
1544         }
1545     } else {
1546         fprintf(out, "\t0");
1547     }
1548     fprintf(out, "\n};\n\n");
1549 #else
1550     /*
1551      * Open the comp.dat file.
1552      */
1553     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
1554     if ((out = fopen(path, "wb")) == 0)
1555         return;
1556 
1557     /*
1558      * Write the header.
1559      */
1560     hdr[1] = (ac_uint2) comps_used * 4;
1561     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1562 
1563     /*
1564      * Write out the byte count to maintain header size.
1565      */
1566     bytes = comps_used * sizeof(_comp_t);
1567     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1568 
1569     /*
1570      * Now, if comps exist, write them out.
1571      */
1572     if (comps_used > 0)
1573         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
1574 
1575     fclose(out);
1576 #endif
1577 
1578     /*****************************************************************
1579      *
1580      * Generate the decomposition data.
1581      *
1582      *****************************************************************/
1583 
1584     /*
1585      * Fully expand all decompositions before generating the output file.
1586      */
1587     expand_decomp();
1588 
1589 #if HARDCODE_DATA
1590     fprintf(out, PREF "ac_uint4 _ucdcmp_size = %ld;\n\n",
1591         decomps_used * 2L);
1592 
1593     fprintf(out, PREF "ac_uint4 _ucdcmp_nodes[] = {");
1594 
1595     if (decomps_used) {
1596         /*
1597          * Write the list of decomp nodes.
1598          */
1599         for (i = idx = 0; i < decomps_used; i++) {
1600             fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1601                 (unsigned long) decomps[i].code, (unsigned long) idx);
1602             idx += decomps[i].used;
1603         }
1604 
1605         /*
1606          * Write the sentinel index as the last decomp node.
1607          */
1608         fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1609 
1610         fprintf(out, PREF "ac_uint4 _ucdcmp_decomp[] = {");
1611         /*
1612          * Write the decompositions themselves.
1613          */
1614         k = 0;
1615         for (i = 0; i < decomps_used; i++)
1616           for (j=0; j<decomps[i].used; j++) {
1617             if (k) fprintf(out, ",");
1618             if (!(k&3)) fprintf(out,"\n\t");
1619             else fprintf(out, " ");
1620             k++;
1621             fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]);
1622           }
1623         fprintf(out, "\n};\n\n");
1624     }
1625 #else
1626     /*
1627      * Open the decomp.dat file.
1628      */
1629     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
1630     if ((out = fopen(path, "wb")) == 0)
1631       return;
1632 
1633     hdr[1] = decomps_used;
1634 
1635     /*
1636      * Write the header.
1637      */
1638     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1639 
1640     /*
1641      * Write a temporary byte count which will be calculated as the
1642      * decompositions are written out.
1643      */
1644     bytes = 0;
1645     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1646 
1647     if (decomps_used) {
1648         /*
1649          * Write the list of decomp nodes.
1650          */
1651         for (i = idx = 0; i < decomps_used; i++) {
1652             fwrite((char *) &decomps[i].code, sizeof(ac_uint4), 1, out);
1653             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
1654             idx += decomps[i].used;
1655         }
1656 
1657         /*
1658          * Write the sentinel index as the last decomp node.
1659          */
1660         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
1661 
1662         /*
1663          * Write the decompositions themselves.
1664          */
1665         for (i = 0; i < decomps_used; i++)
1666           fwrite((char *) decomps[i].decomp, sizeof(ac_uint4),
1667                  decomps[i].used, out);
1668 
1669         /*
1670          * Seek back to the beginning and write the byte count.
1671          */
1672         bytes = (sizeof(ac_uint4) * idx) +
1673             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
1674         fseek(out, sizeof(ac_uint2) << 1, 0L);
1675         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1676 
1677         fclose(out);
1678     }
1679 #endif
1680 
1681 #ifdef HARDCODE_DATA
1682     fprintf(out, PREF "ac_uint4 _uckdcmp_size = %ld;\n\n",
1683         kdecomps_used * 2L);
1684 
1685     fprintf(out, PREF "ac_uint4 _uckdcmp_nodes[] = {");
1686 
1687     if (kdecomps_used) {
1688         /*
1689          * Write the list of kdecomp nodes.
1690          */
1691         for (i = idx = 0; i < kdecomps_used; i++) {
1692             fprintf(out, "\n\t0x%08lx, 0x%08lx,",
1693                 (unsigned long) kdecomps[i].code, (unsigned long) idx);
1694             idx += kdecomps[i].used;
1695         }
1696 
1697         /*
1698          * Write the sentinel index as the last decomp node.
1699          */
1700         fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
1701 
1702         fprintf(out, PREF "ac_uint4 _uckdcmp_decomp[] = {");
1703 
1704         /*
1705          * Write the decompositions themselves.
1706          */
1707         k = 0;
1708         for (i = 0; i < kdecomps_used; i++)
1709           for (j=0; j<kdecomps[i].used; j++) {
1710             if (k) fprintf(out, ",");
1711             if (!(k&3)) fprintf(out,"\n\t");
1712             else fprintf(out, " ");
1713             k++;
1714             fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]);
1715           }
1716         fprintf(out, "\n};\n\n");
1717     }
1718 #else
1719     /*
1720      * Open the kdecomp.dat file.
1721      */
1722     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
1723     if ((out = fopen(path, "wb")) == 0)
1724       return;
1725 
1726     hdr[1] = kdecomps_used;
1727 
1728     /*
1729      * Write the header.
1730      */
1731     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1732 
1733     /*
1734      * Write a temporary byte count which will be calculated as the
1735      * decompositions are written out.
1736      */
1737     bytes = 0;
1738     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1739 
1740     if (kdecomps_used) {
1741         /*
1742          * Write the list of kdecomp nodes.
1743          */
1744         for (i = idx = 0; i < kdecomps_used; i++) {
1745             fwrite((char *) &kdecomps[i].code, sizeof(ac_uint4), 1, out);
1746             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
1747             idx += kdecomps[i].used;
1748         }
1749 
1750         /*
1751          * Write the sentinel index as the last decomp node.
1752          */
1753         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
1754 
1755         /*
1756          * Write the decompositions themselves.
1757          */
1758         for (i = 0; i < kdecomps_used; i++)
1759           fwrite((char *) kdecomps[i].decomp, sizeof(ac_uint4),
1760                  kdecomps[i].used, out);
1761 
1762         /*
1763          * Seek back to the beginning and write the byte count.
1764          */
1765         bytes = (sizeof(ac_uint4) * idx) +
1766             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
1767         fseek(out, sizeof(ac_uint2) << 1, 0L);
1768         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1769 
1770         fclose(out);
1771     }
1772 #endif
1773 
1774     /*****************************************************************
1775      *
1776      * Generate the combining class data.
1777      *
1778      *****************************************************************/
1779 #ifdef HARDCODE_DATA
1780     fprintf(out, PREF "ac_uint4 _uccmcl_size = %ld;\n\n", (long) ccl_used);
1781 
1782     fprintf(out, PREF "ac_uint4 _uccmcl_nodes[] = {");
1783 
1784     if (ccl_used > 0) {
1785         /*
1786          * Write the combining class ranges out.
1787          */
1788         for (i = 0; i<ccl_used; i++) {
1789             if (i) fprintf(out, ",");
1790             if (!(i&3)) fprintf(out, "\n\t");
1791             else fprintf(out, " ");
1792             fprintf(out, "0x%08lx", (unsigned long) ccl[i]);
1793         }
1794     } else {
1795         fprintf(out, "\t0");
1796     }
1797     fprintf(out, "\n};\n\n");
1798 #else
1799     /*
1800      * Open the cmbcl.dat file.
1801      */
1802     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
1803     if ((out = fopen(path, "wb")) == 0)
1804       return;
1805 
1806     /*
1807      * Set the number of ranges used.  Each range has a combining class which
1808      * means each entry is a 3-tuple.
1809      */
1810     hdr[1] = ccl_used / 3;
1811 
1812     /*
1813      * Write the header.
1814      */
1815     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1816 
1817     /*
1818      * Write out the byte count to maintain header size.
1819      */
1820     bytes = ccl_used * sizeof(ac_uint4);
1821     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1822 
1823     if (ccl_used > 0)
1824       /*
1825        * Write the combining class ranges out.
1826        */
1827       fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out);
1828 
1829     fclose(out);
1830 #endif
1831 
1832     /*****************************************************************
1833      *
1834      * Generate the number data.
1835      *
1836      *****************************************************************/
1837 
1838 #if HARDCODE_DATA
1839     fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n",
1840         (unsigned long)ncodes_used<<1);
1841 
1842     fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {");
1843 
1844     /*
1845      * Now, if number mappings exist, write them out.
1846      */
1847     if (ncodes_used > 0) {
1848         for (i = 0; i<ncodes_used; i++) {
1849             if (i) fprintf(out, ",");
1850             if (!(i&1)) fprintf(out, "\n\t");
1851             else fprintf(out, " ");
1852             fprintf(out, "0x%08lx, 0x%08lx",
1853                 (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx);
1854         }
1855         fprintf(out, "\n};\n\n");
1856 
1857         fprintf(out, PREF "short _ucnum_vals[] = {");
1858         for (i = 0; i<nums_used; i++) {
1859             if (i) fprintf(out, ",");
1860             if (!(i&3)) fprintf(out, "\n\t");
1861             else fprintf(out, " ");
1862             if (nums[i].numerator < 0) {
1863                 fprintf(out, "%6d, 0x%04x",
1864                   nums[i].numerator, nums[i].denominator);
1865             } else {
1866                 fprintf(out, "0x%04x, 0x%04x",
1867                   nums[i].numerator, nums[i].denominator);
1868             }
1869         }
1870         fprintf(out, "\n};\n\n");
1871     }
1872 #else
1873     /*
1874      * Open the num.dat file.
1875      */
1876     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
1877     if ((out = fopen(path, "wb")) == 0)
1878       return;
1879 
1880     /*
1881      * The count part of the header will be the total number of codes that
1882      * have numbers.
1883      */
1884     hdr[1] = (ac_uint2) (ncodes_used << 1);
1885     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1886 
1887     /*
1888      * Write the header.
1889      */
1890     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1891 
1892     /*
1893      * Write out the byte count to maintain header size.
1894      */
1895     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
1896 
1897     /*
1898      * Now, if number mappings exist, write them out.
1899      */
1900     if (ncodes_used > 0) {
1901         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1902         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1903     }
1904 #endif
1905 
1906 #endif
1907 
1908     fclose(out);
1909 }
1910 
1911 static void
1912 usage(char *prog)
1913 {
1914     fprintf(stderr,
1915             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
1916     fprintf(stderr, " datafile1 datafile2 ...\n\n");
1917     fprintf(stderr,
1918             "-o output-directory\n\t\tWrite the output files to a different");
1919     fprintf(stderr, " directory (default: .).\n");
1920     fprintf(stderr,
1921             "-x composition-exclusion\n\t\tFile of composition codes");
1922     fprintf(stderr, " that should be excluded.\n");
1923     exit(1);
1924 }
1925 
1926 int
1927 main(int argc, char *argv[])
1928 {
1929     FILE *in;
1930     char *prog, *opath;
1931 
1932     prog = argv[1];
1933 
1934     opath = 0;
1935     in = stdin;
1936 
1937     argc--;
1938     argv++;
1939 
1940     while (argc > 0) {
1941         if (argv[0][0] == '-') {
1942             switch (argv[0][1]) {
1943               case 'o':
1944                 argc--;
1945                 argv++;
1946                 opath = argv[0];
1947                 break;
1948               case 'x':
1949                 argc--;
1950                 argv++;
1951                 if ((in = fopen(argv[0], "r")) == 0)
1952                   fprintf(stderr,
1953                           "%s: unable to open composition exclusion file %s\n",
1954                           prog, argv[0]);
1955                 else {
1956                     read_compexdata(in);
1957                     fclose(in);
1958                     in = 0;
1959                 }
1960                 break;
1961               default:
1962                 usage(prog);
1963             }
1964         } else {
1965             if (in != stdin && in != NULL)
1966               fclose(in);
1967             if ((in = fopen(argv[0], "r")) == 0)
1968               fprintf(stderr, "%s: unable to open ctype file %s\n",
1969                       prog, argv[0]);
1970             else {
1971                 read_cdata(in);
1972                 fclose(in);
1973                 in = 0;
1974             }
1975         }
1976         argc--;
1977         argv++;
1978     }
1979 
1980     if (opath == 0)
1981       opath = ".";
1982     write_cdata(opath);
1983 
1984     return 0;
1985 }

/* [<][>][^][v][top][bottom][index][help] */