root/ext/fileinfo/libmagic/apprentice.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_type
  2. get_standard_integer_type
  3. init_file_tables
  4. add_mlist
  5. apprentice_1
  6. file_ms_free
  7. file_ms_alloc
  8. apprentice_unmap
  9. mlist_alloc
  10. mlist_free
  11. file_apprentice
  12. nonmagic
  13. apprentice_magic_strength
  14. apprentice_sort
  15. apprentice_list
  16. set_test_type
  17. addentry
  18. load_1
  19. cmpstrp
  20. set_text_binary
  21. set_last_default
  22. coalesce_entries
  23. magic_entry_free
  24. apprentice_load
  25. file_signextend
  26. string_modifier_check
  27. get_op
  28. get_cond
  29. check_cond
  30. parse_indirect_modifier
  31. parse_op_modifier
  32. parse_string_modifier
  33. parse
  34. parse_strength
  35. goodchar
  36. parse_extra
  37. parse_apple
  38. parse_mime
  39. check_format_type
  40. check_format
  41. getvalue
  42. getstr
  43. hextoint
  44. file_showstr
  45. eatsize
  46. apprentice_buf
  47. apprentice_map
  48. check_buffer
  49. apprentice_compile
  50. mkdbname
  51. byteswap
  52. swap2
  53. swap4
  54. swap8
  55. bs1
  56. file_pstring_length_size
  57. file_pstring_get_length
  58. file_magicfind

   1 /*
   2  * Copyright (c) Ian F. Darwin 1986-1995.
   3  * Software written by Ian F. Darwin and others;
   4  * maintained 1995-present by Christos Zoulas and others.
   5  * 
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice immediately at the beginning of the file, without modification,
  11  *    this list of conditions, and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *  
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28 /*
  29  * apprentice - make one pass through /etc/magic, learning its secrets.
  30  */
  31 
  32 #include "php.h"
  33 
  34 #include "file.h"
  35 
  36 #ifndef lint
  37 FILE_RCSID("@(#)$File: apprentice.c,v 1.230 2015/01/02 21:29:39 christos Exp $")
  38 #endif  /* lint */
  39 
  40 #include "magic.h"
  41 #include "patchlevel.h"
  42 #include <stdlib.h>
  43 
  44 #if defined(__hpux) && !defined(HAVE_STRTOULL)
  45 #if SIZEOF_LONG == 8
  46 # define strtoull strtoul
  47 #else
  48 # define strtoull __strtoull
  49 #endif
  50 #endif
  51 
  52 #ifdef PHP_WIN32
  53 #include "win32/unistd.h"
  54 #define strtoull _strtoui64
  55 #else
  56 #include <unistd.h>
  57 #endif
  58 #include <string.h>
  59 #include <assert.h>
  60 #include <ctype.h>
  61 #include <fcntl.h>
  62 
  63 #ifndef SSIZE_MAX
  64 #define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
  65 #else
  66 #define MAXMAGIC_SIZE        SSIZE_MAX
  67 #endif
  68 
  69 #define EATAB {while (isascii((unsigned char) *l) && \
  70                       isspace((unsigned char) *l))  ++l;}
  71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  72                         tolower((unsigned char) (l)) : (l))
  73 /*
  74  * Work around a bug in headers on Digital Unix.
  75  * At least confirmed for: OSF1 V4.0 878
  76  */
  77 #if defined(__osf__) && defined(__DECC)
  78 #ifdef MAP_FAILED
  79 #undef MAP_FAILED
  80 #endif
  81 #endif
  82 
  83 #ifndef MAP_FAILED
  84 #define MAP_FAILED (void *) -1
  85 #endif
  86 
  87 #ifndef MAP_FILE
  88 #define MAP_FILE 0
  89 #endif
  90 
  91 #define ALLOC_CHUNK     (size_t)10
  92 #define ALLOC_INCR      (size_t)200
  93 
  94 #define MAP_TYPE_MMAP   0
  95 #define MAP_TYPE_MALLOC 1
  96 #define MAP_TYPE_USER   2
  97 
  98 struct magic_entry {
  99         struct magic *mp;       
 100         uint32_t cont_count;
 101         uint32_t max_count;
 102 };
 103 
 104 struct magic_entry_set {
 105         struct magic_entry *me;
 106         uint32_t count;
 107         uint32_t max;
 108 };
 109 
 110 struct magic_map {
 111         void *p;
 112         size_t len;
 113         int type;
 114         struct magic *magic[MAGIC_SETS];
 115         uint32_t nmagic[MAGIC_SETS];
 116 };
 117 
 118 int file_formats[FILE_NAMES_SIZE];
 119 const size_t file_nformats = FILE_NAMES_SIZE;
 120 const char *file_names[FILE_NAMES_SIZE];
 121 const size_t file_nnames = FILE_NAMES_SIZE;
 122 
 123 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
 124 private int hextoint(int);
 125 private const char *getstr(struct magic_set *, struct magic *, const char *,
 126     int);
 127 private int parse(struct magic_set *, struct magic_entry *, const char *,
 128     size_t, int);
 129 private void eatsize(const char **);
 130 private int apprentice_1(struct magic_set *, const char *, int);
 131 private size_t apprentice_magic_strength(const struct magic *);
 132 private int apprentice_sort(const void *, const void *);
 133 private void apprentice_list(struct mlist *, int );
 134 private struct magic_map *apprentice_load(struct magic_set *, 
 135     const char *, int);
 136 private struct mlist *mlist_alloc(void);
 137 private void mlist_free(struct mlist *);
 138 private void byteswap(struct magic *, uint32_t);
 139 private void bs1(struct magic *);
 140 private uint16_t swap2(uint16_t);
 141 private uint32_t swap4(uint32_t);
 142 private uint64_t swap8(uint64_t);
 143 private char *mkdbname(struct magic_set *, const char *, int);
 144 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
 145     size_t);
 146 private struct magic_map *apprentice_map(struct magic_set *, const char *);
 147 private int check_buffer(struct magic_set *, struct magic_map *, const char *);
 148 private void apprentice_unmap(struct magic_map *);
 149 private int apprentice_compile(struct magic_set *, struct magic_map *,
 150     const char *);
 151 private int check_format_type(const char *, int);
 152 private int check_format(struct magic_set *, struct magic *);
 153 private int get_op(char);
 154 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
 155 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
 156 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
 157 
 158 
 159 private size_t magicsize = sizeof(struct magic);
 160 
 161 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
 162 
 163 private struct {
 164         const char *name;
 165         size_t len;
 166         int (*fun)(struct magic_set *, struct magic_entry *, const char *);
 167 } bang[] = {
 168 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
 169         DECLARE_FIELD(mime),
 170         DECLARE_FIELD(apple),
 171         DECLARE_FIELD(strength),
 172 #undef  DECLARE_FIELD
 173         { NULL, 0, NULL }
 174 };
 175 
 176 #include "../data_file.c"
 177 
 178 struct type_tbl_s {
 179         const char name[16];
 180         const size_t len;
 181         const int type;
 182         const int format;
 183 };
 184 
 185 /*
 186  * XXX - the actual Single UNIX Specification says that "long" means "long",
 187  * as in the C data type, but we treat it as meaning "4-byte integer".
 188  * Given that the OS X version of file 5.04 did the same, I guess that passes
 189  * the actual test; having "long" be dependent on how big a "long" is on
 190  * the machine running "file" is silly.
 191  */
 192 static const struct type_tbl_s type_tbl[] = {
 193 # define XX(s)          s, (sizeof(s) - 1)
 194 # define XX_NULL        "", 0
 195         { XX("invalid"),        FILE_INVALID,           FILE_FMT_NONE },
 196         { XX("byte"),           FILE_BYTE,              FILE_FMT_NUM },
 197         { XX("short"),          FILE_SHORT,             FILE_FMT_NUM },
 198         { XX("default"),        FILE_DEFAULT,           FILE_FMT_NONE },
 199         { XX("long"),           FILE_LONG,              FILE_FMT_NUM },
 200         { XX("string"),         FILE_STRING,            FILE_FMT_STR },
 201         { XX("date"),           FILE_DATE,              FILE_FMT_STR },
 202         { XX("beshort"),        FILE_BESHORT,           FILE_FMT_NUM },
 203         { XX("belong"),         FILE_BELONG,            FILE_FMT_NUM },
 204         { XX("bedate"),         FILE_BEDATE,            FILE_FMT_STR },
 205         { XX("leshort"),        FILE_LESHORT,           FILE_FMT_NUM },
 206         { XX("lelong"),         FILE_LELONG,            FILE_FMT_NUM },
 207         { XX("ledate"),         FILE_LEDATE,            FILE_FMT_STR },
 208         { XX("pstring"),        FILE_PSTRING,           FILE_FMT_STR },
 209         { XX("ldate"),          FILE_LDATE,             FILE_FMT_STR },
 210         { XX("beldate"),        FILE_BELDATE,           FILE_FMT_STR },
 211         { XX("leldate"),        FILE_LELDATE,           FILE_FMT_STR },
 212         { XX("regex"),          FILE_REGEX,             FILE_FMT_STR },
 213         { XX("bestring16"),     FILE_BESTRING16,        FILE_FMT_STR },
 214         { XX("lestring16"),     FILE_LESTRING16,        FILE_FMT_STR },
 215         { XX("search"),         FILE_SEARCH,            FILE_FMT_STR },
 216         { XX("medate"),         FILE_MEDATE,            FILE_FMT_STR },
 217         { XX("meldate"),        FILE_MELDATE,           FILE_FMT_STR },
 218         { XX("melong"),         FILE_MELONG,            FILE_FMT_NUM },
 219         { XX("quad"),           FILE_QUAD,              FILE_FMT_QUAD },
 220         { XX("lequad"),         FILE_LEQUAD,            FILE_FMT_QUAD },
 221         { XX("bequad"),         FILE_BEQUAD,            FILE_FMT_QUAD },
 222         { XX("qdate"),          FILE_QDATE,             FILE_FMT_STR },
 223         { XX("leqdate"),        FILE_LEQDATE,           FILE_FMT_STR },
 224         { XX("beqdate"),        FILE_BEQDATE,           FILE_FMT_STR },
 225         { XX("qldate"),         FILE_QLDATE,            FILE_FMT_STR },
 226         { XX("leqldate"),       FILE_LEQLDATE,          FILE_FMT_STR },
 227         { XX("beqldate"),       FILE_BEQLDATE,          FILE_FMT_STR },
 228         { XX("float"),          FILE_FLOAT,             FILE_FMT_FLOAT },
 229         { XX("befloat"),        FILE_BEFLOAT,           FILE_FMT_FLOAT },
 230         { XX("lefloat"),        FILE_LEFLOAT,           FILE_FMT_FLOAT },
 231         { XX("double"),         FILE_DOUBLE,            FILE_FMT_DOUBLE },
 232         { XX("bedouble"),       FILE_BEDOUBLE,          FILE_FMT_DOUBLE },
 233         { XX("ledouble"),       FILE_LEDOUBLE,          FILE_FMT_DOUBLE },
 234         { XX("leid3"),          FILE_LEID3,             FILE_FMT_NUM },
 235         { XX("beid3"),          FILE_BEID3,             FILE_FMT_NUM },
 236         { XX("indirect"),       FILE_INDIRECT,          FILE_FMT_NUM },
 237         { XX("qwdate"),         FILE_QWDATE,            FILE_FMT_STR },
 238         { XX("leqwdate"),       FILE_LEQWDATE,          FILE_FMT_STR },
 239         { XX("beqwdate"),       FILE_BEQWDATE,          FILE_FMT_STR },
 240         { XX("name"),           FILE_NAME,              FILE_FMT_NONE },
 241         { XX("use"),            FILE_USE,               FILE_FMT_NONE },
 242         { XX("clear"),          FILE_CLEAR,             FILE_FMT_NONE },
 243         { XX_NULL,              FILE_INVALID,           FILE_FMT_NONE },
 244 };
 245 
 246 /*
 247  * These are not types, and cannot be preceded by "u" to make them
 248  * unsigned.
 249  */
 250 static const struct type_tbl_s special_tbl[] = {
 251         { XX("name"),           FILE_NAME,              FILE_FMT_STR },
 252         { XX("use"),            FILE_USE,               FILE_FMT_STR },
 253         { XX_NULL,              FILE_INVALID,           FILE_FMT_NONE },
 254 };
 255 # undef XX
 256 # undef XX_NULL
 257 
 258 #ifndef S_ISDIR
 259 #define S_ISDIR(mode) ((mode) & _S_IFDIR)
 260 #endif
 261 
 262 private int
 263 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
 264 {
 265         const struct type_tbl_s *p;
 266 
 267         for (p = tbl; p->len; p++) {
 268                 if (strncmp(l, p->name, p->len) == 0) {
 269                         if (t)
 270                                 *t = l + p->len;
 271                         break;
 272                 }
 273         }
 274         return p->type;
 275 }
 276 
 277 private int
 278 get_standard_integer_type(const char *l, const char **t)
 279 {
 280         int type;
 281 
 282         if (isalpha((unsigned char)l[1])) {
 283                 switch (l[1]) {
 284                 case 'C':
 285                         /* "dC" and "uC" */
 286                         type = FILE_BYTE;
 287                         break;
 288                 case 'S':
 289                         /* "dS" and "uS" */
 290                         type = FILE_SHORT;
 291                         break;
 292                 case 'I':
 293                 case 'L':
 294                         /*
 295                          * "dI", "dL", "uI", and "uL".
 296                          *
 297                          * XXX - the actual Single UNIX Specification says
 298                          * that "L" means "long", as in the C data type,
 299                          * but we treat it as meaning "4-byte integer".
 300                          * Given that the OS X version of file 5.04 did
 301                          * the same, I guess that passes the actual SUS
 302                          * validation suite; having "dL" be dependent on
 303                          * how big a "long" is on the machine running
 304                          * "file" is silly.
 305                          */
 306                         type = FILE_LONG;
 307                         break;
 308                 case 'Q':
 309                         /* "dQ" and "uQ" */
 310                         type = FILE_QUAD;
 311                         break;
 312                 default:
 313                         /* "d{anything else}", "u{anything else}" */
 314                         return FILE_INVALID;
 315                 }
 316                 l += 2;
 317         } else if (isdigit((unsigned char)l[1])) {
 318                 /*
 319                  * "d{num}" and "u{num}"; we only support {num} values
 320                  * of 1, 2, 4, and 8 - the Single UNIX Specification
 321                  * doesn't say anything about whether arbitrary
 322                  * values should be supported, but both the Solaris 10
 323                  * and OS X Mountain Lion versions of file passed the
 324                  * Single UNIX Specification validation suite, and
 325                  * neither of them support values bigger than 8 or
 326                  * non-power-of-2 values.
 327                  */
 328                 if (isdigit((unsigned char)l[2])) {
 329                         /* Multi-digit, so > 9 */
 330                         return FILE_INVALID;
 331                 }
 332                 switch (l[1]) {
 333                 case '1':
 334                         type = FILE_BYTE;
 335                         break;
 336                 case '2':
 337                         type = FILE_SHORT;
 338                         break;
 339                 case '4':
 340                         type = FILE_LONG;
 341                         break;
 342                 case '8':
 343                         type = FILE_QUAD;
 344                         break;
 345                 default:
 346                         /* XXX - what about 3, 5, 6, or 7? */
 347                         return FILE_INVALID;
 348                 }
 349                 l += 2;
 350         } else {
 351                 /*
 352                  * "d" or "u" by itself.
 353                  */
 354                 type = FILE_LONG;
 355                 ++l;
 356         }
 357         if (t)
 358                 *t = l;
 359         return type;
 360 }
 361 
 362 private void
 363 init_file_tables(void)
 364 {
 365         static int done = 0;
 366         const struct type_tbl_s *p;
 367 
 368         if (done)
 369                 return;
 370         done++;
 371 
 372         for (p = type_tbl; p->len; p++) {
 373                 assert(p->type < FILE_NAMES_SIZE);
 374                 file_names[p->type] = p->name;
 375                 file_formats[p->type] = p->format;
 376         }
 377         assert(p - type_tbl == FILE_NAMES_SIZE);
 378 }
 379 
 380 private int
 381 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
 382 {
 383         struct mlist *ml;
 384 
 385         mlp->map = idx == 0 ? map : NULL;
 386         if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
 387                 return -1;
 388 
 389         ml->map = NULL;
 390         ml->magic = map->magic[idx];
 391         ml->nmagic = map->nmagic[idx];
 392 
 393         mlp->prev->next = ml;
 394         ml->prev = mlp->prev;
 395         ml->next = mlp;
 396         mlp->prev = ml;
 397         return 0;
 398 }
 399 
 400 /*
 401  * Handle one file or directory.
 402  */
 403 private int
 404 apprentice_1(struct magic_set *ms, const char *fn, int action)
 405 {
 406         struct magic_map *map;
 407         struct mlist *ml;
 408         size_t i;
 409 
 410         if (magicsize != FILE_MAGICSIZE) {
 411                 file_error(ms, 0, "magic element size %lu != %lu",
 412                     (unsigned long)sizeof(*map->magic[0]),
 413                     (unsigned long)FILE_MAGICSIZE);
 414                 return -1;
 415         }
 416 
 417         if (action == FILE_COMPILE) {
 418                 map = apprentice_load(ms, fn, action);
 419                 if (map == NULL)
 420                         return -1;
 421                 return apprentice_compile(ms, map, fn);
 422         }
 423 
 424         map = apprentice_map(ms, fn);
 425         if (map == NULL) {
 426                 if (fn) {
 427                         if (ms->flags & MAGIC_CHECK)
 428                                 file_magwarn(ms, "using regular magic file `%s'", fn);
 429                         map = apprentice_load(ms, fn, action);
 430                 }
 431                 if (map == NULL)
 432                         return -1;
 433         }
 434 
 435         for (i = 0; i < MAGIC_SETS; i++) {
 436                 if (add_mlist(ms->mlist[i], map, i) == -1) {
 437                         file_oomem(ms, sizeof(*ml));
 438                         apprentice_unmap(map);
 439                         return -1;
 440                 }
 441         }
 442 
 443         if (action == FILE_LIST) {
 444                 for (i = 0; i < MAGIC_SETS; i++) {
 445                         printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
 446                             i);
 447                         apprentice_list(ms->mlist[i], BINTEST);
 448                         printf("Text patterns:\n");
 449                         apprentice_list(ms->mlist[i], TEXTTEST);
 450                 }
 451         }
 452         return 0;
 453 }
 454 
 455 protected void
 456 file_ms_free(struct magic_set *ms)
 457 {
 458         size_t i;
 459         if (ms == NULL)
 460                 return;
 461         for (i = 0; i < MAGIC_SETS; i++)
 462                 mlist_free(ms->mlist[i]);
 463         if (ms->o.pbuf) {
 464                 efree(ms->o.pbuf);
 465         }
 466         if (ms->o.buf) {
 467                 efree(ms->o.buf);
 468         }
 469         if (ms->c.li) {
 470                 efree(ms->c.li);
 471         }
 472         efree(ms);
 473 }
 474 
 475 protected struct magic_set *
 476 file_ms_alloc(int flags)
 477 {
 478         struct magic_set *ms;
 479         size_t i, len;
 480 
 481         if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
 482             sizeof(struct magic_set)))) == NULL)
 483                 return NULL;
 484 
 485         if (magic_setflags(ms, flags) == -1) {
 486                 errno = EINVAL;
 487                 goto free;
 488         }
 489 
 490         ms->o.buf = ms->o.pbuf = NULL;
 491         len = (ms->c.len = 10) * sizeof(*ms->c.li);
 492 
 493         if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
 494                 goto free;
 495 
 496         ms->event_flags = 0;
 497         ms->error = -1;
 498         for (i = 0; i < MAGIC_SETS; i++)
 499                 ms->mlist[i] = NULL;
 500         ms->file = "unknown";
 501         ms->line = 0;
 502         ms->indir_max = FILE_INDIR_MAX;
 503         ms->name_max = FILE_NAME_MAX;
 504         ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
 505         ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
 506         ms->elf_notes_max = FILE_ELF_NOTES_MAX;
 507         return ms;
 508 free:
 509         efree(ms);
 510         return NULL;
 511 }
 512 
 513 private void
 514 apprentice_unmap(struct magic_map *map)
 515 {
 516         if (map == NULL)
 517                 return;
 518         if (map->p != php_magic_database) {
 519                 if (map->p == NULL) {
 520                         int j;
 521                         for (j = 0; j < MAGIC_SETS; j++) {
 522                                 if (map->magic[j]) {
 523                                         efree(map->magic[j]);
 524                                 }
 525                         }
 526                 } else {
 527                         efree(map->p);
 528                 }
 529         }
 530         efree(map);
 531 }
 532 
 533 private struct mlist *
 534 mlist_alloc(void)
 535 {
 536         struct mlist *mlist;
 537         if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
 538                 return NULL;
 539         }
 540         mlist->next = mlist->prev = mlist;
 541         return mlist;
 542 }
 543 
 544 private void
 545 mlist_free(struct mlist *mlist)
 546 {
 547         struct mlist *ml, *next;
 548 
 549         if (mlist == NULL)
 550                 return;
 551 
 552         ml = mlist->next;
 553         for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
 554                 if (ml->map)
 555                         apprentice_unmap(ml->map);
 556                 efree(ml);
 557                 if (ml == mlist)
 558                         break;
 559         }
 560 }
 561 
 562 /* const char *fn: list of magic files and directories */
 563 protected int
 564 file_apprentice(struct magic_set *ms, const char *fn, int action)
 565 {
 566         char *p, *mfn;
 567         int file_err, errs = -1;
 568         size_t i;
 569 
 570         if (ms->mlist[0] != NULL)
 571                 file_reset(ms);
 572 
 573 /* XXX disabling default magic loading so the compiled in data is used */
 574 #if 0
 575         if ((fn = magic_getpath(fn, action)) == NULL)
 576                 return -1;
 577 #endif
 578 
 579         init_file_tables();
 580 
 581         if (fn == NULL)
 582                 fn = getenv("MAGIC");
 583         if (fn == NULL) {
 584                 for (i = 0; i < MAGIC_SETS; i++) {
 585                         mlist_free(ms->mlist[i]);
 586                         if ((ms->mlist[i] = mlist_alloc()) == NULL) {
 587                                 file_oomem(ms, sizeof(*ms->mlist[i]));
 588                                 return -1;
 589                         }
 590                 }
 591                 return apprentice_1(ms, fn, action);
 592         }
 593 
 594         if ((mfn = estrdup(fn)) == NULL) {
 595                 file_oomem(ms, strlen(fn));
 596                 return -1;
 597         }
 598 
 599         for (i = 0; i < MAGIC_SETS; i++) {
 600                 mlist_free(ms->mlist[i]);
 601                 if ((ms->mlist[i] = mlist_alloc()) == NULL) {
 602                         file_oomem(ms, sizeof(*ms->mlist[i]));
 603                         while (i-- > 0) {
 604                                 mlist_free(ms->mlist[i]);
 605                                 ms->mlist[i] = NULL;
 606                         }
 607                         efree(mfn);
 608                         return -1;
 609                 }
 610         }
 611         fn = mfn;
 612 
 613         while (fn) {
 614                 p = strchr(fn, PATHSEP);
 615                 if (p)
 616                         *p++ = '\0';
 617                 if (*fn == '\0')
 618                         break;
 619                 file_err = apprentice_1(ms, fn, action);
 620                 errs = MAX(errs, file_err);
 621                 fn = p;
 622         }
 623 
 624         efree(mfn);
 625 
 626         if (errs == -1) {
 627                 for (i = 0; i < MAGIC_SETS; i++) {
 628                         mlist_free(ms->mlist[i]);
 629                         ms->mlist[i] = NULL;
 630                 }
 631                 file_error(ms, 0, "could not find any valid magic files!");
 632                 return -1;
 633         }
 634 
 635 #if 0
 636         /*
 637          * Always leave the database loaded
 638          */
 639         if (action == FILE_LOAD)
 640                 return 0;
 641 
 642         for (i = 0; i < MAGIC_SETS; i++) {
 643                 mlist_free(ms->mlist[i]);
 644                 ms->mlist[i] = NULL;
 645         }
 646 #endif
 647 
 648         switch (action) {
 649         case FILE_LOAD:
 650         case FILE_COMPILE:
 651         case FILE_CHECK:
 652         case FILE_LIST:
 653                 return 0;
 654         default:
 655                 file_error(ms, 0, "Invalid action %d", action);
 656                 return -1;
 657         }
 658 }
 659 
 660 /*
 661  * Compute the real length of a magic expression, for the purposes
 662  * of determining how "strong" a magic expression is (approximating
 663  * how specific its matches are):
 664  *      - magic characters count 0 unless escaped.
 665  *      - [] expressions count 1
 666  *      - {} expressions count 0
 667  *      - regular characters or escaped magic characters count 1
 668  *      - 0 length expressions count as one
 669  */
 670 private size_t
 671 nonmagic(const char *str)
 672 {
 673         const char *p;
 674         size_t rv = 0;
 675 
 676         for (p = str; *p; p++)
 677                 switch (*p) {
 678                 case '\\':      /* Escaped anything counts 1 */
 679                         if (!*++p)
 680                                 p--;
 681                         rv++;
 682                         continue;
 683                 case '?':       /* Magic characters count 0 */
 684                 case '*':
 685                 case '.':
 686                 case '+':
 687                 case '^':
 688                 case '$':
 689                         continue;
 690                 case '[':       /* Bracketed expressions count 1 the ']' */
 691                         while (*p && *p != ']')
 692                                 p++;
 693                         p--;
 694                         continue;
 695                 case '{':       /* Braced expressions count 0 */
 696                         while (*p && *p != '}')
 697                                 p++;
 698                         if (!*p)
 699                                 p--;
 700                         continue;
 701                 default:        /* Anything else counts 1 */
 702                         rv++;
 703                         continue;
 704                 }
 705 
 706         return rv == 0 ? 1 : rv;        /* Return at least 1 */
 707 }
 708 
 709 /*
 710  * Get weight of this magic entry, for sorting purposes.
 711  */
 712 private size_t
 713 apprentice_magic_strength(const struct magic *m)
 714 {
 715 #define MULT 10
 716         size_t v, val = 2 * MULT;       /* baseline strength */
 717 
 718         switch (m->type) {
 719         case FILE_DEFAULT:      /* make sure this sorts last */
 720                 if (m->factor_op != FILE_FACTOR_OP_NONE)
 721                         abort();
 722                 return 0;
 723 
 724         case FILE_BYTE:
 725                 val += 1 * MULT;
 726                 break;
 727 
 728         case FILE_SHORT:
 729         case FILE_LESHORT:
 730         case FILE_BESHORT:
 731                 val += 2 * MULT;
 732                 break;
 733 
 734         case FILE_LONG:
 735         case FILE_LELONG:
 736         case FILE_BELONG:
 737         case FILE_MELONG:
 738                 val += 4 * MULT;
 739                 break;
 740 
 741         case FILE_PSTRING:
 742         case FILE_STRING:
 743                 val += m->vallen * MULT;
 744                 break;
 745 
 746         case FILE_BESTRING16:
 747         case FILE_LESTRING16:
 748                 val += m->vallen * MULT / 2;
 749                 break;
 750 
 751         case FILE_SEARCH:
 752                 val += m->vallen * MAX(MULT / m->vallen, 1);
 753                 break;
 754 
 755         case FILE_REGEX:
 756                 v = nonmagic(m->value.s);
 757                 val += v * MAX(MULT / v, 1);
 758                 break;
 759 
 760         case FILE_DATE:
 761         case FILE_LEDATE:
 762         case FILE_BEDATE:
 763         case FILE_MEDATE:
 764         case FILE_LDATE:
 765         case FILE_LELDATE:
 766         case FILE_BELDATE:
 767         case FILE_MELDATE:
 768         case FILE_FLOAT:
 769         case FILE_BEFLOAT:
 770         case FILE_LEFLOAT:
 771                 val += 4 * MULT;
 772                 break;
 773 
 774         case FILE_QUAD:
 775         case FILE_BEQUAD:
 776         case FILE_LEQUAD:
 777         case FILE_QDATE:
 778         case FILE_LEQDATE:
 779         case FILE_BEQDATE:
 780         case FILE_QLDATE:
 781         case FILE_LEQLDATE:
 782         case FILE_BEQLDATE:
 783         case FILE_QWDATE:
 784         case FILE_LEQWDATE:
 785         case FILE_BEQWDATE:
 786         case FILE_DOUBLE:
 787         case FILE_BEDOUBLE:
 788         case FILE_LEDOUBLE:
 789                 val += 8 * MULT;
 790                 break;
 791 
 792         case FILE_INDIRECT:
 793         case FILE_NAME:
 794         case FILE_USE:
 795                 break;
 796 
 797         default:
 798                 (void)fprintf(stderr, "Bad type %d\n", m->type);
 799                 abort();
 800         }
 801 
 802         switch (m->reln) {
 803         case 'x':       /* matches anything penalize */
 804         case '!':       /* matches almost anything penalize */
 805                 val = 0;
 806                 break;
 807 
 808         case '=':       /* Exact match, prefer */
 809                 val += MULT;
 810                 break;
 811 
 812         case '>':
 813         case '<':       /* comparison match reduce strength */
 814                 val -= 2 * MULT;
 815                 break;
 816 
 817         case '^':
 818         case '&':       /* masking bits, we could count them too */
 819                 val -= MULT;
 820                 break;
 821 
 822         default:
 823                 (void)fprintf(stderr, "Bad relation %c\n", m->reln);
 824                 abort();
 825         }
 826 
 827         if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
 828                 val = 1;
 829 
 830         switch (m->factor_op) {
 831         case FILE_FACTOR_OP_NONE:
 832                 break;
 833         case FILE_FACTOR_OP_PLUS:
 834                 val += m->factor;
 835                 break;
 836         case FILE_FACTOR_OP_MINUS:
 837                 val -= m->factor;
 838                 break;
 839         case FILE_FACTOR_OP_TIMES:
 840                 val *= m->factor;
 841                 break;
 842         case FILE_FACTOR_OP_DIV:
 843                 val /= m->factor;
 844                 break;
 845         default:
 846                 abort();
 847         }
 848 
 849         /*
 850          * Magic entries with no description get a bonus because they depend
 851          * on subsequent magic entries to print something.
 852          */
 853         if (m->desc[0] == '\0')
 854                 val++;
 855         return val;
 856 }
 857 
 858 /*  
 859  * Sort callback for sorting entries by "strength" (basically length)
 860  */
 861 private int
 862 apprentice_sort(const void *a, const void *b)
 863 {
 864         const struct magic_entry *ma = CAST(const struct magic_entry *, a);
 865         const struct magic_entry *mb = CAST(const struct magic_entry *, b);
 866         size_t sa = apprentice_magic_strength(ma->mp);
 867         size_t sb = apprentice_magic_strength(mb->mp);
 868         if (sa == sb)
 869                 return 0;
 870         else if (sa > sb)
 871                 return -1;
 872         else
 873                 return 1;
 874 }
 875 
 876 /*  
 877  * Shows sorted patterns list in the order which is used for the matching
 878  */
 879 private void
 880 apprentice_list(struct mlist *mlist, int mode)
 881 {
 882         uint32_t magindex = 0;
 883         struct mlist *ml;
 884         for (ml = mlist->next; ml != mlist; ml = ml->next) {
 885                 for (magindex = 0; magindex < ml->nmagic; magindex++) {
 886                         struct magic *m = &ml->magic[magindex];
 887                         if ((m->flag & mode) != mode) {
 888                                 /* Skip sub-tests */
 889                                 while (magindex + 1 < ml->nmagic &&
 890                                        ml->magic[magindex + 1].cont_level != 0)
 891                                         ++magindex;
 892                                 continue; /* Skip to next top-level test*/
 893                         }
 894 
 895                         /*
 896                          * Try to iterate over the tree until we find item with
 897                          * description/mimetype.
 898                          */
 899                         while (magindex + 1 < ml->nmagic &&
 900                                ml->magic[magindex + 1].cont_level != 0 &&
 901                                *ml->magic[magindex].desc == '\0' &&
 902                                *ml->magic[magindex].mimetype == '\0')
 903                                 magindex++;
 904 
 905                         printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
 906                             apprentice_magic_strength(m),
 907                             ml->magic[magindex].lineno,
 908                             ml->magic[magindex].desc,
 909                             ml->magic[magindex].mimetype);
 910                 }
 911         }
 912 }
 913 
 914 private void
 915 set_test_type(struct magic *mstart, struct magic *m)
 916 {
 917         switch (m->type) {
 918         case FILE_BYTE:
 919         case FILE_SHORT:
 920         case FILE_LONG:
 921         case FILE_DATE:
 922         case FILE_BESHORT:
 923         case FILE_BELONG:
 924         case FILE_BEDATE:
 925         case FILE_LESHORT:
 926         case FILE_LELONG:
 927         case FILE_LEDATE:
 928         case FILE_LDATE:
 929         case FILE_BELDATE:
 930         case FILE_LELDATE:
 931         case FILE_MEDATE:
 932         case FILE_MELDATE:
 933         case FILE_MELONG:
 934         case FILE_QUAD:
 935         case FILE_LEQUAD:
 936         case FILE_BEQUAD:
 937         case FILE_QDATE:
 938         case FILE_LEQDATE:
 939         case FILE_BEQDATE:
 940         case FILE_QLDATE:
 941         case FILE_LEQLDATE:
 942         case FILE_BEQLDATE:
 943         case FILE_QWDATE:
 944         case FILE_LEQWDATE:
 945         case FILE_BEQWDATE:
 946         case FILE_FLOAT:
 947         case FILE_BEFLOAT:
 948         case FILE_LEFLOAT:
 949         case FILE_DOUBLE:
 950         case FILE_BEDOUBLE:
 951         case FILE_LEDOUBLE:
 952                 mstart->flag |= BINTEST;
 953                 break;
 954         case FILE_STRING:
 955         case FILE_PSTRING:
 956         case FILE_BESTRING16:
 957         case FILE_LESTRING16:
 958                 /* Allow text overrides */
 959                 if (mstart->str_flags & STRING_TEXTTEST)
 960                         mstart->flag |= TEXTTEST;
 961                 else
 962                         mstart->flag |= BINTEST;
 963                 break;
 964         case FILE_REGEX:
 965         case FILE_SEARCH:
 966                 /* Check for override */
 967                 if (mstart->str_flags & STRING_BINTEST)
 968                         mstart->flag |= BINTEST;
 969                 if (mstart->str_flags & STRING_TEXTTEST)
 970                         mstart->flag |= TEXTTEST;
 971                     
 972                 if (mstart->flag & (TEXTTEST|BINTEST))
 973                         break;
 974 
 975                 /* binary test if pattern is not text */
 976                 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
 977                     NULL) <= 0)
 978                         mstart->flag |= BINTEST;
 979                 else
 980                         mstart->flag |= TEXTTEST;
 981                 break;
 982         case FILE_DEFAULT:
 983                 /* can't deduce anything; we shouldn't see this at the
 984                    top level anyway */
 985                 break;
 986         case FILE_INVALID:
 987         default:
 988                 /* invalid search type, but no need to complain here */
 989                 break;
 990         }
 991 }
 992 
 993 private int
 994 addentry(struct magic_set *ms, struct magic_entry *me,
 995    struct magic_entry_set *mset)
 996 {
 997         size_t i = me->mp->type == FILE_NAME ? 1 : 0;
 998         if (mset[i].count == mset[i].max) {
 999                 struct magic_entry *mp;
1000 
1001                 mset[i].max += ALLOC_INCR;
1002                 if ((mp = CAST(struct magic_entry *,
1003                     erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1004                     NULL) {
1005                         file_oomem(ms, sizeof(*mp) * mset[i].max);
1006                         return -1;
1007                 }
1008                 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1009                     ALLOC_INCR);
1010                 mset[i].me = mp;
1011         }
1012         mset[i].me[mset[i].count++] = *me;
1013         memset(me, 0, sizeof(*me));
1014         return 0;
1015 }
1016 
1017 /*
1018  * Load and parse one file.
1019  */
1020 private void
1021 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1022    struct magic_entry_set *mset)
1023 {
1024         char buffer[BUFSIZ + 1];
1025         char *line = NULL;
1026         size_t len;
1027         size_t lineno = 0;
1028         struct magic_entry me;
1029 
1030         php_stream *stream;
1031 
1032 
1033         ms->file = fn;
1034         stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1035 
1036         if (stream == NULL) {
1037                 if (errno != ENOENT)
1038                         file_error(ms, errno, "cannot read magic file `%s'",
1039                                    fn);
1040                 (*errs)++;
1041                 return;
1042         }
1043 
1044         memset(&me, 0, sizeof(me));
1045         /* read and parse this file */
1046         for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1047                 if (len == 0) /* null line, garbage, etc */
1048                         continue;
1049                 if (line[len - 1] == '\n') {
1050                         lineno++;
1051                         line[len - 1] = '\0'; /* delete newline */
1052                 }
1053                 switch (line[0]) {
1054                 case '\0':      /* empty, do not parse */
1055                 case '#':       /* comment, do not parse */
1056                         continue;
1057                 case '!':
1058                         if (line[1] == ':') {
1059                                 size_t i;
1060 
1061                                 for (i = 0; bang[i].name != NULL; i++) {
1062                                         if ((size_t)(len - 2) > bang[i].len &&
1063                                             memcmp(bang[i].name, line + 2,
1064                                             bang[i].len) == 0)
1065                                                 break;
1066                                 }
1067                                 if (bang[i].name == NULL) {
1068                                         file_error(ms, 0,
1069                                             "Unknown !: entry `%s'", line);
1070                                         (*errs)++;
1071                                         continue;
1072                                 }
1073                                 if (me.mp == NULL) {
1074                                         file_error(ms, 0,
1075                                             "No current entry for :!%s type",
1076                                                 bang[i].name);
1077                                         (*errs)++;
1078                                         continue;
1079                                 }
1080                                 if ((*bang[i].fun)(ms, &me,
1081                                     line + bang[i].len + 2) != 0) {
1082                                         (*errs)++;
1083                                         continue;
1084                                 }
1085                                 continue;
1086                         }
1087                         /*FALLTHROUGH*/
1088                 default:
1089                 again:
1090                         switch (parse(ms, &me, line, lineno, action)) {
1091                         case 0:
1092                                 continue;
1093                         case 1:
1094                                 (void)addentry(ms, &me, mset);
1095                                 goto again;
1096                         default:
1097                                 (*errs)++;
1098                                 break;
1099                         }
1100                 }
1101         }
1102         if (me.mp)
1103                 (void)addentry(ms, &me, mset);
1104     efree(line);
1105         php_stream_close(stream);
1106 }
1107 
1108 /*
1109  * parse a file or directory of files
1110  * const char *fn: name of magic file or directory
1111  */
1112 private int
1113 cmpstrp(const void *p1, const void *p2)
1114 {
1115         return strcmp(*(char *const *)p1, *(char *const *)p2);
1116 }
1117 
1118 
1119 private uint32_t
1120 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1121     uint32_t starttest)
1122 {
1123         static const char text[] = "text";
1124         static const char binary[] = "binary";
1125         static const size_t len = sizeof(text);
1126 
1127         uint32_t i = starttest;
1128 
1129         do {
1130                 set_test_type(me[starttest].mp, me[i].mp);
1131                 if ((ms->flags & MAGIC_DEBUG) == 0)
1132                         continue;
1133                 (void)fprintf(stderr, "%s%s%s: %s\n",
1134                     me[i].mp->mimetype,
1135                     me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1136                     me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1137                     me[i].mp->flag & BINTEST ? binary : text);
1138                 if (me[i].mp->flag & BINTEST) {
1139                         char *p = strstr(me[i].mp->desc, text);
1140                         if (p && (p == me[i].mp->desc ||
1141                             isspace((unsigned char)p[-1])) &&
1142                             (p + len - me[i].mp->desc == MAXstring
1143                             || (p[len] == '\0' ||
1144                             isspace((unsigned char)p[len]))))
1145                                 (void)fprintf(stderr, "*** Possible "
1146                                     "binary test for text type\n");
1147                 }
1148         } while (++i < nme && me[i].mp->cont_level != 0);
1149         return i;
1150 }
1151 
1152 private void
1153 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1154 {
1155         uint32_t i;
1156         for (i = 0; i < nme; i++) {
1157                 if (me[i].mp->cont_level == 0 &&
1158                     me[i].mp->type == FILE_DEFAULT) {
1159                         while (++i < nme)
1160                                 if (me[i].mp->cont_level == 0)
1161                                         break;
1162                         if (i != nme) {
1163                                 /* XXX - Ugh! */
1164                                 ms->line = me[i].mp->lineno;
1165                                 file_magwarn(ms,
1166                                     "level 0 \"default\" did not sort last");
1167                         }
1168                         return;                                     
1169                 }
1170         }
1171 }
1172 
1173 private int
1174 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1175     struct magic **ma, uint32_t *nma)
1176 {
1177         uint32_t i, mentrycount = 0;
1178         size_t slen;
1179 
1180         for (i = 0; i < nme; i++)
1181                 mentrycount += me[i].cont_count;
1182 
1183         slen = sizeof(**ma) * mentrycount;
1184         if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1185                 file_oomem(ms, slen);
1186                 return -1;
1187         }
1188 
1189         mentrycount = 0;
1190         for (i = 0; i < nme; i++) {
1191                 (void)memcpy(*ma + mentrycount, me[i].mp,
1192                     me[i].cont_count * sizeof(**ma));
1193                 mentrycount += me[i].cont_count;
1194         }
1195         *nma = mentrycount;
1196         return 0;
1197 }
1198 
1199 private void
1200 magic_entry_free(struct magic_entry *me, uint32_t nme)
1201 {
1202         uint32_t i;
1203         if (me == NULL)
1204                 return;
1205         for (i = 0; i < nme; i++)
1206                 efree(me[i].mp);
1207         efree(me);
1208 }
1209 
1210 private struct magic_map *
1211 apprentice_load(struct magic_set *ms, const char *fn, int action)
1212 {
1213         int errs = 0;
1214         uint32_t i, j;
1215         size_t files = 0, maxfiles = 0;
1216         char **filearr = NULL;
1217         zend_stat_t st;
1218         struct magic_map *map;
1219         struct magic_entry_set mset[MAGIC_SETS];
1220         php_stream *dir;
1221         php_stream_dirent d;
1222  
1223 
1224         memset(mset, 0, sizeof(mset));
1225         ms->flags |= MAGIC_CHECK;       /* Enable checks for parsed files */
1226 
1227 
1228         if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1229         {
1230                 file_oomem(ms, sizeof(*map));
1231                 return NULL;
1232         }
1233 
1234         /* print silly verbose header for USG compat. */
1235         if (action == FILE_CHECK)
1236                 (void)fprintf(stderr, "%s\n", usg_hdr);
1237 
1238         /* load directory or file */
1239         /* FIXME: Read file names and sort them to prevent
1240            non-determinism. See Debian bug #488562. */
1241         if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1242                 int mflen;
1243                 char mfn[MAXPATHLEN];
1244 
1245                 dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1246                 if (!dir) {
1247                         errs++;
1248                         goto out;
1249                 }
1250                 while (php_stream_readdir(dir, &d)) {
1251                         if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1252                                 file_oomem(ms,
1253                                 strlen(fn) + strlen(d.d_name) + 2);
1254                                 errs++;
1255                                 php_stream_closedir(dir);
1256                                 goto out;
1257                         }
1258                         if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1259                                 continue;
1260                         }
1261                         if (files >= maxfiles) {
1262                                 size_t mlen;
1263                                 maxfiles = (maxfiles + 1) * 2;
1264                                 mlen = maxfiles * sizeof(*filearr);
1265                                 if ((filearr = CAST(char **,
1266                                     erealloc(filearr, mlen))) == NULL) {
1267                                         file_oomem(ms, mlen);
1268                                         php_stream_closedir(dir);
1269                                         errs++;
1270                                         goto out;
1271                                 }
1272                         }
1273                         filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1274                 }
1275                 php_stream_closedir(dir);
1276                 qsort(filearr, files, sizeof(*filearr), cmpstrp);
1277                 for (i = 0; i < files; i++) {
1278                         load_1(ms, action, filearr[i], &errs, mset);
1279                         efree(filearr[i]);
1280                 }
1281                 efree(filearr);
1282         } else
1283                 load_1(ms, action, fn, &errs, mset);
1284         if (errs)
1285                 goto out;
1286 
1287         for (j = 0; j < MAGIC_SETS; j++) {
1288                 /* Set types of tests */
1289                 for (i = 0; i < mset[j].count; ) {
1290                         if (mset[j].me[i].mp->cont_level != 0) {
1291                                 i++;
1292                                 continue;
1293                         }
1294                         i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1295                 }
1296                 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1297                     apprentice_sort);
1298 
1299                 /*
1300                  * Make sure that any level 0 "default" line is last
1301                  * (if one exists).
1302                  */
1303                 set_last_default(ms, mset[j].me, mset[j].count);
1304 
1305                 /* coalesce per file arrays into a single one */
1306                 if (coalesce_entries(ms, mset[j].me, mset[j].count,
1307                     &map->magic[j], &map->nmagic[j]) == -1) {
1308                         errs++;
1309                         goto out;
1310                 }
1311         }
1312 
1313 out:
1314         for (j = 0; j < MAGIC_SETS; j++)
1315                 magic_entry_free(mset[j].me, mset[j].count);
1316 
1317         if (errs) {
1318                 apprentice_unmap(map);
1319                 return NULL;
1320         }
1321         return map;
1322 }
1323 
1324 /*
1325  * extend the sign bit if the comparison is to be signed
1326  */
1327 protected uint64_t
1328 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1329 {
1330         if (!(m->flag & UNSIGNED)) {
1331                 switch(m->type) {
1332                 /*
1333                  * Do not remove the casts below.  They are
1334                  * vital.  When later compared with the data,
1335                  * the sign extension must have happened.
1336                  */
1337                 case FILE_BYTE:
1338                         v = (signed char) v;
1339                         break;
1340                 case FILE_SHORT:
1341                 case FILE_BESHORT:
1342                 case FILE_LESHORT:
1343                         v = (short) v;
1344                         break;
1345                 case FILE_DATE:
1346                 case FILE_BEDATE:
1347                 case FILE_LEDATE:
1348                 case FILE_MEDATE:
1349                 case FILE_LDATE:
1350                 case FILE_BELDATE:
1351                 case FILE_LELDATE:
1352                 case FILE_MELDATE:
1353                 case FILE_LONG:
1354                 case FILE_BELONG:
1355                 case FILE_LELONG:
1356                 case FILE_MELONG:
1357                 case FILE_FLOAT:
1358                 case FILE_BEFLOAT:
1359                 case FILE_LEFLOAT:
1360                         v = (int32_t) v;
1361                         break;
1362                 case FILE_QUAD:
1363                 case FILE_BEQUAD:
1364                 case FILE_LEQUAD:
1365                 case FILE_QDATE:
1366                 case FILE_QLDATE:
1367                 case FILE_QWDATE:
1368                 case FILE_BEQDATE:
1369                 case FILE_BEQLDATE:
1370                 case FILE_BEQWDATE:
1371                 case FILE_LEQDATE:
1372                 case FILE_LEQLDATE:
1373                 case FILE_LEQWDATE:
1374                 case FILE_DOUBLE:
1375                 case FILE_BEDOUBLE:
1376                 case FILE_LEDOUBLE:
1377                         v = (int64_t) v;
1378                         break;
1379                 case FILE_STRING:
1380                 case FILE_PSTRING:
1381                 case FILE_BESTRING16:
1382                 case FILE_LESTRING16:
1383                 case FILE_REGEX:
1384                 case FILE_SEARCH:
1385                 case FILE_DEFAULT:
1386                 case FILE_INDIRECT:
1387                 case FILE_NAME:
1388                 case FILE_USE:
1389                 case FILE_CLEAR:
1390                         break;
1391                 default:
1392                         if (ms->flags & MAGIC_CHECK)
1393                             file_magwarn(ms, "cannot happen: m->type=%d\n",
1394                                     m->type);
1395                         return ~0U;
1396                 }
1397         }
1398         return v;
1399 }
1400 
1401 private int
1402 string_modifier_check(struct magic_set *ms, struct magic *m)
1403 {
1404         if ((ms->flags & MAGIC_CHECK) == 0)
1405                 return 0;
1406 
1407         if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1408             (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1409                 file_magwarn(ms,
1410                     "'/BHhLl' modifiers are only allowed for pascal strings\n");
1411                 return -1;
1412         }
1413         switch (m->type) {
1414         case FILE_BESTRING16:
1415         case FILE_LESTRING16:
1416                 if (m->str_flags != 0) {
1417                         file_magwarn(ms,
1418                             "no modifiers allowed for 16-bit strings\n");
1419                         return -1;
1420                 }
1421                 break;
1422         case FILE_STRING:
1423         case FILE_PSTRING:
1424                 if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1425                         file_magwarn(ms,
1426                             "'/%c' only allowed on regex and search\n",
1427                             CHAR_REGEX_OFFSET_START);
1428                         return -1;
1429                 }
1430                 break;
1431         case FILE_SEARCH:
1432                 if (m->str_range == 0) {
1433                         file_magwarn(ms,
1434                             "missing range; defaulting to %d\n",
1435                             STRING_DEFAULT_RANGE);
1436                         m->str_range = STRING_DEFAULT_RANGE;
1437                         return -1;
1438                 }
1439                 break;
1440         case FILE_REGEX:
1441                 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1442                         file_magwarn(ms, "'/%c' not allowed on regex\n",
1443                             CHAR_COMPACT_WHITESPACE);
1444                         return -1;
1445                 }
1446                 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1447                         file_magwarn(ms, "'/%c' not allowed on regex\n",
1448                             CHAR_COMPACT_OPTIONAL_WHITESPACE);
1449                         return -1;
1450                 }
1451                 break;
1452         default:
1453                 file_magwarn(ms, "coding error: m->type=%d\n",
1454                     m->type);
1455                 return -1;
1456         }
1457         return 0;
1458 }
1459 
1460 private int
1461 get_op(char c)
1462 {
1463         switch (c) {
1464         case '&':
1465                 return FILE_OPAND;
1466         case '|':
1467                 return FILE_OPOR;
1468         case '^':
1469                 return FILE_OPXOR;
1470         case '+':
1471                 return FILE_OPADD;
1472         case '-':
1473                 return FILE_OPMINUS;
1474         case '*':
1475                 return FILE_OPMULTIPLY;
1476         case '/':
1477                 return FILE_OPDIVIDE;
1478         case '%':
1479                 return FILE_OPMODULO;
1480         default:
1481                 return -1;
1482         }
1483 }
1484 
1485 #ifdef ENABLE_CONDITIONALS
1486 private int
1487 get_cond(const char *l, const char **t)
1488 {
1489         static const struct cond_tbl_s {
1490                 char name[8];
1491                 size_t len;
1492                 int cond;
1493         } cond_tbl[] = {
1494                 { "if",         2,      COND_IF },
1495                 { "elif",       4,      COND_ELIF },
1496                 { "else",       4,      COND_ELSE },
1497                 { "",           0,      COND_NONE },
1498         };
1499         const struct cond_tbl_s *p;
1500 
1501         for (p = cond_tbl; p->len; p++) {
1502                 if (strncmp(l, p->name, p->len) == 0 &&
1503                     isspace((unsigned char)l[p->len])) {
1504                         if (t)
1505                                 *t = l + p->len;
1506                         break;
1507                 }
1508         }
1509         return p->cond;
1510 }
1511 
1512 private int
1513 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1514 {
1515         int last_cond;
1516         last_cond = ms->c.li[cont_level].last_cond;
1517 
1518         switch (cond) {
1519         case COND_IF:
1520                 if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1521                         if (ms->flags & MAGIC_CHECK)
1522                                 file_magwarn(ms, "syntax error: `if'");
1523                         return -1;
1524                 }
1525                 last_cond = COND_IF;
1526                 break;
1527 
1528         case COND_ELIF:
1529                 if (last_cond != COND_IF && last_cond != COND_ELIF) {
1530                         if (ms->flags & MAGIC_CHECK)
1531                                 file_magwarn(ms, "syntax error: `elif'");
1532                         return -1;
1533                 }
1534                 last_cond = COND_ELIF;
1535                 break;
1536 
1537         case COND_ELSE:
1538                 if (last_cond != COND_IF && last_cond != COND_ELIF) {
1539                         if (ms->flags & MAGIC_CHECK)
1540                                 file_magwarn(ms, "syntax error: `else'");
1541                         return -1;
1542                 }
1543                 last_cond = COND_NONE;
1544                 break;
1545 
1546         case COND_NONE:
1547                 last_cond = COND_NONE;
1548                 break;
1549         }
1550 
1551         ms->c.li[cont_level].last_cond = last_cond;
1552         return 0;
1553 }
1554 #endif /* ENABLE_CONDITIONALS */
1555 
1556 private int
1557 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1558 {
1559         const char *l = *lp;
1560 
1561         while (!isspace((unsigned char)*++l))
1562                 switch (*l) {
1563                 case CHAR_INDIRECT_RELATIVE:
1564                         m->str_flags |= INDIRECT_RELATIVE;
1565                         break;
1566                 default:
1567                         if (ms->flags & MAGIC_CHECK)
1568                                 file_magwarn(ms, "indirect modifier `%c' "
1569                                         "invalid", *l);
1570                         *lp = l;
1571                         return -1;
1572                 }
1573         *lp = l;
1574         return 0;
1575 }
1576 
1577 private void
1578 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1579     int op)
1580 {
1581         const char *l = *lp;
1582         char *t;
1583         uint64_t val;
1584 
1585         ++l;
1586         m->mask_op |= op;
1587         val = (uint64_t)strtoull(l, &t, 0);
1588         l = t;
1589         m->num_mask = file_signextend(ms, m, val);
1590         eatsize(&l);
1591         *lp = l;
1592 }
1593 
1594 private int
1595 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1596 {
1597         const char *l = *lp;
1598         char *t;
1599         int have_range = 0;
1600 
1601         while (!isspace((unsigned char)*++l)) {
1602                 switch (*l) {
1603                 case '0':  case '1':  case '2':
1604                 case '3':  case '4':  case '5':
1605                 case '6':  case '7':  case '8':
1606                 case '9':
1607                         if (have_range && (ms->flags & MAGIC_CHECK))
1608                                 file_magwarn(ms, "multiple ranges");
1609                         have_range = 1;
1610                         m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1611                         if (m->str_range == 0)
1612                                 file_magwarn(ms, "zero range");
1613                         l = t - 1;
1614                         break;
1615                 case CHAR_COMPACT_WHITESPACE:
1616                         m->str_flags |= STRING_COMPACT_WHITESPACE;
1617                         break;
1618                 case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1619                         m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1620                         break;
1621                 case CHAR_IGNORE_LOWERCASE:
1622                         m->str_flags |= STRING_IGNORE_LOWERCASE;
1623                         break;
1624                 case CHAR_IGNORE_UPPERCASE:
1625                         m->str_flags |= STRING_IGNORE_UPPERCASE;
1626                         break;
1627                 case CHAR_REGEX_OFFSET_START:
1628                         m->str_flags |= REGEX_OFFSET_START;
1629                         break;
1630                 case CHAR_BINTEST:
1631                         m->str_flags |= STRING_BINTEST;
1632                         break;
1633                 case CHAR_TEXTTEST:
1634                         m->str_flags |= STRING_TEXTTEST;
1635                         break;
1636                 case CHAR_TRIM:
1637                         m->str_flags |= STRING_TRIM;
1638                         break;
1639                 case CHAR_PSTRING_1_LE:
1640 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1641                         if (m->type != FILE_PSTRING)
1642                                 goto bad;
1643                         SET_LENGTH(PSTRING_1_LE);
1644                         break;
1645                 case CHAR_PSTRING_2_BE:
1646                         if (m->type != FILE_PSTRING)
1647                                 goto bad;
1648                         SET_LENGTH(PSTRING_2_BE);
1649                         break;
1650                 case CHAR_PSTRING_2_LE:
1651                         if (m->type != FILE_PSTRING)
1652                                 goto bad;
1653                         SET_LENGTH(PSTRING_2_LE);
1654                         break;
1655                 case CHAR_PSTRING_4_BE:
1656                         if (m->type != FILE_PSTRING)
1657                                 goto bad;
1658                         SET_LENGTH(PSTRING_4_BE);
1659                         break;
1660                 case CHAR_PSTRING_4_LE:
1661                         switch (m->type) {
1662                         case FILE_PSTRING:
1663                         case FILE_REGEX:
1664                                 break;
1665                         default:
1666                                 goto bad;
1667                         }
1668                         SET_LENGTH(PSTRING_4_LE);
1669                         break;
1670                 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1671                         if (m->type != FILE_PSTRING)
1672                                 goto bad;
1673                         m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1674                         break;
1675                 default:
1676                 bad:
1677                         if (ms->flags & MAGIC_CHECK)
1678                                 file_magwarn(ms, "string modifier `%c' "
1679                                         "invalid", *l);
1680                         goto out;
1681                 }
1682                 /* allow multiple '/' for readability */
1683                 if (l[1] == '/' && !isspace((unsigned char)l[2]))
1684                         l++;
1685         }
1686         if (string_modifier_check(ms, m) == -1)
1687                 goto out;
1688         *lp = l;
1689         return 0;
1690 out:
1691         *lp = l;
1692         return -1;
1693 }
1694 
1695 /*
1696  * parse one line from magic file, put into magic[index++] if valid
1697  */
1698 private int
1699 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1700     size_t lineno, int action)
1701 {
1702 #ifdef ENABLE_CONDITIONALS
1703         static uint32_t last_cont_level = 0;
1704 #endif
1705         size_t i;
1706         struct magic *m;
1707         const char *l = line;
1708         char *t;
1709         int op;
1710         uint32_t cont_level;
1711         int32_t diff;
1712 
1713         cont_level = 0;
1714 
1715         /*
1716          * Parse the offset.
1717          */
1718         while (*l == '>') {
1719                 ++l;            /* step over */
1720                 cont_level++; 
1721         }
1722 #ifdef ENABLE_CONDITIONALS
1723         if (cont_level == 0 || cont_level > last_cont_level)
1724                 if (file_check_mem(ms, cont_level) == -1)
1725                         return -1;
1726         last_cont_level = cont_level;
1727 #endif
1728         if (cont_level != 0) {
1729                 if (me->mp == NULL) {
1730                         file_magerror(ms, "No current entry for continuation");
1731                         return -1;
1732                 }
1733                 if (me->cont_count == 0) {
1734                         file_magerror(ms, "Continuations present with 0 count");
1735                         return -1;
1736                 }
1737                 m = &me->mp[me->cont_count - 1];
1738                 diff = (int32_t)cont_level - (int32_t)m->cont_level;
1739                 if (diff > 1)
1740                         file_magwarn(ms, "New continuation level %u is more "
1741                             "than one larger than current level %u", cont_level,
1742                             m->cont_level);
1743                 if (me->cont_count == me->max_count) {
1744                         struct magic *nm;
1745                         size_t cnt = me->max_count + ALLOC_CHUNK;
1746                         if ((nm = CAST(struct magic *, erealloc(me->mp,
1747                             sizeof(*nm) * cnt))) == NULL) {
1748                                 file_oomem(ms, sizeof(*nm) * cnt);
1749                                 return -1;
1750                         }
1751                         me->mp = m = nm;
1752                         me->max_count = CAST(uint32_t, cnt);
1753                 }
1754                 m = &me->mp[me->cont_count++];
1755                 (void)memset(m, 0, sizeof(*m));
1756                 m->cont_level = cont_level;
1757         } else {
1758                 static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1759                 if (me->mp != NULL)
1760                         return 1;
1761                 if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1762                         file_oomem(ms, len);
1763                         return -1;
1764                 }
1765                 me->mp = m;
1766                 me->max_count = ALLOC_CHUNK;
1767                 (void)memset(m, 0, sizeof(*m));
1768                 m->factor_op = FILE_FACTOR_OP_NONE;
1769                 m->cont_level = 0;
1770                 me->cont_count = 1;
1771         }
1772         m->lineno = CAST(uint32_t, lineno);
1773 
1774         if (*l == '&') {  /* m->cont_level == 0 checked below. */
1775                 ++l;            /* step over */
1776                 m->flag |= OFFADD;
1777         }
1778         if (*l == '(') {
1779                 ++l;            /* step over */
1780                 m->flag |= INDIR;
1781                 if (m->flag & OFFADD)
1782                         m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1783 
1784                 if (*l == '&') {  /* m->cont_level == 0 checked below */
1785                         ++l;            /* step over */
1786                         m->flag |= OFFADD;
1787                 }
1788         }
1789         /* Indirect offsets are not valid at level 0. */
1790         if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1791                 if (ms->flags & MAGIC_CHECK)
1792                         file_magwarn(ms, "relative offset at level 0");
1793 
1794         /* get offset, then skip over it */
1795         m->offset = (uint32_t)strtoul(l, &t, 0);
1796         if (l == t)
1797                 if (ms->flags & MAGIC_CHECK)
1798                         file_magwarn(ms, "offset `%s' invalid", l);
1799         l = t;
1800 
1801         if (m->flag & INDIR) {
1802                 m->in_type = FILE_LONG;
1803                 m->in_offset = 0;
1804                 /*
1805                  * read [.lbs][+-]nnnnn)
1806                  */
1807                 if (*l == '.') {
1808                         l++;
1809                         switch (*l) {
1810                         case 'l':
1811                                 m->in_type = FILE_LELONG;
1812                                 break;
1813                         case 'L':
1814                                 m->in_type = FILE_BELONG;
1815                                 break;
1816                         case 'm':
1817                                 m->in_type = FILE_MELONG;
1818                                 break;
1819                         case 'h':
1820                         case 's':
1821                                 m->in_type = FILE_LESHORT;
1822                                 break;
1823                         case 'H':
1824                         case 'S':
1825                                 m->in_type = FILE_BESHORT;
1826                                 break;
1827                         case 'c':
1828                         case 'b':
1829                         case 'C':
1830                         case 'B':
1831                                 m->in_type = FILE_BYTE;
1832                                 break;
1833                         case 'e':
1834                         case 'f':
1835                         case 'g':
1836                                 m->in_type = FILE_LEDOUBLE;
1837                                 break;
1838                         case 'E':
1839                         case 'F':
1840                         case 'G':
1841                                 m->in_type = FILE_BEDOUBLE;
1842                                 break;
1843                         case 'i':
1844                                 m->in_type = FILE_LEID3;
1845                                 break;
1846                         case 'I':
1847                                 m->in_type = FILE_BEID3;
1848                                 break;
1849                         default:
1850                                 if (ms->flags & MAGIC_CHECK)
1851                                         file_magwarn(ms,
1852                                             "indirect offset type `%c' invalid",
1853                                             *l);
1854                                 break;
1855                         }
1856                         l++;
1857                 }
1858 
1859                 m->in_op = 0;
1860                 if (*l == '~') {
1861                         m->in_op |= FILE_OPINVERSE;
1862                         l++;
1863                 }
1864                 if ((op = get_op(*l)) != -1) {
1865                         m->in_op |= op;
1866                         l++;
1867                 }
1868                 if (*l == '(') {
1869                         m->in_op |= FILE_OPINDIRECT;
1870                         l++;
1871                 }
1872                 if (isdigit((unsigned char)*l) || *l == '-') {
1873                         m->in_offset = (int32_t)strtol(l, &t, 0);
1874                         if (l == t)
1875                                 if (ms->flags & MAGIC_CHECK)
1876                                         file_magwarn(ms,
1877                                             "in_offset `%s' invalid", l);
1878                         l = t;
1879                 }
1880                 if (*l++ != ')' || 
1881                     ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1882                         if (ms->flags & MAGIC_CHECK)
1883                                 file_magwarn(ms,
1884                                     "missing ')' in indirect offset");
1885         }
1886         EATAB;
1887 
1888 #ifdef ENABLE_CONDITIONALS
1889         m->cond = get_cond(l, &l);
1890         if (check_cond(ms, m->cond, cont_level) == -1)
1891                 return -1;
1892 
1893         EATAB;
1894 #endif
1895 
1896         /*
1897          * Parse the type.
1898          */
1899         if (*l == 'u') {
1900                 /*
1901                  * Try it as a keyword type prefixed by "u"; match what
1902                  * follows the "u".  If that fails, try it as an SUS
1903                  * integer type. 
1904                  */
1905                 m->type = get_type(type_tbl, l + 1, &l);
1906                 if (m->type == FILE_INVALID) {
1907                         /*
1908                          * Not a keyword type; parse it as an SUS type,
1909                          * 'u' possibly followed by a number or C/S/L.
1910                          */
1911                         m->type = get_standard_integer_type(l, &l);
1912                 }
1913                 /* It's unsigned. */
1914                 if (m->type != FILE_INVALID)
1915                         m->flag |= UNSIGNED;
1916         } else {
1917                 /*
1918                  * Try it as a keyword type.  If that fails, try it as
1919                  * an SUS integer type if it begins with "d" or as an
1920                  * SUS string type if it begins with "s".  In any case,
1921                  * it's not unsigned.
1922                  */
1923                 m->type = get_type(type_tbl, l, &l);
1924                 if (m->type == FILE_INVALID) {
1925                         /*
1926                          * Not a keyword type; parse it as an SUS type,
1927                          * either 'd' possibly followed by a number or
1928                          * C/S/L, or just 's'.
1929                          */
1930                         if (*l == 'd')
1931                                 m->type = get_standard_integer_type(l, &l);
1932                         else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1933                                 m->type = FILE_STRING;
1934                 ++l;
1935                         }
1936                 }
1937         }
1938 
1939         if (m->type == FILE_INVALID) {
1940                 /* Not found - try it as a special keyword. */
1941                 m->type = get_type(special_tbl, l, &l);
1942         }
1943                         
1944         if (m->type == FILE_INVALID) {
1945                 if (ms->flags & MAGIC_CHECK)
1946                         file_magwarn(ms, "type `%s' invalid", l);
1947                 return -1;
1948         }
1949 
1950         /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1951         /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1952 
1953         m->mask_op = 0;
1954         if (*l == '~') {
1955                 if (!IS_LIBMAGIC_STRING(m->type))
1956                         m->mask_op |= FILE_OPINVERSE;
1957                 else if (ms->flags & MAGIC_CHECK)
1958                         file_magwarn(ms, "'~' invalid for string types");
1959                 ++l;
1960         }
1961         m->str_range = 0;
1962         m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1963         if ((op = get_op(*l)) != -1) {
1964                 if (IS_LIBMAGIC_STRING(m->type)) {
1965                         int r;
1966 
1967                         if (op != FILE_OPDIVIDE) {
1968                                 if (ms->flags & MAGIC_CHECK)
1969                                         file_magwarn(ms,
1970                                             "invalid string/indirect op: "
1971                                             "`%c'", *t);
1972                                 return -1;
1973                         }
1974 
1975                         if (m->type == FILE_INDIRECT)
1976                                 r = parse_indirect_modifier(ms, m, &l);
1977                         else
1978                                 r = parse_string_modifier(ms, m, &l);
1979                         if (r == -1)
1980                                 return -1;
1981                 } else
1982                         parse_op_modifier(ms, m, &l, op);
1983         }
1984 
1985         /*
1986          * We used to set mask to all 1's here, instead let's just not do
1987          * anything if mask = 0 (unless you have a better idea)
1988          */
1989         EATAB;
1990   
1991         switch (*l) {
1992         case '>':
1993         case '<':
1994                 m->reln = *l;
1995                 ++l;
1996                 if (*l == '=') {
1997                         if (ms->flags & MAGIC_CHECK) {
1998                                 file_magwarn(ms, "%c= not supported",
1999                                     m->reln);
2000                                 return -1;
2001                         }
2002                    ++l;
2003                 }
2004                 break;
2005         /* Old-style anding: "0 byte &0x80 dynamically linked" */
2006         case '&':
2007         case '^':
2008         case '=':
2009                 m->reln = *l;
2010                 ++l;
2011                 if (*l == '=') {
2012                    /* HP compat: ignore &= etc. */
2013                    ++l;
2014                 }
2015                 break;
2016         case '!':
2017                 m->reln = *l;
2018                 ++l;
2019                 break;
2020         default:
2021                 m->reln = '=';  /* the default relation */
2022                 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 
2023                     isspace((unsigned char)l[1])) || !l[1])) {
2024                         m->reln = *l;
2025                         ++l;
2026                 }
2027                 break;
2028         }
2029         /*
2030          * Grab the value part, except for an 'x' reln.
2031          */
2032         if (m->reln != 'x' && getvalue(ms, m, &l, action))
2033                 return -1;
2034 
2035         /*
2036          * TODO finish this macro and start using it!
2037          * #define offsetcheck {if (offset > HOWMANY-1) 
2038          *      magwarn("offset too big"); }
2039          */
2040 
2041         /*
2042          * Now get last part - the description
2043          */
2044         EATAB;
2045         if (l[0] == '\b') {
2046                 ++l;
2047                 m->flag |= NOSPACE;
2048         } else if ((l[0] == '\\') && (l[1] == 'b')) {
2049                 ++l;
2050                 ++l;
2051                 m->flag |= NOSPACE;
2052         }
2053         for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2054                 continue;
2055         if (i == sizeof(m->desc)) {
2056                 m->desc[sizeof(m->desc) - 1] = '\0';
2057                 if (ms->flags & MAGIC_CHECK)
2058                         file_magwarn(ms, "description `%s' truncated", m->desc);
2059         }
2060 
2061         /*
2062          * We only do this check while compiling, or if any of the magic
2063          * files were not compiled.
2064          */
2065         if (ms->flags & MAGIC_CHECK) {
2066                 if (check_format(ms, m) == -1)
2067                         return -1;
2068         }
2069         m->mimetype[0] = '\0';          /* initialise MIME type to none */
2070         return 0;
2071 }
2072 
2073 /*
2074  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2075  * if valid
2076  */
2077 private int
2078 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
2079 {
2080         const char *l = line;
2081         char *el;
2082         unsigned long factor;
2083         struct magic *m = &me->mp[0];
2084 
2085         if (m->factor_op != FILE_FACTOR_OP_NONE) {
2086                 file_magwarn(ms,
2087                     "Current entry already has a strength type: %c %d",
2088                     m->factor_op, m->factor);
2089                 return -1;
2090         }
2091         if (m->type == FILE_NAME) {
2092                 file_magwarn(ms, "%s: Strength setting is not supported in "
2093                     "\"name\" magic entries", m->value.s);
2094                 return -1;
2095         }
2096         EATAB;
2097         switch (*l) {
2098         case FILE_FACTOR_OP_NONE:
2099         case FILE_FACTOR_OP_PLUS:
2100         case FILE_FACTOR_OP_MINUS:
2101         case FILE_FACTOR_OP_TIMES:
2102         case FILE_FACTOR_OP_DIV:
2103                 m->factor_op = *l++;
2104                 break;
2105         default:
2106                 file_magwarn(ms, "Unknown factor op `%c'", *l);
2107                 return -1;
2108         }
2109         EATAB;
2110         factor = strtoul(l, &el, 0);
2111         if (factor > 255) {
2112                 file_magwarn(ms, "Too large factor `%lu'", factor);
2113                 goto out;
2114         }
2115         if (*el && !isspace((unsigned char)*el)) {
2116                 file_magwarn(ms, "Bad factor `%s'", l);
2117                 goto out;
2118         }
2119         m->factor = (uint8_t)factor;
2120         if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2121                 file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2122                     m->factor_op, m->factor);
2123                 goto out;
2124         }
2125         return 0;
2126 out:
2127         m->factor_op = FILE_FACTOR_OP_NONE;
2128         m->factor = 0;
2129         return -1;
2130 }
2131 
2132 private int
2133 goodchar(unsigned char x, const char *extra)
2134 {
2135         return (isascii(x) && isalnum(x)) || strchr(extra, x);
2136 }
2137 
2138 private int
2139 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2140     zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2141 {
2142         size_t i;
2143         const char *l = line;
2144         struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2145         char *buf = CAST(char *, CAST(void *, m)) + off;
2146 
2147         if (buf[0] != '\0') {
2148                 len = nt ? strlen(buf) : len;
2149                 file_magwarn(ms, "Current entry already has a %s type "
2150                     "`%.*s', new type `%s'", name, (int)len, buf, l);
2151                 return -1;
2152         }       
2153 
2154         if (*m->desc == '\0') {
2155                 file_magwarn(ms, "Current entry does not yet have a "
2156                     "description for adding a %s type", name);
2157                 return -1;
2158         }
2159 
2160         EATAB;
2161         for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
2162                 continue;
2163 
2164         if (i == len && *l) {
2165                 if (nt)
2166                         buf[len - 1] = '\0';
2167                 if (ms->flags & MAGIC_CHECK)
2168                         file_magwarn(ms, "%s type `%s' truncated %"
2169                             SIZE_T_FORMAT "u", name, line, i);
2170         } else {
2171                 if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
2172                         file_magwarn(ms, "%s type `%s' has bad char '%c'",
2173                             name, line, *l);
2174                 if (nt)
2175                         buf[i] = '\0';
2176         }
2177 
2178         if (i > 0)
2179                 return 0;
2180 
2181         file_magerror(ms, "Bad magic entry '%s'", line);
2182         return -1;
2183 }
2184 
2185 /*
2186  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2187  * magic[index - 1]
2188  */
2189 private int
2190 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2191 {
2192         struct magic *m = &me->mp[0];
2193 
2194         return parse_extra(ms, me, line,
2195             CAST(off_t, offsetof(struct magic, apple)),
2196             sizeof(m->apple), "APPLE", "!+-./", 0);
2197 }
2198 
2199 /*
2200  * parse a MIME annotation line from magic file, put into magic[index - 1]
2201  * if valid
2202  */
2203 private int
2204 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2205 {
2206         struct magic *m = &me->mp[0];
2207 
2208         return parse_extra(ms, me, line,
2209             CAST(zend_off_t, offsetof(struct magic, mimetype)),
2210             sizeof(m->mimetype), "MIME", "+-/.", 1);
2211 }
2212 
2213 private int
2214 check_format_type(const char *ptr, int type)
2215 {
2216         int quad = 0, h;
2217         if (*ptr == '\0') {
2218                 /* Missing format string; bad */
2219                 return -1;
2220         }
2221 
2222         switch (file_formats[type]) {
2223         case FILE_FMT_QUAD:
2224                 quad = 1;
2225                 /*FALLTHROUGH*/
2226         case FILE_FMT_NUM:
2227                 if (quad == 0) {
2228                         switch (type) {
2229                         case FILE_BYTE:
2230                                 h = 2;
2231                                 break;
2232                         case FILE_SHORT:
2233                         case FILE_BESHORT:
2234                         case FILE_LESHORT:
2235                                 h = 1;
2236                                 break;
2237                         case FILE_LONG:
2238                         case FILE_BELONG:
2239                         case FILE_LELONG:
2240                         case FILE_MELONG:
2241                         case FILE_LEID3:
2242                         case FILE_BEID3:
2243                         case FILE_INDIRECT:
2244                                 h = 0;
2245                                 break;
2246                         default:
2247                                 abort();
2248                         }
2249                 } else
2250                         h = 0;
2251                 if (*ptr == '-')
2252                         ptr++;
2253                 if (*ptr == '.')
2254                         ptr++;
2255                 while (isdigit((unsigned char)*ptr)) ptr++;
2256                 if (*ptr == '.')
2257                         ptr++;
2258                 while (isdigit((unsigned char)*ptr)) ptr++;
2259                 if (quad) {
2260                         if (*ptr++ != 'l')
2261                                 return -1;
2262                         if (*ptr++ != 'l')
2263                                 return -1;
2264                 }
2265         
2266                 switch (*ptr++) {
2267 #ifdef STRICT_FORMAT    /* "long" formats are int formats for us */
2268                 /* so don't accept the 'l' modifier */
2269                 case 'l':
2270                         switch (*ptr++) {
2271                         case 'i':
2272                         case 'd':
2273                         case 'u':
2274                         case 'o':
2275                         case 'x':
2276                         case 'X':
2277                                 return h != 0 ? -1 : 0;
2278                         default:
2279                                 return -1;
2280                         }
2281                 
2282                 /*
2283                  * Don't accept h and hh modifiers. They make writing
2284                  * magic entries more complicated, for very little benefit
2285                  */
2286                 case 'h':
2287                         if (h-- <= 0)
2288                                 return -1;
2289                         switch (*ptr++) {
2290                         case 'h':
2291                                 if (h-- <= 0)
2292                                         return -1;
2293                                 switch (*ptr++) {
2294                                 case 'i':
2295                                 case 'd':
2296                                 case 'u':
2297                                 case 'o':
2298                                 case 'x':
2299                                 case 'X':
2300                                         return 0;
2301                                 default:
2302                                         return -1;
2303                                 }
2304                         case 'i':
2305                         case 'd':
2306                         case 'u':
2307                         case 'o':
2308                         case 'x':
2309                         case 'X':
2310                                 return h != 0 ? -1 : 0;
2311                         default:
2312                                 return -1;
2313                         }
2314 #endif
2315                 case 'c':
2316                         return h != 2 ? -1 : 0;
2317                 case 'i':
2318                 case 'd':
2319                 case 'u':
2320                 case 'o':
2321                 case 'x':
2322                 case 'X':
2323 #ifdef STRICT_FORMAT
2324                         return h != 0 ? -1 : 0;
2325 #else
2326                         return 0;
2327 #endif
2328                 default:
2329                         return -1;
2330                 }
2331                 
2332         case FILE_FMT_FLOAT:
2333         case FILE_FMT_DOUBLE:
2334                 if (*ptr == '-')
2335                         ptr++;
2336                 if (*ptr == '.')
2337                         ptr++;
2338                 while (isdigit((unsigned char)*ptr)) ptr++;
2339                 if (*ptr == '.')
2340                         ptr++;
2341                 while (isdigit((unsigned char)*ptr)) ptr++;
2342         
2343                 switch (*ptr++) {
2344                 case 'e':
2345                 case 'E':
2346                 case 'f':
2347                 case 'F':
2348                 case 'g':
2349                 case 'G':
2350                         return 0;
2351                         
2352                 default:
2353                         return -1;
2354                 }
2355                 
2356 
2357         case FILE_FMT_STR:
2358                 if (*ptr == '-')
2359                         ptr++;
2360                 while (isdigit((unsigned char )*ptr))
2361                         ptr++;
2362                 if (*ptr == '.') {
2363                         ptr++;
2364                         while (isdigit((unsigned char )*ptr))
2365                                 ptr++;
2366                 }
2367                 
2368                 switch (*ptr++) {
2369                 case 's':
2370                         return 0;
2371                 default:
2372                         return -1;
2373                 }
2374                 
2375         default:
2376                 /* internal error */
2377                 abort();
2378         }
2379         /*NOTREACHED*/
2380         return -1;
2381 }
2382         
2383 /*
2384  * Check that the optional printf format in description matches
2385  * the type of the magic.
2386  */
2387 private int
2388 check_format(struct magic_set *ms, struct magic *m)
2389 {
2390         char *ptr;
2391 
2392         for (ptr = m->desc; *ptr; ptr++)
2393                 if (*ptr == '%')
2394                         break;
2395         if (*ptr == '\0') {
2396                 /* No format string; ok */
2397                 return 1;
2398         }
2399 
2400         assert(file_nformats == file_nnames);
2401 
2402         if (m->type >= file_nformats) {
2403                 file_magwarn(ms, "Internal error inconsistency between "
2404                     "m->type and format strings");              
2405                 return -1;
2406         }
2407         if (file_formats[m->type] == FILE_FMT_NONE) {
2408                 file_magwarn(ms, "No format string for `%s' with description "
2409                     "`%s'", m->desc, file_names[m->type]);
2410                 return -1;
2411         }
2412 
2413         ptr++;
2414         if (check_format_type(ptr, m->type) == -1) {
2415                 /*
2416                  * TODO: this error message is unhelpful if the format
2417                  * string is not one character long
2418                  */
2419                 file_magwarn(ms, "Printf format `%c' is not valid for type "
2420                     "`%s' in description `%s'", *ptr ? *ptr : '?',
2421                     file_names[m->type], m->desc);
2422                 return -1;
2423         }
2424         
2425         for (; *ptr; ptr++) {
2426                 if (*ptr == '%') {
2427                         file_magwarn(ms,
2428                             "Too many format strings (should have at most one) "
2429                             "for `%s' with description `%s'",
2430                             file_names[m->type], m->desc);
2431                         return -1;
2432                 }
2433         }
2434         return 0;
2435 }
2436 
2437 /* 
2438  * Read a numeric value from a pointer, into the value union of a magic 
2439  * pointer, according to the magic type.  Update the string pointer to point 
2440  * just after the number read.  Return 0 for success, non-zero for failure.
2441  */
2442 private int
2443 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2444 {
2445         switch (m->type) {
2446         case FILE_BESTRING16:
2447         case FILE_LESTRING16:
2448         case FILE_STRING:
2449         case FILE_PSTRING:
2450         case FILE_REGEX:
2451         case FILE_SEARCH:
2452         case FILE_NAME:
2453         case FILE_USE:
2454                 *p = getstr(ms, m, *p, action == FILE_COMPILE);
2455                 if (*p == NULL) {
2456                         if (ms->flags & MAGIC_CHECK)
2457                                 file_magwarn(ms, "cannot get string from `%s'",
2458                                     m->value.s);
2459                         return -1;
2460                 }
2461                 if (m->type == FILE_REGEX) {
2462                         /*  XXX do we need this? */
2463                         /*zval pattern;
2464                         int options = 0;
2465                         pcre_cache_entry *pce;
2466 
2467                         convert_libmagic_pattern(&pattern, m->value.s, strlen(m->value.s), options);
2468 
2469                         if ((pce = pcre_get_compiled_regex_cache(Z_STR(pattern))) == NULL) {
2470                                 return -1;      
2471                         }
2472 
2473                         return 0;*/
2474                 }
2475                 return 0;
2476         case FILE_FLOAT:
2477         case FILE_BEFLOAT:
2478         case FILE_LEFLOAT:
2479                 if (m->reln != 'x') {
2480                         char *ep;
2481 #ifdef HAVE_STRTOF
2482                         m->value.f = strtof(*p, &ep);
2483 #else
2484                         m->value.f = (float)strtod(*p, &ep);
2485 #endif
2486                         *p = ep;
2487                 }
2488                 return 0;
2489         case FILE_DOUBLE:
2490         case FILE_BEDOUBLE:
2491         case FILE_LEDOUBLE:
2492                 if (m->reln != 'x') {
2493                         char *ep;
2494                         m->value.d = strtod(*p, &ep);
2495                         *p = ep;
2496                 }
2497                 return 0;
2498         default:
2499                 if (m->reln != 'x') {
2500                         char *ep;
2501                         m->value.q = file_signextend(ms, m,
2502                             (uint64_t)strtoull(*p, &ep, 0));
2503                         *p = ep;
2504                         eatsize(p);
2505                 }
2506                 return 0;
2507         }
2508 }
2509 
2510 /*
2511  * Convert a string containing C character escapes.  Stop at an unescaped
2512  * space or tab.
2513  * Copy the converted version to "m->value.s", and the length in m->vallen.
2514  * Return updated scan pointer as function result. Warn if set.
2515  */
2516 private const char *
2517 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2518 {
2519         const char *origs = s;
2520         char    *p = m->value.s;
2521         size_t  plen = sizeof(m->value.s);
2522         char    *origp = p;
2523         char    *pmax = p + plen - 1;
2524         int     c;
2525         int     val;
2526 
2527         while ((c = *s++) != '\0') {
2528                 if (isspace((unsigned char) c))
2529                         break;
2530                 if (p >= pmax) {
2531                         file_error(ms, 0, "string too long: `%s'", origs);
2532                         return NULL;
2533                 }
2534                 if (c == '\\') {
2535                         switch(c = *s++) {
2536 
2537                         case '\0':
2538                                 if (warn)
2539                                         file_magwarn(ms, "incomplete escape");
2540                                 goto out;
2541 
2542                         case '\t':
2543                                 if (warn) {
2544                                         file_magwarn(ms,
2545                                             "escaped tab found, use \\t instead");
2546                                         warn = 0;       /* already did */
2547                                 }
2548                                 /*FALLTHROUGH*/
2549                         default:
2550                                 if (warn) {
2551                                         if (isprint((unsigned char)c)) {
2552                                                 /* Allow escaping of 
2553                                                  * ``relations'' */
2554                                                 if (strchr("<>&^=!", c) == NULL
2555                                                     && (m->type != FILE_REGEX ||
2556                                                     strchr("[]().*?^$|{}", c)
2557                                                     == NULL)) {
2558                                                         file_magwarn(ms, "no "
2559                                                             "need to escape "
2560                                                             "`%c'", c);
2561                                                 }
2562                                         } else {
2563                                                 file_magwarn(ms,
2564                                                     "unknown escape sequence: "
2565                                                     "\\%03o", c);
2566                                         }
2567                                 }
2568                                 /*FALLTHROUGH*/
2569                         /* space, perhaps force people to use \040? */
2570                         case ' ':
2571 #if 0
2572                         /*
2573                          * Other things people escape, but shouldn't need to,
2574                          * so we disallow them
2575                          */
2576                         case '\'':
2577                         case '"':
2578                         case '?':
2579 #endif
2580                         /* Relations */
2581                         case '>':
2582                         case '<':
2583                         case '&':
2584                         case '^':
2585                         case '=':
2586                         case '!':
2587                         /* and baskslash itself */
2588                         case '\\':
2589                                 *p++ = (char) c;
2590                                 break;
2591 
2592                         case 'a':
2593                                 *p++ = '\a';
2594                                 break;
2595 
2596                         case 'b':
2597                                 *p++ = '\b';
2598                                 break;
2599 
2600                         case 'f':
2601                                 *p++ = '\f';
2602                                 break;
2603 
2604                         case 'n':
2605                                 *p++ = '\n';
2606                                 break;
2607 
2608                         case 'r':
2609                                 *p++ = '\r';
2610                                 break;
2611 
2612                         case 't':
2613                                 *p++ = '\t';
2614                                 break;
2615 
2616                         case 'v':
2617                                 *p++ = '\v';
2618                                 break;
2619 
2620                         /* \ and up to 3 octal digits */
2621                         case '0':
2622                         case '1':
2623                         case '2':
2624                         case '3':
2625                         case '4':
2626                         case '5':
2627                         case '6':
2628                         case '7':
2629                                 val = c - '0';
2630                                 c = *s++;  /* try for 2 */
2631                                 if (c >= '0' && c <= '7') {
2632                                         val = (val << 3) | (c - '0');
2633                                         c = *s++;  /* try for 3 */
2634                                         if (c >= '0' && c <= '7')
2635                                                 val = (val << 3) | (c-'0');
2636                                         else
2637                                                 --s;
2638                                 }
2639                                 else
2640                                         --s;
2641                                 *p++ = (char)val;
2642                                 break;
2643 
2644                         /* \x and up to 2 hex digits */
2645                         case 'x':
2646                                 val = 'x';      /* Default if no digits */
2647                                 c = hextoint(*s++);     /* Get next char */
2648                                 if (c >= 0) {
2649                                         val = c;
2650                                         c = hextoint(*s++);
2651                                         if (c >= 0)
2652                                                 val = (val << 4) + c;
2653                                         else
2654                                                 --s;
2655                                 } else
2656                                         --s;
2657                                 *p++ = (char)val;
2658                                 break;
2659                         }
2660                 } else
2661                         *p++ = (char)c;
2662         }
2663 out:
2664         *p = '\0';
2665         m->vallen = CAST(unsigned char, (p - origp));
2666         if (m->type == FILE_PSTRING)
2667                 m->vallen += (unsigned char)file_pstring_length_size(m);
2668         return s;
2669 }
2670 
2671 
2672 /* Single hex char to int; -1 if not a hex char. */
2673 private int
2674 hextoint(int c)
2675 {
2676         if (!isascii((unsigned char) c))
2677                 return -1;
2678         if (isdigit((unsigned char) c))
2679                 return c - '0';
2680         if ((c >= 'a') && (c <= 'f'))
2681                 return c + 10 - 'a';
2682         if (( c>= 'A') && (c <= 'F'))
2683                 return c + 10 - 'A';
2684         return -1;
2685 }
2686 
2687 
2688 /*
2689  * Print a string containing C character escapes.
2690  */
2691 protected void
2692 file_showstr(FILE *fp, const char *s, size_t len)
2693 {
2694         char    c;
2695 
2696         for (;;) {
2697                 if (len == ~0U) {
2698                         c = *s++;
2699                         if (c == '\0')
2700                                 break;
2701                 }
2702                 else  {
2703                         if (len-- == 0)
2704                                 break;
2705                         c = *s++;
2706                 }
2707                 if (c >= 040 && c <= 0176)      /* TODO isprint && !iscntrl */
2708                         (void) fputc(c, fp);
2709                 else {
2710                         (void) fputc('\\', fp);
2711                         switch (c) {
2712                         case '\a':
2713                                 (void) fputc('a', fp);
2714                                 break;
2715 
2716                         case '\b':
2717                                 (void) fputc('b', fp);
2718                                 break;
2719 
2720                         case '\f':
2721                                 (void) fputc('f', fp);
2722                                 break;
2723 
2724                         case '\n':
2725                                 (void) fputc('n', fp);
2726                                 break;
2727 
2728                         case '\r':
2729                                 (void) fputc('r', fp);
2730                                 break;
2731 
2732                         case '\t':
2733                                 (void) fputc('t', fp);
2734                                 break;
2735 
2736                         case '\v':
2737                                 (void) fputc('v', fp);
2738                                 break;
2739 
2740                         default:
2741                                 (void) fprintf(fp, "%.3o", c & 0377);
2742                                 break;
2743                         }
2744                 }
2745         }
2746 }
2747 
2748 /*
2749  * eatsize(): Eat the size spec from a number [eg. 10UL]
2750  */
2751 private void
2752 eatsize(const char **p)
2753 {
2754         const char *l = *p;
2755 
2756         if (LOWCASE(*l) == 'u') 
2757                 l++;
2758 
2759         switch (LOWCASE(*l)) {
2760         case 'l':    /* long */
2761         case 's':    /* short */
2762         case 'h':    /* short */
2763         case 'b':    /* char/byte */
2764         case 'c':    /* char/byte */
2765                 l++;
2766                 /*FALLTHROUGH*/
2767         default:
2768                 break;
2769         }
2770 
2771         *p = l;
2772 }
2773 
2774 /*
2775  * handle a buffer containing a compiled file.
2776  */
2777 private struct magic_map *
2778 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
2779 {
2780         struct magic_map *map;
2781 
2782         if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
2783                 file_oomem(ms, sizeof(*map));
2784                 return NULL;
2785         }
2786         map->len = len;
2787         map->p = buf;
2788         map->type = MAP_TYPE_USER;
2789         if (check_buffer(ms, map, "buffer") != 0) {
2790                 apprentice_unmap(map);
2791                 return NULL;
2792         }
2793         return map;
2794 }
2795 
2796 /*
2797  * handle a compiled file.
2798  */
2799 
2800 private struct magic_map *
2801 apprentice_map(struct magic_set *ms, const char *fn)
2802 {
2803         uint32_t *ptr;
2804         uint32_t version, entries, nentries;
2805         int needsbyteswap;
2806         char *dbname = NULL;
2807         struct magic_map *map;
2808         size_t i;
2809         php_stream *stream = NULL;
2810         php_stream_statbuf st;
2811 
2812 
2813 
2814         if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2815                 file_oomem(ms, sizeof(*map));
2816                 return NULL;
2817         }
2818 
2819         if (fn == NULL) {
2820                 map->p = (void *)&php_magic_database;
2821                 goto internal_loaded;
2822         }
2823 
2824 #ifdef PHP_WIN32
2825         /* Don't bother on windows with php_stream_open_wrapper,
2826         return to give apprentice_load() a chance. */
2827         if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2828                if (st.sb.st_mode & S_IFDIR) {
2829                        return NULL;
2830                }
2831        }
2832 #endif
2833 
2834         dbname = mkdbname(ms, fn, 0);
2835         if (dbname == NULL)
2836                 goto error;
2837 
2838                 stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2839 
2840         if (!stream) {
2841                 goto error;
2842         }
2843 
2844         if (php_stream_stat(stream, &st) < 0) {
2845                 file_error(ms, errno, "cannot stat `%s'", dbname);
2846                 goto error;
2847         }
2848         if (st.sb.st_size < 8 || st.sb.st_size > MAXMAGIC_SIZE) {
2849                 file_error(ms, 0, "file `%s' is too %s", dbname,
2850                     st.sb.st_size < 8 ? "small" : "large");
2851                 goto error;
2852         }
2853 
2854         map->len = (size_t)st.sb.st_size;
2855         if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2856                 file_oomem(ms, map->len);
2857                 goto error;
2858         }
2859         if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2860                 file_badread(ms);
2861                 goto error;
2862         }
2863         map->len = 0;
2864 #define RET     1
2865 
2866         php_stream_close(stream);
2867         stream = NULL;
2868 
2869 internal_loaded:
2870         ptr = (uint32_t *)(void *)map->p;
2871         if (*ptr != MAGICNO) {
2872                 if (swap4(*ptr) != MAGICNO) {
2873                         file_error(ms, 0, "bad magic in `%s'", dbname);
2874                         goto error;
2875                 }
2876                 needsbyteswap = 1;
2877         } else
2878                 needsbyteswap = 0;
2879         if (needsbyteswap)
2880                 version = swap4(ptr[1]);
2881         else
2882                 version = ptr[1];
2883         if (version != VERSIONNO) {
2884                 file_error(ms, 0, "File %d.%d supports only version %d magic "
2885                     "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2886                     VERSIONNO, dbname, version);
2887                 goto error;
2888         }
2889 
2890         /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2891         machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2892         future. */
2893         if (needsbyteswap && fn == NULL) {
2894                 map->p = emalloc(sizeof(php_magic_database));
2895                 map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2896         }
2897 
2898         if (NULL != fn) {
2899                 nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2900                 entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2901                 if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2902                         file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2903                                 dbname, (unsigned long long)st.sb.st_size,
2904                                 sizeof(struct magic));
2905                         goto error;
2906                 }
2907         }
2908         map->magic[0] = CAST(struct magic *, map->p) + 1;
2909         nentries = 0;
2910         for (i = 0; i < MAGIC_SETS; i++) {
2911                 if (needsbyteswap)
2912                         map->nmagic[i] = swap4(ptr[i + 2]);
2913                 else
2914                         map->nmagic[i] = ptr[i + 2];
2915                 if (i != MAGIC_SETS - 1)
2916                         map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2917                 nentries += map->nmagic[i];
2918         }
2919         if (NULL != fn && entries != nentries + 1) {
2920                 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2921                     dbname, entries, nentries + 1);
2922                 goto error;
2923         }
2924         if (needsbyteswap)
2925                 for (i = 0; i < MAGIC_SETS; i++)
2926                         byteswap(map->magic[i], map->nmagic[i]);
2927 
2928         if (dbname) {
2929                 efree(dbname);
2930         }
2931         return map;
2932 
2933 error:
2934         if (stream) {
2935                 php_stream_close(stream);
2936         }
2937         apprentice_unmap(map);
2938         if (dbname) {
2939                 efree(dbname);
2940         }
2941         return NULL;
2942 }
2943 
2944 private int
2945 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
2946 {
2947         uint32_t *ptr;
2948         uint32_t entries, nentries;
2949         uint32_t version;
2950         int i, needsbyteswap;
2951 
2952         ptr = CAST(uint32_t *, map->p);
2953         if (*ptr != MAGICNO) {
2954                 if (swap4(*ptr) != MAGICNO) {
2955                         file_error(ms, 0, "bad magic in `%s'", dbname);
2956                         return -1;
2957                 }
2958                 needsbyteswap = 1;
2959         } else
2960                 needsbyteswap = 0;
2961         if (needsbyteswap)
2962                 version = swap4(ptr[1]);
2963         else
2964                 version = ptr[1];
2965         if (version != VERSIONNO) {
2966                 file_error(ms, 0, "File %s supports only version %d magic "
2967                     "files. `%s' is version %d", FILE_VERSION_MAJOR,
2968                     VERSIONNO, dbname, version);
2969                 return -1;
2970         }
2971         entries = (uint32_t)(map->len / sizeof(struct magic));
2972         if ((entries * sizeof(struct magic)) != map->len) {
2973                 file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
2974                     "a multiple of %" SIZE_T_FORMAT "u",
2975                     dbname, map->len, sizeof(struct magic));
2976                 return -1;
2977         }
2978         map->magic[0] = CAST(struct magic *, map->p) + 1;
2979         nentries = 0;
2980         for (i = 0; i < MAGIC_SETS; i++) {
2981                 if (needsbyteswap)
2982                         map->nmagic[i] = swap4(ptr[i + 2]);
2983                 else
2984                         map->nmagic[i] = ptr[i + 2];
2985                 if (i != MAGIC_SETS - 1)
2986                         map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2987                 nentries += map->nmagic[i];
2988         }
2989         if (entries != nentries + 1) {
2990                 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2991                     dbname, entries, nentries + 1);
2992                 return -1;
2993         }
2994         if (needsbyteswap)
2995                 for (i = 0; i < MAGIC_SETS; i++)
2996                         byteswap(map->magic[i], map->nmagic[i]);
2997         return 0;
2998 }
2999 
3000 /*
3001  * handle an mmaped file.
3002  */
3003 private int
3004 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3005 {
3006         static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3007         static const size_t m = sizeof(**map->magic);
3008         size_t len;
3009         char *dbname;
3010         int rv = -1;
3011         uint32_t i;
3012         union {
3013                 struct magic m;
3014                 uint32_t h[2 + MAGIC_SETS];
3015         } hdr;
3016         php_stream *stream;
3017 
3018 
3019         dbname = mkdbname(ms, fn, 0);
3020 
3021         if (dbname == NULL)
3022                 goto out;
3023 
3024         /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3025         stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3026 
3027         if (!stream) {
3028                 file_error(ms, errno, "cannot open `%s'", dbname);
3029                 goto out;
3030         }
3031         memset(&hdr, 0, sizeof(hdr));
3032         hdr.h[0] = MAGICNO;
3033         hdr.h[1] = VERSIONNO;
3034         memcpy(hdr.h + 2, map->nmagic, nm);
3035 
3036         if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3037                 file_error(ms, errno, "error writing `%s'", dbname);
3038                 goto out;
3039         }
3040 
3041         for (i = 0; i < MAGIC_SETS; i++) {
3042                 len = m * map->nmagic[i];
3043                 if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3044                         file_error(ms, errno, "error writing `%s'", dbname);
3045                         goto out;
3046                 }
3047         }
3048 
3049         if (stream) {
3050                 php_stream_close(stream);
3051         }
3052         rv = 0;
3053 out:
3054         efree(dbname);
3055         return rv;
3056 }
3057 
3058 private const char ext[] = ".mgc";
3059 /*
3060  * make a dbname
3061  */
3062 private char *
3063 mkdbname(struct magic_set *ms, const char *fn, int strip)
3064 {
3065         const char *p, *q;
3066         char *buf;
3067 
3068         if (strip) {
3069                 if ((p = strrchr(fn, '/')) != NULL)
3070                         fn = ++p;
3071         }
3072 
3073         for (q = fn; *q; q++)
3074                 continue;
3075         /* Look for .mgc */
3076         for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3077                 if (*p != *q)
3078                         break;
3079 
3080         /* Did not find .mgc, restore q */
3081         if (p >= ext)
3082                 while (*q)
3083                         q++;
3084 
3085         q++;
3086         /* Compatibility with old code that looked in .mime */
3087         if (ms->flags & MAGIC_MIME) {
3088                 spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
3089 #ifdef PHP_WIN32
3090                 if (VCWD_ACCESS(buf, R_OK) == 0) {
3091 #else
3092                 if (VCWD_ACCESS(buf, R_OK) != -1) {
3093 #endif
3094                         ms->flags &= MAGIC_MIME_TYPE;
3095                         return buf;
3096                 }
3097                 efree(buf);
3098         }
3099         spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
3100 
3101         /* Compatibility with old code that looked in .mime */
3102         if (strstr(p, ".mime") != NULL)
3103                 ms->flags &= MAGIC_MIME_TYPE;
3104         return buf;
3105 }
3106 
3107 /*
3108  * Byteswap an mmap'ed file if needed
3109  */
3110 private void
3111 byteswap(struct magic *magic, uint32_t nmagic)
3112 {
3113         uint32_t i;
3114         for (i = 0; i < nmagic; i++)
3115                 bs1(&magic[i]);
3116 }
3117 
3118 /*
3119  * swap a short
3120  */
3121 private uint16_t
3122 swap2(uint16_t sv)
3123 {
3124         uint16_t rv;
3125         uint8_t *s = (uint8_t *)(void *)&sv; 
3126         uint8_t *d = (uint8_t *)(void *)&rv; 
3127         d[0] = s[1];
3128         d[1] = s[0];
3129         return rv;
3130 }
3131 
3132 /*
3133  * swap an int
3134  */
3135 private uint32_t
3136 swap4(uint32_t sv)
3137 {
3138         uint32_t rv;
3139         uint8_t *s = (uint8_t *)(void *)&sv; 
3140         uint8_t *d = (uint8_t *)(void *)&rv; 
3141         d[0] = s[3];
3142         d[1] = s[2];
3143         d[2] = s[1];
3144         d[3] = s[0];
3145         return rv;
3146 }
3147 
3148 /*
3149  * swap a quad
3150  */
3151 private uint64_t
3152 swap8(uint64_t sv)
3153 {
3154         uint64_t rv;
3155         uint8_t *s = (uint8_t *)(void *)&sv; 
3156         uint8_t *d = (uint8_t *)(void *)&rv; 
3157 #if 0
3158         d[0] = s[3];
3159         d[1] = s[2];
3160         d[2] = s[1];
3161         d[3] = s[0];
3162         d[4] = s[7];
3163         d[5] = s[6];
3164         d[6] = s[5];
3165         d[7] = s[4];
3166 #else
3167         d[0] = s[7];
3168         d[1] = s[6];
3169         d[2] = s[5];
3170         d[3] = s[4];
3171         d[4] = s[3];
3172         d[5] = s[2];
3173         d[6] = s[1];
3174         d[7] = s[0];
3175 #endif
3176         return rv;
3177 }
3178 
3179 /*
3180  * byteswap a single magic entry
3181  */
3182 private void
3183 bs1(struct magic *m)
3184 {
3185         m->cont_level = swap2(m->cont_level);
3186         m->offset = swap4((uint32_t)m->offset);
3187         m->in_offset = swap4((uint32_t)m->in_offset);
3188         m->lineno = swap4((uint32_t)m->lineno);
3189         if (IS_LIBMAGIC_STRING(m->type)) {
3190                 m->str_range = swap4(m->str_range);
3191                 m->str_flags = swap4(m->str_flags);
3192         }
3193         else {
3194                 m->value.q = swap8(m->value.q);
3195                 m->num_mask = swap8(m->num_mask);
3196         }
3197 }
3198 
3199 protected size_t 
3200 file_pstring_length_size(const struct magic *m)
3201 {
3202         switch (m->str_flags & PSTRING_LEN) {
3203         case PSTRING_1_LE:
3204                 return 1;
3205         case PSTRING_2_LE:
3206         case PSTRING_2_BE:
3207                 return 2;
3208         case PSTRING_4_LE:
3209         case PSTRING_4_BE:
3210                 return 4;
3211         default:
3212                 abort();        /* Impossible */
3213                 return 1;
3214         }
3215 }
3216 protected size_t
3217 file_pstring_get_length(const struct magic *m, const char *s)
3218 {
3219         size_t len = 0;
3220 
3221         switch (m->str_flags & PSTRING_LEN) {
3222         case PSTRING_1_LE:
3223                 len = *s;
3224                 break;
3225         case PSTRING_2_LE:
3226                 len = (s[1] << 8) | s[0];
3227                 break;
3228         case PSTRING_2_BE:
3229                 len = (s[0] << 8) | s[1];
3230                 break;
3231         case PSTRING_4_LE:
3232                 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
3233                 break;
3234         case PSTRING_4_BE:
3235                 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
3236                 break;
3237         default:
3238                 abort();        /* Impossible */
3239         }
3240 
3241         if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
3242                 len -= file_pstring_length_size(m);
3243 
3244         return len;
3245 }
3246 
3247 protected int
3248 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3249 {
3250         uint32_t i, j;
3251         struct mlist *mlist, *ml;
3252 
3253         mlist = ms->mlist[1];
3254 
3255         for (ml = mlist->next; ml != mlist; ml = ml->next) {
3256                 struct magic *ma = ml->magic;
3257                 uint32_t nma = ml->nmagic;
3258                 for (i = 0; i < nma; i++) {
3259                         if (ma[i].type != FILE_NAME)
3260                                 continue;
3261                         if (strcmp(ma[i].value.s, name) == 0) {
3262                                 v->magic = &ma[i];
3263                                 for (j = i + 1; j < nma; j++)
3264                                     if (ma[j].cont_level == 0)
3265                                             break;
3266                                 v->nmagic = j - i;
3267                                 return 0;
3268                         }
3269                 }
3270         }
3271         return -1;
3272 }

/* [<][>][^][v][top][bottom][index][help] */