1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "php.h"
33
34#include "file.h"
35
36#ifndef lint
37FILE_RCSID("@(#)$File: apprentice.c,v 1.230 2015/01/02 21:29:39 christos Exp $")
38#endif  /* lint */
39
40#include "magic.h"
41#include "patchlevel.h"
42#include <stdlib.h>
43
44#if defined(__hpux) && !defined(HAVE_STRTOULL)
45#if SIZEOF_LONG == 8
46# define strtoull strtoul
47#else
48# define strtoull __strtoull
49#endif
50#endif
51
52#ifdef PHP_WIN32
53#include "win32/unistd.h"
54#define strtoull _strtoui64
55#else
56#include <unistd.h>
57#endif
58#include <string.h>
59#include <assert.h>
60#include <ctype.h>
61#include <fcntl.h>
62
63#ifndef SSIZE_MAX
64#define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
65#else
66#define MAXMAGIC_SIZE        SSIZE_MAX
67#endif
68
69#define EATAB {while (isascii((unsigned char) *l) && \
70              isspace((unsigned char) *l))  ++l;}
71#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
72            tolower((unsigned char) (l)) : (l))
73/*
74 * Work around a bug in headers on Digital Unix.
75 * At least confirmed for: OSF1 V4.0 878
76 */
77#if defined(__osf__) && defined(__DECC)
78#ifdef MAP_FAILED
79#undef MAP_FAILED
80#endif
81#endif
82
83#ifndef MAP_FAILED
84#define MAP_FAILED (void *) -1
85#endif
86
87#ifndef MAP_FILE
88#define MAP_FILE 0
89#endif
90
91#define ALLOC_CHUNK (size_t)10
92#define ALLOC_INCR  (size_t)200
93
94#define MAP_TYPE_MMAP   0
95#define MAP_TYPE_MALLOC 1
96#define MAP_TYPE_USER   2
97
98struct magic_entry {
99    struct magic *mp;
100    uint32_t cont_count;
101    uint32_t max_count;
102};
103
104struct magic_entry_set {
105    struct magic_entry *me;
106    uint32_t count;
107    uint32_t max;
108};
109
110struct magic_map {
111    void *p;
112    size_t len;
113    int type;
114    struct magic *magic[MAGIC_SETS];
115    uint32_t nmagic[MAGIC_SETS];
116};
117
118int file_formats[FILE_NAMES_SIZE];
119const size_t file_nformats = FILE_NAMES_SIZE;
120const char *file_names[FILE_NAMES_SIZE];
121const size_t file_nnames = FILE_NAMES_SIZE;
122
123private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
124private int hextoint(int);
125private const char *getstr(struct magic_set *, struct magic *, const char *,
126    int);
127private int parse(struct magic_set *, struct magic_entry *, const char *,
128    size_t, int);
129private void eatsize(const char **);
130private int apprentice_1(struct magic_set *, const char *, int);
131private size_t apprentice_magic_strength(const struct magic *);
132private int apprentice_sort(const void *, const void *);
133private void apprentice_list(struct mlist *, int );
134private struct magic_map *apprentice_load(struct magic_set *,
135    const char *, int);
136private struct mlist *mlist_alloc(void);
137private void mlist_free(struct mlist *);
138private void byteswap(struct magic *, uint32_t);
139private void bs1(struct magic *);
140private uint16_t swap2(uint16_t);
141private uint32_t swap4(uint32_t);
142private uint64_t swap8(uint64_t);
143private char *mkdbname(struct magic_set *, const char *, int);
144private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
145    size_t);
146private struct magic_map *apprentice_map(struct magic_set *, const char *);
147private int check_buffer(struct magic_set *, struct magic_map *, const char *);
148private void apprentice_unmap(struct magic_map *);
149private int apprentice_compile(struct magic_set *, struct magic_map *,
150    const char *);
151private int check_format_type(const char *, int);
152private int check_format(struct magic_set *, struct magic *);
153private int get_op(char);
154private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
155private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
156private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
157
158
159private size_t magicsize = sizeof(struct magic);
160
161private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
162
163private struct {
164    const char *name;
165    size_t len;
166    int (*fun)(struct magic_set *, struct magic_entry *, const char *);
167} bang[] = {
168#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
169    DECLARE_FIELD(mime),
170    DECLARE_FIELD(apple),
171    DECLARE_FIELD(strength),
172#undef  DECLARE_FIELD
173    { NULL, 0, NULL }
174};
175
176#include "../data_file.c"
177
178struct type_tbl_s {
179    const char name[16];
180    const size_t len;
181    const int type;
182    const int format;
183};
184
185/*
186 * XXX - the actual Single UNIX Specification says that "long" means "long",
187 * as in the C data type, but we treat it as meaning "4-byte integer".
188 * Given that the OS X version of file 5.04 did the same, I guess that passes
189 * the actual test; having "long" be dependent on how big a "long" is on
190 * the machine running "file" is silly.
191 */
192static const struct type_tbl_s type_tbl[] = {
193# define XX(s)      s, (sizeof(s) - 1)
194# define XX_NULL    "", 0
195    { XX("invalid"),    FILE_INVALID,       FILE_FMT_NONE },
196    { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
197    { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
198    { XX("default"),    FILE_DEFAULT,       FILE_FMT_NONE },
199    { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
200    { XX("string"),     FILE_STRING,        FILE_FMT_STR },
201    { XX("date"),       FILE_DATE,      FILE_FMT_STR },
202    { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
203    { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
204    { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
205    { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
206    { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
207    { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
208    { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
209    { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
210    { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
211    { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
212    { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
213    { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
214    { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
215    { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
216    { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
217    { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
218    { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
219    { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
220    { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
221    { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
222    { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
223    { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
224    { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
225    { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
226    { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
227    { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
228    { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
229    { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
230    { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
231    { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
232    { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
233    { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
234    { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
235    { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
236    { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NUM },
237    { XX("qwdate"),     FILE_QWDATE,        FILE_FMT_STR },
238    { XX("leqwdate"),   FILE_LEQWDATE,      FILE_FMT_STR },
239    { XX("beqwdate"),   FILE_BEQWDATE,      FILE_FMT_STR },
240    { XX("name"),       FILE_NAME,      FILE_FMT_NONE },
241    { XX("use"),        FILE_USE,       FILE_FMT_NONE },
242    { XX("clear"),      FILE_CLEAR,     FILE_FMT_NONE },
243    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
244};
245
246/*
247 * These are not types, and cannot be preceded by "u" to make them
248 * unsigned.
249 */
250static const struct type_tbl_s special_tbl[] = {
251    { XX("name"),       FILE_NAME,      FILE_FMT_STR },
252    { XX("use"),        FILE_USE,       FILE_FMT_STR },
253    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
254};
255# undef XX
256# undef XX_NULL
257
258#ifndef S_ISDIR
259#define S_ISDIR(mode) ((mode) & _S_IFDIR)
260#endif
261
262private int
263get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
264{
265    const struct type_tbl_s *p;
266
267    for (p = tbl; p->len; p++) {
268        if (strncmp(l, p->name, p->len) == 0) {
269            if (t)
270                *t = l + p->len;
271            break;
272        }
273    }
274    return p->type;
275}
276
277private int
278get_standard_integer_type(const char *l, const char **t)
279{
280    int type;
281
282    if (isalpha((unsigned char)l[1])) {
283        switch (l[1]) {
284        case 'C':
285            /* "dC" and "uC" */
286            type = FILE_BYTE;
287            break;
288        case 'S':
289            /* "dS" and "uS" */
290            type = FILE_SHORT;
291            break;
292        case 'I':
293        case 'L':
294            /*
295             * "dI", "dL", "uI", and "uL".
296             *
297             * XXX - the actual Single UNIX Specification says
298             * that "L" means "long", as in the C data type,
299             * but we treat it as meaning "4-byte integer".
300             * Given that the OS X version of file 5.04 did
301             * the same, I guess that passes the actual SUS
302             * validation suite; having "dL" be dependent on
303             * how big a "long" is on the machine running
304             * "file" is silly.
305             */
306            type = FILE_LONG;
307            break;
308        case 'Q':
309            /* "dQ" and "uQ" */
310            type = FILE_QUAD;
311            break;
312        default:
313            /* "d{anything else}", "u{anything else}" */
314            return FILE_INVALID;
315        }
316        l += 2;
317    } else if (isdigit((unsigned char)l[1])) {
318        /*
319         * "d{num}" and "u{num}"; we only support {num} values
320         * of 1, 2, 4, and 8 - the Single UNIX Specification
321         * doesn't say anything about whether arbitrary
322         * values should be supported, but both the Solaris 10
323         * and OS X Mountain Lion versions of file passed the
324         * Single UNIX Specification validation suite, and
325         * neither of them support values bigger than 8 or
326         * non-power-of-2 values.
327         */
328        if (isdigit((unsigned char)l[2])) {
329            /* Multi-digit, so > 9 */
330            return FILE_INVALID;
331        }
332        switch (l[1]) {
333        case '1':
334            type = FILE_BYTE;
335            break;
336        case '2':
337            type = FILE_SHORT;
338            break;
339        case '4':
340            type = FILE_LONG;
341            break;
342        case '8':
343            type = FILE_QUAD;
344            break;
345        default:
346            /* XXX - what about 3, 5, 6, or 7? */
347            return FILE_INVALID;
348        }
349        l += 2;
350    } else {
351        /*
352         * "d" or "u" by itself.
353         */
354        type = FILE_LONG;
355        ++l;
356    }
357    if (t)
358        *t = l;
359    return type;
360}
361
362private void
363init_file_tables(void)
364{
365    static int done = 0;
366    const struct type_tbl_s *p;
367
368    if (done)
369        return;
370    done++;
371
372    for (p = type_tbl; p->len; p++) {
373        assert(p->type < FILE_NAMES_SIZE);
374        file_names[p->type] = p->name;
375        file_formats[p->type] = p->format;
376    }
377    assert(p - type_tbl == FILE_NAMES_SIZE);
378}
379
380private int
381add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
382{
383    struct mlist *ml;
384
385    mlp->map = idx == 0 ? map : NULL;
386    if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
387        return -1;
388
389    ml->map = NULL;
390    ml->magic = map->magic[idx];
391    ml->nmagic = map->nmagic[idx];
392
393    mlp->prev->next = ml;
394    ml->prev = mlp->prev;
395    ml->next = mlp;
396    mlp->prev = ml;
397    return 0;
398}
399
400/*
401 * Handle one file or directory.
402 */
403private int
404apprentice_1(struct magic_set *ms, const char *fn, int action)
405{
406    struct magic_map *map;
407    struct mlist *ml;
408    size_t i;
409
410    if (magicsize != FILE_MAGICSIZE) {
411        file_error(ms, 0, "magic element size %lu != %lu",
412            (unsigned long)sizeof(*map->magic[0]),
413            (unsigned long)FILE_MAGICSIZE);
414        return -1;
415    }
416
417    if (action == FILE_COMPILE) {
418        map = apprentice_load(ms, fn, action);
419        if (map == NULL)
420            return -1;
421        return apprentice_compile(ms, map, fn);
422    }
423
424    map = apprentice_map(ms, fn);
425    if (map == NULL) {
426        if (fn) {
427            if (ms->flags & MAGIC_CHECK)
428                file_magwarn(ms, "using regular magic file `%s'", fn);
429            map = apprentice_load(ms, fn, action);
430        }
431        if (map == NULL)
432            return -1;
433    }
434
435    for (i = 0; i < MAGIC_SETS; i++) {
436        if (add_mlist(ms->mlist[i], map, i) == -1) {
437            file_oomem(ms, sizeof(*ml));
438            apprentice_unmap(map);
439            return -1;
440        }
441    }
442
443    if (action == FILE_LIST) {
444        for (i = 0; i < MAGIC_SETS; i++) {
445            printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
446                i);
447            apprentice_list(ms->mlist[i], BINTEST);
448            printf("Text patterns:\n");
449            apprentice_list(ms->mlist[i], TEXTTEST);
450        }
451    }
452    return 0;
453}
454
455protected void
456file_ms_free(struct magic_set *ms)
457{
458    size_t i;
459    if (ms == NULL)
460        return;
461    for (i = 0; i < MAGIC_SETS; i++)
462        mlist_free(ms->mlist[i]);
463    if (ms->o.pbuf) {
464        efree(ms->o.pbuf);
465    }
466    if (ms->o.buf) {
467        efree(ms->o.buf);
468    }
469    if (ms->c.li) {
470        efree(ms->c.li);
471    }
472    efree(ms);
473}
474
475protected struct magic_set *
476file_ms_alloc(int flags)
477{
478    struct magic_set *ms;
479    size_t i, len;
480
481    if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
482        sizeof(struct magic_set)))) == NULL)
483        return NULL;
484
485    if (magic_setflags(ms, flags) == -1) {
486        errno = EINVAL;
487        goto free;
488    }
489
490    ms->o.buf = ms->o.pbuf = NULL;
491    len = (ms->c.len = 10) * sizeof(*ms->c.li);
492
493    if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
494        goto free;
495
496    ms->event_flags = 0;
497    ms->error = -1;
498    for (i = 0; i < MAGIC_SETS; i++)
499        ms->mlist[i] = NULL;
500    ms->file = "unknown";
501    ms->line = 0;
502    ms->indir_max = FILE_INDIR_MAX;
503    ms->name_max = FILE_NAME_MAX;
504    ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
505    ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
506    ms->elf_notes_max = FILE_ELF_NOTES_MAX;
507    return ms;
508free:
509    efree(ms);
510    return NULL;
511}
512
513private void
514apprentice_unmap(struct magic_map *map)
515{
516    if (map == NULL)
517        return;
518    if (map->p != php_magic_database) {
519        if (map->p == NULL) {
520            int j;
521            for (j = 0; j < MAGIC_SETS; j++) {
522                if (map->magic[j]) {
523                    efree(map->magic[j]);
524                }
525            }
526        } else {
527            efree(map->p);
528        }
529    }
530    efree(map);
531}
532
533private struct mlist *
534mlist_alloc(void)
535{
536    struct mlist *mlist;
537    if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
538        return NULL;
539    }
540    mlist->next = mlist->prev = mlist;
541    return mlist;
542}
543
544private void
545mlist_free(struct mlist *mlist)
546{
547    struct mlist *ml, *next;
548
549    if (mlist == NULL)
550        return;
551
552    ml = mlist->next;
553    for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
554        if (ml->map)
555            apprentice_unmap(ml->map);
556        efree(ml);
557        if (ml == mlist)
558            break;
559    }
560}
561
562/* const char *fn: list of magic files and directories */
563protected int
564file_apprentice(struct magic_set *ms, const char *fn, int action)
565{
566    char *p, *mfn;
567    int file_err, errs = -1;
568    size_t i;
569
570    if (ms->mlist[0] != NULL)
571        file_reset(ms);
572
573/* XXX disabling default magic loading so the compiled in data is used */
574#if 0
575    if ((fn = magic_getpath(fn, action)) == NULL)
576        return -1;
577#endif
578
579    init_file_tables();
580
581    if (fn == NULL)
582        fn = getenv("MAGIC");
583    if (fn == NULL) {
584        for (i = 0; i < MAGIC_SETS; i++) {
585            mlist_free(ms->mlist[i]);
586            if ((ms->mlist[i] = mlist_alloc()) == NULL) {
587                file_oomem(ms, sizeof(*ms->mlist[i]));
588                return -1;
589            }
590        }
591        return apprentice_1(ms, fn, action);
592    }
593
594    if ((mfn = estrdup(fn)) == NULL) {
595        file_oomem(ms, strlen(fn));
596        return -1;
597    }
598
599    for (i = 0; i < MAGIC_SETS; i++) {
600        mlist_free(ms->mlist[i]);
601        if ((ms->mlist[i] = mlist_alloc()) == NULL) {
602            file_oomem(ms, sizeof(*ms->mlist[i]));
603            while (i-- > 0) {
604                mlist_free(ms->mlist[i]);
605                ms->mlist[i] = NULL;
606            }
607            efree(mfn);
608            return -1;
609        }
610    }
611    fn = mfn;
612
613    while (fn) {
614        p = strchr(fn, PATHSEP);
615        if (p)
616            *p++ = '\0';
617        if (*fn == '\0')
618            break;
619        file_err = apprentice_1(ms, fn, action);
620        errs = MAX(errs, file_err);
621        fn = p;
622    }
623
624    efree(mfn);
625
626    if (errs == -1) {
627        for (i = 0; i < MAGIC_SETS; i++) {
628            mlist_free(ms->mlist[i]);
629            ms->mlist[i] = NULL;
630        }
631        file_error(ms, 0, "could not find any valid magic files!");
632        return -1;
633    }
634
635#if 0
636    /*
637     * Always leave the database loaded
638     */
639    if (action == FILE_LOAD)
640        return 0;
641
642    for (i = 0; i < MAGIC_SETS; i++) {
643        mlist_free(ms->mlist[i]);
644        ms->mlist[i] = NULL;
645    }
646#endif
647
648    switch (action) {
649    case FILE_LOAD:
650    case FILE_COMPILE:
651    case FILE_CHECK:
652    case FILE_LIST:
653        return 0;
654    default:
655        file_error(ms, 0, "Invalid action %d", action);
656        return -1;
657    }
658}
659
660/*
661 * Compute the real length of a magic expression, for the purposes
662 * of determining how "strong" a magic expression is (approximating
663 * how specific its matches are):
664 *  - magic characters count 0 unless escaped.
665 *  - [] expressions count 1
666 *  - {} expressions count 0
667 *  - regular characters or escaped magic characters count 1
668 *  - 0 length expressions count as one
669 */
670private size_t
671nonmagic(const char *str)
672{
673    const char *p;
674    size_t rv = 0;
675
676    for (p = str; *p; p++)
677        switch (*p) {
678        case '\\':  /* Escaped anything counts 1 */
679            if (!*++p)
680                p--;
681            rv++;
682            continue;
683        case '?':   /* Magic characters count 0 */
684        case '*':
685        case '.':
686        case '+':
687        case '^':
688        case '$':
689            continue;
690        case '[':   /* Bracketed expressions count 1 the ']' */
691            while (*p && *p != ']')
692                p++;
693            p--;
694            continue;
695        case '{':   /* Braced expressions count 0 */
696            while (*p && *p != '}')
697                p++;
698            if (!*p)
699                p--;
700            continue;
701        default:    /* Anything else counts 1 */
702            rv++;
703            continue;
704        }
705
706    return rv == 0 ? 1 : rv;    /* Return at least 1 */
707}
708
709/*
710 * Get weight of this magic entry, for sorting purposes.
711 */
712private size_t
713apprentice_magic_strength(const struct magic *m)
714{
715#define MULT 10
716    size_t v, val = 2 * MULT;   /* baseline strength */
717
718    switch (m->type) {
719    case FILE_DEFAULT:  /* make sure this sorts last */
720        if (m->factor_op != FILE_FACTOR_OP_NONE)
721            abort();
722        return 0;
723
724    case FILE_BYTE:
725        val += 1 * MULT;
726        break;
727
728    case FILE_SHORT:
729    case FILE_LESHORT:
730    case FILE_BESHORT:
731        val += 2 * MULT;
732        break;
733
734    case FILE_LONG:
735    case FILE_LELONG:
736    case FILE_BELONG:
737    case FILE_MELONG:
738        val += 4 * MULT;
739        break;
740
741    case FILE_PSTRING:
742    case FILE_STRING:
743        val += m->vallen * MULT;
744        break;
745
746    case FILE_BESTRING16:
747    case FILE_LESTRING16:
748        val += m->vallen * MULT / 2;
749        break;
750
751    case FILE_SEARCH:
752        val += m->vallen * MAX(MULT / m->vallen, 1);
753        break;
754
755    case FILE_REGEX:
756        v = nonmagic(m->value.s);
757        val += v * MAX(MULT / v, 1);
758        break;
759
760    case FILE_DATE:
761    case FILE_LEDATE:
762    case FILE_BEDATE:
763    case FILE_MEDATE:
764    case FILE_LDATE:
765    case FILE_LELDATE:
766    case FILE_BELDATE:
767    case FILE_MELDATE:
768    case FILE_FLOAT:
769    case FILE_BEFLOAT:
770    case FILE_LEFLOAT:
771        val += 4 * MULT;
772        break;
773
774    case FILE_QUAD:
775    case FILE_BEQUAD:
776    case FILE_LEQUAD:
777    case FILE_QDATE:
778    case FILE_LEQDATE:
779    case FILE_BEQDATE:
780    case FILE_QLDATE:
781    case FILE_LEQLDATE:
782    case FILE_BEQLDATE:
783    case FILE_QWDATE:
784    case FILE_LEQWDATE:
785    case FILE_BEQWDATE:
786    case FILE_DOUBLE:
787    case FILE_BEDOUBLE:
788    case FILE_LEDOUBLE:
789        val += 8 * MULT;
790        break;
791
792    case FILE_INDIRECT:
793    case FILE_NAME:
794    case FILE_USE:
795        break;
796
797    default:
798        (void)fprintf(stderr, "Bad type %d\n", m->type);
799        abort();
800    }
801
802    switch (m->reln) {
803    case 'x':   /* matches anything penalize */
804    case '!':       /* matches almost anything penalize */
805        val = 0;
806        break;
807
808    case '=':   /* Exact match, prefer */
809        val += MULT;
810        break;
811
812    case '>':
813    case '<':   /* comparison match reduce strength */
814        val -= 2 * MULT;
815        break;
816
817    case '^':
818    case '&':   /* masking bits, we could count them too */
819        val -= MULT;
820        break;
821
822    default:
823        (void)fprintf(stderr, "Bad relation %c\n", m->reln);
824        abort();
825    }
826
827    if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
828        val = 1;
829
830    switch (m->factor_op) {
831    case FILE_FACTOR_OP_NONE:
832        break;
833    case FILE_FACTOR_OP_PLUS:
834        val += m->factor;
835        break;
836    case FILE_FACTOR_OP_MINUS:
837        val -= m->factor;
838        break;
839    case FILE_FACTOR_OP_TIMES:
840        val *= m->factor;
841        break;
842    case FILE_FACTOR_OP_DIV:
843        val /= m->factor;
844        break;
845    default:
846        abort();
847    }
848
849    /*
850     * Magic entries with no description get a bonus because they depend
851     * on subsequent magic entries to print something.
852     */
853    if (m->desc[0] == '\0')
854        val++;
855    return val;
856}
857
858/*
859 * Sort callback for sorting entries by "strength" (basically length)
860 */
861private int
862apprentice_sort(const void *a, const void *b)
863{
864    const struct magic_entry *ma = CAST(const struct magic_entry *, a);
865    const struct magic_entry *mb = CAST(const struct magic_entry *, b);
866    size_t sa = apprentice_magic_strength(ma->mp);
867    size_t sb = apprentice_magic_strength(mb->mp);
868    if (sa == sb)
869        return 0;
870    else if (sa > sb)
871        return -1;
872    else
873        return 1;
874}
875
876/*
877 * Shows sorted patterns list in the order which is used for the matching
878 */
879private void
880apprentice_list(struct mlist *mlist, int mode)
881{
882    uint32_t magindex = 0;
883    struct mlist *ml;
884    for (ml = mlist->next; ml != mlist; ml = ml->next) {
885        for (magindex = 0; magindex < ml->nmagic; magindex++) {
886            struct magic *m = &ml->magic[magindex];
887            if ((m->flag & mode) != mode) {
888                /* Skip sub-tests */
889                while (magindex + 1 < ml->nmagic &&
890                       ml->magic[magindex + 1].cont_level != 0)
891                    ++magindex;
892                continue; /* Skip to next top-level test*/
893            }
894
895            /*
896             * Try to iterate over the tree until we find item with
897             * description/mimetype.
898             */
899            while (magindex + 1 < ml->nmagic &&
900                   ml->magic[magindex + 1].cont_level != 0 &&
901                   *ml->magic[magindex].desc == '\0' &&
902                   *ml->magic[magindex].mimetype == '\0')
903                magindex++;
904
905            printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
906                apprentice_magic_strength(m),
907                ml->magic[magindex].lineno,
908                ml->magic[magindex].desc,
909                ml->magic[magindex].mimetype);
910        }
911    }
912}
913
914private void
915set_test_type(struct magic *mstart, struct magic *m)
916{
917    switch (m->type) {
918    case FILE_BYTE:
919    case FILE_SHORT:
920    case FILE_LONG:
921    case FILE_DATE:
922    case FILE_BESHORT:
923    case FILE_BELONG:
924    case FILE_BEDATE:
925    case FILE_LESHORT:
926    case FILE_LELONG:
927    case FILE_LEDATE:
928    case FILE_LDATE:
929    case FILE_BELDATE:
930    case FILE_LELDATE:
931    case FILE_MEDATE:
932    case FILE_MELDATE:
933    case FILE_MELONG:
934    case FILE_QUAD:
935    case FILE_LEQUAD:
936    case FILE_BEQUAD:
937    case FILE_QDATE:
938    case FILE_LEQDATE:
939    case FILE_BEQDATE:
940    case FILE_QLDATE:
941    case FILE_LEQLDATE:
942    case FILE_BEQLDATE:
943    case FILE_QWDATE:
944    case FILE_LEQWDATE:
945    case FILE_BEQWDATE:
946    case FILE_FLOAT:
947    case FILE_BEFLOAT:
948    case FILE_LEFLOAT:
949    case FILE_DOUBLE:
950    case FILE_BEDOUBLE:
951    case FILE_LEDOUBLE:
952        mstart->flag |= BINTEST;
953        break;
954    case FILE_STRING:
955    case FILE_PSTRING:
956    case FILE_BESTRING16:
957    case FILE_LESTRING16:
958        /* Allow text overrides */
959        if (mstart->str_flags & STRING_TEXTTEST)
960            mstart->flag |= TEXTTEST;
961        else
962            mstart->flag |= BINTEST;
963        break;
964    case FILE_REGEX:
965    case FILE_SEARCH:
966        /* Check for override */
967        if (mstart->str_flags & STRING_BINTEST)
968            mstart->flag |= BINTEST;
969        if (mstart->str_flags & STRING_TEXTTEST)
970            mstart->flag |= TEXTTEST;
971
972        if (mstart->flag & (TEXTTEST|BINTEST))
973            break;
974
975        /* binary test if pattern is not text */
976        if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
977            NULL) <= 0)
978            mstart->flag |= BINTEST;
979        else
980            mstart->flag |= TEXTTEST;
981        break;
982    case FILE_DEFAULT:
983        /* can't deduce anything; we shouldn't see this at the
984           top level anyway */
985        break;
986    case FILE_INVALID:
987    default:
988        /* invalid search type, but no need to complain here */
989        break;
990    }
991}
992
993private int
994addentry(struct magic_set *ms, struct magic_entry *me,
995   struct magic_entry_set *mset)
996{
997    size_t i = me->mp->type == FILE_NAME ? 1 : 0;
998    if (mset[i].count == mset[i].max) {
999        struct magic_entry *mp;
1000
1001        mset[i].max += ALLOC_INCR;
1002        if ((mp = CAST(struct magic_entry *,
1003            erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1004            NULL) {
1005            file_oomem(ms, sizeof(*mp) * mset[i].max);
1006            return -1;
1007        }
1008        (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1009            ALLOC_INCR);
1010        mset[i].me = mp;
1011    }
1012    mset[i].me[mset[i].count++] = *me;
1013    memset(me, 0, sizeof(*me));
1014    return 0;
1015}
1016
1017/*
1018 * Load and parse one file.
1019 */
1020private void
1021load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1022   struct magic_entry_set *mset)
1023{
1024    char buffer[BUFSIZ + 1];
1025    char *line = NULL;
1026    size_t len;
1027    size_t lineno = 0;
1028    struct magic_entry me;
1029
1030    php_stream *stream;
1031
1032
1033    ms->file = fn;
1034    stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1035
1036    if (stream == NULL) {
1037        if (errno != ENOENT)
1038            file_error(ms, errno, "cannot read magic file `%s'",
1039                   fn);
1040        (*errs)++;
1041        return;
1042    }
1043
1044    memset(&me, 0, sizeof(me));
1045    /* read and parse this file */
1046    for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1047        if (len == 0) /* null line, garbage, etc */
1048            continue;
1049        if (line[len - 1] == '\n') {
1050            lineno++;
1051            line[len - 1] = '\0'; /* delete newline */
1052        }
1053        switch (line[0]) {
1054        case '\0':  /* empty, do not parse */
1055        case '#':   /* comment, do not parse */
1056            continue;
1057        case '!':
1058            if (line[1] == ':') {
1059                size_t i;
1060
1061                for (i = 0; bang[i].name != NULL; i++) {
1062                    if ((size_t)(len - 2) > bang[i].len &&
1063                        memcmp(bang[i].name, line + 2,
1064                        bang[i].len) == 0)
1065                        break;
1066                }
1067                if (bang[i].name == NULL) {
1068                    file_error(ms, 0,
1069                        "Unknown !: entry `%s'", line);
1070                    (*errs)++;
1071                    continue;
1072                }
1073                if (me.mp == NULL) {
1074                    file_error(ms, 0,
1075                        "No current entry for :!%s type",
1076                        bang[i].name);
1077                    (*errs)++;
1078                    continue;
1079                }
1080                if ((*bang[i].fun)(ms, &me,
1081                    line + bang[i].len + 2) != 0) {
1082                    (*errs)++;
1083                    continue;
1084                }
1085                continue;
1086            }
1087            /*FALLTHROUGH*/
1088        default:
1089        again:
1090            switch (parse(ms, &me, line, lineno, action)) {
1091            case 0:
1092                continue;
1093            case 1:
1094                (void)addentry(ms, &me, mset);
1095                goto again;
1096            default:
1097                (*errs)++;
1098                break;
1099            }
1100        }
1101    }
1102    if (me.mp)
1103        (void)addentry(ms, &me, mset);
1104    efree(line);
1105    php_stream_close(stream);
1106}
1107
1108/*
1109 * parse a file or directory of files
1110 * const char *fn: name of magic file or directory
1111 */
1112private int
1113cmpstrp(const void *p1, const void *p2)
1114{
1115        return strcmp(*(char *const *)p1, *(char *const *)p2);
1116}
1117
1118
1119private uint32_t
1120set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1121    uint32_t starttest)
1122{
1123    static const char text[] = "text";
1124    static const char binary[] = "binary";
1125    static const size_t len = sizeof(text);
1126
1127    uint32_t i = starttest;
1128
1129    do {
1130        set_test_type(me[starttest].mp, me[i].mp);
1131        if ((ms->flags & MAGIC_DEBUG) == 0)
1132            continue;
1133        (void)fprintf(stderr, "%s%s%s: %s\n",
1134            me[i].mp->mimetype,
1135            me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1136            me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1137            me[i].mp->flag & BINTEST ? binary : text);
1138        if (me[i].mp->flag & BINTEST) {
1139            char *p = strstr(me[i].mp->desc, text);
1140            if (p && (p == me[i].mp->desc ||
1141                isspace((unsigned char)p[-1])) &&
1142                (p + len - me[i].mp->desc == MAXstring
1143                || (p[len] == '\0' ||
1144                isspace((unsigned char)p[len]))))
1145                (void)fprintf(stderr, "*** Possible "
1146                    "binary test for text type\n");
1147        }
1148    } while (++i < nme && me[i].mp->cont_level != 0);
1149    return i;
1150}
1151
1152private void
1153set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1154{
1155    uint32_t i;
1156    for (i = 0; i < nme; i++) {
1157        if (me[i].mp->cont_level == 0 &&
1158            me[i].mp->type == FILE_DEFAULT) {
1159            while (++i < nme)
1160                if (me[i].mp->cont_level == 0)
1161                    break;
1162            if (i != nme) {
1163                /* XXX - Ugh! */
1164                ms->line = me[i].mp->lineno;
1165                file_magwarn(ms,
1166                    "level 0 \"default\" did not sort last");
1167            }
1168            return;
1169        }
1170    }
1171}
1172
1173private int
1174coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1175    struct magic **ma, uint32_t *nma)
1176{
1177    uint32_t i, mentrycount = 0;
1178    size_t slen;
1179
1180    for (i = 0; i < nme; i++)
1181        mentrycount += me[i].cont_count;
1182
1183    slen = sizeof(**ma) * mentrycount;
1184    if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1185        file_oomem(ms, slen);
1186        return -1;
1187    }
1188
1189    mentrycount = 0;
1190    for (i = 0; i < nme; i++) {
1191        (void)memcpy(*ma + mentrycount, me[i].mp,
1192            me[i].cont_count * sizeof(**ma));
1193        mentrycount += me[i].cont_count;
1194    }
1195    *nma = mentrycount;
1196    return 0;
1197}
1198
1199private void
1200magic_entry_free(struct magic_entry *me, uint32_t nme)
1201{
1202    uint32_t i;
1203    if (me == NULL)
1204        return;
1205    for (i = 0; i < nme; i++)
1206        efree(me[i].mp);
1207    efree(me);
1208}
1209
1210private struct magic_map *
1211apprentice_load(struct magic_set *ms, const char *fn, int action)
1212{
1213    int errs = 0;
1214    uint32_t i, j;
1215    size_t files = 0, maxfiles = 0;
1216    char **filearr = NULL;
1217    zend_stat_t st;
1218    struct magic_map *map;
1219    struct magic_entry_set mset[MAGIC_SETS];
1220    php_stream *dir;
1221    php_stream_dirent d;
1222
1223
1224    memset(mset, 0, sizeof(mset));
1225    ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
1226
1227
1228    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1229    {
1230        file_oomem(ms, sizeof(*map));
1231        return NULL;
1232    }
1233
1234    /* print silly verbose header for USG compat. */
1235    if (action == FILE_CHECK)
1236        (void)fprintf(stderr, "%s\n", usg_hdr);
1237
1238    /* load directory or file */
1239    /* FIXME: Read file names and sort them to prevent
1240       non-determinism. See Debian bug #488562. */
1241    if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1242        int mflen;
1243        char mfn[MAXPATHLEN];
1244
1245        dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1246        if (!dir) {
1247            errs++;
1248            goto out;
1249        }
1250        while (php_stream_readdir(dir, &d)) {
1251            if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1252                file_oomem(ms,
1253                strlen(fn) + strlen(d.d_name) + 2);
1254                errs++;
1255                php_stream_closedir(dir);
1256                goto out;
1257            }
1258            if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1259                continue;
1260            }
1261            if (files >= maxfiles) {
1262                size_t mlen;
1263                maxfiles = (maxfiles + 1) * 2;
1264                mlen = maxfiles * sizeof(*filearr);
1265                if ((filearr = CAST(char **,
1266                    erealloc(filearr, mlen))) == NULL) {
1267                    file_oomem(ms, mlen);
1268                    php_stream_closedir(dir);
1269                    errs++;
1270                    goto out;
1271                }
1272            }
1273            filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1274        }
1275        php_stream_closedir(dir);
1276        qsort(filearr, files, sizeof(*filearr), cmpstrp);
1277        for (i = 0; i < files; i++) {
1278            load_1(ms, action, filearr[i], &errs, mset);
1279            efree(filearr[i]);
1280        }
1281        efree(filearr);
1282    } else
1283        load_1(ms, action, fn, &errs, mset);
1284    if (errs)
1285        goto out;
1286
1287    for (j = 0; j < MAGIC_SETS; j++) {
1288        /* Set types of tests */
1289        for (i = 0; i < mset[j].count; ) {
1290            if (mset[j].me[i].mp->cont_level != 0) {
1291                i++;
1292                continue;
1293            }
1294            i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1295        }
1296        qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1297            apprentice_sort);
1298
1299        /*
1300         * Make sure that any level 0 "default" line is last
1301         * (if one exists).
1302         */
1303        set_last_default(ms, mset[j].me, mset[j].count);
1304
1305        /* coalesce per file arrays into a single one */
1306        if (coalesce_entries(ms, mset[j].me, mset[j].count,
1307            &map->magic[j], &map->nmagic[j]) == -1) {
1308            errs++;
1309            goto out;
1310        }
1311    }
1312
1313out:
1314    for (j = 0; j < MAGIC_SETS; j++)
1315        magic_entry_free(mset[j].me, mset[j].count);
1316
1317    if (errs) {
1318        apprentice_unmap(map);
1319        return NULL;
1320    }
1321    return map;
1322}
1323
1324/*
1325 * extend the sign bit if the comparison is to be signed
1326 */
1327protected uint64_t
1328file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1329{
1330    if (!(m->flag & UNSIGNED)) {
1331        switch(m->type) {
1332        /*
1333         * Do not remove the casts below.  They are
1334         * vital.  When later compared with the data,
1335         * the sign extension must have happened.
1336         */
1337        case FILE_BYTE:
1338            v = (signed char) v;
1339            break;
1340        case FILE_SHORT:
1341        case FILE_BESHORT:
1342        case FILE_LESHORT:
1343            v = (short) v;
1344            break;
1345        case FILE_DATE:
1346        case FILE_BEDATE:
1347        case FILE_LEDATE:
1348        case FILE_MEDATE:
1349        case FILE_LDATE:
1350        case FILE_BELDATE:
1351        case FILE_LELDATE:
1352        case FILE_MELDATE:
1353        case FILE_LONG:
1354        case FILE_BELONG:
1355        case FILE_LELONG:
1356        case FILE_MELONG:
1357        case FILE_FLOAT:
1358        case FILE_BEFLOAT:
1359        case FILE_LEFLOAT:
1360            v = (int32_t) v;
1361            break;
1362        case FILE_QUAD:
1363        case FILE_BEQUAD:
1364        case FILE_LEQUAD:
1365        case FILE_QDATE:
1366        case FILE_QLDATE:
1367        case FILE_QWDATE:
1368        case FILE_BEQDATE:
1369        case FILE_BEQLDATE:
1370        case FILE_BEQWDATE:
1371        case FILE_LEQDATE:
1372        case FILE_LEQLDATE:
1373        case FILE_LEQWDATE:
1374        case FILE_DOUBLE:
1375        case FILE_BEDOUBLE:
1376        case FILE_LEDOUBLE:
1377            v = (int64_t) v;
1378            break;
1379        case FILE_STRING:
1380        case FILE_PSTRING:
1381        case FILE_BESTRING16:
1382        case FILE_LESTRING16:
1383        case FILE_REGEX:
1384        case FILE_SEARCH:
1385        case FILE_DEFAULT:
1386        case FILE_INDIRECT:
1387        case FILE_NAME:
1388        case FILE_USE:
1389        case FILE_CLEAR:
1390            break;
1391        default:
1392            if (ms->flags & MAGIC_CHECK)
1393                file_magwarn(ms, "cannot happen: m->type=%d\n",
1394                    m->type);
1395            return ~0U;
1396        }
1397    }
1398    return v;
1399}
1400
1401private int
1402string_modifier_check(struct magic_set *ms, struct magic *m)
1403{
1404    if ((ms->flags & MAGIC_CHECK) == 0)
1405        return 0;
1406
1407    if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1408        (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1409        file_magwarn(ms,
1410            "'/BHhLl' modifiers are only allowed for pascal strings\n");
1411        return -1;
1412    }
1413    switch (m->type) {
1414    case FILE_BESTRING16:
1415    case FILE_LESTRING16:
1416        if (m->str_flags != 0) {
1417            file_magwarn(ms,
1418                "no modifiers allowed for 16-bit strings\n");
1419            return -1;
1420        }
1421        break;
1422    case FILE_STRING:
1423    case FILE_PSTRING:
1424        if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1425            file_magwarn(ms,
1426                "'/%c' only allowed on regex and search\n",
1427                CHAR_REGEX_OFFSET_START);
1428            return -1;
1429        }
1430        break;
1431    case FILE_SEARCH:
1432        if (m->str_range == 0) {
1433            file_magwarn(ms,
1434                "missing range; defaulting to %d\n",
1435                            STRING_DEFAULT_RANGE);
1436            m->str_range = STRING_DEFAULT_RANGE;
1437            return -1;
1438        }
1439        break;
1440    case FILE_REGEX:
1441        if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1442            file_magwarn(ms, "'/%c' not allowed on regex\n",
1443                CHAR_COMPACT_WHITESPACE);
1444            return -1;
1445        }
1446        if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1447            file_magwarn(ms, "'/%c' not allowed on regex\n",
1448                CHAR_COMPACT_OPTIONAL_WHITESPACE);
1449            return -1;
1450        }
1451        break;
1452    default:
1453        file_magwarn(ms, "coding error: m->type=%d\n",
1454            m->type);
1455        return -1;
1456    }
1457    return 0;
1458}
1459
1460private int
1461get_op(char c)
1462{
1463    switch (c) {
1464    case '&':
1465        return FILE_OPAND;
1466    case '|':
1467        return FILE_OPOR;
1468    case '^':
1469        return FILE_OPXOR;
1470    case '+':
1471        return FILE_OPADD;
1472    case '-':
1473        return FILE_OPMINUS;
1474    case '*':
1475        return FILE_OPMULTIPLY;
1476    case '/':
1477        return FILE_OPDIVIDE;
1478    case '%':
1479        return FILE_OPMODULO;
1480    default:
1481        return -1;
1482    }
1483}
1484
1485#ifdef ENABLE_CONDITIONALS
1486private int
1487get_cond(const char *l, const char **t)
1488{
1489    static const struct cond_tbl_s {
1490        char name[8];
1491        size_t len;
1492        int cond;
1493    } cond_tbl[] = {
1494        { "if",     2,  COND_IF },
1495        { "elif",   4,  COND_ELIF },
1496        { "else",   4,  COND_ELSE },
1497        { "",       0,  COND_NONE },
1498    };
1499    const struct cond_tbl_s *p;
1500
1501    for (p = cond_tbl; p->len; p++) {
1502        if (strncmp(l, p->name, p->len) == 0 &&
1503            isspace((unsigned char)l[p->len])) {
1504            if (t)
1505                *t = l + p->len;
1506            break;
1507        }
1508    }
1509    return p->cond;
1510}
1511
1512private int
1513check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1514{
1515    int last_cond;
1516    last_cond = ms->c.li[cont_level].last_cond;
1517
1518    switch (cond) {
1519    case COND_IF:
1520        if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1521            if (ms->flags & MAGIC_CHECK)
1522                file_magwarn(ms, "syntax error: `if'");
1523            return -1;
1524        }
1525        last_cond = COND_IF;
1526        break;
1527
1528    case COND_ELIF:
1529        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1530            if (ms->flags & MAGIC_CHECK)
1531                file_magwarn(ms, "syntax error: `elif'");
1532            return -1;
1533        }
1534        last_cond = COND_ELIF;
1535        break;
1536
1537    case COND_ELSE:
1538        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1539            if (ms->flags & MAGIC_CHECK)
1540                file_magwarn(ms, "syntax error: `else'");
1541            return -1;
1542        }
1543        last_cond = COND_NONE;
1544        break;
1545
1546    case COND_NONE:
1547        last_cond = COND_NONE;
1548        break;
1549    }
1550
1551    ms->c.li[cont_level].last_cond = last_cond;
1552    return 0;
1553}
1554#endif /* ENABLE_CONDITIONALS */
1555
1556private int
1557parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1558{
1559    const char *l = *lp;
1560
1561    while (!isspace((unsigned char)*++l))
1562        switch (*l) {
1563        case CHAR_INDIRECT_RELATIVE:
1564            m->str_flags |= INDIRECT_RELATIVE;
1565            break;
1566        default:
1567            if (ms->flags & MAGIC_CHECK)
1568                file_magwarn(ms, "indirect modifier `%c' "
1569                    "invalid", *l);
1570            *lp = l;
1571            return -1;
1572        }
1573    *lp = l;
1574    return 0;
1575}
1576
1577private void
1578parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1579    int op)
1580{
1581    const char *l = *lp;
1582    char *t;
1583    uint64_t val;
1584
1585    ++l;
1586    m->mask_op |= op;
1587    val = (uint64_t)strtoull(l, &t, 0);
1588    l = t;
1589    m->num_mask = file_signextend(ms, m, val);
1590    eatsize(&l);
1591    *lp = l;
1592}
1593
1594private int
1595parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1596{
1597    const char *l = *lp;
1598    char *t;
1599    int have_range = 0;
1600
1601    while (!isspace((unsigned char)*++l)) {
1602        switch (*l) {
1603        case '0':  case '1':  case '2':
1604        case '3':  case '4':  case '5':
1605        case '6':  case '7':  case '8':
1606        case '9':
1607            if (have_range && (ms->flags & MAGIC_CHECK))
1608                file_magwarn(ms, "multiple ranges");
1609            have_range = 1;
1610            m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1611            if (m->str_range == 0)
1612                file_magwarn(ms, "zero range");
1613            l = t - 1;
1614            break;
1615        case CHAR_COMPACT_WHITESPACE:
1616            m->str_flags |= STRING_COMPACT_WHITESPACE;
1617            break;
1618        case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1619            m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1620            break;
1621        case CHAR_IGNORE_LOWERCASE:
1622            m->str_flags |= STRING_IGNORE_LOWERCASE;
1623            break;
1624        case CHAR_IGNORE_UPPERCASE:
1625            m->str_flags |= STRING_IGNORE_UPPERCASE;
1626            break;
1627        case CHAR_REGEX_OFFSET_START:
1628            m->str_flags |= REGEX_OFFSET_START;
1629            break;
1630        case CHAR_BINTEST:
1631            m->str_flags |= STRING_BINTEST;
1632            break;
1633        case CHAR_TEXTTEST:
1634            m->str_flags |= STRING_TEXTTEST;
1635            break;
1636        case CHAR_TRIM:
1637            m->str_flags |= STRING_TRIM;
1638            break;
1639        case CHAR_PSTRING_1_LE:
1640#define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1641            if (m->type != FILE_PSTRING)
1642                goto bad;
1643            SET_LENGTH(PSTRING_1_LE);
1644            break;
1645        case CHAR_PSTRING_2_BE:
1646            if (m->type != FILE_PSTRING)
1647                goto bad;
1648            SET_LENGTH(PSTRING_2_BE);
1649            break;
1650        case CHAR_PSTRING_2_LE:
1651            if (m->type != FILE_PSTRING)
1652                goto bad;
1653            SET_LENGTH(PSTRING_2_LE);
1654            break;
1655        case CHAR_PSTRING_4_BE:
1656            if (m->type != FILE_PSTRING)
1657                goto bad;
1658            SET_LENGTH(PSTRING_4_BE);
1659            break;
1660        case CHAR_PSTRING_4_LE:
1661            switch (m->type) {
1662            case FILE_PSTRING:
1663            case FILE_REGEX:
1664                break;
1665            default:
1666                goto bad;
1667            }
1668            SET_LENGTH(PSTRING_4_LE);
1669            break;
1670        case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1671            if (m->type != FILE_PSTRING)
1672                goto bad;
1673            m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1674            break;
1675        default:
1676        bad:
1677            if (ms->flags & MAGIC_CHECK)
1678                file_magwarn(ms, "string modifier `%c' "
1679                    "invalid", *l);
1680            goto out;
1681        }
1682        /* allow multiple '/' for readability */
1683        if (l[1] == '/' && !isspace((unsigned char)l[2]))
1684            l++;
1685    }
1686    if (string_modifier_check(ms, m) == -1)
1687        goto out;
1688    *lp = l;
1689    return 0;
1690out:
1691    *lp = l;
1692    return -1;
1693}
1694
1695/*
1696 * parse one line from magic file, put into magic[index++] if valid
1697 */
1698private int
1699parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1700    size_t lineno, int action)
1701{
1702#ifdef ENABLE_CONDITIONALS
1703    static uint32_t last_cont_level = 0;
1704#endif
1705    size_t i;
1706    struct magic *m;
1707    const char *l = line;
1708    char *t;
1709    int op;
1710    uint32_t cont_level;
1711    int32_t diff;
1712
1713    cont_level = 0;
1714
1715    /*
1716     * Parse the offset.
1717     */
1718    while (*l == '>') {
1719        ++l;        /* step over */
1720        cont_level++;
1721    }
1722#ifdef ENABLE_CONDITIONALS
1723    if (cont_level == 0 || cont_level > last_cont_level)
1724        if (file_check_mem(ms, cont_level) == -1)
1725            return -1;
1726    last_cont_level = cont_level;
1727#endif
1728    if (cont_level != 0) {
1729        if (me->mp == NULL) {
1730            file_magerror(ms, "No current entry for continuation");
1731            return -1;
1732        }
1733        if (me->cont_count == 0) {
1734            file_magerror(ms, "Continuations present with 0 count");
1735            return -1;
1736        }
1737        m = &me->mp[me->cont_count - 1];
1738        diff = (int32_t)cont_level - (int32_t)m->cont_level;
1739        if (diff > 1)
1740            file_magwarn(ms, "New continuation level %u is more "
1741                "than one larger than current level %u", cont_level,
1742                m->cont_level);
1743        if (me->cont_count == me->max_count) {
1744            struct magic *nm;
1745            size_t cnt = me->max_count + ALLOC_CHUNK;
1746            if ((nm = CAST(struct magic *, erealloc(me->mp,
1747                sizeof(*nm) * cnt))) == NULL) {
1748                file_oomem(ms, sizeof(*nm) * cnt);
1749                return -1;
1750            }
1751            me->mp = m = nm;
1752            me->max_count = CAST(uint32_t, cnt);
1753        }
1754        m = &me->mp[me->cont_count++];
1755        (void)memset(m, 0, sizeof(*m));
1756        m->cont_level = cont_level;
1757    } else {
1758        static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1759        if (me->mp != NULL)
1760            return 1;
1761        if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1762            file_oomem(ms, len);
1763            return -1;
1764        }
1765        me->mp = m;
1766        me->max_count = ALLOC_CHUNK;
1767        (void)memset(m, 0, sizeof(*m));
1768        m->factor_op = FILE_FACTOR_OP_NONE;
1769        m->cont_level = 0;
1770        me->cont_count = 1;
1771    }
1772    m->lineno = CAST(uint32_t, lineno);
1773
1774    if (*l == '&') {  /* m->cont_level == 0 checked below. */
1775                ++l;            /* step over */
1776                m->flag |= OFFADD;
1777        }
1778    if (*l == '(') {
1779        ++l;        /* step over */
1780        m->flag |= INDIR;
1781        if (m->flag & OFFADD)
1782            m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1783
1784        if (*l == '&') {  /* m->cont_level == 0 checked below */
1785            ++l;            /* step over */
1786            m->flag |= OFFADD;
1787        }
1788    }
1789    /* Indirect offsets are not valid at level 0. */
1790    if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1791        if (ms->flags & MAGIC_CHECK)
1792            file_magwarn(ms, "relative offset at level 0");
1793
1794    /* get offset, then skip over it */
1795    m->offset = (uint32_t)strtoul(l, &t, 0);
1796        if (l == t)
1797        if (ms->flags & MAGIC_CHECK)
1798            file_magwarn(ms, "offset `%s' invalid", l);
1799        l = t;
1800
1801    if (m->flag & INDIR) {
1802        m->in_type = FILE_LONG;
1803        m->in_offset = 0;
1804        /*
1805         * read [.lbs][+-]nnnnn)
1806         */
1807        if (*l == '.') {
1808            l++;
1809            switch (*l) {
1810            case 'l':
1811                m->in_type = FILE_LELONG;
1812                break;
1813            case 'L':
1814                m->in_type = FILE_BELONG;
1815                break;
1816            case 'm':
1817                m->in_type = FILE_MELONG;
1818                break;
1819            case 'h':
1820            case 's':
1821                m->in_type = FILE_LESHORT;
1822                break;
1823            case 'H':
1824            case 'S':
1825                m->in_type = FILE_BESHORT;
1826                break;
1827            case 'c':
1828            case 'b':
1829            case 'C':
1830            case 'B':
1831                m->in_type = FILE_BYTE;
1832                break;
1833            case 'e':
1834            case 'f':
1835            case 'g':
1836                m->in_type = FILE_LEDOUBLE;
1837                break;
1838            case 'E':
1839            case 'F':
1840            case 'G':
1841                m->in_type = FILE_BEDOUBLE;
1842                break;
1843            case 'i':
1844                m->in_type = FILE_LEID3;
1845                break;
1846            case 'I':
1847                m->in_type = FILE_BEID3;
1848                break;
1849            default:
1850                if (ms->flags & MAGIC_CHECK)
1851                    file_magwarn(ms,
1852                        "indirect offset type `%c' invalid",
1853                        *l);
1854                break;
1855            }
1856            l++;
1857        }
1858
1859        m->in_op = 0;
1860        if (*l == '~') {
1861            m->in_op |= FILE_OPINVERSE;
1862            l++;
1863        }
1864        if ((op = get_op(*l)) != -1) {
1865            m->in_op |= op;
1866            l++;
1867        }
1868        if (*l == '(') {
1869            m->in_op |= FILE_OPINDIRECT;
1870            l++;
1871        }
1872        if (isdigit((unsigned char)*l) || *l == '-') {
1873            m->in_offset = (int32_t)strtol(l, &t, 0);
1874            if (l == t)
1875                if (ms->flags & MAGIC_CHECK)
1876                    file_magwarn(ms,
1877                        "in_offset `%s' invalid", l);
1878            l = t;
1879        }
1880        if (*l++ != ')' ||
1881            ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1882            if (ms->flags & MAGIC_CHECK)
1883                file_magwarn(ms,
1884                    "missing ')' in indirect offset");
1885    }
1886    EATAB;
1887
1888#ifdef ENABLE_CONDITIONALS
1889    m->cond = get_cond(l, &l);
1890    if (check_cond(ms, m->cond, cont_level) == -1)
1891        return -1;
1892
1893    EATAB;
1894#endif
1895
1896    /*
1897     * Parse the type.
1898     */
1899    if (*l == 'u') {
1900        /*
1901         * Try it as a keyword type prefixed by "u"; match what
1902         * follows the "u".  If that fails, try it as an SUS
1903         * integer type.
1904         */
1905        m->type = get_type(type_tbl, l + 1, &l);
1906        if (m->type == FILE_INVALID) {
1907            /*
1908             * Not a keyword type; parse it as an SUS type,
1909             * 'u' possibly followed by a number or C/S/L.
1910             */
1911            m->type = get_standard_integer_type(l, &l);
1912        }
1913        /* It's unsigned. */
1914        if (m->type != FILE_INVALID)
1915            m->flag |= UNSIGNED;
1916    } else {
1917        /*
1918         * Try it as a keyword type.  If that fails, try it as
1919         * an SUS integer type if it begins with "d" or as an
1920         * SUS string type if it begins with "s".  In any case,
1921         * it's not unsigned.
1922         */
1923        m->type = get_type(type_tbl, l, &l);
1924        if (m->type == FILE_INVALID) {
1925            /*
1926             * Not a keyword type; parse it as an SUS type,
1927             * either 'd' possibly followed by a number or
1928             * C/S/L, or just 's'.
1929             */
1930            if (*l == 'd')
1931                m->type = get_standard_integer_type(l, &l);
1932            else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1933                m->type = FILE_STRING;
1934        ++l;
1935            }
1936        }
1937    }
1938
1939    if (m->type == FILE_INVALID) {
1940        /* Not found - try it as a special keyword. */
1941        m->type = get_type(special_tbl, l, &l);
1942    }
1943
1944    if (m->type == FILE_INVALID) {
1945        if (ms->flags & MAGIC_CHECK)
1946            file_magwarn(ms, "type `%s' invalid", l);
1947        return -1;
1948    }
1949
1950    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1951    /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1952
1953    m->mask_op = 0;
1954    if (*l == '~') {
1955        if (!IS_LIBMAGIC_STRING(m->type))
1956            m->mask_op |= FILE_OPINVERSE;
1957        else if (ms->flags & MAGIC_CHECK)
1958            file_magwarn(ms, "'~' invalid for string types");
1959        ++l;
1960    }
1961    m->str_range = 0;
1962    m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1963    if ((op = get_op(*l)) != -1) {
1964        if (IS_LIBMAGIC_STRING(m->type)) {
1965            int r;
1966
1967            if (op != FILE_OPDIVIDE) {
1968                if (ms->flags & MAGIC_CHECK)
1969                    file_magwarn(ms,
1970                        "invalid string/indirect op: "
1971                        "`%c'", *t);
1972                return -1;
1973            }
1974
1975            if (m->type == FILE_INDIRECT)
1976                r = parse_indirect_modifier(ms, m, &l);
1977            else
1978                r = parse_string_modifier(ms, m, &l);
1979            if (r == -1)
1980                return -1;
1981        } else
1982            parse_op_modifier(ms, m, &l, op);
1983    }
1984
1985    /*
1986     * We used to set mask to all 1's here, instead let's just not do
1987     * anything if mask = 0 (unless you have a better idea)
1988     */
1989    EATAB;
1990
1991    switch (*l) {
1992    case '>':
1993    case '<':
1994        m->reln = *l;
1995        ++l;
1996        if (*l == '=') {
1997            if (ms->flags & MAGIC_CHECK) {
1998                file_magwarn(ms, "%c= not supported",
1999                    m->reln);
2000                return -1;
2001            }
2002           ++l;
2003        }
2004        break;
2005    /* Old-style anding: "0 byte &0x80 dynamically linked" */
2006    case '&':
2007    case '^':
2008    case '=':
2009        m->reln = *l;
2010        ++l;
2011        if (*l == '=') {
2012           /* HP compat: ignore &= etc. */
2013           ++l;
2014        }
2015        break;
2016    case '!':
2017        m->reln = *l;
2018        ++l;
2019        break;
2020    default:
2021        m->reln = '=';  /* the default relation */
2022        if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
2023            isspace((unsigned char)l[1])) || !l[1])) {
2024            m->reln = *l;
2025            ++l;
2026        }
2027        break;
2028    }
2029    /*
2030     * Grab the value part, except for an 'x' reln.
2031     */
2032    if (m->reln != 'x' && getvalue(ms, m, &l, action))
2033        return -1;
2034
2035    /*
2036     * TODO finish this macro and start using it!
2037     * #define offsetcheck {if (offset > HOWMANY-1)
2038     *  magwarn("offset too big"); }
2039     */
2040
2041    /*
2042     * Now get last part - the description
2043     */
2044    EATAB;
2045    if (l[0] == '\b') {
2046        ++l;
2047        m->flag |= NOSPACE;
2048    } else if ((l[0] == '\\') && (l[1] == 'b')) {
2049        ++l;
2050        ++l;
2051        m->flag |= NOSPACE;
2052    }
2053    for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2054        continue;
2055    if (i == sizeof(m->desc)) {
2056        m->desc[sizeof(m->desc) - 1] = '\0';
2057        if (ms->flags & MAGIC_CHECK)
2058            file_magwarn(ms, "description `%s' truncated", m->desc);
2059    }
2060
2061        /*
2062     * We only do this check while compiling, or if any of the magic
2063     * files were not compiled.
2064         */
2065        if (ms->flags & MAGIC_CHECK) {
2066        if (check_format(ms, m) == -1)
2067            return -1;
2068    }
2069    m->mimetype[0] = '\0';      /* initialise MIME type to none */
2070    return 0;
2071}
2072
2073/*
2074 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2075 * if valid
2076 */
2077private int
2078parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
2079{
2080    const char *l = line;
2081    char *el;
2082    unsigned long factor;
2083    struct magic *m = &me->mp[0];
2084
2085    if (m->factor_op != FILE_FACTOR_OP_NONE) {
2086        file_magwarn(ms,
2087            "Current entry already has a strength type: %c %d",
2088            m->factor_op, m->factor);
2089        return -1;
2090    }
2091    if (m->type == FILE_NAME) {
2092        file_magwarn(ms, "%s: Strength setting is not supported in "
2093            "\"name\" magic entries", m->value.s);
2094        return -1;
2095    }
2096    EATAB;
2097    switch (*l) {
2098    case FILE_FACTOR_OP_NONE:
2099    case FILE_FACTOR_OP_PLUS:
2100    case FILE_FACTOR_OP_MINUS:
2101    case FILE_FACTOR_OP_TIMES:
2102    case FILE_FACTOR_OP_DIV:
2103        m->factor_op = *l++;
2104        break;
2105    default:
2106        file_magwarn(ms, "Unknown factor op `%c'", *l);
2107        return -1;
2108    }
2109    EATAB;
2110    factor = strtoul(l, &el, 0);
2111    if (factor > 255) {
2112        file_magwarn(ms, "Too large factor `%lu'", factor);
2113        goto out;
2114    }
2115    if (*el && !isspace((unsigned char)*el)) {
2116        file_magwarn(ms, "Bad factor `%s'", l);
2117        goto out;
2118    }
2119    m->factor = (uint8_t)factor;
2120    if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2121        file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2122            m->factor_op, m->factor);
2123        goto out;
2124    }
2125    return 0;
2126out:
2127    m->factor_op = FILE_FACTOR_OP_NONE;
2128    m->factor = 0;
2129    return -1;
2130}
2131
2132private int
2133goodchar(unsigned char x, const char *extra)
2134{
2135    return (isascii(x) && isalnum(x)) || strchr(extra, x);
2136}
2137
2138private int
2139parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2140    zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2141{
2142    size_t i;
2143    const char *l = line;
2144    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2145    char *buf = CAST(char *, CAST(void *, m)) + off;
2146
2147    if (buf[0] != '\0') {
2148        len = nt ? strlen(buf) : len;
2149        file_magwarn(ms, "Current entry already has a %s type "
2150            "`%.*s', new type `%s'", name, (int)len, buf, l);
2151        return -1;
2152    }
2153
2154    if (*m->desc == '\0') {
2155        file_magwarn(ms, "Current entry does not yet have a "
2156            "description for adding a %s type", name);
2157        return -1;
2158    }
2159
2160    EATAB;
2161    for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
2162        continue;
2163
2164    if (i == len && *l) {
2165        if (nt)
2166            buf[len - 1] = '\0';
2167        if (ms->flags & MAGIC_CHECK)
2168            file_magwarn(ms, "%s type `%s' truncated %"
2169                SIZE_T_FORMAT "u", name, line, i);
2170    } else {
2171        if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
2172            file_magwarn(ms, "%s type `%s' has bad char '%c'",
2173                name, line, *l);
2174        if (nt)
2175            buf[i] = '\0';
2176    }
2177
2178    if (i > 0)
2179        return 0;
2180
2181    file_magerror(ms, "Bad magic entry '%s'", line);
2182    return -1;
2183}
2184
2185/*
2186 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2187 * magic[index - 1]
2188 */
2189private int
2190parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2191{
2192    struct magic *m = &me->mp[0];
2193
2194    return parse_extra(ms, me, line,
2195        CAST(off_t, offsetof(struct magic, apple)),
2196        sizeof(m->apple), "APPLE", "!+-./", 0);
2197}
2198
2199/*
2200 * parse a MIME annotation line from magic file, put into magic[index - 1]
2201 * if valid
2202 */
2203private int
2204parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2205{
2206    struct magic *m = &me->mp[0];
2207
2208    return parse_extra(ms, me, line,
2209        CAST(zend_off_t, offsetof(struct magic, mimetype)),
2210        sizeof(m->mimetype), "MIME", "+-/.", 1);
2211}
2212
2213private int
2214check_format_type(const char *ptr, int type)
2215{
2216    int quad = 0, h;
2217    if (*ptr == '\0') {
2218        /* Missing format string; bad */
2219        return -1;
2220    }
2221
2222    switch (file_formats[type]) {
2223    case FILE_FMT_QUAD:
2224        quad = 1;
2225        /*FALLTHROUGH*/
2226    case FILE_FMT_NUM:
2227        if (quad == 0) {
2228            switch (type) {
2229            case FILE_BYTE:
2230                h = 2;
2231                break;
2232            case FILE_SHORT:
2233            case FILE_BESHORT:
2234            case FILE_LESHORT:
2235                h = 1;
2236                break;
2237            case FILE_LONG:
2238            case FILE_BELONG:
2239            case FILE_LELONG:
2240            case FILE_MELONG:
2241            case FILE_LEID3:
2242            case FILE_BEID3:
2243            case FILE_INDIRECT:
2244                h = 0;
2245                break;
2246            default:
2247                abort();
2248            }
2249        } else
2250            h = 0;
2251        if (*ptr == '-')
2252            ptr++;
2253        if (*ptr == '.')
2254            ptr++;
2255        while (isdigit((unsigned char)*ptr)) ptr++;
2256        if (*ptr == '.')
2257            ptr++;
2258        while (isdigit((unsigned char)*ptr)) ptr++;
2259        if (quad) {
2260            if (*ptr++ != 'l')
2261                return -1;
2262            if (*ptr++ != 'l')
2263                return -1;
2264        }
2265
2266        switch (*ptr++) {
2267#ifdef STRICT_FORMAT    /* "long" formats are int formats for us */
2268        /* so don't accept the 'l' modifier */
2269        case 'l':
2270            switch (*ptr++) {
2271            case 'i':
2272            case 'd':
2273            case 'u':
2274            case 'o':
2275            case 'x':
2276            case 'X':
2277                return h != 0 ? -1 : 0;
2278            default:
2279                return -1;
2280            }
2281
2282        /*
2283         * Don't accept h and hh modifiers. They make writing
2284         * magic entries more complicated, for very little benefit
2285         */
2286        case 'h':
2287            if (h-- <= 0)
2288                return -1;
2289            switch (*ptr++) {
2290            case 'h':
2291                if (h-- <= 0)
2292                    return -1;
2293                switch (*ptr++) {
2294                case 'i':
2295                case 'd':
2296                case 'u':
2297                case 'o':
2298                case 'x':
2299                case 'X':
2300                    return 0;
2301                default:
2302                    return -1;
2303                }
2304            case 'i':
2305            case 'd':
2306            case 'u':
2307            case 'o':
2308            case 'x':
2309            case 'X':
2310                return h != 0 ? -1 : 0;
2311            default:
2312                return -1;
2313            }
2314#endif
2315        case 'c':
2316            return h != 2 ? -1 : 0;
2317        case 'i':
2318        case 'd':
2319        case 'u':
2320        case 'o':
2321        case 'x':
2322        case 'X':
2323#ifdef STRICT_FORMAT
2324            return h != 0 ? -1 : 0;
2325#else
2326            return 0;
2327#endif
2328        default:
2329            return -1;
2330        }
2331
2332    case FILE_FMT_FLOAT:
2333    case FILE_FMT_DOUBLE:
2334        if (*ptr == '-')
2335            ptr++;
2336        if (*ptr == '.')
2337            ptr++;
2338        while (isdigit((unsigned char)*ptr)) ptr++;
2339        if (*ptr == '.')
2340            ptr++;
2341        while (isdigit((unsigned char)*ptr)) ptr++;
2342
2343        switch (*ptr++) {
2344        case 'e':
2345        case 'E':
2346        case 'f':
2347        case 'F':
2348        case 'g':
2349        case 'G':
2350            return 0;
2351
2352        default:
2353            return -1;
2354        }
2355
2356
2357    case FILE_FMT_STR:
2358        if (*ptr == '-')
2359            ptr++;
2360        while (isdigit((unsigned char )*ptr))
2361            ptr++;
2362        if (*ptr == '.') {
2363            ptr++;
2364            while (isdigit((unsigned char )*ptr))
2365                ptr++;
2366        }
2367
2368        switch (*ptr++) {
2369        case 's':
2370            return 0;
2371        default:
2372            return -1;
2373        }
2374
2375    default:
2376        /* internal error */
2377        abort();
2378    }
2379    /*NOTREACHED*/
2380    return -1;
2381}
2382
2383/*
2384 * Check that the optional printf format in description matches
2385 * the type of the magic.
2386 */
2387private int
2388check_format(struct magic_set *ms, struct magic *m)
2389{
2390    char *ptr;
2391
2392    for (ptr = m->desc; *ptr; ptr++)
2393        if (*ptr == '%')
2394            break;
2395    if (*ptr == '\0') {
2396        /* No format string; ok */
2397        return 1;
2398    }
2399
2400    assert(file_nformats == file_nnames);
2401
2402    if (m->type >= file_nformats) {
2403        file_magwarn(ms, "Internal error inconsistency between "
2404            "m->type and format strings");
2405        return -1;
2406    }
2407    if (file_formats[m->type] == FILE_FMT_NONE) {
2408        file_magwarn(ms, "No format string for `%s' with description "
2409            "`%s'", m->desc, file_names[m->type]);
2410        return -1;
2411    }
2412
2413    ptr++;
2414    if (check_format_type(ptr, m->type) == -1) {
2415        /*
2416         * TODO: this error message is unhelpful if the format
2417         * string is not one character long
2418         */
2419        file_magwarn(ms, "Printf format `%c' is not valid for type "
2420            "`%s' in description `%s'", *ptr ? *ptr : '?',
2421            file_names[m->type], m->desc);
2422        return -1;
2423    }
2424
2425    for (; *ptr; ptr++) {
2426        if (*ptr == '%') {
2427            file_magwarn(ms,
2428                "Too many format strings (should have at most one) "
2429                "for `%s' with description `%s'",
2430                file_names[m->type], m->desc);
2431            return -1;
2432        }
2433    }
2434    return 0;
2435}
2436
2437/*
2438 * Read a numeric value from a pointer, into the value union of a magic
2439 * pointer, according to the magic type.  Update the string pointer to point
2440 * just after the number read.  Return 0 for success, non-zero for failure.
2441 */
2442private int
2443getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2444{
2445    switch (m->type) {
2446    case FILE_BESTRING16:
2447    case FILE_LESTRING16:
2448    case FILE_STRING:
2449    case FILE_PSTRING:
2450    case FILE_REGEX:
2451    case FILE_SEARCH:
2452    case FILE_NAME:
2453    case FILE_USE:
2454        *p = getstr(ms, m, *p, action == FILE_COMPILE);
2455        if (*p == NULL) {
2456            if (ms->flags & MAGIC_CHECK)
2457                file_magwarn(ms, "cannot get string from `%s'",
2458                    m->value.s);
2459            return -1;
2460        }
2461        if (m->type == FILE_REGEX) {
2462            /*  XXX do we need this? */
2463            /*zval pattern;
2464            int options = 0;
2465            pcre_cache_entry *pce;
2466
2467            convert_libmagic_pattern(&pattern, m->value.s, strlen(m->value.s), options);
2468
2469            if ((pce = pcre_get_compiled_regex_cache(Z_STR(pattern))) == NULL) {
2470                return -1;
2471            }
2472
2473            return 0;*/
2474        }
2475        return 0;
2476    case FILE_FLOAT:
2477    case FILE_BEFLOAT:
2478    case FILE_LEFLOAT:
2479        if (m->reln != 'x') {
2480            char *ep;
2481#ifdef HAVE_STRTOF
2482            m->value.f = strtof(*p, &ep);
2483#else
2484            m->value.f = (float)strtod(*p, &ep);
2485#endif
2486            *p = ep;
2487        }
2488        return 0;
2489    case FILE_DOUBLE:
2490    case FILE_BEDOUBLE:
2491    case FILE_LEDOUBLE:
2492        if (m->reln != 'x') {
2493            char *ep;
2494            m->value.d = strtod(*p, &ep);
2495            *p = ep;
2496        }
2497        return 0;
2498    default:
2499        if (m->reln != 'x') {
2500            char *ep;
2501            m->value.q = file_signextend(ms, m,
2502                (uint64_t)strtoull(*p, &ep, 0));
2503            *p = ep;
2504            eatsize(p);
2505        }
2506        return 0;
2507    }
2508}
2509
2510/*
2511 * Convert a string containing C character escapes.  Stop at an unescaped
2512 * space or tab.
2513 * Copy the converted version to "m->value.s", and the length in m->vallen.
2514 * Return updated scan pointer as function result. Warn if set.
2515 */
2516private const char *
2517getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2518{
2519    const char *origs = s;
2520    char    *p = m->value.s;
2521    size_t  plen = sizeof(m->value.s);
2522    char    *origp = p;
2523    char    *pmax = p + plen - 1;
2524    int c;
2525    int val;
2526
2527    while ((c = *s++) != '\0') {
2528        if (isspace((unsigned char) c))
2529            break;
2530        if (p >= pmax) {
2531            file_error(ms, 0, "string too long: `%s'", origs);
2532            return NULL;
2533        }
2534        if (c == '\\') {
2535            switch(c = *s++) {
2536
2537            case '\0':
2538                if (warn)
2539                    file_magwarn(ms, "incomplete escape");
2540                goto out;
2541
2542            case '\t':
2543                if (warn) {
2544                    file_magwarn(ms,
2545                        "escaped tab found, use \\t instead");
2546                    warn = 0;   /* already did */
2547                }
2548                /*FALLTHROUGH*/
2549            default:
2550                if (warn) {
2551                    if (isprint((unsigned char)c)) {
2552                        /* Allow escaping of
2553                         * ``relations'' */
2554                        if (strchr("<>&^=!", c) == NULL
2555                            && (m->type != FILE_REGEX ||
2556                            strchr("[]().*?^$|{}", c)
2557                            == NULL)) {
2558                            file_magwarn(ms, "no "
2559                                "need to escape "
2560                                "`%c'", c);
2561                        }
2562                    } else {
2563                        file_magwarn(ms,
2564                            "unknown escape sequence: "
2565                            "\\%03o", c);
2566                    }
2567                }
2568                /*FALLTHROUGH*/
2569            /* space, perhaps force people to use \040? */
2570            case ' ':
2571#if 0
2572            /*
2573             * Other things people escape, but shouldn't need to,
2574             * so we disallow them
2575             */
2576            case '\'':
2577            case '"':
2578            case '?':
2579#endif
2580            /* Relations */
2581            case '>':
2582            case '<':
2583            case '&':
2584            case '^':
2585            case '=':
2586            case '!':
2587            /* and baskslash itself */
2588            case '\\':
2589                *p++ = (char) c;
2590                break;
2591
2592            case 'a':
2593                *p++ = '\a';
2594                break;
2595
2596            case 'b':
2597                *p++ = '\b';
2598                break;
2599
2600            case 'f':
2601                *p++ = '\f';
2602                break;
2603
2604            case 'n':
2605                *p++ = '\n';
2606                break;
2607
2608            case 'r':
2609                *p++ = '\r';
2610                break;
2611
2612            case 't':
2613                *p++ = '\t';
2614                break;
2615
2616            case 'v':
2617                *p++ = '\v';
2618                break;
2619
2620            /* \ and up to 3 octal digits */
2621            case '0':
2622            case '1':
2623            case '2':
2624            case '3':
2625            case '4':
2626            case '5':
2627            case '6':
2628            case '7':
2629                val = c - '0';
2630                c = *s++;  /* try for 2 */
2631                if (c >= '0' && c <= '7') {
2632                    val = (val << 3) | (c - '0');
2633                    c = *s++;  /* try for 3 */
2634                    if (c >= '0' && c <= '7')
2635                        val = (val << 3) | (c-'0');
2636                    else
2637                        --s;
2638                }
2639                else
2640                    --s;
2641                *p++ = (char)val;
2642                break;
2643
2644            /* \x and up to 2 hex digits */
2645            case 'x':
2646                val = 'x';  /* Default if no digits */
2647                c = hextoint(*s++); /* Get next char */
2648                if (c >= 0) {
2649                    val = c;
2650                    c = hextoint(*s++);
2651                    if (c >= 0)
2652                        val = (val << 4) + c;
2653                    else
2654                        --s;
2655                } else
2656                    --s;
2657                *p++ = (char)val;
2658                break;
2659            }
2660        } else
2661            *p++ = (char)c;
2662    }
2663out:
2664    *p = '\0';
2665    m->vallen = CAST(unsigned char, (p - origp));
2666    if (m->type == FILE_PSTRING)
2667        m->vallen += (unsigned char)file_pstring_length_size(m);
2668    return s;
2669}
2670
2671
2672/* Single hex char to int; -1 if not a hex char. */
2673private int
2674hextoint(int c)
2675{
2676    if (!isascii((unsigned char) c))
2677        return -1;
2678    if (isdigit((unsigned char) c))
2679        return c - '0';
2680    if ((c >= 'a') && (c <= 'f'))
2681        return c + 10 - 'a';
2682    if (( c>= 'A') && (c <= 'F'))
2683        return c + 10 - 'A';
2684    return -1;
2685}
2686
2687
2688/*
2689 * Print a string containing C character escapes.
2690 */
2691protected void
2692file_showstr(FILE *fp, const char *s, size_t len)
2693{
2694    char    c;
2695
2696    for (;;) {
2697        if (len == ~0U) {
2698            c = *s++;
2699            if (c == '\0')
2700                break;
2701        }
2702        else  {
2703            if (len-- == 0)
2704                break;
2705            c = *s++;
2706        }
2707        if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
2708            (void) fputc(c, fp);
2709        else {
2710            (void) fputc('\\', fp);
2711            switch (c) {
2712            case '\a':
2713                (void) fputc('a', fp);
2714                break;
2715
2716            case '\b':
2717                (void) fputc('b', fp);
2718                break;
2719
2720            case '\f':
2721                (void) fputc('f', fp);
2722                break;
2723
2724            case '\n':
2725                (void) fputc('n', fp);
2726                break;
2727
2728            case '\r':
2729                (void) fputc('r', fp);
2730                break;
2731
2732            case '\t':
2733                (void) fputc('t', fp);
2734                break;
2735
2736            case '\v':
2737                (void) fputc('v', fp);
2738                break;
2739
2740            default:
2741                (void) fprintf(fp, "%.3o", c & 0377);
2742                break;
2743            }
2744        }
2745    }
2746}
2747
2748/*
2749 * eatsize(): Eat the size spec from a number [eg. 10UL]
2750 */
2751private void
2752eatsize(const char **p)
2753{
2754    const char *l = *p;
2755
2756    if (LOWCASE(*l) == 'u')
2757        l++;
2758
2759    switch (LOWCASE(*l)) {
2760    case 'l':    /* long */
2761    case 's':    /* short */
2762    case 'h':    /* short */
2763    case 'b':    /* char/byte */
2764    case 'c':    /* char/byte */
2765        l++;
2766        /*FALLTHROUGH*/
2767    default:
2768        break;
2769    }
2770
2771    *p = l;
2772}
2773
2774/*
2775 * handle a buffer containing a compiled file.
2776 */
2777private struct magic_map *
2778apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
2779{
2780    struct magic_map *map;
2781
2782    if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
2783        file_oomem(ms, sizeof(*map));
2784        return NULL;
2785    }
2786    map->len = len;
2787    map->p = buf;
2788    map->type = MAP_TYPE_USER;
2789    if (check_buffer(ms, map, "buffer") != 0) {
2790        apprentice_unmap(map);
2791        return NULL;
2792    }
2793    return map;
2794}
2795
2796/*
2797 * handle a compiled file.
2798 */
2799
2800private struct magic_map *
2801apprentice_map(struct magic_set *ms, const char *fn)
2802{
2803    uint32_t *ptr;
2804    uint32_t version, entries, nentries;
2805    int needsbyteswap;
2806    char *dbname = NULL;
2807    struct magic_map *map;
2808    size_t i;
2809    php_stream *stream = NULL;
2810    php_stream_statbuf st;
2811
2812
2813
2814    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2815        file_oomem(ms, sizeof(*map));
2816        return NULL;
2817    }
2818
2819    if (fn == NULL) {
2820        map->p = (void *)&php_magic_database;
2821        goto internal_loaded;
2822    }
2823
2824#ifdef PHP_WIN32
2825    /* Don't bother on windows with php_stream_open_wrapper,
2826    return to give apprentice_load() a chance. */
2827    if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2828               if (st.sb.st_mode & S_IFDIR) {
2829                       return NULL;
2830               }
2831       }
2832#endif
2833
2834    dbname = mkdbname(ms, fn, 0);
2835    if (dbname == NULL)
2836        goto error;
2837
2838        stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2839
2840    if (!stream) {
2841        goto error;
2842    }
2843
2844    if (php_stream_stat(stream, &st) < 0) {
2845        file_error(ms, errno, "cannot stat `%s'", dbname);
2846        goto error;
2847    }
2848    if (st.sb.st_size < 8 || st.sb.st_size > MAXMAGIC_SIZE) {
2849        file_error(ms, 0, "file `%s' is too %s", dbname,
2850            st.sb.st_size < 8 ? "small" : "large");
2851        goto error;
2852    }
2853
2854    map->len = (size_t)st.sb.st_size;
2855    if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2856        file_oomem(ms, map->len);
2857        goto error;
2858    }
2859    if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2860        file_badread(ms);
2861        goto error;
2862    }
2863    map->len = 0;
2864#define RET 1
2865
2866    php_stream_close(stream);
2867    stream = NULL;
2868
2869internal_loaded:
2870    ptr = (uint32_t *)(void *)map->p;
2871    if (*ptr != MAGICNO) {
2872        if (swap4(*ptr) != MAGICNO) {
2873            file_error(ms, 0, "bad magic in `%s'", dbname);
2874            goto error;
2875        }
2876        needsbyteswap = 1;
2877    } else
2878        needsbyteswap = 0;
2879    if (needsbyteswap)
2880        version = swap4(ptr[1]);
2881    else
2882        version = ptr[1];
2883    if (version != VERSIONNO) {
2884        file_error(ms, 0, "File %d.%d supports only version %d magic "
2885            "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2886            VERSIONNO, dbname, version);
2887        goto error;
2888    }
2889
2890    /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2891    machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2892    future. */
2893    if (needsbyteswap && fn == NULL) {
2894        map->p = emalloc(sizeof(php_magic_database));
2895        map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2896    }
2897
2898    if (NULL != fn) {
2899        nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2900        entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2901        if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2902            file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2903                dbname, (unsigned long long)st.sb.st_size,
2904                sizeof(struct magic));
2905            goto error;
2906        }
2907    }
2908    map->magic[0] = CAST(struct magic *, map->p) + 1;
2909    nentries = 0;
2910    for (i = 0; i < MAGIC_SETS; i++) {
2911        if (needsbyteswap)
2912            map->nmagic[i] = swap4(ptr[i + 2]);
2913        else
2914            map->nmagic[i] = ptr[i + 2];
2915        if (i != MAGIC_SETS - 1)
2916            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2917        nentries += map->nmagic[i];
2918    }
2919    if (NULL != fn && entries != nentries + 1) {
2920        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2921            dbname, entries, nentries + 1);
2922        goto error;
2923    }
2924    if (needsbyteswap)
2925        for (i = 0; i < MAGIC_SETS; i++)
2926            byteswap(map->magic[i], map->nmagic[i]);
2927
2928    if (dbname) {
2929        efree(dbname);
2930    }
2931    return map;
2932
2933error:
2934    if (stream) {
2935        php_stream_close(stream);
2936    }
2937    apprentice_unmap(map);
2938    if (dbname) {
2939        efree(dbname);
2940    }
2941    return NULL;
2942}
2943
2944private int
2945check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
2946{
2947    uint32_t *ptr;
2948    uint32_t entries, nentries;
2949    uint32_t version;
2950    int i, needsbyteswap;
2951
2952    ptr = CAST(uint32_t *, map->p);
2953    if (*ptr != MAGICNO) {
2954        if (swap4(*ptr) != MAGICNO) {
2955            file_error(ms, 0, "bad magic in `%s'", dbname);
2956            return -1;
2957        }
2958        needsbyteswap = 1;
2959    } else
2960        needsbyteswap = 0;
2961    if (needsbyteswap)
2962        version = swap4(ptr[1]);
2963    else
2964        version = ptr[1];
2965    if (version != VERSIONNO) {
2966        file_error(ms, 0, "File %s supports only version %d magic "
2967            "files. `%s' is version %d", FILE_VERSION_MAJOR,
2968            VERSIONNO, dbname, version);
2969        return -1;
2970    }
2971    entries = (uint32_t)(map->len / sizeof(struct magic));
2972    if ((entries * sizeof(struct magic)) != map->len) {
2973        file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
2974            "a multiple of %" SIZE_T_FORMAT "u",
2975            dbname, map->len, sizeof(struct magic));
2976        return -1;
2977    }
2978    map->magic[0] = CAST(struct magic *, map->p) + 1;
2979    nentries = 0;
2980    for (i = 0; i < MAGIC_SETS; i++) {
2981        if (needsbyteswap)
2982            map->nmagic[i] = swap4(ptr[i + 2]);
2983        else
2984            map->nmagic[i] = ptr[i + 2];
2985        if (i != MAGIC_SETS - 1)
2986            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2987        nentries += map->nmagic[i];
2988    }
2989    if (entries != nentries + 1) {
2990        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2991            dbname, entries, nentries + 1);
2992        return -1;
2993    }
2994    if (needsbyteswap)
2995        for (i = 0; i < MAGIC_SETS; i++)
2996            byteswap(map->magic[i], map->nmagic[i]);
2997    return 0;
2998}
2999
3000/*
3001 * handle an mmaped file.
3002 */
3003private int
3004apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3005{
3006    static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3007    static const size_t m = sizeof(**map->magic);
3008    int fd = -1;
3009    size_t len;
3010    char *dbname;
3011    int rv = -1;
3012    uint32_t i;
3013    union {
3014        struct magic m;
3015        uint32_t h[2 + MAGIC_SETS];
3016    } hdr;
3017    php_stream *stream;
3018
3019
3020    dbname = mkdbname(ms, fn, 0);
3021
3022    if (dbname == NULL)
3023        goto out;
3024
3025    /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3026    stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3027
3028    if (!stream) {
3029        file_error(ms, errno, "cannot open `%s'", dbname);
3030        goto out;
3031    }
3032    memset(&hdr, 0, sizeof(hdr));
3033    hdr.h[0] = MAGICNO;
3034    hdr.h[1] = VERSIONNO;
3035    memcpy(hdr.h + 2, map->nmagic, nm);
3036
3037    if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3038        file_error(ms, errno, "error writing `%s'", dbname);
3039        goto out;
3040    }
3041
3042    for (i = 0; i < MAGIC_SETS; i++) {
3043        len = m * map->nmagic[i];
3044        if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3045            file_error(ms, errno, "error writing `%s'", dbname);
3046            goto out;
3047        }
3048    }
3049
3050    if (stream) {
3051        php_stream_close(stream);
3052    }
3053    rv = 0;
3054out:
3055    efree(dbname);
3056    return rv;
3057}
3058
3059private const char ext[] = ".mgc";
3060/*
3061 * make a dbname
3062 */
3063private char *
3064mkdbname(struct magic_set *ms, const char *fn, int strip)
3065{
3066    const char *p, *q;
3067    char *buf;
3068
3069    if (strip) {
3070        if ((p = strrchr(fn, '/')) != NULL)
3071            fn = ++p;
3072    }
3073
3074    for (q = fn; *q; q++)
3075        continue;
3076    /* Look for .mgc */
3077    for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3078        if (*p != *q)
3079            break;
3080
3081    /* Did not find .mgc, restore q */
3082    if (p >= ext)
3083        while (*q)
3084            q++;
3085
3086    q++;
3087    /* Compatibility with old code that looked in .mime */
3088    if (ms->flags & MAGIC_MIME) {
3089        spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
3090#ifdef PHP_WIN32
3091        if (VCWD_ACCESS(buf, R_OK) == 0) {
3092#else
3093        if (VCWD_ACCESS(buf, R_OK) != -1) {
3094#endif
3095            ms->flags &= MAGIC_MIME_TYPE;
3096            return buf;
3097        }
3098        efree(buf);
3099    }
3100    spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
3101
3102    /* Compatibility with old code that looked in .mime */
3103    if (strstr(p, ".mime") != NULL)
3104        ms->flags &= MAGIC_MIME_TYPE;
3105    return buf;
3106}
3107
3108/*
3109 * Byteswap an mmap'ed file if needed
3110 */
3111private void
3112byteswap(struct magic *magic, uint32_t nmagic)
3113{
3114    uint32_t i;
3115    for (i = 0; i < nmagic; i++)
3116        bs1(&magic[i]);
3117}
3118
3119/*
3120 * swap a short
3121 */
3122private uint16_t
3123swap2(uint16_t sv)
3124{
3125    uint16_t rv;
3126    uint8_t *s = (uint8_t *)(void *)&sv;
3127    uint8_t *d = (uint8_t *)(void *)&rv;
3128    d[0] = s[1];
3129    d[1] = s[0];
3130    return rv;
3131}
3132
3133/*
3134 * swap an int
3135 */
3136private uint32_t
3137swap4(uint32_t sv)
3138{
3139    uint32_t rv;
3140    uint8_t *s = (uint8_t *)(void *)&sv;
3141    uint8_t *d = (uint8_t *)(void *)&rv;
3142    d[0] = s[3];
3143    d[1] = s[2];
3144    d[2] = s[1];
3145    d[3] = s[0];
3146    return rv;
3147}
3148
3149/*
3150 * swap a quad
3151 */
3152private uint64_t
3153swap8(uint64_t sv)
3154{
3155    uint64_t rv;
3156    uint8_t *s = (uint8_t *)(void *)&sv;
3157    uint8_t *d = (uint8_t *)(void *)&rv;
3158#if 0
3159    d[0] = s[3];
3160    d[1] = s[2];
3161    d[2] = s[1];
3162    d[3] = s[0];
3163    d[4] = s[7];
3164    d[5] = s[6];
3165    d[6] = s[5];
3166    d[7] = s[4];
3167#else
3168    d[0] = s[7];
3169    d[1] = s[6];
3170    d[2] = s[5];
3171    d[3] = s[4];
3172    d[4] = s[3];
3173    d[5] = s[2];
3174    d[6] = s[1];
3175    d[7] = s[0];
3176#endif
3177    return rv;
3178}
3179
3180/*
3181 * byteswap a single magic entry
3182 */
3183private void
3184bs1(struct magic *m)
3185{
3186    m->cont_level = swap2(m->cont_level);
3187    m->offset = swap4((uint32_t)m->offset);
3188    m->in_offset = swap4((uint32_t)m->in_offset);
3189    m->lineno = swap4((uint32_t)m->lineno);
3190    if (IS_LIBMAGIC_STRING(m->type)) {
3191        m->str_range = swap4(m->str_range);
3192        m->str_flags = swap4(m->str_flags);
3193    }
3194    else {
3195        m->value.q = swap8(m->value.q);
3196        m->num_mask = swap8(m->num_mask);
3197    }
3198}
3199
3200protected size_t
3201file_pstring_length_size(const struct magic *m)
3202{
3203    switch (m->str_flags & PSTRING_LEN) {
3204    case PSTRING_1_LE:
3205        return 1;
3206    case PSTRING_2_LE:
3207    case PSTRING_2_BE:
3208        return 2;
3209    case PSTRING_4_LE:
3210    case PSTRING_4_BE:
3211        return 4;
3212    default:
3213        abort();    /* Impossible */
3214        return 1;
3215    }
3216}
3217protected size_t
3218file_pstring_get_length(const struct magic *m, const char *s)
3219{
3220    size_t len = 0;
3221
3222    switch (m->str_flags & PSTRING_LEN) {
3223    case PSTRING_1_LE:
3224        len = *s;
3225        break;
3226    case PSTRING_2_LE:
3227        len = (s[1] << 8) | s[0];
3228        break;
3229    case PSTRING_2_BE:
3230        len = (s[0] << 8) | s[1];
3231        break;
3232    case PSTRING_4_LE:
3233        len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
3234        break;
3235    case PSTRING_4_BE:
3236        len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
3237        break;
3238    default:
3239        abort();    /* Impossible */
3240    }
3241
3242    if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
3243        len -= file_pstring_length_size(m);
3244
3245    return len;
3246}
3247
3248protected int
3249file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3250{
3251    uint32_t i, j;
3252    struct mlist *mlist, *ml;
3253
3254    mlist = ms->mlist[1];
3255
3256    for (ml = mlist->next; ml != mlist; ml = ml->next) {
3257        struct magic *ma = ml->magic;
3258        uint32_t nma = ml->nmagic;
3259        for (i = 0; i < nma; i++) {
3260            if (ma[i].type != FILE_NAME)
3261                continue;
3262            if (strcmp(ma[i].value.s, name) == 0) {
3263                v->magic = &ma[i];
3264                for (j = i + 1; j < nma; j++)
3265                    if (ma[j].cont_level == 0)
3266                        break;
3267                v->nmagic = j - i;
3268                return 0;
3269            }
3270        }
3271    }
3272    return -1;
3273}
3274