1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "php.h"
33
34#include "file.h"
35
36#ifndef lint
37FILE_RCSID("@(#)$File: apprentice.c,v 1.230 2015/01/02 21:29:39 christos Exp $")
38#endif  /* lint */
39
40#include "magic.h"
41#include "patchlevel.h"
42#include <stdlib.h>
43
44#if defined(__hpux) && !defined(HAVE_STRTOULL)
45#if SIZEOF_LONG == 8
46# define strtoull strtoul
47#else
48# define strtoull __strtoull
49#endif
50#endif
51
52#ifdef PHP_WIN32
53#include "win32/unistd.h"
54#if _MSC_VER <= 1300
55# include "win32/php_strtoi64.h"
56#endif
57#define strtoull _strtoui64
58#else
59#include <unistd.h>
60#endif
61#include <string.h>
62#include <assert.h>
63#include <ctype.h>
64#include <fcntl.h>
65
66#ifndef SSIZE_MAX
67#define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
68#else
69#define MAXMAGIC_SIZE        SSIZE_MAX
70#endif
71
72#define EATAB {while (isascii((unsigned char) *l) && \
73              isspace((unsigned char) *l))  ++l;}
74#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
75            tolower((unsigned char) (l)) : (l))
76/*
77 * Work around a bug in headers on Digital Unix.
78 * At least confirmed for: OSF1 V4.0 878
79 */
80#if defined(__osf__) && defined(__DECC)
81#ifdef MAP_FAILED
82#undef MAP_FAILED
83#endif
84#endif
85
86#ifndef MAP_FAILED
87#define MAP_FAILED (void *) -1
88#endif
89
90#ifndef MAP_FILE
91#define MAP_FILE 0
92#endif
93
94#define ALLOC_CHUNK (size_t)10
95#define ALLOC_INCR  (size_t)200
96
97#define MAP_TYPE_MMAP   0
98#define MAP_TYPE_MALLOC 1
99#define MAP_TYPE_USER   2
100
101struct magic_entry {
102    struct magic *mp;
103    uint32_t cont_count;
104    uint32_t max_count;
105};
106
107struct magic_entry_set {
108    struct magic_entry *me;
109    uint32_t count;
110    uint32_t max;
111};
112
113struct magic_map {
114    void *p;
115    size_t len;
116    int type;
117    struct magic *magic[MAGIC_SETS];
118    uint32_t nmagic[MAGIC_SETS];
119};
120
121int file_formats[FILE_NAMES_SIZE];
122const size_t file_nformats = FILE_NAMES_SIZE;
123const char *file_names[FILE_NAMES_SIZE];
124const size_t file_nnames = FILE_NAMES_SIZE;
125
126private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
127private int hextoint(int);
128private const char *getstr(struct magic_set *, struct magic *, const char *,
129    int);
130private int parse(struct magic_set *, struct magic_entry *, const char *,
131    size_t, int);
132private void eatsize(const char **);
133private int apprentice_1(struct magic_set *, const char *, int);
134private size_t apprentice_magic_strength(const struct magic *);
135private int apprentice_sort(const void *, const void *);
136private void apprentice_list(struct mlist *, int );
137private struct magic_map *apprentice_load(struct magic_set *,
138    const char *, int);
139private struct mlist *mlist_alloc(void);
140private void mlist_free(struct mlist *);
141private void byteswap(struct magic *, uint32_t);
142private void bs1(struct magic *);
143private uint16_t swap2(uint16_t);
144private uint32_t swap4(uint32_t);
145private uint64_t swap8(uint64_t);
146private char *mkdbname(struct magic_set *, const char *, int);
147private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
148    size_t);
149private struct magic_map *apprentice_map(struct magic_set *, const char *);
150private int check_buffer(struct magic_set *, struct magic_map *, const char *);
151private void apprentice_unmap(struct magic_map *);
152private int apprentice_compile(struct magic_set *, struct magic_map *,
153    const char *);
154private int check_format_type(const char *, int);
155private int check_format(struct magic_set *, struct magic *);
156private int get_op(char);
157private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
158private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
159private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
160
161
162private size_t magicsize = sizeof(struct magic);
163
164private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
165
166private struct {
167    const char *name;
168    size_t len;
169    int (*fun)(struct magic_set *, struct magic_entry *, const char *);
170} bang[] = {
171#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
172    DECLARE_FIELD(mime),
173    DECLARE_FIELD(apple),
174    DECLARE_FIELD(strength),
175#undef  DECLARE_FIELD
176    { NULL, 0, NULL }
177};
178
179#include "../data_file.c"
180
181struct type_tbl_s {
182    const char name[16];
183    const size_t len;
184    const int type;
185    const int format;
186};
187
188/*
189 * XXX - the actual Single UNIX Specification says that "long" means "long",
190 * as in the C data type, but we treat it as meaning "4-byte integer".
191 * Given that the OS X version of file 5.04 did the same, I guess that passes
192 * the actual test; having "long" be dependent on how big a "long" is on
193 * the machine running "file" is silly.
194 */
195static const struct type_tbl_s type_tbl[] = {
196# define XX(s)      s, (sizeof(s) - 1)
197# define XX_NULL    "", 0
198    { XX("invalid"),    FILE_INVALID,       FILE_FMT_NONE },
199    { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
200    { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
201    { XX("default"),    FILE_DEFAULT,       FILE_FMT_NONE },
202    { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
203    { XX("string"),     FILE_STRING,        FILE_FMT_STR },
204    { XX("date"),       FILE_DATE,      FILE_FMT_STR },
205    { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
206    { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
207    { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
208    { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
209    { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
210    { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
211    { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
212    { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
213    { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
214    { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
215    { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
216    { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
217    { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
218    { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
219    { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
220    { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
221    { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
222    { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
223    { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
224    { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
225    { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
226    { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
227    { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
228    { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
229    { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
230    { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
231    { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
232    { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
233    { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
234    { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
235    { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
236    { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
237    { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
238    { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
239    { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NUM },
240    { XX("qwdate"),     FILE_QWDATE,        FILE_FMT_STR },
241    { XX("leqwdate"),   FILE_LEQWDATE,      FILE_FMT_STR },
242    { XX("beqwdate"),   FILE_BEQWDATE,      FILE_FMT_STR },
243    { XX("name"),       FILE_NAME,      FILE_FMT_NONE },
244    { XX("use"),        FILE_USE,       FILE_FMT_NONE },
245    { XX("clear"),      FILE_CLEAR,     FILE_FMT_NONE },
246    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
247};
248
249/*
250 * These are not types, and cannot be preceded by "u" to make them
251 * unsigned.
252 */
253static const struct type_tbl_s special_tbl[] = {
254    { XX("name"),       FILE_NAME,      FILE_FMT_STR },
255    { XX("use"),        FILE_USE,       FILE_FMT_STR },
256    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
257};
258# undef XX
259# undef XX_NULL
260
261#ifndef S_ISDIR
262#define S_ISDIR(mode) ((mode) & _S_IFDIR)
263#endif
264
265private int
266get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
267{
268    const struct type_tbl_s *p;
269
270    for (p = tbl; p->len; p++) {
271        if (strncmp(l, p->name, p->len) == 0) {
272            if (t)
273                *t = l + p->len;
274            break;
275        }
276    }
277    return p->type;
278}
279
280private int
281get_standard_integer_type(const char *l, const char **t)
282{
283    int type;
284
285    if (isalpha((unsigned char)l[1])) {
286        switch (l[1]) {
287        case 'C':
288            /* "dC" and "uC" */
289            type = FILE_BYTE;
290            break;
291        case 'S':
292            /* "dS" and "uS" */
293            type = FILE_SHORT;
294            break;
295        case 'I':
296        case 'L':
297            /*
298             * "dI", "dL", "uI", and "uL".
299             *
300             * XXX - the actual Single UNIX Specification says
301             * that "L" means "long", as in the C data type,
302             * but we treat it as meaning "4-byte integer".
303             * Given that the OS X version of file 5.04 did
304             * the same, I guess that passes the actual SUS
305             * validation suite; having "dL" be dependent on
306             * how big a "long" is on the machine running
307             * "file" is silly.
308             */
309            type = FILE_LONG;
310            break;
311        case 'Q':
312            /* "dQ" and "uQ" */
313            type = FILE_QUAD;
314            break;
315        default:
316            /* "d{anything else}", "u{anything else}" */
317            return FILE_INVALID;
318        }
319        l += 2;
320    } else if (isdigit((unsigned char)l[1])) {
321        /*
322         * "d{num}" and "u{num}"; we only support {num} values
323         * of 1, 2, 4, and 8 - the Single UNIX Specification
324         * doesn't say anything about whether arbitrary
325         * values should be supported, but both the Solaris 10
326         * and OS X Mountain Lion versions of file passed the
327         * Single UNIX Specification validation suite, and
328         * neither of them support values bigger than 8 or
329         * non-power-of-2 values.
330         */
331        if (isdigit((unsigned char)l[2])) {
332            /* Multi-digit, so > 9 */
333            return FILE_INVALID;
334        }
335        switch (l[1]) {
336        case '1':
337            type = FILE_BYTE;
338            break;
339        case '2':
340            type = FILE_SHORT;
341            break;
342        case '4':
343            type = FILE_LONG;
344            break;
345        case '8':
346            type = FILE_QUAD;
347            break;
348        default:
349            /* XXX - what about 3, 5, 6, or 7? */
350            return FILE_INVALID;
351        }
352        l += 2;
353    } else {
354        /*
355         * "d" or "u" by itself.
356         */
357        type = FILE_LONG;
358        ++l;
359    }
360    if (t)
361        *t = l;
362    return type;
363}
364
365private void
366init_file_tables(void)
367{
368    static int done = 0;
369    const struct type_tbl_s *p;
370
371    if (done)
372        return;
373    done++;
374
375    for (p = type_tbl; p->len; p++) {
376        assert(p->type < FILE_NAMES_SIZE);
377        file_names[p->type] = p->name;
378        file_formats[p->type] = p->format;
379    }
380    assert(p - type_tbl == FILE_NAMES_SIZE);
381}
382
383private int
384add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
385{
386    struct mlist *ml;
387
388    mlp->map = idx == 0 ? map : NULL;
389    if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
390        return -1;
391
392    ml->map = NULL;
393    ml->magic = map->magic[idx];
394    ml->nmagic = map->nmagic[idx];
395
396    mlp->prev->next = ml;
397    ml->prev = mlp->prev;
398    ml->next = mlp;
399    mlp->prev = ml;
400    return 0;
401}
402
403/*
404 * Handle one file or directory.
405 */
406private int
407apprentice_1(struct magic_set *ms, const char *fn, int action)
408{
409    struct magic_map *map;
410    struct mlist *ml;
411    size_t i;
412
413    if (magicsize != FILE_MAGICSIZE) {
414        file_error(ms, 0, "magic element size %lu != %lu",
415            (unsigned long)sizeof(*map->magic[0]),
416            (unsigned long)FILE_MAGICSIZE);
417        return -1;
418    }
419
420    if (action == FILE_COMPILE) {
421        map = apprentice_load(ms, fn, action);
422        if (map == NULL)
423            return -1;
424        return apprentice_compile(ms, map, fn);
425    }
426
427    map = apprentice_map(ms, fn);
428    if (map == NULL) {
429        if (fn) {
430            if (ms->flags & MAGIC_CHECK)
431                file_magwarn(ms, "using regular magic file `%s'", fn);
432            map = apprentice_load(ms, fn, action);
433        }
434        if (map == NULL)
435            return -1;
436    }
437
438    for (i = 0; i < MAGIC_SETS; i++) {
439        if (add_mlist(ms->mlist[i], map, i) == -1) {
440            file_oomem(ms, sizeof(*ml));
441            apprentice_unmap(map);
442            return -1;
443        }
444    }
445
446    if (action == FILE_LIST) {
447        for (i = 0; i < MAGIC_SETS; i++) {
448            printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
449                i);
450            apprentice_list(ms->mlist[i], BINTEST);
451            printf("Text patterns:\n");
452            apprentice_list(ms->mlist[i], TEXTTEST);
453        }
454    }
455    return 0;
456}
457
458protected void
459file_ms_free(struct magic_set *ms)
460{
461    size_t i;
462    if (ms == NULL)
463        return;
464    for (i = 0; i < MAGIC_SETS; i++)
465        mlist_free(ms->mlist[i]);
466    if (ms->o.pbuf) {
467        efree(ms->o.pbuf);
468    }
469    if (ms->o.buf) {
470        efree(ms->o.buf);
471    }
472    if (ms->c.li) {
473        efree(ms->c.li);
474    }
475    efree(ms);
476}
477
478protected struct magic_set *
479file_ms_alloc(int flags)
480{
481    struct magic_set *ms;
482    size_t i, len;
483
484    if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
485        sizeof(struct magic_set)))) == NULL)
486        return NULL;
487
488    if (magic_setflags(ms, flags) == -1) {
489        errno = EINVAL;
490        goto free;
491    }
492
493    ms->o.buf = ms->o.pbuf = NULL;
494    len = (ms->c.len = 10) * sizeof(*ms->c.li);
495
496    if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
497        goto free;
498
499    ms->event_flags = 0;
500    ms->error = -1;
501    for (i = 0; i < MAGIC_SETS; i++)
502        ms->mlist[i] = NULL;
503    ms->file = "unknown";
504    ms->line = 0;
505    ms->indir_max = FILE_INDIR_MAX;
506    ms->name_max = FILE_NAME_MAX;
507    ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
508    ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
509    ms->elf_notes_max = FILE_ELF_NOTES_MAX;
510    return ms;
511free:
512    efree(ms);
513    return NULL;
514}
515
516private void
517apprentice_unmap(struct magic_map *map)
518{
519    if (map == NULL)
520        return;
521    if (map->p != php_magic_database) {
522        if (map->p == NULL) {
523            int j;
524            for (j = 0; j < MAGIC_SETS; j++) {
525                if (map->magic[j]) {
526                    efree(map->magic[j]);
527                }
528            }
529        } else {
530            efree(map->p);
531        }
532    }
533    efree(map);
534}
535
536private struct mlist *
537mlist_alloc(void)
538{
539    struct mlist *mlist;
540    if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
541        return NULL;
542    }
543    mlist->next = mlist->prev = mlist;
544    return mlist;
545}
546
547private void
548mlist_free(struct mlist *mlist)
549{
550    struct mlist *ml, *next;
551
552    if (mlist == NULL)
553        return;
554
555    ml = mlist->next;
556    for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
557        if (ml->map)
558            apprentice_unmap(ml->map);
559        efree(ml);
560        if (ml == mlist)
561            break;
562    }
563}
564
565/* const char *fn: list of magic files and directories */
566protected int
567file_apprentice(struct magic_set *ms, const char *fn, int action)
568{
569    char *p, *mfn;
570    int file_err, errs = -1;
571    size_t i;
572
573    if (ms->mlist[0] != NULL)
574        file_reset(ms);
575
576/* XXX disabling default magic loading so the compiled in data is used */
577#if 0
578    if ((fn = magic_getpath(fn, action)) == NULL)
579        return -1;
580#endif
581
582    init_file_tables();
583
584    if (fn == NULL)
585        fn = getenv("MAGIC");
586    if (fn == NULL) {
587        for (i = 0; i < MAGIC_SETS; i++) {
588            mlist_free(ms->mlist[i]);
589            if ((ms->mlist[i] = mlist_alloc()) == NULL) {
590                file_oomem(ms, sizeof(*ms->mlist[i]));
591                return -1;
592            }
593        }
594        return apprentice_1(ms, fn, action);
595    }
596
597    if ((mfn = estrdup(fn)) == NULL) {
598        file_oomem(ms, strlen(fn));
599        return -1;
600    }
601
602    for (i = 0; i < MAGIC_SETS; i++) {
603        mlist_free(ms->mlist[i]);
604        if ((ms->mlist[i] = mlist_alloc()) == NULL) {
605            file_oomem(ms, sizeof(*ms->mlist[i]));
606            while (i-- > 0) {
607                mlist_free(ms->mlist[i]);
608                ms->mlist[i] = NULL;
609            }
610            efree(mfn);
611            return -1;
612        }
613    }
614    fn = mfn;
615
616    while (fn) {
617        p = strchr(fn, PATHSEP);
618        if (p)
619            *p++ = '\0';
620        if (*fn == '\0')
621            break;
622        file_err = apprentice_1(ms, fn, action);
623        errs = MAX(errs, file_err);
624        fn = p;
625    }
626
627    efree(mfn);
628
629    if (errs == -1) {
630        for (i = 0; i < MAGIC_SETS; i++) {
631            mlist_free(ms->mlist[i]);
632            ms->mlist[i] = NULL;
633        }
634        file_error(ms, 0, "could not find any valid magic files!");
635        return -1;
636    }
637
638#if 0
639    /*
640     * Always leave the database loaded
641     */
642    if (action == FILE_LOAD)
643        return 0;
644
645    for (i = 0; i < MAGIC_SETS; i++) {
646        mlist_free(ms->mlist[i]);
647        ms->mlist[i] = NULL;
648    }
649#endif
650
651    switch (action) {
652    case FILE_LOAD:
653    case FILE_COMPILE:
654    case FILE_CHECK:
655    case FILE_LIST:
656        return 0;
657    default:
658        file_error(ms, 0, "Invalid action %d", action);
659        return -1;
660    }
661}
662
663/*
664 * Compute the real length of a magic expression, for the purposes
665 * of determining how "strong" a magic expression is (approximating
666 * how specific its matches are):
667 *  - magic characters count 0 unless escaped.
668 *  - [] expressions count 1
669 *  - {} expressions count 0
670 *  - regular characters or escaped magic characters count 1
671 *  - 0 length expressions count as one
672 */
673private size_t
674nonmagic(const char *str)
675{
676    const char *p;
677    size_t rv = 0;
678
679    for (p = str; *p; p++)
680        switch (*p) {
681        case '\\':  /* Escaped anything counts 1 */
682            if (!*++p)
683                p--;
684            rv++;
685            continue;
686        case '?':   /* Magic characters count 0 */
687        case '*':
688        case '.':
689        case '+':
690        case '^':
691        case '$':
692            continue;
693        case '[':   /* Bracketed expressions count 1 the ']' */
694            while (*p && *p != ']')
695                p++;
696            p--;
697            continue;
698        case '{':   /* Braced expressions count 0 */
699            while (*p && *p != '}')
700                p++;
701            if (!*p)
702                p--;
703            continue;
704        default:    /* Anything else counts 1 */
705            rv++;
706            continue;
707        }
708
709    return rv == 0 ? 1 : rv;    /* Return at least 1 */
710}
711
712/*
713 * Get weight of this magic entry, for sorting purposes.
714 */
715private size_t
716apprentice_magic_strength(const struct magic *m)
717{
718#define MULT 10
719    size_t v, val = 2 * MULT;   /* baseline strength */
720
721    switch (m->type) {
722    case FILE_DEFAULT:  /* make sure this sorts last */
723        if (m->factor_op != FILE_FACTOR_OP_NONE)
724            abort();
725        return 0;
726
727    case FILE_BYTE:
728        val += 1 * MULT;
729        break;
730
731    case FILE_SHORT:
732    case FILE_LESHORT:
733    case FILE_BESHORT:
734        val += 2 * MULT;
735        break;
736
737    case FILE_LONG:
738    case FILE_LELONG:
739    case FILE_BELONG:
740    case FILE_MELONG:
741        val += 4 * MULT;
742        break;
743
744    case FILE_PSTRING:
745    case FILE_STRING:
746        val += m->vallen * MULT;
747        break;
748
749    case FILE_BESTRING16:
750    case FILE_LESTRING16:
751        val += m->vallen * MULT / 2;
752        break;
753
754    case FILE_SEARCH:
755        val += m->vallen * MAX(MULT / m->vallen, 1);
756        break;
757
758    case FILE_REGEX:
759        v = nonmagic(m->value.s);
760        val += v * MAX(MULT / v, 1);
761        break;
762
763    case FILE_DATE:
764    case FILE_LEDATE:
765    case FILE_BEDATE:
766    case FILE_MEDATE:
767    case FILE_LDATE:
768    case FILE_LELDATE:
769    case FILE_BELDATE:
770    case FILE_MELDATE:
771    case FILE_FLOAT:
772    case FILE_BEFLOAT:
773    case FILE_LEFLOAT:
774        val += 4 * MULT;
775        break;
776
777    case FILE_QUAD:
778    case FILE_BEQUAD:
779    case FILE_LEQUAD:
780    case FILE_QDATE:
781    case FILE_LEQDATE:
782    case FILE_BEQDATE:
783    case FILE_QLDATE:
784    case FILE_LEQLDATE:
785    case FILE_BEQLDATE:
786    case FILE_QWDATE:
787    case FILE_LEQWDATE:
788    case FILE_BEQWDATE:
789    case FILE_DOUBLE:
790    case FILE_BEDOUBLE:
791    case FILE_LEDOUBLE:
792        val += 8 * MULT;
793        break;
794
795    case FILE_INDIRECT:
796    case FILE_NAME:
797    case FILE_USE:
798        break;
799
800    default:
801        (void)fprintf(stderr, "Bad type %d\n", m->type);
802        abort();
803    }
804
805    switch (m->reln) {
806    case 'x':   /* matches anything penalize */
807    case '!':       /* matches almost anything penalize */
808        val = 0;
809        break;
810
811    case '=':   /* Exact match, prefer */
812        val += MULT;
813        break;
814
815    case '>':
816    case '<':   /* comparison match reduce strength */
817        val -= 2 * MULT;
818        break;
819
820    case '^':
821    case '&':   /* masking bits, we could count them too */
822        val -= MULT;
823        break;
824
825    default:
826        (void)fprintf(stderr, "Bad relation %c\n", m->reln);
827        abort();
828    }
829
830    if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
831        val = 1;
832
833    switch (m->factor_op) {
834    case FILE_FACTOR_OP_NONE:
835        break;
836    case FILE_FACTOR_OP_PLUS:
837        val += m->factor;
838        break;
839    case FILE_FACTOR_OP_MINUS:
840        val -= m->factor;
841        break;
842    case FILE_FACTOR_OP_TIMES:
843        val *= m->factor;
844        break;
845    case FILE_FACTOR_OP_DIV:
846        val /= m->factor;
847        break;
848    default:
849        abort();
850    }
851
852    /*
853     * Magic entries with no description get a bonus because they depend
854     * on subsequent magic entries to print something.
855     */
856    if (m->desc[0] == '\0')
857        val++;
858    return val;
859}
860
861/*
862 * Sort callback for sorting entries by "strength" (basically length)
863 */
864private int
865apprentice_sort(const void *a, const void *b)
866{
867    const struct magic_entry *ma = CAST(const struct magic_entry *, a);
868    const struct magic_entry *mb = CAST(const struct magic_entry *, b);
869    size_t sa = apprentice_magic_strength(ma->mp);
870    size_t sb = apprentice_magic_strength(mb->mp);
871    if (sa == sb)
872        return 0;
873    else if (sa > sb)
874        return -1;
875    else
876        return 1;
877}
878
879/*
880 * Shows sorted patterns list in the order which is used for the matching
881 */
882private void
883apprentice_list(struct mlist *mlist, int mode)
884{
885    uint32_t magindex = 0;
886    struct mlist *ml;
887    for (ml = mlist->next; ml != mlist; ml = ml->next) {
888        for (magindex = 0; magindex < ml->nmagic; magindex++) {
889            struct magic *m = &ml->magic[magindex];
890            if ((m->flag & mode) != mode) {
891                /* Skip sub-tests */
892                while (magindex + 1 < ml->nmagic &&
893                       ml->magic[magindex + 1].cont_level != 0)
894                    ++magindex;
895                continue; /* Skip to next top-level test*/
896            }
897
898            /*
899             * Try to iterate over the tree until we find item with
900             * description/mimetype.
901             */
902            while (magindex + 1 < ml->nmagic &&
903                   ml->magic[magindex + 1].cont_level != 0 &&
904                   *ml->magic[magindex].desc == '\0' &&
905                   *ml->magic[magindex].mimetype == '\0')
906                magindex++;
907
908            printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
909                apprentice_magic_strength(m),
910                ml->magic[magindex].lineno,
911                ml->magic[magindex].desc,
912                ml->magic[magindex].mimetype);
913        }
914    }
915}
916
917private void
918set_test_type(struct magic *mstart, struct magic *m)
919{
920    switch (m->type) {
921    case FILE_BYTE:
922    case FILE_SHORT:
923    case FILE_LONG:
924    case FILE_DATE:
925    case FILE_BESHORT:
926    case FILE_BELONG:
927    case FILE_BEDATE:
928    case FILE_LESHORT:
929    case FILE_LELONG:
930    case FILE_LEDATE:
931    case FILE_LDATE:
932    case FILE_BELDATE:
933    case FILE_LELDATE:
934    case FILE_MEDATE:
935    case FILE_MELDATE:
936    case FILE_MELONG:
937    case FILE_QUAD:
938    case FILE_LEQUAD:
939    case FILE_BEQUAD:
940    case FILE_QDATE:
941    case FILE_LEQDATE:
942    case FILE_BEQDATE:
943    case FILE_QLDATE:
944    case FILE_LEQLDATE:
945    case FILE_BEQLDATE:
946    case FILE_QWDATE:
947    case FILE_LEQWDATE:
948    case FILE_BEQWDATE:
949    case FILE_FLOAT:
950    case FILE_BEFLOAT:
951    case FILE_LEFLOAT:
952    case FILE_DOUBLE:
953    case FILE_BEDOUBLE:
954    case FILE_LEDOUBLE:
955        mstart->flag |= BINTEST;
956        break;
957    case FILE_STRING:
958    case FILE_PSTRING:
959    case FILE_BESTRING16:
960    case FILE_LESTRING16:
961        /* Allow text overrides */
962        if (mstart->str_flags & STRING_TEXTTEST)
963            mstart->flag |= TEXTTEST;
964        else
965            mstart->flag |= BINTEST;
966        break;
967    case FILE_REGEX:
968    case FILE_SEARCH:
969        /* Check for override */
970        if (mstart->str_flags & STRING_BINTEST)
971            mstart->flag |= BINTEST;
972        if (mstart->str_flags & STRING_TEXTTEST)
973            mstart->flag |= TEXTTEST;
974
975        if (mstart->flag & (TEXTTEST|BINTEST))
976            break;
977
978        /* binary test if pattern is not text */
979        if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
980            NULL) <= 0)
981            mstart->flag |= BINTEST;
982        else
983            mstart->flag |= TEXTTEST;
984        break;
985    case FILE_DEFAULT:
986        /* can't deduce anything; we shouldn't see this at the
987           top level anyway */
988        break;
989    case FILE_INVALID:
990    default:
991        /* invalid search type, but no need to complain here */
992        break;
993    }
994}
995
996private int
997addentry(struct magic_set *ms, struct magic_entry *me,
998   struct magic_entry_set *mset)
999{
1000    size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1001    if (mset[i].count == mset[i].max) {
1002        struct magic_entry *mp;
1003
1004        mset[i].max += ALLOC_INCR;
1005        if ((mp = CAST(struct magic_entry *,
1006            erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1007            NULL) {
1008            file_oomem(ms, sizeof(*mp) * mset[i].max);
1009            return -1;
1010        }
1011        (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1012            ALLOC_INCR);
1013        mset[i].me = mp;
1014    }
1015    mset[i].me[mset[i].count++] = *me;
1016    memset(me, 0, sizeof(*me));
1017    return 0;
1018}
1019
1020/*
1021 * Load and parse one file.
1022 */
1023private void
1024load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1025   struct magic_entry_set *mset)
1026{
1027    char buffer[BUFSIZ + 1];
1028    char *line = NULL;
1029    size_t len;
1030    size_t lineno = 0;
1031    struct magic_entry me;
1032
1033    php_stream *stream;
1034
1035
1036    ms->file = fn;
1037    stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1038
1039    if (stream == NULL) {
1040        if (errno != ENOENT)
1041            file_error(ms, errno, "cannot read magic file `%s'",
1042                   fn);
1043        (*errs)++;
1044        return;
1045    }
1046
1047    memset(&me, 0, sizeof(me));
1048    /* read and parse this file */
1049    for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1050        if (len == 0) /* null line, garbage, etc */
1051            continue;
1052        if (line[len - 1] == '\n') {
1053            lineno++;
1054            line[len - 1] = '\0'; /* delete newline */
1055        }
1056        switch (line[0]) {
1057        case '\0':  /* empty, do not parse */
1058        case '#':   /* comment, do not parse */
1059            continue;
1060        case '!':
1061            if (line[1] == ':') {
1062                size_t i;
1063
1064                for (i = 0; bang[i].name != NULL; i++) {
1065                    if ((size_t)(len - 2) > bang[i].len &&
1066                        memcmp(bang[i].name, line + 2,
1067                        bang[i].len) == 0)
1068                        break;
1069                }
1070                if (bang[i].name == NULL) {
1071                    file_error(ms, 0,
1072                        "Unknown !: entry `%s'", line);
1073                    (*errs)++;
1074                    continue;
1075                }
1076                if (me.mp == NULL) {
1077                    file_error(ms, 0,
1078                        "No current entry for :!%s type",
1079                        bang[i].name);
1080                    (*errs)++;
1081                    continue;
1082                }
1083                if ((*bang[i].fun)(ms, &me,
1084                    line + bang[i].len + 2) != 0) {
1085                    (*errs)++;
1086                    continue;
1087                }
1088                continue;
1089            }
1090            /*FALLTHROUGH*/
1091        default:
1092        again:
1093            switch (parse(ms, &me, line, lineno, action)) {
1094            case 0:
1095                continue;
1096            case 1:
1097                (void)addentry(ms, &me, mset);
1098                goto again;
1099            default:
1100                (*errs)++;
1101                break;
1102            }
1103        }
1104    }
1105    if (me.mp)
1106        (void)addentry(ms, &me, mset);
1107    efree(line);
1108    php_stream_close(stream);
1109}
1110
1111/*
1112 * parse a file or directory of files
1113 * const char *fn: name of magic file or directory
1114 */
1115private int
1116cmpstrp(const void *p1, const void *p2)
1117{
1118        return strcmp(*(char *const *)p1, *(char *const *)p2);
1119}
1120
1121
1122private uint32_t
1123set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1124    uint32_t starttest)
1125{
1126    static const char text[] = "text";
1127    static const char binary[] = "binary";
1128    static const size_t len = sizeof(text);
1129
1130    uint32_t i = starttest;
1131
1132    do {
1133        set_test_type(me[starttest].mp, me[i].mp);
1134        if ((ms->flags & MAGIC_DEBUG) == 0)
1135            continue;
1136        (void)fprintf(stderr, "%s%s%s: %s\n",
1137            me[i].mp->mimetype,
1138            me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1139            me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1140            me[i].mp->flag & BINTEST ? binary : text);
1141        if (me[i].mp->flag & BINTEST) {
1142            char *p = strstr(me[i].mp->desc, text);
1143            if (p && (p == me[i].mp->desc ||
1144                isspace((unsigned char)p[-1])) &&
1145                (p + len - me[i].mp->desc == MAXstring
1146                || (p[len] == '\0' ||
1147                isspace((unsigned char)p[len]))))
1148                (void)fprintf(stderr, "*** Possible "
1149                    "binary test for text type\n");
1150        }
1151    } while (++i < nme && me[i].mp->cont_level != 0);
1152    return i;
1153}
1154
1155private void
1156set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1157{
1158    uint32_t i;
1159    for (i = 0; i < nme; i++) {
1160        if (me[i].mp->cont_level == 0 &&
1161            me[i].mp->type == FILE_DEFAULT) {
1162            while (++i < nme)
1163                if (me[i].mp->cont_level == 0)
1164                    break;
1165            if (i != nme) {
1166                /* XXX - Ugh! */
1167                ms->line = me[i].mp->lineno;
1168                file_magwarn(ms,
1169                    "level 0 \"default\" did not sort last");
1170            }
1171            return;
1172        }
1173    }
1174}
1175
1176private int
1177coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1178    struct magic **ma, uint32_t *nma)
1179{
1180    uint32_t i, mentrycount = 0;
1181    size_t slen;
1182
1183    for (i = 0; i < nme; i++)
1184        mentrycount += me[i].cont_count;
1185
1186    slen = sizeof(**ma) * mentrycount;
1187    if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1188        file_oomem(ms, slen);
1189        return -1;
1190    }
1191
1192    mentrycount = 0;
1193    for (i = 0; i < nme; i++) {
1194        (void)memcpy(*ma + mentrycount, me[i].mp,
1195            me[i].cont_count * sizeof(**ma));
1196        mentrycount += me[i].cont_count;
1197    }
1198    *nma = mentrycount;
1199    return 0;
1200}
1201
1202private void
1203magic_entry_free(struct magic_entry *me, uint32_t nme)
1204{
1205    uint32_t i;
1206    if (me == NULL)
1207        return;
1208    for (i = 0; i < nme; i++)
1209        efree(me[i].mp);
1210    efree(me);
1211}
1212
1213private struct magic_map *
1214apprentice_load(struct magic_set *ms, const char *fn, int action)
1215{
1216    int errs = 0;
1217    uint32_t i, j;
1218    size_t files = 0, maxfiles = 0;
1219    char **filearr = NULL;
1220    zend_stat_t st;
1221    struct magic_map *map;
1222    struct magic_entry_set mset[MAGIC_SETS];
1223    php_stream *dir;
1224    php_stream_dirent d;
1225
1226
1227    memset(mset, 0, sizeof(mset));
1228    ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
1229
1230
1231    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1232    {
1233        file_oomem(ms, sizeof(*map));
1234        return NULL;
1235    }
1236
1237    /* print silly verbose header for USG compat. */
1238    if (action == FILE_CHECK)
1239        (void)fprintf(stderr, "%s\n", usg_hdr);
1240
1241    /* load directory or file */
1242    /* FIXME: Read file names and sort them to prevent
1243       non-determinism. See Debian bug #488562. */
1244    if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1245        int mflen;
1246        char mfn[MAXPATHLEN];
1247
1248        dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1249        if (!dir) {
1250            errs++;
1251            goto out;
1252        }
1253        while (php_stream_readdir(dir, &d)) {
1254            if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1255                file_oomem(ms,
1256                strlen(fn) + strlen(d.d_name) + 2);
1257                errs++;
1258                php_stream_closedir(dir);
1259                goto out;
1260            }
1261            if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1262                continue;
1263            }
1264            if (files >= maxfiles) {
1265                size_t mlen;
1266                maxfiles = (maxfiles + 1) * 2;
1267                mlen = maxfiles * sizeof(*filearr);
1268                if ((filearr = CAST(char **,
1269                    erealloc(filearr, mlen))) == NULL) {
1270                    file_oomem(ms, mlen);
1271                    php_stream_closedir(dir);
1272                    errs++;
1273                    goto out;
1274                }
1275            }
1276            filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1277        }
1278        php_stream_closedir(dir);
1279        qsort(filearr, files, sizeof(*filearr), cmpstrp);
1280        for (i = 0; i < files; i++) {
1281            load_1(ms, action, filearr[i], &errs, mset);
1282            efree(filearr[i]);
1283        }
1284        efree(filearr);
1285    } else
1286        load_1(ms, action, fn, &errs, mset);
1287    if (errs)
1288        goto out;
1289
1290    for (j = 0; j < MAGIC_SETS; j++) {
1291        /* Set types of tests */
1292        for (i = 0; i < mset[j].count; ) {
1293            if (mset[j].me[i].mp->cont_level != 0) {
1294                i++;
1295                continue;
1296            }
1297            i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1298        }
1299        qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1300            apprentice_sort);
1301
1302        /*
1303         * Make sure that any level 0 "default" line is last
1304         * (if one exists).
1305         */
1306        set_last_default(ms, mset[j].me, mset[j].count);
1307
1308        /* coalesce per file arrays into a single one */
1309        if (coalesce_entries(ms, mset[j].me, mset[j].count,
1310            &map->magic[j], &map->nmagic[j]) == -1) {
1311            errs++;
1312            goto out;
1313        }
1314    }
1315
1316out:
1317    for (j = 0; j < MAGIC_SETS; j++)
1318        magic_entry_free(mset[j].me, mset[j].count);
1319
1320    if (errs) {
1321        apprentice_unmap(map);
1322        return NULL;
1323    }
1324    return map;
1325}
1326
1327/*
1328 * extend the sign bit if the comparison is to be signed
1329 */
1330protected uint64_t
1331file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1332{
1333    if (!(m->flag & UNSIGNED)) {
1334        switch(m->type) {
1335        /*
1336         * Do not remove the casts below.  They are
1337         * vital.  When later compared with the data,
1338         * the sign extension must have happened.
1339         */
1340        case FILE_BYTE:
1341            v = (signed char) v;
1342            break;
1343        case FILE_SHORT:
1344        case FILE_BESHORT:
1345        case FILE_LESHORT:
1346            v = (short) v;
1347            break;
1348        case FILE_DATE:
1349        case FILE_BEDATE:
1350        case FILE_LEDATE:
1351        case FILE_MEDATE:
1352        case FILE_LDATE:
1353        case FILE_BELDATE:
1354        case FILE_LELDATE:
1355        case FILE_MELDATE:
1356        case FILE_LONG:
1357        case FILE_BELONG:
1358        case FILE_LELONG:
1359        case FILE_MELONG:
1360        case FILE_FLOAT:
1361        case FILE_BEFLOAT:
1362        case FILE_LEFLOAT:
1363            v = (int32_t) v;
1364            break;
1365        case FILE_QUAD:
1366        case FILE_BEQUAD:
1367        case FILE_LEQUAD:
1368        case FILE_QDATE:
1369        case FILE_QLDATE:
1370        case FILE_QWDATE:
1371        case FILE_BEQDATE:
1372        case FILE_BEQLDATE:
1373        case FILE_BEQWDATE:
1374        case FILE_LEQDATE:
1375        case FILE_LEQLDATE:
1376        case FILE_LEQWDATE:
1377        case FILE_DOUBLE:
1378        case FILE_BEDOUBLE:
1379        case FILE_LEDOUBLE:
1380            v = (int64_t) v;
1381            break;
1382        case FILE_STRING:
1383        case FILE_PSTRING:
1384        case FILE_BESTRING16:
1385        case FILE_LESTRING16:
1386        case FILE_REGEX:
1387        case FILE_SEARCH:
1388        case FILE_DEFAULT:
1389        case FILE_INDIRECT:
1390        case FILE_NAME:
1391        case FILE_USE:
1392        case FILE_CLEAR:
1393            break;
1394        default:
1395            if (ms->flags & MAGIC_CHECK)
1396                file_magwarn(ms, "cannot happen: m->type=%d\n",
1397                    m->type);
1398            return ~0U;
1399        }
1400    }
1401    return v;
1402}
1403
1404private int
1405string_modifier_check(struct magic_set *ms, struct magic *m)
1406{
1407    if ((ms->flags & MAGIC_CHECK) == 0)
1408        return 0;
1409
1410    if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1411        (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1412        file_magwarn(ms,
1413            "'/BHhLl' modifiers are only allowed for pascal strings\n");
1414        return -1;
1415    }
1416    switch (m->type) {
1417    case FILE_BESTRING16:
1418    case FILE_LESTRING16:
1419        if (m->str_flags != 0) {
1420            file_magwarn(ms,
1421                "no modifiers allowed for 16-bit strings\n");
1422            return -1;
1423        }
1424        break;
1425    case FILE_STRING:
1426    case FILE_PSTRING:
1427        if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1428            file_magwarn(ms,
1429                "'/%c' only allowed on regex and search\n",
1430                CHAR_REGEX_OFFSET_START);
1431            return -1;
1432        }
1433        break;
1434    case FILE_SEARCH:
1435        if (m->str_range == 0) {
1436            file_magwarn(ms,
1437                "missing range; defaulting to %d\n",
1438                            STRING_DEFAULT_RANGE);
1439            m->str_range = STRING_DEFAULT_RANGE;
1440            return -1;
1441        }
1442        break;
1443    case FILE_REGEX:
1444        if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1445            file_magwarn(ms, "'/%c' not allowed on regex\n",
1446                CHAR_COMPACT_WHITESPACE);
1447            return -1;
1448        }
1449        if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1450            file_magwarn(ms, "'/%c' not allowed on regex\n",
1451                CHAR_COMPACT_OPTIONAL_WHITESPACE);
1452            return -1;
1453        }
1454        break;
1455    default:
1456        file_magwarn(ms, "coding error: m->type=%d\n",
1457            m->type);
1458        return -1;
1459    }
1460    return 0;
1461}
1462
1463private int
1464get_op(char c)
1465{
1466    switch (c) {
1467    case '&':
1468        return FILE_OPAND;
1469    case '|':
1470        return FILE_OPOR;
1471    case '^':
1472        return FILE_OPXOR;
1473    case '+':
1474        return FILE_OPADD;
1475    case '-':
1476        return FILE_OPMINUS;
1477    case '*':
1478        return FILE_OPMULTIPLY;
1479    case '/':
1480        return FILE_OPDIVIDE;
1481    case '%':
1482        return FILE_OPMODULO;
1483    default:
1484        return -1;
1485    }
1486}
1487
1488#ifdef ENABLE_CONDITIONALS
1489private int
1490get_cond(const char *l, const char **t)
1491{
1492    static const struct cond_tbl_s {
1493        char name[8];
1494        size_t len;
1495        int cond;
1496    } cond_tbl[] = {
1497        { "if",     2,  COND_IF },
1498        { "elif",   4,  COND_ELIF },
1499        { "else",   4,  COND_ELSE },
1500        { "",       0,  COND_NONE },
1501    };
1502    const struct cond_tbl_s *p;
1503
1504    for (p = cond_tbl; p->len; p++) {
1505        if (strncmp(l, p->name, p->len) == 0 &&
1506            isspace((unsigned char)l[p->len])) {
1507            if (t)
1508                *t = l + p->len;
1509            break;
1510        }
1511    }
1512    return p->cond;
1513}
1514
1515private int
1516check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1517{
1518    int last_cond;
1519    last_cond = ms->c.li[cont_level].last_cond;
1520
1521    switch (cond) {
1522    case COND_IF:
1523        if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1524            if (ms->flags & MAGIC_CHECK)
1525                file_magwarn(ms, "syntax error: `if'");
1526            return -1;
1527        }
1528        last_cond = COND_IF;
1529        break;
1530
1531    case COND_ELIF:
1532        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1533            if (ms->flags & MAGIC_CHECK)
1534                file_magwarn(ms, "syntax error: `elif'");
1535            return -1;
1536        }
1537        last_cond = COND_ELIF;
1538        break;
1539
1540    case COND_ELSE:
1541        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1542            if (ms->flags & MAGIC_CHECK)
1543                file_magwarn(ms, "syntax error: `else'");
1544            return -1;
1545        }
1546        last_cond = COND_NONE;
1547        break;
1548
1549    case COND_NONE:
1550        last_cond = COND_NONE;
1551        break;
1552    }
1553
1554    ms->c.li[cont_level].last_cond = last_cond;
1555    return 0;
1556}
1557#endif /* ENABLE_CONDITIONALS */
1558
1559private int
1560parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1561{
1562    const char *l = *lp;
1563
1564    while (!isspace((unsigned char)*++l))
1565        switch (*l) {
1566        case CHAR_INDIRECT_RELATIVE:
1567            m->str_flags |= INDIRECT_RELATIVE;
1568            break;
1569        default:
1570            if (ms->flags & MAGIC_CHECK)
1571                file_magwarn(ms, "indirect modifier `%c' "
1572                    "invalid", *l);
1573            *lp = l;
1574            return -1;
1575        }
1576    *lp = l;
1577    return 0;
1578}
1579
1580private void
1581parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1582    int op)
1583{
1584    const char *l = *lp;
1585    char *t;
1586    uint64_t val;
1587
1588    ++l;
1589    m->mask_op |= op;
1590    val = (uint64_t)strtoull(l, &t, 0);
1591    l = t;
1592    m->num_mask = file_signextend(ms, m, val);
1593    eatsize(&l);
1594    *lp = l;
1595}
1596
1597private int
1598parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1599{
1600    const char *l = *lp;
1601    char *t;
1602    int have_range = 0;
1603
1604    while (!isspace((unsigned char)*++l)) {
1605        switch (*l) {
1606        case '0':  case '1':  case '2':
1607        case '3':  case '4':  case '5':
1608        case '6':  case '7':  case '8':
1609        case '9':
1610            if (have_range && (ms->flags & MAGIC_CHECK))
1611                file_magwarn(ms, "multiple ranges");
1612            have_range = 1;
1613            m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1614            if (m->str_range == 0)
1615                file_magwarn(ms, "zero range");
1616            l = t - 1;
1617            break;
1618        case CHAR_COMPACT_WHITESPACE:
1619            m->str_flags |= STRING_COMPACT_WHITESPACE;
1620            break;
1621        case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1622            m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1623            break;
1624        case CHAR_IGNORE_LOWERCASE:
1625            m->str_flags |= STRING_IGNORE_LOWERCASE;
1626            break;
1627        case CHAR_IGNORE_UPPERCASE:
1628            m->str_flags |= STRING_IGNORE_UPPERCASE;
1629            break;
1630        case CHAR_REGEX_OFFSET_START:
1631            m->str_flags |= REGEX_OFFSET_START;
1632            break;
1633        case CHAR_BINTEST:
1634            m->str_flags |= STRING_BINTEST;
1635            break;
1636        case CHAR_TEXTTEST:
1637            m->str_flags |= STRING_TEXTTEST;
1638            break;
1639        case CHAR_TRIM:
1640            m->str_flags |= STRING_TRIM;
1641            break;
1642        case CHAR_PSTRING_1_LE:
1643#define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1644            if (m->type != FILE_PSTRING)
1645                goto bad;
1646            SET_LENGTH(PSTRING_1_LE);
1647            break;
1648        case CHAR_PSTRING_2_BE:
1649            if (m->type != FILE_PSTRING)
1650                goto bad;
1651            SET_LENGTH(PSTRING_2_BE);
1652            break;
1653        case CHAR_PSTRING_2_LE:
1654            if (m->type != FILE_PSTRING)
1655                goto bad;
1656            SET_LENGTH(PSTRING_2_LE);
1657            break;
1658        case CHAR_PSTRING_4_BE:
1659            if (m->type != FILE_PSTRING)
1660                goto bad;
1661            SET_LENGTH(PSTRING_4_BE);
1662            break;
1663        case CHAR_PSTRING_4_LE:
1664            switch (m->type) {
1665            case FILE_PSTRING:
1666            case FILE_REGEX:
1667                break;
1668            default:
1669                goto bad;
1670            }
1671            SET_LENGTH(PSTRING_4_LE);
1672            break;
1673        case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1674            if (m->type != FILE_PSTRING)
1675                goto bad;
1676            m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1677            break;
1678        default:
1679        bad:
1680            if (ms->flags & MAGIC_CHECK)
1681                file_magwarn(ms, "string modifier `%c' "
1682                    "invalid", *l);
1683            goto out;
1684        }
1685        /* allow multiple '/' for readability */
1686        if (l[1] == '/' && !isspace((unsigned char)l[2]))
1687            l++;
1688    }
1689    if (string_modifier_check(ms, m) == -1)
1690        goto out;
1691    *lp = l;
1692    return 0;
1693out:
1694    *lp = l;
1695    return -1;
1696}
1697
1698/*
1699 * parse one line from magic file, put into magic[index++] if valid
1700 */
1701private int
1702parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1703    size_t lineno, int action)
1704{
1705#ifdef ENABLE_CONDITIONALS
1706    static uint32_t last_cont_level = 0;
1707#endif
1708    size_t i;
1709    struct magic *m;
1710    const char *l = line;
1711    char *t;
1712    int op;
1713    uint32_t cont_level;
1714    int32_t diff;
1715
1716    cont_level = 0;
1717
1718    /*
1719     * Parse the offset.
1720     */
1721    while (*l == '>') {
1722        ++l;        /* step over */
1723        cont_level++;
1724    }
1725#ifdef ENABLE_CONDITIONALS
1726    if (cont_level == 0 || cont_level > last_cont_level)
1727        if (file_check_mem(ms, cont_level) == -1)
1728            return -1;
1729    last_cont_level = cont_level;
1730#endif
1731    if (cont_level != 0) {
1732        if (me->mp == NULL) {
1733            file_magerror(ms, "No current entry for continuation");
1734            return -1;
1735        }
1736        if (me->cont_count == 0) {
1737            file_magerror(ms, "Continuations present with 0 count");
1738            return -1;
1739        }
1740        m = &me->mp[me->cont_count - 1];
1741        diff = (int32_t)cont_level - (int32_t)m->cont_level;
1742        if (diff > 1)
1743            file_magwarn(ms, "New continuation level %u is more "
1744                "than one larger than current level %u", cont_level,
1745                m->cont_level);
1746        if (me->cont_count == me->max_count) {
1747            struct magic *nm;
1748            size_t cnt = me->max_count + ALLOC_CHUNK;
1749            if ((nm = CAST(struct magic *, erealloc(me->mp,
1750                sizeof(*nm) * cnt))) == NULL) {
1751                file_oomem(ms, sizeof(*nm) * cnt);
1752                return -1;
1753            }
1754            me->mp = m = nm;
1755            me->max_count = CAST(uint32_t, cnt);
1756        }
1757        m = &me->mp[me->cont_count++];
1758        (void)memset(m, 0, sizeof(*m));
1759        m->cont_level = cont_level;
1760    } else {
1761        static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1762        if (me->mp != NULL)
1763            return 1;
1764        if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1765            file_oomem(ms, len);
1766            return -1;
1767        }
1768        me->mp = m;
1769        me->max_count = ALLOC_CHUNK;
1770        (void)memset(m, 0, sizeof(*m));
1771        m->factor_op = FILE_FACTOR_OP_NONE;
1772        m->cont_level = 0;
1773        me->cont_count = 1;
1774    }
1775    m->lineno = CAST(uint32_t, lineno);
1776
1777    if (*l == '&') {  /* m->cont_level == 0 checked below. */
1778                ++l;            /* step over */
1779                m->flag |= OFFADD;
1780        }
1781    if (*l == '(') {
1782        ++l;        /* step over */
1783        m->flag |= INDIR;
1784        if (m->flag & OFFADD)
1785            m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1786
1787        if (*l == '&') {  /* m->cont_level == 0 checked below */
1788            ++l;            /* step over */
1789            m->flag |= OFFADD;
1790        }
1791    }
1792    /* Indirect offsets are not valid at level 0. */
1793    if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1794        if (ms->flags & MAGIC_CHECK)
1795            file_magwarn(ms, "relative offset at level 0");
1796
1797    /* get offset, then skip over it */
1798    m->offset = (uint32_t)strtoul(l, &t, 0);
1799        if (l == t)
1800        if (ms->flags & MAGIC_CHECK)
1801            file_magwarn(ms, "offset `%s' invalid", l);
1802        l = t;
1803
1804    if (m->flag & INDIR) {
1805        m->in_type = FILE_LONG;
1806        m->in_offset = 0;
1807        /*
1808         * read [.lbs][+-]nnnnn)
1809         */
1810        if (*l == '.') {
1811            l++;
1812            switch (*l) {
1813            case 'l':
1814                m->in_type = FILE_LELONG;
1815                break;
1816            case 'L':
1817                m->in_type = FILE_BELONG;
1818                break;
1819            case 'm':
1820                m->in_type = FILE_MELONG;
1821                break;
1822            case 'h':
1823            case 's':
1824                m->in_type = FILE_LESHORT;
1825                break;
1826            case 'H':
1827            case 'S':
1828                m->in_type = FILE_BESHORT;
1829                break;
1830            case 'c':
1831            case 'b':
1832            case 'C':
1833            case 'B':
1834                m->in_type = FILE_BYTE;
1835                break;
1836            case 'e':
1837            case 'f':
1838            case 'g':
1839                m->in_type = FILE_LEDOUBLE;
1840                break;
1841            case 'E':
1842            case 'F':
1843            case 'G':
1844                m->in_type = FILE_BEDOUBLE;
1845                break;
1846            case 'i':
1847                m->in_type = FILE_LEID3;
1848                break;
1849            case 'I':
1850                m->in_type = FILE_BEID3;
1851                break;
1852            default:
1853                if (ms->flags & MAGIC_CHECK)
1854                    file_magwarn(ms,
1855                        "indirect offset type `%c' invalid",
1856                        *l);
1857                break;
1858            }
1859            l++;
1860        }
1861
1862        m->in_op = 0;
1863        if (*l == '~') {
1864            m->in_op |= FILE_OPINVERSE;
1865            l++;
1866        }
1867        if ((op = get_op(*l)) != -1) {
1868            m->in_op |= op;
1869            l++;
1870        }
1871        if (*l == '(') {
1872            m->in_op |= FILE_OPINDIRECT;
1873            l++;
1874        }
1875        if (isdigit((unsigned char)*l) || *l == '-') {
1876            m->in_offset = (int32_t)strtol(l, &t, 0);
1877            if (l == t)
1878                if (ms->flags & MAGIC_CHECK)
1879                    file_magwarn(ms,
1880                        "in_offset `%s' invalid", l);
1881            l = t;
1882        }
1883        if (*l++ != ')' ||
1884            ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1885            if (ms->flags & MAGIC_CHECK)
1886                file_magwarn(ms,
1887                    "missing ')' in indirect offset");
1888    }
1889    EATAB;
1890
1891#ifdef ENABLE_CONDITIONALS
1892    m->cond = get_cond(l, &l);
1893    if (check_cond(ms, m->cond, cont_level) == -1)
1894        return -1;
1895
1896    EATAB;
1897#endif
1898
1899    /*
1900     * Parse the type.
1901     */
1902    if (*l == 'u') {
1903        /*
1904         * Try it as a keyword type prefixed by "u"; match what
1905         * follows the "u".  If that fails, try it as an SUS
1906         * integer type.
1907         */
1908        m->type = get_type(type_tbl, l + 1, &l);
1909        if (m->type == FILE_INVALID) {
1910            /*
1911             * Not a keyword type; parse it as an SUS type,
1912             * 'u' possibly followed by a number or C/S/L.
1913             */
1914            m->type = get_standard_integer_type(l, &l);
1915        }
1916        /* It's unsigned. */
1917        if (m->type != FILE_INVALID)
1918            m->flag |= UNSIGNED;
1919    } else {
1920        /*
1921         * Try it as a keyword type.  If that fails, try it as
1922         * an SUS integer type if it begins with "d" or as an
1923         * SUS string type if it begins with "s".  In any case,
1924         * it's not unsigned.
1925         */
1926        m->type = get_type(type_tbl, l, &l);
1927        if (m->type == FILE_INVALID) {
1928            /*
1929             * Not a keyword type; parse it as an SUS type,
1930             * either 'd' possibly followed by a number or
1931             * C/S/L, or just 's'.
1932             */
1933            if (*l == 'd')
1934                m->type = get_standard_integer_type(l, &l);
1935            else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1936                m->type = FILE_STRING;
1937        ++l;
1938            }
1939        }
1940    }
1941
1942    if (m->type == FILE_INVALID) {
1943        /* Not found - try it as a special keyword. */
1944        m->type = get_type(special_tbl, l, &l);
1945    }
1946
1947    if (m->type == FILE_INVALID) {
1948        if (ms->flags & MAGIC_CHECK)
1949            file_magwarn(ms, "type `%s' invalid", l);
1950        return -1;
1951    }
1952
1953    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1954    /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1955
1956    m->mask_op = 0;
1957    if (*l == '~') {
1958        if (!IS_LIBMAGIC_STRING(m->type))
1959            m->mask_op |= FILE_OPINVERSE;
1960        else if (ms->flags & MAGIC_CHECK)
1961            file_magwarn(ms, "'~' invalid for string types");
1962        ++l;
1963    }
1964    m->str_range = 0;
1965    m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1966    if ((op = get_op(*l)) != -1) {
1967        if (IS_LIBMAGIC_STRING(m->type)) {
1968            int r;
1969
1970            if (op != FILE_OPDIVIDE) {
1971                if (ms->flags & MAGIC_CHECK)
1972                    file_magwarn(ms,
1973                        "invalid string/indirect op: "
1974                        "`%c'", *t);
1975                return -1;
1976            }
1977
1978            if (m->type == FILE_INDIRECT)
1979                r = parse_indirect_modifier(ms, m, &l);
1980            else
1981                r = parse_string_modifier(ms, m, &l);
1982            if (r == -1)
1983                return -1;
1984        } else
1985            parse_op_modifier(ms, m, &l, op);
1986    }
1987
1988    /*
1989     * We used to set mask to all 1's here, instead let's just not do
1990     * anything if mask = 0 (unless you have a better idea)
1991     */
1992    EATAB;
1993
1994    switch (*l) {
1995    case '>':
1996    case '<':
1997        m->reln = *l;
1998        ++l;
1999        if (*l == '=') {
2000            if (ms->flags & MAGIC_CHECK) {
2001                file_magwarn(ms, "%c= not supported",
2002                    m->reln);
2003                return -1;
2004            }
2005           ++l;
2006        }
2007        break;
2008    /* Old-style anding: "0 byte &0x80 dynamically linked" */
2009    case '&':
2010    case '^':
2011    case '=':
2012        m->reln = *l;
2013        ++l;
2014        if (*l == '=') {
2015           /* HP compat: ignore &= etc. */
2016           ++l;
2017        }
2018        break;
2019    case '!':
2020        m->reln = *l;
2021        ++l;
2022        break;
2023    default:
2024        m->reln = '=';  /* the default relation */
2025        if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
2026            isspace((unsigned char)l[1])) || !l[1])) {
2027            m->reln = *l;
2028            ++l;
2029        }
2030        break;
2031    }
2032    /*
2033     * Grab the value part, except for an 'x' reln.
2034     */
2035    if (m->reln != 'x' && getvalue(ms, m, &l, action))
2036        return -1;
2037
2038    /*
2039     * TODO finish this macro and start using it!
2040     * #define offsetcheck {if (offset > HOWMANY-1)
2041     *  magwarn("offset too big"); }
2042     */
2043
2044    /*
2045     * Now get last part - the description
2046     */
2047    EATAB;
2048    if (l[0] == '\b') {
2049        ++l;
2050        m->flag |= NOSPACE;
2051    } else if ((l[0] == '\\') && (l[1] == 'b')) {
2052        ++l;
2053        ++l;
2054        m->flag |= NOSPACE;
2055    }
2056    for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2057        continue;
2058    if (i == sizeof(m->desc)) {
2059        m->desc[sizeof(m->desc) - 1] = '\0';
2060        if (ms->flags & MAGIC_CHECK)
2061            file_magwarn(ms, "description `%s' truncated", m->desc);
2062    }
2063
2064        /*
2065     * We only do this check while compiling, or if any of the magic
2066     * files were not compiled.
2067         */
2068        if (ms->flags & MAGIC_CHECK) {
2069        if (check_format(ms, m) == -1)
2070            return -1;
2071    }
2072    m->mimetype[0] = '\0';      /* initialise MIME type to none */
2073    return 0;
2074}
2075
2076/*
2077 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2078 * if valid
2079 */
2080private int
2081parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
2082{
2083    const char *l = line;
2084    char *el;
2085    unsigned long factor;
2086    struct magic *m = &me->mp[0];
2087
2088    if (m->factor_op != FILE_FACTOR_OP_NONE) {
2089        file_magwarn(ms,
2090            "Current entry already has a strength type: %c %d",
2091            m->factor_op, m->factor);
2092        return -1;
2093    }
2094    if (m->type == FILE_NAME) {
2095        file_magwarn(ms, "%s: Strength setting is not supported in "
2096            "\"name\" magic entries", m->value.s);
2097        return -1;
2098    }
2099    EATAB;
2100    switch (*l) {
2101    case FILE_FACTOR_OP_NONE:
2102    case FILE_FACTOR_OP_PLUS:
2103    case FILE_FACTOR_OP_MINUS:
2104    case FILE_FACTOR_OP_TIMES:
2105    case FILE_FACTOR_OP_DIV:
2106        m->factor_op = *l++;
2107        break;
2108    default:
2109        file_magwarn(ms, "Unknown factor op `%c'", *l);
2110        return -1;
2111    }
2112    EATAB;
2113    factor = strtoul(l, &el, 0);
2114    if (factor > 255) {
2115        file_magwarn(ms, "Too large factor `%lu'", factor);
2116        goto out;
2117    }
2118    if (*el && !isspace((unsigned char)*el)) {
2119        file_magwarn(ms, "Bad factor `%s'", l);
2120        goto out;
2121    }
2122    m->factor = (uint8_t)factor;
2123    if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2124        file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2125            m->factor_op, m->factor);
2126        goto out;
2127    }
2128    return 0;
2129out:
2130    m->factor_op = FILE_FACTOR_OP_NONE;
2131    m->factor = 0;
2132    return -1;
2133}
2134
2135private int
2136goodchar(unsigned char x, const char *extra)
2137{
2138    return (isascii(x) && isalnum(x)) || strchr(extra, x);
2139}
2140
2141private int
2142parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2143    zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2144{
2145    size_t i;
2146    const char *l = line;
2147    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2148    char *buf = CAST(char *, CAST(void *, m)) + off;
2149
2150    if (buf[0] != '\0') {
2151        len = nt ? strlen(buf) : len;
2152        file_magwarn(ms, "Current entry already has a %s type "
2153            "`%.*s', new type `%s'", name, (int)len, buf, l);
2154        return -1;
2155    }
2156
2157    if (*m->desc == '\0') {
2158        file_magwarn(ms, "Current entry does not yet have a "
2159            "description for adding a %s type", name);
2160        return -1;
2161    }
2162
2163    EATAB;
2164    for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
2165        continue;
2166
2167    if (i == len && *l) {
2168        if (nt)
2169            buf[len - 1] = '\0';
2170        if (ms->flags & MAGIC_CHECK)
2171            file_magwarn(ms, "%s type `%s' truncated %"
2172                SIZE_T_FORMAT "u", name, line, i);
2173    } else {
2174        if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
2175            file_magwarn(ms, "%s type `%s' has bad char '%c'",
2176                name, line, *l);
2177        if (nt)
2178            buf[i] = '\0';
2179    }
2180
2181    if (i > 0)
2182        return 0;
2183
2184    file_magerror(ms, "Bad magic entry '%s'", line);
2185    return -1;
2186}
2187
2188/*
2189 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2190 * magic[index - 1]
2191 */
2192private int
2193parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2194{
2195    struct magic *m = &me->mp[0];
2196
2197    return parse_extra(ms, me, line,
2198        CAST(off_t, offsetof(struct magic, apple)),
2199        sizeof(m->apple), "APPLE", "!+-./", 0);
2200}
2201
2202/*
2203 * parse a MIME annotation line from magic file, put into magic[index - 1]
2204 * if valid
2205 */
2206private int
2207parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2208{
2209    struct magic *m = &me->mp[0];
2210
2211    return parse_extra(ms, me, line,
2212        CAST(zend_off_t, offsetof(struct magic, mimetype)),
2213        sizeof(m->mimetype), "MIME", "+-/.", 1);
2214}
2215
2216private int
2217check_format_type(const char *ptr, int type)
2218{
2219    int quad = 0, h;
2220    if (*ptr == '\0') {
2221        /* Missing format string; bad */
2222        return -1;
2223    }
2224
2225    switch (file_formats[type]) {
2226    case FILE_FMT_QUAD:
2227        quad = 1;
2228        /*FALLTHROUGH*/
2229    case FILE_FMT_NUM:
2230        if (quad == 0) {
2231            switch (type) {
2232            case FILE_BYTE:
2233                h = 2;
2234                break;
2235            case FILE_SHORT:
2236            case FILE_BESHORT:
2237            case FILE_LESHORT:
2238                h = 1;
2239                break;
2240            case FILE_LONG:
2241            case FILE_BELONG:
2242            case FILE_LELONG:
2243            case FILE_MELONG:
2244            case FILE_LEID3:
2245            case FILE_BEID3:
2246            case FILE_INDIRECT:
2247                h = 0;
2248                break;
2249            default:
2250                abort();
2251            }
2252        } else
2253            h = 0;
2254        if (*ptr == '-')
2255            ptr++;
2256        if (*ptr == '.')
2257            ptr++;
2258        while (isdigit((unsigned char)*ptr)) ptr++;
2259        if (*ptr == '.')
2260            ptr++;
2261        while (isdigit((unsigned char)*ptr)) ptr++;
2262        if (quad) {
2263            if (*ptr++ != 'l')
2264                return -1;
2265            if (*ptr++ != 'l')
2266                return -1;
2267        }
2268
2269        switch (*ptr++) {
2270#ifdef STRICT_FORMAT    /* "long" formats are int formats for us */
2271        /* so don't accept the 'l' modifier */
2272        case 'l':
2273            switch (*ptr++) {
2274            case 'i':
2275            case 'd':
2276            case 'u':
2277            case 'o':
2278            case 'x':
2279            case 'X':
2280                return h != 0 ? -1 : 0;
2281            default:
2282                return -1;
2283            }
2284
2285        /*
2286         * Don't accept h and hh modifiers. They make writing
2287         * magic entries more complicated, for very little benefit
2288         */
2289        case 'h':
2290            if (h-- <= 0)
2291                return -1;
2292            switch (*ptr++) {
2293            case 'h':
2294                if (h-- <= 0)
2295                    return -1;
2296                switch (*ptr++) {
2297                case 'i':
2298                case 'd':
2299                case 'u':
2300                case 'o':
2301                case 'x':
2302                case 'X':
2303                    return 0;
2304                default:
2305                    return -1;
2306                }
2307            case 'i':
2308            case 'd':
2309            case 'u':
2310            case 'o':
2311            case 'x':
2312            case 'X':
2313                return h != 0 ? -1 : 0;
2314            default:
2315                return -1;
2316            }
2317#endif
2318        case 'c':
2319            return h != 2 ? -1 : 0;
2320        case 'i':
2321        case 'd':
2322        case 'u':
2323        case 'o':
2324        case 'x':
2325        case 'X':
2326#ifdef STRICT_FORMAT
2327            return h != 0 ? -1 : 0;
2328#else
2329            return 0;
2330#endif
2331        default:
2332            return -1;
2333        }
2334
2335    case FILE_FMT_FLOAT:
2336    case FILE_FMT_DOUBLE:
2337        if (*ptr == '-')
2338            ptr++;
2339        if (*ptr == '.')
2340            ptr++;
2341        while (isdigit((unsigned char)*ptr)) ptr++;
2342        if (*ptr == '.')
2343            ptr++;
2344        while (isdigit((unsigned char)*ptr)) ptr++;
2345
2346        switch (*ptr++) {
2347        case 'e':
2348        case 'E':
2349        case 'f':
2350        case 'F':
2351        case 'g':
2352        case 'G':
2353            return 0;
2354
2355        default:
2356            return -1;
2357        }
2358
2359
2360    case FILE_FMT_STR:
2361        if (*ptr == '-')
2362            ptr++;
2363        while (isdigit((unsigned char )*ptr))
2364            ptr++;
2365        if (*ptr == '.') {
2366            ptr++;
2367            while (isdigit((unsigned char )*ptr))
2368                ptr++;
2369        }
2370
2371        switch (*ptr++) {
2372        case 's':
2373            return 0;
2374        default:
2375            return -1;
2376        }
2377
2378    default:
2379        /* internal error */
2380        abort();
2381    }
2382    /*NOTREACHED*/
2383    return -1;
2384}
2385
2386/*
2387 * Check that the optional printf format in description matches
2388 * the type of the magic.
2389 */
2390private int
2391check_format(struct magic_set *ms, struct magic *m)
2392{
2393    char *ptr;
2394
2395    for (ptr = m->desc; *ptr; ptr++)
2396        if (*ptr == '%')
2397            break;
2398    if (*ptr == '\0') {
2399        /* No format string; ok */
2400        return 1;
2401    }
2402
2403    assert(file_nformats == file_nnames);
2404
2405    if (m->type >= file_nformats) {
2406        file_magwarn(ms, "Internal error inconsistency between "
2407            "m->type and format strings");
2408        return -1;
2409    }
2410    if (file_formats[m->type] == FILE_FMT_NONE) {
2411        file_magwarn(ms, "No format string for `%s' with description "
2412            "`%s'", m->desc, file_names[m->type]);
2413        return -1;
2414    }
2415
2416    ptr++;
2417    if (check_format_type(ptr, m->type) == -1) {
2418        /*
2419         * TODO: this error message is unhelpful if the format
2420         * string is not one character long
2421         */
2422        file_magwarn(ms, "Printf format `%c' is not valid for type "
2423            "`%s' in description `%s'", *ptr ? *ptr : '?',
2424            file_names[m->type], m->desc);
2425        return -1;
2426    }
2427
2428    for (; *ptr; ptr++) {
2429        if (*ptr == '%') {
2430            file_magwarn(ms,
2431                "Too many format strings (should have at most one) "
2432                "for `%s' with description `%s'",
2433                file_names[m->type], m->desc);
2434            return -1;
2435        }
2436    }
2437    return 0;
2438}
2439
2440/*
2441 * Read a numeric value from a pointer, into the value union of a magic
2442 * pointer, according to the magic type.  Update the string pointer to point
2443 * just after the number read.  Return 0 for success, non-zero for failure.
2444 */
2445private int
2446getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2447{
2448    switch (m->type) {
2449    case FILE_BESTRING16:
2450    case FILE_LESTRING16:
2451    case FILE_STRING:
2452    case FILE_PSTRING:
2453    case FILE_REGEX:
2454    case FILE_SEARCH:
2455    case FILE_NAME:
2456    case FILE_USE:
2457        *p = getstr(ms, m, *p, action == FILE_COMPILE);
2458        if (*p == NULL) {
2459            if (ms->flags & MAGIC_CHECK)
2460                file_magwarn(ms, "cannot get string from `%s'",
2461                    m->value.s);
2462            return -1;
2463        }
2464        if (m->type == FILE_REGEX) {
2465            /*  XXX do we need this? */
2466            /*zval pattern;
2467            int options = 0;
2468            pcre_cache_entry *pce;
2469
2470            convert_libmagic_pattern(&pattern, m->value.s, strlen(m->value.s), options);
2471
2472            if ((pce = pcre_get_compiled_regex_cache(Z_STR(pattern))) == NULL) {
2473                return -1;
2474            }
2475
2476            return 0;*/
2477        }
2478        return 0;
2479    case FILE_FLOAT:
2480    case FILE_BEFLOAT:
2481    case FILE_LEFLOAT:
2482        if (m->reln != 'x') {
2483            char *ep;
2484#ifdef HAVE_STRTOF
2485            m->value.f = strtof(*p, &ep);
2486#else
2487            m->value.f = (float)strtod(*p, &ep);
2488#endif
2489            *p = ep;
2490        }
2491        return 0;
2492    case FILE_DOUBLE:
2493    case FILE_BEDOUBLE:
2494    case FILE_LEDOUBLE:
2495        if (m->reln != 'x') {
2496            char *ep;
2497            m->value.d = strtod(*p, &ep);
2498            *p = ep;
2499        }
2500        return 0;
2501    default:
2502        if (m->reln != 'x') {
2503            char *ep;
2504            m->value.q = file_signextend(ms, m,
2505                (uint64_t)strtoull(*p, &ep, 0));
2506            *p = ep;
2507            eatsize(p);
2508        }
2509        return 0;
2510    }
2511}
2512
2513/*
2514 * Convert a string containing C character escapes.  Stop at an unescaped
2515 * space or tab.
2516 * Copy the converted version to "m->value.s", and the length in m->vallen.
2517 * Return updated scan pointer as function result. Warn if set.
2518 */
2519private const char *
2520getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2521{
2522    const char *origs = s;
2523    char    *p = m->value.s;
2524    size_t  plen = sizeof(m->value.s);
2525    char    *origp = p;
2526    char    *pmax = p + plen - 1;
2527    int c;
2528    int val;
2529
2530    while ((c = *s++) != '\0') {
2531        if (isspace((unsigned char) c))
2532            break;
2533        if (p >= pmax) {
2534            file_error(ms, 0, "string too long: `%s'", origs);
2535            return NULL;
2536        }
2537        if (c == '\\') {
2538            switch(c = *s++) {
2539
2540            case '\0':
2541                if (warn)
2542                    file_magwarn(ms, "incomplete escape");
2543                goto out;
2544
2545            case '\t':
2546                if (warn) {
2547                    file_magwarn(ms,
2548                        "escaped tab found, use \\t instead");
2549                    warn = 0;   /* already did */
2550                }
2551                /*FALLTHROUGH*/
2552            default:
2553                if (warn) {
2554                    if (isprint((unsigned char)c)) {
2555                        /* Allow escaping of
2556                         * ``relations'' */
2557                        if (strchr("<>&^=!", c) == NULL
2558                            && (m->type != FILE_REGEX ||
2559                            strchr("[]().*?^$|{}", c)
2560                            == NULL)) {
2561                            file_magwarn(ms, "no "
2562                                "need to escape "
2563                                "`%c'", c);
2564                        }
2565                    } else {
2566                        file_magwarn(ms,
2567                            "unknown escape sequence: "
2568                            "\\%03o", c);
2569                    }
2570                }
2571                /*FALLTHROUGH*/
2572            /* space, perhaps force people to use \040? */
2573            case ' ':
2574#if 0
2575            /*
2576             * Other things people escape, but shouldn't need to,
2577             * so we disallow them
2578             */
2579            case '\'':
2580            case '"':
2581            case '?':
2582#endif
2583            /* Relations */
2584            case '>':
2585            case '<':
2586            case '&':
2587            case '^':
2588            case '=':
2589            case '!':
2590            /* and baskslash itself */
2591            case '\\':
2592                *p++ = (char) c;
2593                break;
2594
2595            case 'a':
2596                *p++ = '\a';
2597                break;
2598
2599            case 'b':
2600                *p++ = '\b';
2601                break;
2602
2603            case 'f':
2604                *p++ = '\f';
2605                break;
2606
2607            case 'n':
2608                *p++ = '\n';
2609                break;
2610
2611            case 'r':
2612                *p++ = '\r';
2613                break;
2614
2615            case 't':
2616                *p++ = '\t';
2617                break;
2618
2619            case 'v':
2620                *p++ = '\v';
2621                break;
2622
2623            /* \ and up to 3 octal digits */
2624            case '0':
2625            case '1':
2626            case '2':
2627            case '3':
2628            case '4':
2629            case '5':
2630            case '6':
2631            case '7':
2632                val = c - '0';
2633                c = *s++;  /* try for 2 */
2634                if (c >= '0' && c <= '7') {
2635                    val = (val << 3) | (c - '0');
2636                    c = *s++;  /* try for 3 */
2637                    if (c >= '0' && c <= '7')
2638                        val = (val << 3) | (c-'0');
2639                    else
2640                        --s;
2641                }
2642                else
2643                    --s;
2644                *p++ = (char)val;
2645                break;
2646
2647            /* \x and up to 2 hex digits */
2648            case 'x':
2649                val = 'x';  /* Default if no digits */
2650                c = hextoint(*s++); /* Get next char */
2651                if (c >= 0) {
2652                    val = c;
2653                    c = hextoint(*s++);
2654                    if (c >= 0)
2655                        val = (val << 4) + c;
2656                    else
2657                        --s;
2658                } else
2659                    --s;
2660                *p++ = (char)val;
2661                break;
2662            }
2663        } else
2664            *p++ = (char)c;
2665    }
2666out:
2667    *p = '\0';
2668    m->vallen = CAST(unsigned char, (p - origp));
2669    if (m->type == FILE_PSTRING)
2670        m->vallen += (unsigned char)file_pstring_length_size(m);
2671    return s;
2672}
2673
2674
2675/* Single hex char to int; -1 if not a hex char. */
2676private int
2677hextoint(int c)
2678{
2679    if (!isascii((unsigned char) c))
2680        return -1;
2681    if (isdigit((unsigned char) c))
2682        return c - '0';
2683    if ((c >= 'a') && (c <= 'f'))
2684        return c + 10 - 'a';
2685    if (( c>= 'A') && (c <= 'F'))
2686        return c + 10 - 'A';
2687    return -1;
2688}
2689
2690
2691/*
2692 * Print a string containing C character escapes.
2693 */
2694protected void
2695file_showstr(FILE *fp, const char *s, size_t len)
2696{
2697    char    c;
2698
2699    for (;;) {
2700        if (len == ~0U) {
2701            c = *s++;
2702            if (c == '\0')
2703                break;
2704        }
2705        else  {
2706            if (len-- == 0)
2707                break;
2708            c = *s++;
2709        }
2710        if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
2711            (void) fputc(c, fp);
2712        else {
2713            (void) fputc('\\', fp);
2714            switch (c) {
2715            case '\a':
2716                (void) fputc('a', fp);
2717                break;
2718
2719            case '\b':
2720                (void) fputc('b', fp);
2721                break;
2722
2723            case '\f':
2724                (void) fputc('f', fp);
2725                break;
2726
2727            case '\n':
2728                (void) fputc('n', fp);
2729                break;
2730
2731            case '\r':
2732                (void) fputc('r', fp);
2733                break;
2734
2735            case '\t':
2736                (void) fputc('t', fp);
2737                break;
2738
2739            case '\v':
2740                (void) fputc('v', fp);
2741                break;
2742
2743            default:
2744                (void) fprintf(fp, "%.3o", c & 0377);
2745                break;
2746            }
2747        }
2748    }
2749}
2750
2751/*
2752 * eatsize(): Eat the size spec from a number [eg. 10UL]
2753 */
2754private void
2755eatsize(const char **p)
2756{
2757    const char *l = *p;
2758
2759    if (LOWCASE(*l) == 'u')
2760        l++;
2761
2762    switch (LOWCASE(*l)) {
2763    case 'l':    /* long */
2764    case 's':    /* short */
2765    case 'h':    /* short */
2766    case 'b':    /* char/byte */
2767    case 'c':    /* char/byte */
2768        l++;
2769        /*FALLTHROUGH*/
2770    default:
2771        break;
2772    }
2773
2774    *p = l;
2775}
2776
2777/*
2778 * handle a buffer containing a compiled file.
2779 */
2780private struct magic_map *
2781apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
2782{
2783    struct magic_map *map;
2784
2785    if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
2786        file_oomem(ms, sizeof(*map));
2787        return NULL;
2788    }
2789    map->len = len;
2790    map->p = buf;
2791    map->type = MAP_TYPE_USER;
2792    if (check_buffer(ms, map, "buffer") != 0) {
2793        apprentice_unmap(map);
2794        return NULL;
2795    }
2796    return map;
2797}
2798
2799/*
2800 * handle a compiled file.
2801 */
2802
2803private struct magic_map *
2804apprentice_map(struct magic_set *ms, const char *fn)
2805{
2806    uint32_t *ptr;
2807    uint32_t version, entries, nentries;
2808    int needsbyteswap;
2809    char *dbname = NULL;
2810    struct magic_map *map;
2811    size_t i;
2812    php_stream *stream = NULL;
2813    php_stream_statbuf st;
2814
2815
2816
2817    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2818        file_oomem(ms, sizeof(*map));
2819        return NULL;
2820    }
2821
2822    if (fn == NULL) {
2823        map->p = (void *)&php_magic_database;
2824        goto internal_loaded;
2825    }
2826
2827#ifdef PHP_WIN32
2828    /* Don't bother on windows with php_stream_open_wrapper,
2829    return to give apprentice_load() a chance. */
2830    if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2831               if (st.sb.st_mode & S_IFDIR) {
2832                       return NULL;
2833               }
2834       }
2835#endif
2836
2837    dbname = mkdbname(ms, fn, 0);
2838    if (dbname == NULL)
2839        goto error;
2840
2841        stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2842
2843    if (!stream) {
2844        goto error;
2845    }
2846
2847    if (php_stream_stat(stream, &st) < 0) {
2848        file_error(ms, errno, "cannot stat `%s'", dbname);
2849        goto error;
2850    }
2851    if (st.sb.st_size < 8 || st.sb.st_size > MAXMAGIC_SIZE) {
2852        file_error(ms, 0, "file `%s' is too %s", dbname,
2853            st.sb.st_size < 8 ? "small" : "large");
2854        goto error;
2855    }
2856
2857    map->len = (size_t)st.sb.st_size;
2858    if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2859        file_oomem(ms, map->len);
2860        goto error;
2861    }
2862    if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2863        file_badread(ms);
2864        goto error;
2865    }
2866    map->len = 0;
2867#define RET 1
2868
2869    php_stream_close(stream);
2870    stream = NULL;
2871
2872internal_loaded:
2873    ptr = (uint32_t *)(void *)map->p;
2874    if (*ptr != MAGICNO) {
2875        if (swap4(*ptr) != MAGICNO) {
2876            file_error(ms, 0, "bad magic in `%s'", dbname);
2877            goto error;
2878        }
2879        needsbyteswap = 1;
2880    } else
2881        needsbyteswap = 0;
2882    if (needsbyteswap)
2883        version = swap4(ptr[1]);
2884    else
2885        version = ptr[1];
2886    if (version != VERSIONNO) {
2887        file_error(ms, 0, "File %d.%d supports only version %d magic "
2888            "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2889            VERSIONNO, dbname, version);
2890        goto error;
2891    }
2892
2893    /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2894    machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2895    future. */
2896    if (needsbyteswap && fn == NULL) {
2897        map->p = emalloc(sizeof(php_magic_database));
2898        map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2899    }
2900
2901    if (NULL != fn) {
2902        nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2903        entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2904        if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2905            file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2906                dbname, (unsigned long long)st.sb.st_size,
2907                sizeof(struct magic));
2908            goto error;
2909        }
2910    }
2911    map->magic[0] = CAST(struct magic *, map->p) + 1;
2912    nentries = 0;
2913    for (i = 0; i < MAGIC_SETS; i++) {
2914        if (needsbyteswap)
2915            map->nmagic[i] = swap4(ptr[i + 2]);
2916        else
2917            map->nmagic[i] = ptr[i + 2];
2918        if (i != MAGIC_SETS - 1)
2919            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2920        nentries += map->nmagic[i];
2921    }
2922    if (NULL != fn && entries != nentries + 1) {
2923        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2924            dbname, entries, nentries + 1);
2925        goto error;
2926    }
2927    if (needsbyteswap)
2928        for (i = 0; i < MAGIC_SETS; i++)
2929            byteswap(map->magic[i], map->nmagic[i]);
2930
2931    if (dbname) {
2932        efree(dbname);
2933    }
2934    return map;
2935
2936error:
2937    if (stream) {
2938        php_stream_close(stream);
2939    }
2940    apprentice_unmap(map);
2941    if (dbname) {
2942        efree(dbname);
2943    }
2944    return NULL;
2945}
2946
2947private int
2948check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
2949{
2950    uint32_t *ptr;
2951    uint32_t entries, nentries;
2952    uint32_t version;
2953    int i, needsbyteswap;
2954
2955    ptr = CAST(uint32_t *, map->p);
2956    if (*ptr != MAGICNO) {
2957        if (swap4(*ptr) != MAGICNO) {
2958            file_error(ms, 0, "bad magic in `%s'", dbname);
2959            return -1;
2960        }
2961        needsbyteswap = 1;
2962    } else
2963        needsbyteswap = 0;
2964    if (needsbyteswap)
2965        version = swap4(ptr[1]);
2966    else
2967        version = ptr[1];
2968    if (version != VERSIONNO) {
2969        file_error(ms, 0, "File %s supports only version %d magic "
2970            "files. `%s' is version %d", FILE_VERSION_MAJOR,
2971            VERSIONNO, dbname, version);
2972        return -1;
2973    }
2974    entries = (uint32_t)(map->len / sizeof(struct magic));
2975    if ((entries * sizeof(struct magic)) != map->len) {
2976        file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
2977            "a multiple of %" SIZE_T_FORMAT "u",
2978            dbname, map->len, sizeof(struct magic));
2979        return -1;
2980    }
2981    map->magic[0] = CAST(struct magic *, map->p) + 1;
2982    nentries = 0;
2983    for (i = 0; i < MAGIC_SETS; i++) {
2984        if (needsbyteswap)
2985            map->nmagic[i] = swap4(ptr[i + 2]);
2986        else
2987            map->nmagic[i] = ptr[i + 2];
2988        if (i != MAGIC_SETS - 1)
2989            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2990        nentries += map->nmagic[i];
2991    }
2992    if (entries != nentries + 1) {
2993        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2994            dbname, entries, nentries + 1);
2995        return -1;
2996    }
2997    if (needsbyteswap)
2998        for (i = 0; i < MAGIC_SETS; i++)
2999            byteswap(map->magic[i], map->nmagic[i]);
3000    return 0;
3001}
3002
3003/*
3004 * handle an mmaped file.
3005 */
3006private int
3007apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3008{
3009    static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3010    static const size_t m = sizeof(**map->magic);
3011    int fd = -1;
3012    size_t len;
3013    char *dbname;
3014    int rv = -1;
3015    uint32_t i;
3016    union {
3017        struct magic m;
3018        uint32_t h[2 + MAGIC_SETS];
3019    } hdr;
3020    php_stream *stream;
3021
3022
3023    dbname = mkdbname(ms, fn, 0);
3024
3025    if (dbname == NULL)
3026        goto out;
3027
3028    /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3029    stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3030
3031    if (!stream) {
3032        file_error(ms, errno, "cannot open `%s'", dbname);
3033        goto out;
3034    }
3035    memset(&hdr, 0, sizeof(hdr));
3036    hdr.h[0] = MAGICNO;
3037    hdr.h[1] = VERSIONNO;
3038    memcpy(hdr.h + 2, map->nmagic, nm);
3039
3040    if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3041        file_error(ms, errno, "error writing `%s'", dbname);
3042        goto out;
3043    }
3044
3045    for (i = 0; i < MAGIC_SETS; i++) {
3046        len = m * map->nmagic[i];
3047        if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3048            file_error(ms, errno, "error writing `%s'", dbname);
3049            goto out;
3050        }
3051    }
3052
3053    if (stream) {
3054        php_stream_close(stream);
3055    }
3056    rv = 0;
3057out:
3058    efree(dbname);
3059    return rv;
3060}
3061
3062private const char ext[] = ".mgc";
3063/*
3064 * make a dbname
3065 */
3066private char *
3067mkdbname(struct magic_set *ms, const char *fn, int strip)
3068{
3069    const char *p, *q;
3070    char *buf;
3071
3072    if (strip) {
3073        if ((p = strrchr(fn, '/')) != NULL)
3074            fn = ++p;
3075    }
3076
3077    for (q = fn; *q; q++)
3078        continue;
3079    /* Look for .mgc */
3080    for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3081        if (*p != *q)
3082            break;
3083
3084    /* Did not find .mgc, restore q */
3085    if (p >= ext)
3086        while (*q)
3087            q++;
3088
3089    q++;
3090    /* Compatibility with old code that looked in .mime */
3091    if (ms->flags & MAGIC_MIME) {
3092        spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
3093#ifdef PHP_WIN32
3094        if (VCWD_ACCESS(buf, R_OK) == 0) {
3095#else
3096        if (VCWD_ACCESS(buf, R_OK) != -1) {
3097#endif
3098            ms->flags &= MAGIC_MIME_TYPE;
3099            return buf;
3100        }
3101        efree(buf);
3102    }
3103    spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
3104
3105    /* Compatibility with old code that looked in .mime */
3106    if (strstr(p, ".mime") != NULL)
3107        ms->flags &= MAGIC_MIME_TYPE;
3108    return buf;
3109}
3110
3111/*
3112 * Byteswap an mmap'ed file if needed
3113 */
3114private void
3115byteswap(struct magic *magic, uint32_t nmagic)
3116{
3117    uint32_t i;
3118    for (i = 0; i < nmagic; i++)
3119        bs1(&magic[i]);
3120}
3121
3122/*
3123 * swap a short
3124 */
3125private uint16_t
3126swap2(uint16_t sv)
3127{
3128    uint16_t rv;
3129    uint8_t *s = (uint8_t *)(void *)&sv;
3130    uint8_t *d = (uint8_t *)(void *)&rv;
3131    d[0] = s[1];
3132    d[1] = s[0];
3133    return rv;
3134}
3135
3136/*
3137 * swap an int
3138 */
3139private uint32_t
3140swap4(uint32_t sv)
3141{
3142    uint32_t rv;
3143    uint8_t *s = (uint8_t *)(void *)&sv;
3144    uint8_t *d = (uint8_t *)(void *)&rv;
3145    d[0] = s[3];
3146    d[1] = s[2];
3147    d[2] = s[1];
3148    d[3] = s[0];
3149    return rv;
3150}
3151
3152/*
3153 * swap a quad
3154 */
3155private uint64_t
3156swap8(uint64_t sv)
3157{
3158    uint64_t rv;
3159    uint8_t *s = (uint8_t *)(void *)&sv;
3160    uint8_t *d = (uint8_t *)(void *)&rv;
3161#if 0
3162    d[0] = s[3];
3163    d[1] = s[2];
3164    d[2] = s[1];
3165    d[3] = s[0];
3166    d[4] = s[7];
3167    d[5] = s[6];
3168    d[6] = s[5];
3169    d[7] = s[4];
3170#else
3171    d[0] = s[7];
3172    d[1] = s[6];
3173    d[2] = s[5];
3174    d[3] = s[4];
3175    d[4] = s[3];
3176    d[5] = s[2];
3177    d[6] = s[1];
3178    d[7] = s[0];
3179#endif
3180    return rv;
3181}
3182
3183/*
3184 * byteswap a single magic entry
3185 */
3186private void
3187bs1(struct magic *m)
3188{
3189    m->cont_level = swap2(m->cont_level);
3190    m->offset = swap4((uint32_t)m->offset);
3191    m->in_offset = swap4((uint32_t)m->in_offset);
3192    m->lineno = swap4((uint32_t)m->lineno);
3193    if (IS_LIBMAGIC_STRING(m->type)) {
3194        m->str_range = swap4(m->str_range);
3195        m->str_flags = swap4(m->str_flags);
3196    }
3197    else {
3198        m->value.q = swap8(m->value.q);
3199        m->num_mask = swap8(m->num_mask);
3200    }
3201}
3202
3203protected size_t
3204file_pstring_length_size(const struct magic *m)
3205{
3206    switch (m->str_flags & PSTRING_LEN) {
3207    case PSTRING_1_LE:
3208        return 1;
3209    case PSTRING_2_LE:
3210    case PSTRING_2_BE:
3211        return 2;
3212    case PSTRING_4_LE:
3213    case PSTRING_4_BE:
3214        return 4;
3215    default:
3216        abort();    /* Impossible */
3217        return 1;
3218    }
3219}
3220protected size_t
3221file_pstring_get_length(const struct magic *m, const char *s)
3222{
3223    size_t len = 0;
3224
3225    switch (m->str_flags & PSTRING_LEN) {
3226    case PSTRING_1_LE:
3227        len = *s;
3228        break;
3229    case PSTRING_2_LE:
3230        len = (s[1] << 8) | s[0];
3231        break;
3232    case PSTRING_2_BE:
3233        len = (s[0] << 8) | s[1];
3234        break;
3235    case PSTRING_4_LE:
3236        len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
3237        break;
3238    case PSTRING_4_BE:
3239        len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
3240        break;
3241    default:
3242        abort();    /* Impossible */
3243    }
3244
3245    if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
3246        len -= file_pstring_length_size(m);
3247
3248    return len;
3249}
3250
3251protected int
3252file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3253{
3254    uint32_t i, j;
3255    struct mlist *mlist, *ml;
3256
3257    mlist = ms->mlist[1];
3258
3259    for (ml = mlist->next; ml != mlist; ml = ml->next) {
3260        struct magic *ma = ml->magic;
3261        uint32_t nma = ml->nmagic;
3262        for (i = 0; i < nma; i++) {
3263            if (ma[i].type != FILE_NAME)
3264                continue;
3265            if (strcmp(ma[i].value.s, name) == 0) {
3266                v->magic = &ma[i];
3267                for (j = i + 1; j < nma; j++)
3268                    if (ma[j].cont_level == 0)
3269                        break;
3270                v->nmagic = j - i;
3271                return 0;
3272            }
3273        }
3274    }
3275    return -1;
3276}
3277