1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "php.h"
33
34#include "file.h"
35
36#ifndef lint
37FILE_RCSID("@(#)$File: apprentice.c,v 1.196 2013/11/19 21:01:12 christos Exp $")
38#endif  /* lint */
39
40#include "magic.h"
41#include "patchlevel.h"
42#include <stdlib.h>
43
44#if defined(__hpux) && !defined(HAVE_STRTOULL)
45#if SIZEOF_LONG == 8
46# define strtoull strtoul
47#else
48# define strtoull __strtoull
49#endif
50#endif
51
52#ifdef PHP_WIN32
53#include "win32/unistd.h"
54#if _MSC_VER <= 1300
55# include "win32/php_strtoi64.h"
56#endif
57#define strtoull _strtoui64
58#else
59#include <unistd.h>
60#endif
61#include <string.h>
62#include <assert.h>
63#include <ctype.h>
64#include <fcntl.h>
65
66#define EATAB {while (isascii((unsigned char) *l) && \
67              isspace((unsigned char) *l))  ++l;}
68#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69            tolower((unsigned char) (l)) : (l))
70/*
71 * Work around a bug in headers on Digital Unix.
72 * At least confirmed for: OSF1 V4.0 878
73 */
74#if defined(__osf__) && defined(__DECC)
75#ifdef MAP_FAILED
76#undef MAP_FAILED
77#endif
78#endif
79
80#ifndef MAP_FAILED
81#define MAP_FAILED (void *) -1
82#endif
83
84#ifndef MAP_FILE
85#define MAP_FILE 0
86#endif
87
88#define ALLOC_CHUNK (size_t)10
89#define ALLOC_INCR  (size_t)200
90
91struct magic_entry {
92    struct magic *mp;
93    uint32_t cont_count;
94    uint32_t max_count;
95};
96
97struct magic_entry_set {
98    struct magic_entry *me;
99    uint32_t count;
100    uint32_t max;
101};
102
103struct magic_map {
104    void *p;
105    size_t len;
106    struct magic *magic[MAGIC_SETS];
107    uint32_t nmagic[MAGIC_SETS];
108};
109
110int file_formats[FILE_NAMES_SIZE];
111const size_t file_nformats = FILE_NAMES_SIZE;
112const char *file_names[FILE_NAMES_SIZE];
113const size_t file_nnames = FILE_NAMES_SIZE;
114
115private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
116private int hextoint(int);
117private const char *getstr(struct magic_set *, struct magic *, const char *,
118    int);
119private int parse(struct magic_set *, struct magic_entry *, const char *,
120    size_t, int);
121private void eatsize(const char **);
122private int apprentice_1(struct magic_set *, const char *, int);
123private size_t apprentice_magic_strength(const struct magic *);
124private int apprentice_sort(const void *, const void *);
125private void apprentice_list(struct mlist *, int );
126private struct magic_map *apprentice_load(struct magic_set *,
127    const char *, int);
128private struct mlist *mlist_alloc(void);
129private void mlist_free(struct mlist *);
130private void byteswap(struct magic *, uint32_t);
131private void bs1(struct magic *);
132private uint16_t swap2(uint16_t);
133private uint32_t swap4(uint32_t);
134private uint64_t swap8(uint64_t);
135private char *mkdbname(struct magic_set *, const char *, int);
136private struct magic_map *apprentice_map(struct magic_set *, const char *);
137private void apprentice_unmap(struct magic_map *);
138private int apprentice_compile(struct magic_set *, struct magic_map *,
139    const char *);
140private int check_format_type(const char *, int);
141private int check_format(struct magic_set *, struct magic *);
142private int get_op(char);
143private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
144private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
145private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
146
147
148private size_t magicsize = sizeof(struct magic);
149
150private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
151
152private struct {
153    const char *name;
154    size_t len;
155    int (*fun)(struct magic_set *, struct magic_entry *, const char *);
156} bang[] = {
157#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
158    DECLARE_FIELD(mime),
159    DECLARE_FIELD(apple),
160    DECLARE_FIELD(strength),
161#undef  DECLARE_FIELD
162    { NULL, 0, NULL }
163};
164
165#include "../data_file.c"
166
167struct type_tbl_s {
168    const char name[16];
169    const size_t len;
170    const int type;
171    const int format;
172};
173
174/*
175 * XXX - the actual Single UNIX Specification says that "long" means "long",
176 * as in the C data type, but we treat it as meaning "4-byte integer".
177 * Given that the OS X version of file 5.04 did the same, I guess that passes
178 * the actual test; having "long" be dependent on how big a "long" is on
179 * the machine running "file" is silly.
180 */
181static const struct type_tbl_s type_tbl[] = {
182# define XX(s)      s, (sizeof(s) - 1)
183# define XX_NULL    "", 0
184    { XX("invalid"),    FILE_INVALID,       FILE_FMT_NONE },
185    { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
186    { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
187    { XX("default"),    FILE_DEFAULT,       FILE_FMT_NONE },
188    { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
189    { XX("string"),     FILE_STRING,        FILE_FMT_STR },
190    { XX("date"),       FILE_DATE,      FILE_FMT_STR },
191    { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
192    { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
193    { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
194    { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
195    { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
196    { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
197    { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
198    { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
199    { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
200    { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
201    { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
202    { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
203    { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
204    { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
205    { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
206    { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
207    { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
208    { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
209    { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
210    { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
211    { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
212    { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
213    { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
214    { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
215    { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
216    { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
217    { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
218    { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
219    { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
220    { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
221    { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
222    { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
223    { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
224    { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
225    { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NUM },
226    { XX("qwdate"),     FILE_QWDATE,        FILE_FMT_STR },
227    { XX("leqwdate"),   FILE_LEQWDATE,      FILE_FMT_STR },
228    { XX("beqwdate"),   FILE_BEQWDATE,      FILE_FMT_STR },
229    { XX("name"),       FILE_NAME,      FILE_FMT_NONE },
230    { XX("use"),        FILE_USE,       FILE_FMT_NONE },
231    { XX("clear"),      FILE_CLEAR,     FILE_FMT_NONE },
232    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
233};
234
235/*
236 * These are not types, and cannot be preceded by "u" to make them
237 * unsigned.
238 */
239static const struct type_tbl_s special_tbl[] = {
240    { XX("name"),       FILE_NAME,      FILE_FMT_STR },
241    { XX("use"),        FILE_USE,       FILE_FMT_STR },
242    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
243};
244# undef XX
245# undef XX_NULL
246
247#ifndef S_ISDIR
248#define S_ISDIR(mode) ((mode) & _S_IFDIR)
249#endif
250
251private int
252get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
253{
254    const struct type_tbl_s *p;
255
256    for (p = tbl; p->len; p++) {
257        if (strncmp(l, p->name, p->len) == 0) {
258            if (t)
259                *t = l + p->len;
260            break;
261        }
262    }
263    return p->type;
264}
265
266private int
267get_standard_integer_type(const char *l, const char **t)
268{
269    int type;
270
271    if (isalpha((unsigned char)l[1])) {
272        switch (l[1]) {
273        case 'C':
274            /* "dC" and "uC" */
275            type = FILE_BYTE;
276            break;
277        case 'S':
278            /* "dS" and "uS" */
279            type = FILE_SHORT;
280            break;
281        case 'I':
282        case 'L':
283            /*
284             * "dI", "dL", "uI", and "uL".
285             *
286             * XXX - the actual Single UNIX Specification says
287             * that "L" means "long", as in the C data type,
288             * but we treat it as meaning "4-byte integer".
289             * Given that the OS X version of file 5.04 did
290             * the same, I guess that passes the actual SUS
291             * validation suite; having "dL" be dependent on
292             * how big a "long" is on the machine running
293             * "file" is silly.
294             */
295            type = FILE_LONG;
296            break;
297        case 'Q':
298            /* "dQ" and "uQ" */
299            type = FILE_QUAD;
300            break;
301        default:
302            /* "d{anything else}", "u{anything else}" */
303            return FILE_INVALID;
304        }
305        l += 2;
306    } else if (isdigit((unsigned char)l[1])) {
307        /*
308         * "d{num}" and "u{num}"; we only support {num} values
309         * of 1, 2, 4, and 8 - the Single UNIX Specification
310         * doesn't say anything about whether arbitrary
311         * values should be supported, but both the Solaris 10
312         * and OS X Mountain Lion versions of file passed the
313         * Single UNIX Specification validation suite, and
314         * neither of them support values bigger than 8 or
315         * non-power-of-2 values.
316         */
317        if (isdigit((unsigned char)l[2])) {
318            /* Multi-digit, so > 9 */
319            return FILE_INVALID;
320        }
321        switch (l[1]) {
322        case '1':
323            type = FILE_BYTE;
324            break;
325        case '2':
326            type = FILE_SHORT;
327            break;
328        case '4':
329            type = FILE_LONG;
330            break;
331        case '8':
332            type = FILE_QUAD;
333            break;
334        default:
335            /* XXX - what about 3, 5, 6, or 7? */
336            return FILE_INVALID;
337        }
338        l += 2;
339    } else {
340        /*
341         * "d" or "u" by itself.
342         */
343        type = FILE_LONG;
344        ++l;
345    }
346    if (t)
347        *t = l;
348    return type;
349}
350
351private void
352init_file_tables(void)
353{
354    static int done = 0;
355    const struct type_tbl_s *p;
356
357    if (done)
358        return;
359    done++;
360
361    for (p = type_tbl; p->len; p++) {
362        assert(p->type < FILE_NAMES_SIZE);
363        file_names[p->type] = p->name;
364        file_formats[p->type] = p->format;
365    }
366    assert(p - type_tbl == FILE_NAMES_SIZE);
367}
368
369private int
370add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
371{
372    struct mlist *ml;
373
374    if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
375        return -1;
376
377    ml->map = idx == 0 ? map : NULL;
378    ml->magic = map->magic[idx];
379    ml->nmagic = map->nmagic[idx];
380
381    mlp->prev->next = ml;
382    ml->prev = mlp->prev;
383    ml->next = mlp;
384    mlp->prev = ml;
385    return 0;
386}
387
388/*
389 * Handle one file or directory.
390 */
391private int
392apprentice_1(struct magic_set *ms, const char *fn, int action)
393{
394    struct mlist *ml;
395    struct magic_map *map;
396    size_t i;
397
398    if (magicsize != FILE_MAGICSIZE) {
399        file_error(ms, 0, "magic element size %lu != %lu",
400            (unsigned long)sizeof(*map->magic[0]),
401            (unsigned long)FILE_MAGICSIZE);
402        return -1;
403    }
404
405    if (action == FILE_COMPILE) {
406        map = apprentice_load(ms, fn, action);
407        if (map == NULL)
408            return -1;
409        return apprentice_compile(ms, map, fn);
410    }
411
412    map = apprentice_map(ms, fn);
413    if (map == NULL) {
414        if (fn) {
415            if (ms->flags & MAGIC_CHECK)
416                file_magwarn(ms, "using regular magic file `%s'", fn);
417            map = apprentice_load(ms, fn, action);
418        }
419        if (map == NULL)
420            return -1;
421    }
422
423    for (i = 0; i < MAGIC_SETS; i++) {
424        if (add_mlist(ms->mlist[i], map, i) == -1) {
425            file_oomem(ms, sizeof(*ml));
426            apprentice_unmap(map);
427            return -1;
428        }
429    }
430
431    if (action == FILE_LIST) {
432        for (i = 0; i < MAGIC_SETS; i++) {
433            printf("Set %zu:\nBinary patterns:\n", i);
434            apprentice_list(ms->mlist[i], BINTEST);
435            printf("Text patterns:\n");
436            apprentice_list(ms->mlist[i], TEXTTEST);
437        }
438    }
439
440    return 0;
441}
442
443protected void
444file_ms_free(struct magic_set *ms)
445{
446    size_t i;
447    if (ms == NULL)
448        return;
449    for (i = 0; i < MAGIC_SETS; i++)
450        mlist_free(ms->mlist[i]);
451    if (ms->o.pbuf) {
452        efree(ms->o.pbuf);
453    }
454    if (ms->o.buf) {
455        efree(ms->o.buf);
456    }
457    if (ms->c.li) {
458        efree(ms->c.li);
459    }
460    efree(ms);
461}
462
463protected struct magic_set *
464file_ms_alloc(int flags)
465{
466    struct magic_set *ms;
467    size_t i, len;
468
469    if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
470        sizeof(struct magic_set)))) == NULL)
471        return NULL;
472
473    if (magic_setflags(ms, flags) == -1) {
474        errno = EINVAL;
475        goto free;
476    }
477
478    ms->o.buf = ms->o.pbuf = NULL;
479    len = (ms->c.len = 10) * sizeof(*ms->c.li);
480
481    if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
482        goto free;
483
484    ms->event_flags = 0;
485    ms->error = -1;
486    for (i = 0; i < MAGIC_SETS; i++)
487        ms->mlist[i] = NULL;
488    ms->file = "unknown";
489    ms->line = 0;
490    return ms;
491free:
492    efree(ms);
493    return NULL;
494}
495
496private void
497apprentice_unmap(struct magic_map *map)
498{
499    if (map == NULL)
500        return;
501    if (map->p != php_magic_database) {
502        if (map->p == NULL) {
503            int j;
504            for (j = 0; j < MAGIC_SETS; j++) {
505                if (map->magic[j]) {
506                    efree(map->magic[j]);
507                }
508            }
509        } else {
510            efree(map->p);
511        }
512    }
513    efree(map);
514}
515
516private struct mlist *
517mlist_alloc(void)
518{
519    struct mlist *mlist;
520    if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
521        return NULL;
522    }
523    mlist->next = mlist->prev = mlist;
524    return mlist;
525}
526
527private void
528mlist_free(struct mlist *mlist)
529{
530    struct mlist *ml;
531
532    if (mlist == NULL)
533        return;
534
535    for (ml = mlist->next; ml != mlist;) {
536        struct mlist *next = ml->next;
537        if (ml->map)
538            apprentice_unmap(ml->map);
539        efree(ml);
540        ml = next;
541    }
542    efree(ml);
543}
544
545/* const char *fn: list of magic files and directories */
546protected int
547file_apprentice(struct magic_set *ms, const char *fn, int action)
548{
549    char *p, *mfn;
550    int file_err, errs = -1;
551    size_t i;
552
553    file_reset(ms);
554
555/* XXX disabling default magic loading so the compiled in data is used */
556#if 0
557    if ((fn = magic_getpath(fn, action)) == NULL)
558        return -1;
559#endif
560
561    init_file_tables();
562
563    if (fn == NULL)
564        fn = getenv("MAGIC");
565    if (fn == NULL) {
566        for (i = 0; i < MAGIC_SETS; i++) {
567            mlist_free(ms->mlist[i]);
568            if ((ms->mlist[i] = mlist_alloc()) == NULL) {
569                file_oomem(ms, sizeof(*ms->mlist[i]));
570                return -1;
571            }
572        }
573        return apprentice_1(ms, fn, action);
574    }
575
576    if ((mfn = estrdup(fn)) == NULL) {
577        file_oomem(ms, strlen(fn));
578        return -1;
579    }
580
581    for (i = 0; i < MAGIC_SETS; i++) {
582        mlist_free(ms->mlist[i]);
583        if ((ms->mlist[i] = mlist_alloc()) == NULL) {
584            file_oomem(ms, sizeof(*ms->mlist[i]));
585            if (i != 0) {
586                --i;
587                do
588                    mlist_free(ms->mlist[i]);
589                while (i != 0);
590            }
591            efree(mfn);
592            return -1;
593        }
594    }
595    fn = mfn;
596
597    while (fn) {
598        p = strchr(fn, PATHSEP);
599        if (p)
600            *p++ = '\0';
601        if (*fn == '\0')
602            break;
603        file_err = apprentice_1(ms, fn, action);
604        errs = MAX(errs, file_err);
605        fn = p;
606    }
607
608    efree(mfn);
609
610    if (errs == -1) {
611        for (i = 0; i < MAGIC_SETS; i++) {
612            mlist_free(ms->mlist[i]);
613            ms->mlist[i] = NULL;
614        }
615        file_error(ms, 0, "could not find any valid magic files!");
616        return -1;
617    }
618
619#if 0
620    /*
621     * Always leave the database loaded
622     */
623    if (action == FILE_LOAD)
624        return 0;
625
626    for (i = 0; i < MAGIC_SETS; i++) {
627        mlist_free(ms->mlist[i]);
628        ms->mlist[i] = NULL;
629    }
630#endif
631
632    switch (action) {
633    case FILE_LOAD:
634    case FILE_COMPILE:
635    case FILE_CHECK:
636    case FILE_LIST:
637        return 0;
638    default:
639        file_error(ms, 0, "Invalid action %d", action);
640        return -1;
641    }
642}
643
644/*
645 * Get weight of this magic entry, for sorting purposes.
646 */
647private size_t
648apprentice_magic_strength(const struct magic *m)
649{
650#define MULT 10
651    size_t val = 2 * MULT;  /* baseline strength */
652
653    switch (m->type) {
654    case FILE_DEFAULT:  /* make sure this sorts last */
655        if (m->factor_op != FILE_FACTOR_OP_NONE)
656            abort();
657        return 0;
658
659    case FILE_BYTE:
660        val += 1 * MULT;
661        break;
662
663    case FILE_SHORT:
664    case FILE_LESHORT:
665    case FILE_BESHORT:
666        val += 2 * MULT;
667        break;
668
669    case FILE_LONG:
670    case FILE_LELONG:
671    case FILE_BELONG:
672    case FILE_MELONG:
673        val += 4 * MULT;
674        break;
675
676    case FILE_PSTRING:
677    case FILE_STRING:
678        val += m->vallen * MULT;
679        break;
680
681    case FILE_BESTRING16:
682    case FILE_LESTRING16:
683        val += m->vallen * MULT / 2;
684        break;
685
686    case FILE_SEARCH:
687    case FILE_REGEX:
688        val += m->vallen * MAX(MULT / m->vallen, 1);
689        break;
690
691    case FILE_DATE:
692    case FILE_LEDATE:
693    case FILE_BEDATE:
694    case FILE_MEDATE:
695    case FILE_LDATE:
696    case FILE_LELDATE:
697    case FILE_BELDATE:
698    case FILE_MELDATE:
699    case FILE_FLOAT:
700    case FILE_BEFLOAT:
701    case FILE_LEFLOAT:
702        val += 4 * MULT;
703        break;
704
705    case FILE_QUAD:
706    case FILE_BEQUAD:
707    case FILE_LEQUAD:
708    case FILE_QDATE:
709    case FILE_LEQDATE:
710    case FILE_BEQDATE:
711    case FILE_QLDATE:
712    case FILE_LEQLDATE:
713    case FILE_BEQLDATE:
714    case FILE_QWDATE:
715    case FILE_LEQWDATE:
716    case FILE_BEQWDATE:
717    case FILE_DOUBLE:
718    case FILE_BEDOUBLE:
719    case FILE_LEDOUBLE:
720        val += 8 * MULT;
721        break;
722
723    case FILE_INDIRECT:
724    case FILE_NAME:
725    case FILE_USE:
726        break;
727
728    default:
729        val = 0;
730        (void)fprintf(stderr, "Bad type %d\n", m->type);
731        abort();
732    }
733
734    switch (m->reln) {
735    case 'x':   /* matches anything penalize */
736    case '!':       /* matches almost anything penalize */
737        val = 0;
738        break;
739
740    case '=':   /* Exact match, prefer */
741        val += MULT;
742        break;
743
744    case '>':
745    case '<':   /* comparison match reduce strength */
746        val -= 2 * MULT;
747        break;
748
749    case '^':
750    case '&':   /* masking bits, we could count them too */
751        val -= MULT;
752        break;
753
754    default:
755        (void)fprintf(stderr, "Bad relation %c\n", m->reln);
756        abort();
757    }
758
759    if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
760        val = 1;
761
762    switch (m->factor_op) {
763    case FILE_FACTOR_OP_NONE:
764        break;
765    case FILE_FACTOR_OP_PLUS:
766        val += m->factor;
767        break;
768    case FILE_FACTOR_OP_MINUS:
769        val -= m->factor;
770        break;
771    case FILE_FACTOR_OP_TIMES:
772        val *= m->factor;
773        break;
774    case FILE_FACTOR_OP_DIV:
775        val /= m->factor;
776        break;
777    default:
778        abort();
779    }
780
781    /*
782     * Magic entries with no description get a bonus because they depend
783     * on subsequent magic entries to print something.
784     */
785    if (m->desc[0] == '\0')
786        val++;
787    return val;
788}
789
790/*
791 * Sort callback for sorting entries by "strength" (basically length)
792 */
793private int
794apprentice_sort(const void *a, const void *b)
795{
796    const struct magic_entry *ma = CAST(const struct magic_entry *, a);
797    const struct magic_entry *mb = CAST(const struct magic_entry *, b);
798    size_t sa = apprentice_magic_strength(ma->mp);
799    size_t sb = apprentice_magic_strength(mb->mp);
800    if (sa == sb)
801        return 0;
802    else if (sa > sb)
803        return -1;
804    else
805        return 1;
806}
807
808/*
809 * Shows sorted patterns list in the order which is used for the matching
810 */
811private void
812apprentice_list(struct mlist *mlist, int mode)
813{
814    uint32_t magindex = 0;
815    struct mlist *ml;
816    for (ml = mlist->next; ml != mlist; ml = ml->next) {
817        for (magindex = 0; magindex < ml->nmagic; magindex++) {
818            struct magic *m = &ml->magic[magindex];
819            if ((m->flag & mode) != mode) {
820                /* Skip sub-tests */
821                while (magindex + 1 < ml->nmagic &&
822                       ml->magic[magindex + 1].cont_level != 0)
823                    ++magindex;
824                continue; /* Skip to next top-level test*/
825            }
826
827            /*
828             * Try to iterate over the tree until we find item with
829             * description/mimetype.
830             */
831            while (magindex + 1 < ml->nmagic &&
832                   ml->magic[magindex + 1].cont_level != 0 &&
833                   *ml->magic[magindex].desc == '\0' &&
834                   *ml->magic[magindex].mimetype == '\0')
835                magindex++;
836
837            printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
838                apprentice_magic_strength(m),
839                ml->magic[magindex].desc,
840                ml->magic[magindex].mimetype);
841        }
842    }
843}
844
845private void
846set_test_type(struct magic *mstart, struct magic *m)
847{
848    switch (m->type) {
849    case FILE_BYTE:
850    case FILE_SHORT:
851    case FILE_LONG:
852    case FILE_DATE:
853    case FILE_BESHORT:
854    case FILE_BELONG:
855    case FILE_BEDATE:
856    case FILE_LESHORT:
857    case FILE_LELONG:
858    case FILE_LEDATE:
859    case FILE_LDATE:
860    case FILE_BELDATE:
861    case FILE_LELDATE:
862    case FILE_MEDATE:
863    case FILE_MELDATE:
864    case FILE_MELONG:
865    case FILE_QUAD:
866    case FILE_LEQUAD:
867    case FILE_BEQUAD:
868    case FILE_QDATE:
869    case FILE_LEQDATE:
870    case FILE_BEQDATE:
871    case FILE_QLDATE:
872    case FILE_LEQLDATE:
873    case FILE_BEQLDATE:
874    case FILE_QWDATE:
875    case FILE_LEQWDATE:
876    case FILE_BEQWDATE:
877    case FILE_FLOAT:
878    case FILE_BEFLOAT:
879    case FILE_LEFLOAT:
880    case FILE_DOUBLE:
881    case FILE_BEDOUBLE:
882    case FILE_LEDOUBLE:
883        mstart->flag |= BINTEST;
884        break;
885    case FILE_STRING:
886    case FILE_PSTRING:
887    case FILE_BESTRING16:
888    case FILE_LESTRING16:
889        /* Allow text overrides */
890        if (mstart->str_flags & STRING_TEXTTEST)
891            mstart->flag |= TEXTTEST;
892        else
893            mstart->flag |= BINTEST;
894        break;
895    case FILE_REGEX:
896    case FILE_SEARCH:
897        /* Check for override */
898        if (mstart->str_flags & STRING_BINTEST)
899            mstart->flag |= BINTEST;
900        if (mstart->str_flags & STRING_TEXTTEST)
901            mstart->flag |= TEXTTEST;
902
903        if (mstart->flag & (TEXTTEST|BINTEST))
904            break;
905
906        /* binary test if pattern is not text */
907        if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
908            NULL) <= 0)
909            mstart->flag |= BINTEST;
910        else
911            mstart->flag |= TEXTTEST;
912        break;
913    case FILE_DEFAULT:
914        /* can't deduce anything; we shouldn't see this at the
915           top level anyway */
916        break;
917    case FILE_INVALID:
918    default:
919        /* invalid search type, but no need to complain here */
920        break;
921    }
922}
923
924private int
925addentry(struct magic_set *ms, struct magic_entry *me,
926   struct magic_entry_set *mset)
927{
928    size_t i = me->mp->type == FILE_NAME ? 1 : 0;
929    if (mset[i].count == mset[i].max) {
930        struct magic_entry *mp;
931
932        mset[i].max += ALLOC_INCR;
933        if ((mp = CAST(struct magic_entry *,
934            erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
935            NULL) {
936            file_oomem(ms, sizeof(*mp) * mset[i].max);
937            return -1;
938        }
939        (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
940            ALLOC_INCR);
941        mset[i].me = mp;
942    }
943    mset[i].me[mset[i].count++] = *me;
944    memset(me, 0, sizeof(*me));
945    return 0;
946}
947
948/*
949 * Load and parse one file.
950 */
951private void
952load_1(struct magic_set *ms, int action, const char *fn, int *errs,
953   struct magic_entry_set *mset)
954{
955    char buffer[BUFSIZ + 1];
956    char *line = NULL;
957    size_t len;
958    size_t lineno = 0;
959    struct magic_entry me;
960
961    php_stream *stream;
962
963    TSRMLS_FETCH();
964
965    ms->file = fn;
966    stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
967
968    if (stream == NULL) {
969        if (errno != ENOENT)
970            file_error(ms, errno, "cannot read magic file `%s'",
971                   fn);
972        (*errs)++;
973        return;
974    }
975
976    memset(&me, 0, sizeof(me));
977    /* read and parse this file */
978    for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
979        if (len == 0) /* null line, garbage, etc */
980            continue;
981        if (line[len - 1] == '\n') {
982            lineno++;
983            line[len - 1] = '\0'; /* delete newline */
984        }
985        switch (line[0]) {
986        case '\0':  /* empty, do not parse */
987        case '#':   /* comment, do not parse */
988            continue;
989        case '!':
990            if (line[1] == ':') {
991                size_t i;
992
993                for (i = 0; bang[i].name != NULL; i++) {
994                    if ((size_t)(len - 2) > bang[i].len &&
995                        memcmp(bang[i].name, line + 2,
996                        bang[i].len) == 0)
997                        break;
998                }
999                if (bang[i].name == NULL) {
1000                    file_error(ms, 0,
1001                        "Unknown !: entry `%s'", line);
1002                    (*errs)++;
1003                    continue;
1004                }
1005                if (me.mp == NULL) {
1006                    file_error(ms, 0,
1007                        "No current entry for :!%s type",
1008                        bang[i].name);
1009                    (*errs)++;
1010                    continue;
1011                }
1012                if ((*bang[i].fun)(ms, &me,
1013                    line + bang[i].len + 2) != 0) {
1014                    (*errs)++;
1015                    continue;
1016                }
1017                continue;
1018            }
1019            /*FALLTHROUGH*/
1020        default:
1021        again:
1022            switch (parse(ms, &me, line, lineno, action)) {
1023            case 0:
1024                continue;
1025            case 1:
1026                (void)addentry(ms, &me, mset);
1027                goto again;
1028            default:
1029                (*errs)++;
1030                break;
1031            }
1032        }
1033    }
1034    if (me.mp)
1035        (void)addentry(ms, &me, mset);
1036    php_stream_close(stream);
1037}
1038
1039/*
1040 * parse a file or directory of files
1041 * const char *fn: name of magic file or directory
1042 */
1043private int
1044cmpstrp(const void *p1, const void *p2)
1045{
1046        return strcmp(*(char *const *)p1, *(char *const *)p2);
1047}
1048
1049
1050private uint32_t
1051set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1052    uint32_t starttest)
1053{
1054    static const char text[] = "text";
1055    static const char binary[] = "binary";
1056    static const size_t len = sizeof(text);
1057
1058    uint32_t i = starttest;
1059
1060    do {
1061        set_test_type(me[starttest].mp, me[i].mp);
1062        if ((ms->flags & MAGIC_DEBUG) == 0)
1063            continue;
1064        (void)fprintf(stderr, "%s%s%s: %s\n",
1065            me[i].mp->mimetype,
1066            me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1067            me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1068            me[i].mp->flag & BINTEST ? binary : text);
1069        if (me[i].mp->flag & BINTEST) {
1070            char *p = strstr(me[i].mp->desc, text);
1071            if (p && (p == me[i].mp->desc ||
1072                isspace((unsigned char)p[-1])) &&
1073                (p + len - me[i].mp->desc == MAXstring
1074                || (p[len] == '\0' ||
1075                isspace((unsigned char)p[len]))))
1076                (void)fprintf(stderr, "*** Possible "
1077                    "binary test for text type\n");
1078        }
1079    } while (++i < nme && me[i].mp->cont_level != 0);
1080    return i;
1081}
1082
1083private void
1084set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1085{
1086    uint32_t i;
1087    for (i = 0; i < nme; i++) {
1088        if (me[i].mp->cont_level == 0 &&
1089            me[i].mp->type == FILE_DEFAULT) {
1090            while (++i < nme)
1091                if (me[i].mp->cont_level == 0)
1092                    break;
1093            if (i != nme) {
1094                /* XXX - Ugh! */
1095                ms->line = me[i].mp->lineno;
1096                file_magwarn(ms,
1097                    "level 0 \"default\" did not sort last");
1098            }
1099            return;
1100        }
1101    }
1102}
1103
1104private int
1105coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1106    struct magic **ma, uint32_t *nma)
1107{
1108    uint32_t i, mentrycount = 0;
1109    size_t slen;
1110
1111    for (i = 0; i < nme; i++)
1112        mentrycount += me[i].cont_count;
1113
1114    slen = sizeof(**ma) * mentrycount;
1115    if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1116        file_oomem(ms, slen);
1117        return -1;
1118    }
1119
1120    mentrycount = 0;
1121    for (i = 0; i < nme; i++) {
1122        (void)memcpy(*ma + mentrycount, me[i].mp,
1123            me[i].cont_count * sizeof(**ma));
1124        mentrycount += me[i].cont_count;
1125    }
1126    *nma = mentrycount;
1127    return 0;
1128}
1129
1130private void
1131magic_entry_free(struct magic_entry *me, uint32_t nme)
1132{
1133    uint32_t i;
1134    if (me == NULL)
1135        return;
1136    for (i = 0; i < nme; i++)
1137        efree(me[i].mp);
1138    efree(me);
1139}
1140
1141private struct magic_map *
1142apprentice_load(struct magic_set *ms, const char *fn, int action)
1143{
1144    int errs = 0;
1145    uint32_t i, j;
1146    size_t files = 0, maxfiles = 0;
1147    char **filearr = NULL;
1148    struct stat st;
1149    struct magic_map *map;
1150    struct magic_entry_set mset[MAGIC_SETS];
1151    php_stream *dir;
1152    php_stream_dirent d;
1153
1154    TSRMLS_FETCH();
1155
1156    memset(mset, 0, sizeof(mset));
1157    ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
1158
1159
1160    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1161    {
1162        file_oomem(ms, sizeof(*map));
1163        return NULL;
1164    }
1165
1166    /* print silly verbose header for USG compat. */
1167    if (action == FILE_CHECK)
1168        (void)fprintf(stderr, "%s\n", usg_hdr);
1169
1170    /* load directory or file */
1171    /* FIXME: Read file names and sort them to prevent
1172       non-determinism. See Debian bug #488562. */
1173    if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1174        int mflen;
1175        char mfn[MAXPATHLEN];
1176
1177        dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1178        if (!dir) {
1179            errs++;
1180            goto out;
1181        }
1182        while (php_stream_readdir(dir, &d)) {
1183            if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1184                file_oomem(ms,
1185                strlen(fn) + strlen(d.d_name) + 2);
1186                errs++;
1187                php_stream_closedir(dir);
1188                goto out;
1189            }
1190            if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1191                continue;
1192            }
1193            if (files >= maxfiles) {
1194                size_t mlen;
1195                maxfiles = (maxfiles + 1) * 2;
1196                mlen = maxfiles * sizeof(*filearr);
1197                if ((filearr = CAST(char **,
1198                    erealloc(filearr, mlen))) == NULL) {
1199                    file_oomem(ms, mlen);
1200                    efree(mfn);
1201                    php_stream_closedir(dir);
1202                    errs++;
1203                    goto out;
1204                }
1205            }
1206            filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1207        }
1208        php_stream_closedir(dir);
1209        qsort(filearr, files, sizeof(*filearr), cmpstrp);
1210        for (i = 0; i < files; i++) {
1211            load_1(ms, action, filearr[i], &errs, mset);
1212            efree(filearr[i]);
1213        }
1214        efree(filearr);
1215    } else
1216        load_1(ms, action, fn, &errs, mset);
1217    if (errs)
1218        goto out;
1219
1220    for (j = 0; j < MAGIC_SETS; j++) {
1221        /* Set types of tests */
1222        for (i = 0; i < mset[j].count; ) {
1223            if (mset[j].me[i].mp->cont_level != 0) {
1224                i++;
1225                continue;
1226            }
1227            i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1228        }
1229        qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1230            apprentice_sort);
1231
1232        /*
1233         * Make sure that any level 0 "default" line is last
1234         * (if one exists).
1235         */
1236        set_last_default(ms, mset[j].me, mset[j].count);
1237
1238        /* coalesce per file arrays into a single one */
1239        if (coalesce_entries(ms, mset[j].me, mset[j].count,
1240            &map->magic[j], &map->nmagic[j]) == -1) {
1241            errs++;
1242            goto out;
1243        }
1244    }
1245
1246out:
1247    for (j = 0; j < MAGIC_SETS; j++)
1248        magic_entry_free(mset[j].me, mset[j].count);
1249
1250    if (errs) {
1251        for (j = 0; j < MAGIC_SETS; j++) {
1252            if (map->magic[j])
1253                efree(map->magic[j]);
1254        }
1255        efree(map);
1256        return NULL;
1257    }
1258    return map;
1259}
1260
1261/*
1262 * extend the sign bit if the comparison is to be signed
1263 */
1264protected uint64_t
1265file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1266{
1267    if (!(m->flag & UNSIGNED)) {
1268        switch(m->type) {
1269        /*
1270         * Do not remove the casts below.  They are
1271         * vital.  When later compared with the data,
1272         * the sign extension must have happened.
1273         */
1274        case FILE_BYTE:
1275            v = (char) v;
1276            break;
1277        case FILE_SHORT:
1278        case FILE_BESHORT:
1279        case FILE_LESHORT:
1280            v = (short) v;
1281            break;
1282        case FILE_DATE:
1283        case FILE_BEDATE:
1284        case FILE_LEDATE:
1285        case FILE_MEDATE:
1286        case FILE_LDATE:
1287        case FILE_BELDATE:
1288        case FILE_LELDATE:
1289        case FILE_MELDATE:
1290        case FILE_LONG:
1291        case FILE_BELONG:
1292        case FILE_LELONG:
1293        case FILE_MELONG:
1294        case FILE_FLOAT:
1295        case FILE_BEFLOAT:
1296        case FILE_LEFLOAT:
1297            v = (int32_t) v;
1298            break;
1299        case FILE_QUAD:
1300        case FILE_BEQUAD:
1301        case FILE_LEQUAD:
1302        case FILE_QDATE:
1303        case FILE_QLDATE:
1304        case FILE_QWDATE:
1305        case FILE_BEQDATE:
1306        case FILE_BEQLDATE:
1307        case FILE_BEQWDATE:
1308        case FILE_LEQDATE:
1309        case FILE_LEQLDATE:
1310        case FILE_LEQWDATE:
1311        case FILE_DOUBLE:
1312        case FILE_BEDOUBLE:
1313        case FILE_LEDOUBLE:
1314            v = (int64_t) v;
1315            break;
1316        case FILE_STRING:
1317        case FILE_PSTRING:
1318        case FILE_BESTRING16:
1319        case FILE_LESTRING16:
1320        case FILE_REGEX:
1321        case FILE_SEARCH:
1322        case FILE_DEFAULT:
1323        case FILE_INDIRECT:
1324        case FILE_NAME:
1325        case FILE_USE:
1326        case FILE_CLEAR:
1327            break;
1328        default:
1329            if (ms->flags & MAGIC_CHECK)
1330                file_magwarn(ms, "cannot happen: m->type=%d\n",
1331                    m->type);
1332            return ~0U;
1333        }
1334    }
1335    return v;
1336}
1337
1338private int
1339string_modifier_check(struct magic_set *ms, struct magic *m)
1340{
1341    if ((ms->flags & MAGIC_CHECK) == 0)
1342        return 0;
1343
1344    if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1345        file_magwarn(ms,
1346            "'/BHhLl' modifiers are only allowed for pascal strings\n");
1347        return -1;
1348    }
1349    switch (m->type) {
1350    case FILE_BESTRING16:
1351    case FILE_LESTRING16:
1352        if (m->str_flags != 0) {
1353            file_magwarn(ms,
1354                "no modifiers allowed for 16-bit strings\n");
1355            return -1;
1356        }
1357        break;
1358    case FILE_STRING:
1359    case FILE_PSTRING:
1360        if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1361            file_magwarn(ms,
1362                "'/%c' only allowed on regex and search\n",
1363                CHAR_REGEX_OFFSET_START);
1364            return -1;
1365        }
1366        break;
1367    case FILE_SEARCH:
1368        if (m->str_range == 0) {
1369            file_magwarn(ms,
1370                "missing range; defaulting to %d\n",
1371                            STRING_DEFAULT_RANGE);
1372            m->str_range = STRING_DEFAULT_RANGE;
1373            return -1;
1374        }
1375        break;
1376    case FILE_REGEX:
1377        if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1378            file_magwarn(ms, "'/%c' not allowed on regex\n",
1379                CHAR_COMPACT_WHITESPACE);
1380            return -1;
1381        }
1382        if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1383            file_magwarn(ms, "'/%c' not allowed on regex\n",
1384                CHAR_COMPACT_OPTIONAL_WHITESPACE);
1385            return -1;
1386        }
1387        break;
1388    default:
1389        file_magwarn(ms, "coding error: m->type=%d\n",
1390            m->type);
1391        return -1;
1392    }
1393    return 0;
1394}
1395
1396private int
1397get_op(char c)
1398{
1399    switch (c) {
1400    case '&':
1401        return FILE_OPAND;
1402    case '|':
1403        return FILE_OPOR;
1404    case '^':
1405        return FILE_OPXOR;
1406    case '+':
1407        return FILE_OPADD;
1408    case '-':
1409        return FILE_OPMINUS;
1410    case '*':
1411        return FILE_OPMULTIPLY;
1412    case '/':
1413        return FILE_OPDIVIDE;
1414    case '%':
1415        return FILE_OPMODULO;
1416    default:
1417        return -1;
1418    }
1419}
1420
1421#ifdef ENABLE_CONDITIONALS
1422private int
1423get_cond(const char *l, const char **t)
1424{
1425    static const struct cond_tbl_s {
1426        char name[8];
1427        size_t len;
1428        int cond;
1429    } cond_tbl[] = {
1430        { "if",     2,  COND_IF },
1431        { "elif",   4,  COND_ELIF },
1432        { "else",   4,  COND_ELSE },
1433        { "",       0,  COND_NONE },
1434    };
1435    const struct cond_tbl_s *p;
1436
1437    for (p = cond_tbl; p->len; p++) {
1438        if (strncmp(l, p->name, p->len) == 0 &&
1439            isspace((unsigned char)l[p->len])) {
1440            if (t)
1441                *t = l + p->len;
1442            break;
1443        }
1444    }
1445    return p->cond;
1446}
1447
1448private int
1449check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1450{
1451    int last_cond;
1452    last_cond = ms->c.li[cont_level].last_cond;
1453
1454    switch (cond) {
1455    case COND_IF:
1456        if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1457            if (ms->flags & MAGIC_CHECK)
1458                file_magwarn(ms, "syntax error: `if'");
1459            return -1;
1460        }
1461        last_cond = COND_IF;
1462        break;
1463
1464    case COND_ELIF:
1465        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1466            if (ms->flags & MAGIC_CHECK)
1467                file_magwarn(ms, "syntax error: `elif'");
1468            return -1;
1469        }
1470        last_cond = COND_ELIF;
1471        break;
1472
1473    case COND_ELSE:
1474        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1475            if (ms->flags & MAGIC_CHECK)
1476                file_magwarn(ms, "syntax error: `else'");
1477            return -1;
1478        }
1479        last_cond = COND_NONE;
1480        break;
1481
1482    case COND_NONE:
1483        last_cond = COND_NONE;
1484        break;
1485    }
1486
1487    ms->c.li[cont_level].last_cond = last_cond;
1488    return 0;
1489}
1490#endif /* ENABLE_CONDITIONALS */
1491
1492/*
1493 * parse one line from magic file, put into magic[index++] if valid
1494 */
1495private int
1496parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1497    size_t lineno, int action)
1498{
1499#ifdef ENABLE_CONDITIONALS
1500    static uint32_t last_cont_level = 0;
1501#endif
1502    size_t i;
1503    struct magic *m;
1504    const char *l = line;
1505    char *t;
1506    int op;
1507    uint32_t cont_level;
1508    int32_t diff;
1509
1510    cont_level = 0;
1511
1512    /*
1513     * Parse the offset.
1514     */
1515    while (*l == '>') {
1516        ++l;        /* step over */
1517        cont_level++;
1518    }
1519#ifdef ENABLE_CONDITIONALS
1520    if (cont_level == 0 || cont_level > last_cont_level)
1521        if (file_check_mem(ms, cont_level) == -1)
1522            return -1;
1523    last_cont_level = cont_level;
1524#endif
1525    if (cont_level != 0) {
1526        if (me->mp == NULL) {
1527            file_magerror(ms, "No current entry for continuation");
1528            return -1;
1529        }
1530        if (me->cont_count == 0) {
1531            file_magerror(ms, "Continuations present with 0 count");
1532            return -1;
1533        }
1534        m = &me->mp[me->cont_count - 1];
1535        diff = (int32_t)cont_level - (int32_t)m->cont_level;
1536        if (diff > 1)
1537            file_magwarn(ms, "New continuation level %u is more "
1538                "than one larger than current level %u", cont_level,
1539                m->cont_level);
1540        if (me->cont_count == me->max_count) {
1541            struct magic *nm;
1542            size_t cnt = me->max_count + ALLOC_CHUNK;
1543            if ((nm = CAST(struct magic *, erealloc(me->mp,
1544                sizeof(*nm) * cnt))) == NULL) {
1545                file_oomem(ms, sizeof(*nm) * cnt);
1546                return -1;
1547            }
1548            me->mp = m = nm;
1549            me->max_count = CAST(uint32_t, cnt);
1550        }
1551        m = &me->mp[me->cont_count++];
1552        (void)memset(m, 0, sizeof(*m));
1553        m->cont_level = cont_level;
1554    } else {
1555        static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1556        if (me->mp != NULL)
1557            return 1;
1558        if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1559            file_oomem(ms, len);
1560            return -1;
1561        }
1562        me->mp = m;
1563        me->max_count = ALLOC_CHUNK;
1564        (void)memset(m, 0, sizeof(*m));
1565        m->factor_op = FILE_FACTOR_OP_NONE;
1566        m->cont_level = 0;
1567        me->cont_count = 1;
1568    }
1569    m->lineno = CAST(uint32_t, lineno);
1570
1571    if (*l == '&') {  /* m->cont_level == 0 checked below. */
1572                ++l;            /* step over */
1573                m->flag |= OFFADD;
1574        }
1575    if (*l == '(') {
1576        ++l;        /* step over */
1577        m->flag |= INDIR;
1578        if (m->flag & OFFADD)
1579            m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1580
1581        if (*l == '&') {  /* m->cont_level == 0 checked below */
1582            ++l;            /* step over */
1583            m->flag |= OFFADD;
1584        }
1585    }
1586    /* Indirect offsets are not valid at level 0. */
1587    if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1588        if (ms->flags & MAGIC_CHECK)
1589            file_magwarn(ms, "relative offset at level 0");
1590
1591    /* get offset, then skip over it */
1592    m->offset = (uint32_t)strtoul(l, &t, 0);
1593        if (l == t)
1594        if (ms->flags & MAGIC_CHECK)
1595            file_magwarn(ms, "offset `%s' invalid", l);
1596        l = t;
1597
1598    if (m->flag & INDIR) {
1599        m->in_type = FILE_LONG;
1600        m->in_offset = 0;
1601        /*
1602         * read [.lbs][+-]nnnnn)
1603         */
1604        if (*l == '.') {
1605            l++;
1606            switch (*l) {
1607            case 'l':
1608                m->in_type = FILE_LELONG;
1609                break;
1610            case 'L':
1611                m->in_type = FILE_BELONG;
1612                break;
1613            case 'm':
1614                m->in_type = FILE_MELONG;
1615                break;
1616            case 'h':
1617            case 's':
1618                m->in_type = FILE_LESHORT;
1619                break;
1620            case 'H':
1621            case 'S':
1622                m->in_type = FILE_BESHORT;
1623                break;
1624            case 'c':
1625            case 'b':
1626            case 'C':
1627            case 'B':
1628                m->in_type = FILE_BYTE;
1629                break;
1630            case 'e':
1631            case 'f':
1632            case 'g':
1633                m->in_type = FILE_LEDOUBLE;
1634                break;
1635            case 'E':
1636            case 'F':
1637            case 'G':
1638                m->in_type = FILE_BEDOUBLE;
1639                break;
1640            case 'i':
1641                m->in_type = FILE_LEID3;
1642                break;
1643            case 'I':
1644                m->in_type = FILE_BEID3;
1645                break;
1646            default:
1647                if (ms->flags & MAGIC_CHECK)
1648                    file_magwarn(ms,
1649                        "indirect offset type `%c' invalid",
1650                        *l);
1651                break;
1652            }
1653            l++;
1654        }
1655
1656        m->in_op = 0;
1657        if (*l == '~') {
1658            m->in_op |= FILE_OPINVERSE;
1659            l++;
1660        }
1661        if ((op = get_op(*l)) != -1) {
1662            m->in_op |= op;
1663            l++;
1664        }
1665        if (*l == '(') {
1666            m->in_op |= FILE_OPINDIRECT;
1667            l++;
1668        }
1669        if (isdigit((unsigned char)*l) || *l == '-') {
1670            m->in_offset = (int32_t)strtol(l, &t, 0);
1671            if (l == t)
1672                if (ms->flags & MAGIC_CHECK)
1673                    file_magwarn(ms,
1674                        "in_offset `%s' invalid", l);
1675            l = t;
1676        }
1677        if (*l++ != ')' ||
1678            ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1679            if (ms->flags & MAGIC_CHECK)
1680                file_magwarn(ms,
1681                    "missing ')' in indirect offset");
1682    }
1683    EATAB;
1684
1685#ifdef ENABLE_CONDITIONALS
1686    m->cond = get_cond(l, &l);
1687    if (check_cond(ms, m->cond, cont_level) == -1)
1688        return -1;
1689
1690    EATAB;
1691#endif
1692
1693    /*
1694     * Parse the type.
1695     */
1696    if (*l == 'u') {
1697        /*
1698         * Try it as a keyword type prefixed by "u"; match what
1699         * follows the "u".  If that fails, try it as an SUS
1700         * integer type.
1701         */
1702        m->type = get_type(type_tbl, l + 1, &l);
1703        if (m->type == FILE_INVALID) {
1704            /*
1705             * Not a keyword type; parse it as an SUS type,
1706             * 'u' possibly followed by a number or C/S/L.
1707             */
1708            m->type = get_standard_integer_type(l, &l);
1709        }
1710        // It's unsigned.
1711        if (m->type != FILE_INVALID)
1712            m->flag |= UNSIGNED;
1713    } else {
1714        /*
1715         * Try it as a keyword type.  If that fails, try it as
1716         * an SUS integer type if it begins with "d" or as an
1717         * SUS string type if it begins with "s".  In any case,
1718         * it's not unsigned.
1719         */
1720        m->type = get_type(type_tbl, l, &l);
1721        if (m->type == FILE_INVALID) {
1722            /*
1723             * Not a keyword type; parse it as an SUS type,
1724             * either 'd' possibly followed by a number or
1725             * C/S/L, or just 's'.
1726             */
1727            if (*l == 'd')
1728                m->type = get_standard_integer_type(l, &l);
1729            else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1730                m->type = FILE_STRING;
1731        ++l;
1732            }
1733        }
1734    }
1735
1736    if (m->type == FILE_INVALID) {
1737        /* Not found - try it as a special keyword. */
1738        m->type = get_type(special_tbl, l, &l);
1739    }
1740
1741    if (m->type == FILE_INVALID) {
1742        if (ms->flags & MAGIC_CHECK)
1743            file_magwarn(ms, "type `%s' invalid", l);
1744        /*if (me->mp) {
1745            efree(me->mp);
1746            me->mp = NULL;
1747        }*/
1748        return -1;
1749    }
1750
1751    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1752    /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1753
1754    m->mask_op = 0;
1755    if (*l == '~') {
1756        if (!IS_LIBMAGIC_STRING(m->type))
1757            m->mask_op |= FILE_OPINVERSE;
1758        else if (ms->flags & MAGIC_CHECK)
1759            file_magwarn(ms, "'~' invalid for string types");
1760        ++l;
1761    }
1762    m->str_range = 0;
1763    m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1764    if ((op = get_op(*l)) != -1) {
1765        if (!IS_LIBMAGIC_STRING(m->type)) {
1766            uint64_t val;
1767            ++l;
1768            m->mask_op |= op;
1769            val = (uint64_t)strtoull(l, &t, 0);
1770            l = t;
1771            m->num_mask = file_signextend(ms, m, val);
1772            eatsize(&l);
1773        }
1774        else if (op == FILE_OPDIVIDE) {
1775            int have_range = 0;
1776            while (!isspace((unsigned char)*++l)) {
1777                switch (*l) {
1778                case '0':  case '1':  case '2':
1779                case '3':  case '4':  case '5':
1780                case '6':  case '7':  case '8':
1781                case '9':
1782                    if (have_range &&
1783                        (ms->flags & MAGIC_CHECK))
1784                        file_magwarn(ms,
1785                            "multiple ranges");
1786                    have_range = 1;
1787                    m->str_range = CAST(uint32_t,
1788                        strtoul(l, &t, 0));
1789                    if (m->str_range == 0)
1790                        file_magwarn(ms,
1791                            "zero range");
1792                    l = t - 1;
1793                    break;
1794                case CHAR_COMPACT_WHITESPACE:
1795                    m->str_flags |=
1796                        STRING_COMPACT_WHITESPACE;
1797                    break;
1798                case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1799                    m->str_flags |=
1800                        STRING_COMPACT_OPTIONAL_WHITESPACE;
1801                    break;
1802                case CHAR_IGNORE_LOWERCASE:
1803                    m->str_flags |= STRING_IGNORE_LOWERCASE;
1804                    break;
1805                case CHAR_IGNORE_UPPERCASE:
1806                    m->str_flags |= STRING_IGNORE_UPPERCASE;
1807                    break;
1808                case CHAR_REGEX_OFFSET_START:
1809                    m->str_flags |= REGEX_OFFSET_START;
1810                    break;
1811                case CHAR_BINTEST:
1812                    m->str_flags |= STRING_BINTEST;
1813                    break;
1814                case CHAR_TEXTTEST:
1815                    m->str_flags |= STRING_TEXTTEST;
1816                    break;
1817                case CHAR_TRIM:
1818                    m->str_flags |= STRING_TRIM;
1819                    break;
1820                case CHAR_PSTRING_1_LE:
1821                    if (m->type != FILE_PSTRING)
1822                        goto bad;
1823                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1824                    break;
1825                case CHAR_PSTRING_2_BE:
1826                    if (m->type != FILE_PSTRING)
1827                        goto bad;
1828                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1829                    break;
1830                case CHAR_PSTRING_2_LE:
1831                    if (m->type != FILE_PSTRING)
1832                        goto bad;
1833                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1834                    break;
1835                case CHAR_PSTRING_4_BE:
1836                    if (m->type != FILE_PSTRING)
1837                        goto bad;
1838                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1839                    break;
1840                case CHAR_PSTRING_4_LE:
1841                    if (m->type != FILE_PSTRING)
1842                        goto bad;
1843                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1844                    break;
1845                case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1846                    if (m->type != FILE_PSTRING)
1847                        goto bad;
1848                    m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1849                    break;
1850                default:
1851                bad:
1852                    if (ms->flags & MAGIC_CHECK)
1853                        file_magwarn(ms,
1854                            "string extension `%c' "
1855                            "invalid", *l);
1856                    return -1;
1857                }
1858                /* allow multiple '/' for readability */
1859                if (l[1] == '/' &&
1860                    !isspace((unsigned char)l[2]))
1861                    l++;
1862            }
1863            if (string_modifier_check(ms, m) == -1)
1864                return -1;
1865        }
1866        else {
1867            if (ms->flags & MAGIC_CHECK)
1868                file_magwarn(ms, "invalid string op: %c", *t);
1869            return -1;
1870        }
1871    }
1872    /*
1873     * We used to set mask to all 1's here, instead let's just not do
1874     * anything if mask = 0 (unless you have a better idea)
1875     */
1876    EATAB;
1877
1878    switch (*l) {
1879    case '>':
1880    case '<':
1881        m->reln = *l;
1882        ++l;
1883        if (*l == '=') {
1884            if (ms->flags & MAGIC_CHECK) {
1885                file_magwarn(ms, "%c= not supported",
1886                    m->reln);
1887                return -1;
1888            }
1889           ++l;
1890        }
1891        break;
1892    /* Old-style anding: "0 byte &0x80 dynamically linked" */
1893    case '&':
1894    case '^':
1895    case '=':
1896        m->reln = *l;
1897        ++l;
1898        if (*l == '=') {
1899           /* HP compat: ignore &= etc. */
1900           ++l;
1901        }
1902        break;
1903    case '!':
1904        m->reln = *l;
1905        ++l;
1906        break;
1907    default:
1908        m->reln = '=';  /* the default relation */
1909        if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1910            isspace((unsigned char)l[1])) || !l[1])) {
1911            m->reln = *l;
1912            ++l;
1913        }
1914        break;
1915    }
1916    /*
1917     * Grab the value part, except for an 'x' reln.
1918     */
1919    if (m->reln != 'x' && getvalue(ms, m, &l, action))
1920        return -1;
1921
1922    /*
1923     * TODO finish this macro and start using it!
1924     * #define offsetcheck {if (offset > HOWMANY-1)
1925     *  magwarn("offset too big"); }
1926     */
1927
1928    /*
1929     * Now get last part - the description
1930     */
1931    EATAB;
1932    if (l[0] == '\b') {
1933        ++l;
1934        m->flag |= NOSPACE;
1935    } else if ((l[0] == '\\') && (l[1] == 'b')) {
1936        ++l;
1937        ++l;
1938        m->flag |= NOSPACE;
1939    }
1940    for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1941        continue;
1942    if (i == sizeof(m->desc)) {
1943        m->desc[sizeof(m->desc) - 1] = '\0';
1944        if (ms->flags & MAGIC_CHECK)
1945            file_magwarn(ms, "description `%s' truncated", m->desc);
1946    }
1947
1948        /*
1949     * We only do this check while compiling, or if any of the magic
1950     * files were not compiled.
1951         */
1952        if (ms->flags & MAGIC_CHECK) {
1953        if (check_format(ms, m) == -1)
1954            return -1;
1955    }
1956    m->mimetype[0] = '\0';      /* initialise MIME type to none */
1957    return 0;
1958}
1959
1960/*
1961 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1962 * if valid
1963 */
1964private int
1965parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1966{
1967    const char *l = line;
1968    char *el;
1969    unsigned long factor;
1970    struct magic *m = &me->mp[0];
1971
1972    if (m->factor_op != FILE_FACTOR_OP_NONE) {
1973        file_magwarn(ms,
1974            "Current entry already has a strength type: %c %d",
1975            m->factor_op, m->factor);
1976        return -1;
1977    }
1978    if (m->type == FILE_NAME) {
1979        file_magwarn(ms, "%s: Strength setting is not supported in "
1980            "\"name\" magic entries", m->value.s);
1981        return -1;
1982    }
1983    EATAB;
1984    switch (*l) {
1985    case FILE_FACTOR_OP_NONE:
1986    case FILE_FACTOR_OP_PLUS:
1987    case FILE_FACTOR_OP_MINUS:
1988    case FILE_FACTOR_OP_TIMES:
1989    case FILE_FACTOR_OP_DIV:
1990        m->factor_op = *l++;
1991        break;
1992    default:
1993        file_magwarn(ms, "Unknown factor op `%c'", *l);
1994        return -1;
1995    }
1996    EATAB;
1997    factor = strtoul(l, &el, 0);
1998    if (factor > 255) {
1999        file_magwarn(ms, "Too large factor `%lu'", factor);
2000        goto out;
2001    }
2002    if (*el && !isspace((unsigned char)*el)) {
2003        file_magwarn(ms, "Bad factor `%s'", l);
2004        goto out;
2005    }
2006    m->factor = (uint8_t)factor;
2007    if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2008        file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2009            m->factor_op, m->factor);
2010        goto out;
2011    }
2012    return 0;
2013out:
2014    m->factor_op = FILE_FACTOR_OP_NONE;
2015    m->factor = 0;
2016    return -1;
2017}
2018
2019/*
2020 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2021 * magic[index - 1]
2022 */
2023private int
2024parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2025{
2026    size_t i;
2027    const char *l = line;
2028    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2029
2030    if (m->apple[0] != '\0') {
2031        file_magwarn(ms, "Current entry already has a APPLE type "
2032            "`%.8s', new type `%s'", m->mimetype, l);
2033        return -1;
2034    }
2035
2036    EATAB;
2037    for (i = 0; *l && ((isascii((unsigned char)*l) &&
2038        isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2039        i < sizeof(m->apple); m->apple[i++] = *l++)
2040        continue;
2041    if (i == sizeof(m->apple) && *l) {
2042        /* We don't need to NUL terminate here, printing handles it */
2043        if (ms->flags & MAGIC_CHECK)
2044            file_magwarn(ms, "APPLE type `%s' truncated %"
2045                SIZE_T_FORMAT "u", line, i);
2046    }
2047
2048    if (i > 0)
2049        return 0;
2050    else
2051        return -1;
2052}
2053
2054/*
2055 * parse a MIME annotation line from magic file, put into magic[index - 1]
2056 * if valid
2057 */
2058private int
2059parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2060{
2061    size_t i;
2062    const char *l = line;
2063    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2064
2065    if (m->mimetype[0] != '\0') {
2066        file_magwarn(ms, "Current entry already has a MIME type `%s',"
2067            " new type `%s'", m->mimetype, l);
2068        return -1;
2069    }
2070
2071    EATAB;
2072    for (i = 0; *l && ((isascii((unsigned char)*l) &&
2073        isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2074        i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
2075        continue;
2076    if (i == sizeof(m->mimetype)) {
2077        m->mimetype[sizeof(m->mimetype) - 1] = '\0';
2078        if (ms->flags & MAGIC_CHECK)
2079            file_magwarn(ms, "MIME type `%s' truncated %"
2080                SIZE_T_FORMAT "u", m->mimetype, i);
2081    } else
2082        m->mimetype[i] = '\0';
2083
2084    if (i > 0)
2085        return 0;
2086    else
2087        return -1;
2088}
2089
2090private int
2091check_format_type(const char *ptr, int type)
2092{
2093    int quad = 0;
2094    if (*ptr == '\0') {
2095        /* Missing format string; bad */
2096        return -1;
2097    }
2098
2099    switch (type) {
2100    case FILE_FMT_QUAD:
2101        quad = 1;
2102        /*FALLTHROUGH*/
2103    case FILE_FMT_NUM:
2104        if (*ptr == '-')
2105            ptr++;
2106        if (*ptr == '.')
2107            ptr++;
2108        while (isdigit((unsigned char)*ptr)) ptr++;
2109        if (*ptr == '.')
2110            ptr++;
2111        while (isdigit((unsigned char)*ptr)) ptr++;
2112        if (quad) {
2113            if (*ptr++ != 'l')
2114                return -1;
2115            if (*ptr++ != 'l')
2116                return -1;
2117        }
2118
2119        switch (*ptr++) {
2120        case 'l':
2121            switch (*ptr++) {
2122            case 'i':
2123            case 'd':
2124            case 'u':
2125            case 'o':
2126            case 'x':
2127            case 'X':
2128                return 0;
2129            default:
2130                return -1;
2131            }
2132
2133        case 'h':
2134            switch (*ptr++) {
2135            case 'h':
2136                switch (*ptr++) {
2137                case 'i':
2138                case 'd':
2139                case 'u':
2140                case 'o':
2141                case 'x':
2142                case 'X':
2143                    return 0;
2144                default:
2145                    return -1;
2146                }
2147            case 'd':
2148                return 0;
2149            default:
2150                return -1;
2151            }
2152
2153        case 'i':
2154        case 'c':
2155        case 'd':
2156        case 'u':
2157        case 'o':
2158        case 'x':
2159        case 'X':
2160            return 0;
2161
2162        default:
2163            return -1;
2164        }
2165
2166    case FILE_FMT_FLOAT:
2167    case FILE_FMT_DOUBLE:
2168        if (*ptr == '-')
2169            ptr++;
2170        if (*ptr == '.')
2171            ptr++;
2172        while (isdigit((unsigned char)*ptr)) ptr++;
2173        if (*ptr == '.')
2174            ptr++;
2175        while (isdigit((unsigned char)*ptr)) ptr++;
2176
2177        switch (*ptr++) {
2178        case 'e':
2179        case 'E':
2180        case 'f':
2181        case 'F':
2182        case 'g':
2183        case 'G':
2184            return 0;
2185
2186        default:
2187            return -1;
2188        }
2189
2190
2191    case FILE_FMT_STR:
2192        if (*ptr == '-')
2193            ptr++;
2194        while (isdigit((unsigned char )*ptr))
2195            ptr++;
2196        if (*ptr == '.') {
2197            ptr++;
2198            while (isdigit((unsigned char )*ptr))
2199                ptr++;
2200        }
2201
2202        switch (*ptr++) {
2203        case 's':
2204            return 0;
2205        default:
2206            return -1;
2207        }
2208
2209    default:
2210        /* internal error */
2211        abort();
2212    }
2213    /*NOTREACHED*/
2214    return -1;
2215}
2216
2217/*
2218 * Check that the optional printf format in description matches
2219 * the type of the magic.
2220 */
2221private int
2222check_format(struct magic_set *ms, struct magic *m)
2223{
2224    char *ptr;
2225
2226    for (ptr = m->desc; *ptr; ptr++)
2227        if (*ptr == '%')
2228            break;
2229    if (*ptr == '\0') {
2230        /* No format string; ok */
2231        return 1;
2232    }
2233
2234    assert(file_nformats == file_nnames);
2235
2236    if (m->type >= file_nformats) {
2237        file_magwarn(ms, "Internal error inconsistency between "
2238            "m->type and format strings");
2239        return -1;
2240    }
2241    if (file_formats[m->type] == FILE_FMT_NONE) {
2242        file_magwarn(ms, "No format string for `%s' with description "
2243            "`%s'", m->desc, file_names[m->type]);
2244        return -1;
2245    }
2246
2247    ptr++;
2248    if (check_format_type(ptr, file_formats[m->type]) == -1) {
2249        /*
2250         * TODO: this error message is unhelpful if the format
2251         * string is not one character long
2252         */
2253        file_magwarn(ms, "Printf format `%c' is not valid for type "
2254            "`%s' in description `%s'", *ptr ? *ptr : '?',
2255            file_names[m->type], m->desc);
2256        return -1;
2257    }
2258
2259    for (; *ptr; ptr++) {
2260        if (*ptr == '%') {
2261            file_magwarn(ms,
2262                "Too many format strings (should have at most one) "
2263                "for `%s' with description `%s'",
2264                file_names[m->type], m->desc);
2265            return -1;
2266        }
2267    }
2268    return 0;
2269}
2270
2271/*
2272 * Read a numeric value from a pointer, into the value union of a magic
2273 * pointer, according to the magic type.  Update the string pointer to point
2274 * just after the number read.  Return 0 for success, non-zero for failure.
2275 */
2276private int
2277getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2278{
2279    switch (m->type) {
2280    case FILE_BESTRING16:
2281    case FILE_LESTRING16:
2282    case FILE_STRING:
2283    case FILE_PSTRING:
2284    case FILE_REGEX:
2285    case FILE_SEARCH:
2286    case FILE_NAME:
2287    case FILE_USE:
2288        *p = getstr(ms, m, *p, action == FILE_COMPILE);
2289        if (*p == NULL) {
2290            if (ms->flags & MAGIC_CHECK)
2291                file_magwarn(ms, "cannot get string from `%s'",
2292                    m->value.s);
2293            return -1;
2294        }
2295        return 0;
2296    case FILE_FLOAT:
2297    case FILE_BEFLOAT:
2298    case FILE_LEFLOAT:
2299        if (m->reln != 'x') {
2300            char *ep;
2301#ifdef HAVE_STRTOF
2302            m->value.f = strtof(*p, &ep);
2303#else
2304            m->value.f = (float)strtod(*p, &ep);
2305#endif
2306            *p = ep;
2307        }
2308        return 0;
2309    case FILE_DOUBLE:
2310    case FILE_BEDOUBLE:
2311    case FILE_LEDOUBLE:
2312        if (m->reln != 'x') {
2313            char *ep;
2314            m->value.d = strtod(*p, &ep);
2315            *p = ep;
2316        }
2317        return 0;
2318    default:
2319        if (m->reln != 'x') {
2320            char *ep;
2321            m->value.q = file_signextend(ms, m,
2322                (uint64_t)strtoull(*p, &ep, 0));
2323            *p = ep;
2324            eatsize(p);
2325        }
2326        return 0;
2327    }
2328}
2329
2330/*
2331 * Convert a string containing C character escapes.  Stop at an unescaped
2332 * space or tab.
2333 * Copy the converted version to "m->value.s", and the length in m->vallen.
2334 * Return updated scan pointer as function result. Warn if set.
2335 */
2336private const char *
2337getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2338{
2339    const char *origs = s;
2340    char    *p = m->value.s;
2341    size_t  plen = sizeof(m->value.s);
2342    char    *origp = p;
2343    char    *pmax = p + plen - 1;
2344    int c;
2345    int val;
2346
2347    while ((c = *s++) != '\0') {
2348        if (isspace((unsigned char) c))
2349            break;
2350        if (p >= pmax) {
2351            file_error(ms, 0, "string too long: `%s'", origs);
2352            return NULL;
2353        }
2354        if (c == '\\') {
2355            switch(c = *s++) {
2356
2357            case '\0':
2358                if (warn)
2359                    file_magwarn(ms, "incomplete escape");
2360                goto out;
2361
2362            case '\t':
2363                if (warn) {
2364                    file_magwarn(ms,
2365                        "escaped tab found, use \\t instead");
2366                    warn = 0;   /* already did */
2367                }
2368                /*FALLTHROUGH*/
2369            default:
2370                if (warn) {
2371                    if (isprint((unsigned char)c)) {
2372                        /* Allow escaping of
2373                         * ``relations'' */
2374                        if (strchr("<>&^=!", c) == NULL
2375                            && (m->type != FILE_REGEX ||
2376                            strchr("[]().*?^$|{}", c)
2377                            == NULL)) {
2378                            file_magwarn(ms, "no "
2379                                "need to escape "
2380                                "`%c'", c);
2381                        }
2382                    } else {
2383                        file_magwarn(ms,
2384                            "unknown escape sequence: "
2385                            "\\%03o", c);
2386                    }
2387                }
2388                /*FALLTHROUGH*/
2389            /* space, perhaps force people to use \040? */
2390            case ' ':
2391#if 0
2392            /*
2393             * Other things people escape, but shouldn't need to,
2394             * so we disallow them
2395             */
2396            case '\'':
2397            case '"':
2398            case '?':
2399#endif
2400            /* Relations */
2401            case '>':
2402            case '<':
2403            case '&':
2404            case '^':
2405            case '=':
2406            case '!':
2407            /* and baskslash itself */
2408            case '\\':
2409                *p++ = (char) c;
2410                break;
2411
2412            case 'a':
2413                *p++ = '\a';
2414                break;
2415
2416            case 'b':
2417                *p++ = '\b';
2418                break;
2419
2420            case 'f':
2421                *p++ = '\f';
2422                break;
2423
2424            case 'n':
2425                *p++ = '\n';
2426                break;
2427
2428            case 'r':
2429                *p++ = '\r';
2430                break;
2431
2432            case 't':
2433                *p++ = '\t';
2434                break;
2435
2436            case 'v':
2437                *p++ = '\v';
2438                break;
2439
2440            /* \ and up to 3 octal digits */
2441            case '0':
2442            case '1':
2443            case '2':
2444            case '3':
2445            case '4':
2446            case '5':
2447            case '6':
2448            case '7':
2449                val = c - '0';
2450                c = *s++;  /* try for 2 */
2451                if (c >= '0' && c <= '7') {
2452                    val = (val << 3) | (c - '0');
2453                    c = *s++;  /* try for 3 */
2454                    if (c >= '0' && c <= '7')
2455                        val = (val << 3) | (c-'0');
2456                    else
2457                        --s;
2458                }
2459                else
2460                    --s;
2461                *p++ = (char)val;
2462                break;
2463
2464            /* \x and up to 2 hex digits */
2465            case 'x':
2466                val = 'x';  /* Default if no digits */
2467                c = hextoint(*s++); /* Get next char */
2468                if (c >= 0) {
2469                    val = c;
2470                    c = hextoint(*s++);
2471                    if (c >= 0)
2472                        val = (val << 4) + c;
2473                    else
2474                        --s;
2475                } else
2476                    --s;
2477                *p++ = (char)val;
2478                break;
2479            }
2480        } else
2481            *p++ = (char)c;
2482    }
2483out:
2484    *p = '\0';
2485    m->vallen = CAST(unsigned char, (p - origp));
2486    if (m->type == FILE_PSTRING)
2487        m->vallen += (unsigned char)file_pstring_length_size(m);
2488    return s;
2489}
2490
2491
2492/* Single hex char to int; -1 if not a hex char. */
2493private int
2494hextoint(int c)
2495{
2496    if (!isascii((unsigned char) c))
2497        return -1;
2498    if (isdigit((unsigned char) c))
2499        return c - '0';
2500    if ((c >= 'a') && (c <= 'f'))
2501        return c + 10 - 'a';
2502    if (( c>= 'A') && (c <= 'F'))
2503        return c + 10 - 'A';
2504    return -1;
2505}
2506
2507
2508/*
2509 * Print a string containing C character escapes.
2510 */
2511protected void
2512file_showstr(FILE *fp, const char *s, size_t len)
2513{
2514    char    c;
2515
2516    for (;;) {
2517        if (len == ~0U) {
2518            c = *s++;
2519            if (c == '\0')
2520                break;
2521        }
2522        else  {
2523            if (len-- == 0)
2524                break;
2525            c = *s++;
2526        }
2527        if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
2528            (void) fputc(c, fp);
2529        else {
2530            (void) fputc('\\', fp);
2531            switch (c) {
2532            case '\a':
2533                (void) fputc('a', fp);
2534                break;
2535
2536            case '\b':
2537                (void) fputc('b', fp);
2538                break;
2539
2540            case '\f':
2541                (void) fputc('f', fp);
2542                break;
2543
2544            case '\n':
2545                (void) fputc('n', fp);
2546                break;
2547
2548            case '\r':
2549                (void) fputc('r', fp);
2550                break;
2551
2552            case '\t':
2553                (void) fputc('t', fp);
2554                break;
2555
2556            case '\v':
2557                (void) fputc('v', fp);
2558                break;
2559
2560            default:
2561                (void) fprintf(fp, "%.3o", c & 0377);
2562                break;
2563            }
2564        }
2565    }
2566}
2567
2568/*
2569 * eatsize(): Eat the size spec from a number [eg. 10UL]
2570 */
2571private void
2572eatsize(const char **p)
2573{
2574    const char *l = *p;
2575
2576    if (LOWCASE(*l) == 'u')
2577        l++;
2578
2579    switch (LOWCASE(*l)) {
2580    case 'l':    /* long */
2581    case 's':    /* short */
2582    case 'h':    /* short */
2583    case 'b':    /* char/byte */
2584    case 'c':    /* char/byte */
2585        l++;
2586        /*FALLTHROUGH*/
2587    default:
2588        break;
2589    }
2590
2591    *p = l;
2592}
2593
2594/*
2595 * handle a compiled file.
2596 */
2597
2598private struct magic_map *
2599apprentice_map(struct magic_set *ms, const char *fn)
2600{
2601    uint32_t *ptr;
2602    uint32_t version, entries, nentries;
2603    int needsbyteswap;
2604    char *dbname = NULL;
2605    struct magic_map *map;
2606    size_t i;
2607    php_stream *stream = NULL;
2608    php_stream_statbuf st;
2609
2610
2611    TSRMLS_FETCH();
2612
2613    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2614        file_oomem(ms, sizeof(*map));
2615        efree(map);
2616        goto error;
2617    }
2618
2619    if (fn == NULL) {
2620        map->p = (void *)&php_magic_database;
2621        goto internal_loaded;
2622    }
2623
2624#ifdef PHP_WIN32
2625    /* Don't bother on windows with php_stream_open_wrapper,
2626    return to give apprentice_load() a chance. */
2627    if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2628               if (st.sb.st_mode & S_IFDIR) {
2629                       goto error;
2630               }
2631       }
2632#endif
2633
2634    dbname = mkdbname(ms, fn, 0);
2635    if (dbname == NULL)
2636        goto error;
2637
2638        stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2639
2640    if (!stream) {
2641        goto error;
2642    }
2643
2644    if (php_stream_stat(stream, &st) < 0) {
2645        file_error(ms, errno, "cannot stat `%s'", dbname);
2646        goto error;
2647    }
2648
2649    if (st.sb.st_size < 8) {
2650        file_error(ms, 0, "file `%s' is too small", dbname);
2651        goto error;
2652    }
2653
2654    map->len = (size_t)st.sb.st_size;
2655    if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2656        file_oomem(ms, map->len);
2657        goto error;
2658    }
2659    if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2660        file_badread(ms);
2661        goto error;
2662    }
2663    map->len = 0;
2664#define RET 1
2665
2666    php_stream_close(stream);
2667    stream = NULL;
2668
2669internal_loaded:
2670    ptr = (uint32_t *)(void *)map->p;
2671    if (*ptr != MAGICNO) {
2672        if (swap4(*ptr) != MAGICNO) {
2673            file_error(ms, 0, "bad magic in `%s'", dbname);
2674            goto error;
2675        }
2676        needsbyteswap = 1;
2677    } else
2678        needsbyteswap = 0;
2679    if (needsbyteswap)
2680        version = swap4(ptr[1]);
2681    else
2682        version = ptr[1];
2683    if (version != VERSIONNO) {
2684        file_error(ms, 0, "File %d.%d supports only version %d magic "
2685            "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2686            VERSIONNO, dbname, version);
2687        goto error;
2688    }
2689
2690    /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2691    machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2692    future. */
2693    if (needsbyteswap && fn == NULL) {
2694        map->p = emalloc(sizeof(php_magic_database));
2695        map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2696    }
2697
2698    if (NULL != fn) {
2699        nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2700        entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2701        if ((off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2702            file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2703                dbname, (unsigned long long)st.sb.st_size,
2704                sizeof(struct magic));
2705            goto error;
2706        }
2707    }
2708    map->magic[0] = CAST(struct magic *, map->p) + 1;
2709    nentries = 0;
2710    for (i = 0; i < MAGIC_SETS; i++) {
2711        if (needsbyteswap)
2712            map->nmagic[i] = swap4(ptr[i + 2]);
2713        else
2714            map->nmagic[i] = ptr[i + 2];
2715        if (i != MAGIC_SETS - 1)
2716            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2717        nentries += map->nmagic[i];
2718    }
2719    if (NULL != fn && entries != nentries + 1) {
2720        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2721            dbname, entries, nentries + 1);
2722        goto error;
2723    }
2724
2725    if (needsbyteswap)
2726        for (i = 0; i < MAGIC_SETS; i++)
2727            byteswap(map->magic[i], map->nmagic[i]);
2728
2729    if (dbname) {
2730        efree(dbname);
2731    }
2732    return map;
2733
2734error:
2735    if (stream) {
2736        php_stream_close(stream);
2737    }
2738    apprentice_unmap(map);
2739    if (dbname) {
2740        efree(dbname);
2741    }
2742    return NULL;
2743}
2744
2745private const uint32_t ar[] = {
2746    MAGICNO, VERSIONNO
2747};
2748
2749/*
2750 * handle an mmaped file.
2751 */
2752private int
2753apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
2754{
2755    static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
2756    static const size_t m = sizeof(**map->magic);
2757    int fd = -1;
2758    size_t len;
2759    char *dbname;
2760    int rv = -1;
2761    uint32_t i;
2762    php_stream *stream;
2763
2764    TSRMLS_FETCH();
2765
2766    dbname = mkdbname(ms, fn, 0);
2767
2768    if (dbname == NULL)
2769        goto out;
2770
2771    /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2772    stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2773
2774    if (!stream) {
2775        file_error(ms, errno, "cannot open `%s'", dbname);
2776        goto out;
2777    }
2778
2779    if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2780        file_error(ms, errno, "error writing `%s'", dbname);
2781        goto out;
2782    }
2783
2784    if (php_stream_write(stream, (const char *)map->nmagic, nm) != (ssize_t)nm) {
2785        file_error(ms, errno, "error writing `%s'", dbname);
2786        goto out;
2787    }
2788
2789    assert(nm + sizeof(ar) < m);
2790
2791    if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2792        file_error(ms, errno, "error seeking `%s'", dbname);
2793        goto out;
2794    }
2795
2796    for (i = 0; i < MAGIC_SETS; i++) {
2797        len = m * map->nmagic[i];
2798        if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
2799            file_error(ms, errno, "error writing `%s'", dbname);
2800            goto out;
2801        }
2802    }
2803
2804    if (stream) {
2805        php_stream_close(stream);
2806    }
2807
2808    rv = 0;
2809out:
2810    efree(dbname);
2811    return rv;
2812}
2813
2814private const char ext[] = ".mgc";
2815/*
2816 * make a dbname
2817 */
2818private char *
2819mkdbname(struct magic_set *ms, const char *fn, int strip)
2820{
2821    const char *p, *q;
2822    char *buf;
2823    TSRMLS_FETCH();
2824
2825    if (strip) {
2826        if ((p = strrchr(fn, '/')) != NULL)
2827            fn = ++p;
2828    }
2829
2830    for (q = fn; *q; q++)
2831        continue;
2832    /* Look for .mgc */
2833    for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2834        if (*p != *q)
2835            break;
2836
2837    /* Did not find .mgc, restore q */
2838    if (p >= ext)
2839        while (*q)
2840            q++;
2841
2842    q++;
2843    /* Compatibility with old code that looked in .mime */
2844    if (ms->flags & MAGIC_MIME) {
2845        spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
2846#ifdef PHP_WIN32
2847        if (VCWD_ACCESS(buf, R_OK) == 0) {
2848#else
2849        if (VCWD_ACCESS(buf, R_OK) != -1) {
2850#endif
2851            ms->flags &= MAGIC_MIME_TYPE;
2852            return buf;
2853        }
2854        efree(buf);
2855    }
2856    spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2857
2858    /* Compatibility with old code that looked in .mime */
2859    if (strstr(p, ".mime") != NULL)
2860        ms->flags &= MAGIC_MIME_TYPE;
2861    return buf;
2862}
2863
2864/*
2865 * Byteswap an mmap'ed file if needed
2866 */
2867private void
2868byteswap(struct magic *magic, uint32_t nmagic)
2869{
2870    uint32_t i;
2871    for (i = 0; i < nmagic; i++)
2872        bs1(&magic[i]);
2873}
2874
2875/*
2876 * swap a short
2877 */
2878private uint16_t
2879swap2(uint16_t sv)
2880{
2881    uint16_t rv;
2882    uint8_t *s = (uint8_t *)(void *)&sv;
2883    uint8_t *d = (uint8_t *)(void *)&rv;
2884    d[0] = s[1];
2885    d[1] = s[0];
2886    return rv;
2887}
2888
2889/*
2890 * swap an int
2891 */
2892private uint32_t
2893swap4(uint32_t sv)
2894{
2895    uint32_t rv;
2896    uint8_t *s = (uint8_t *)(void *)&sv;
2897    uint8_t *d = (uint8_t *)(void *)&rv;
2898    d[0] = s[3];
2899    d[1] = s[2];
2900    d[2] = s[1];
2901    d[3] = s[0];
2902    return rv;
2903}
2904
2905/*
2906 * swap a quad
2907 */
2908private uint64_t
2909swap8(uint64_t sv)
2910{
2911    uint64_t rv;
2912    uint8_t *s = (uint8_t *)(void *)&sv;
2913    uint8_t *d = (uint8_t *)(void *)&rv;
2914#if 0
2915    d[0] = s[3];
2916    d[1] = s[2];
2917    d[2] = s[1];
2918    d[3] = s[0];
2919    d[4] = s[7];
2920    d[5] = s[6];
2921    d[6] = s[5];
2922    d[7] = s[4];
2923#else
2924    d[0] = s[7];
2925    d[1] = s[6];
2926    d[2] = s[5];
2927    d[3] = s[4];
2928    d[4] = s[3];
2929    d[5] = s[2];
2930    d[6] = s[1];
2931    d[7] = s[0];
2932#endif
2933    return rv;
2934}
2935
2936/*
2937 * byteswap a single magic entry
2938 */
2939private void
2940bs1(struct magic *m)
2941{
2942    m->cont_level = swap2(m->cont_level);
2943    m->offset = swap4((uint32_t)m->offset);
2944    m->in_offset = swap4((uint32_t)m->in_offset);
2945    m->lineno = swap4((uint32_t)m->lineno);
2946    if (IS_LIBMAGIC_STRING(m->type)) {
2947        m->str_range = swap4(m->str_range);
2948        m->str_flags = swap4(m->str_flags);
2949    }
2950    else {
2951        m->value.q = swap8(m->value.q);
2952        m->num_mask = swap8(m->num_mask);
2953    }
2954}
2955
2956protected size_t
2957file_pstring_length_size(const struct magic *m)
2958{
2959    switch (m->str_flags & PSTRING_LEN) {
2960    case PSTRING_1_LE:
2961        return 1;
2962    case PSTRING_2_LE:
2963    case PSTRING_2_BE:
2964        return 2;
2965    case PSTRING_4_LE:
2966    case PSTRING_4_BE:
2967        return 4;
2968    default:
2969        abort();    /* Impossible */
2970        return 1;
2971    }
2972}
2973protected size_t
2974file_pstring_get_length(const struct magic *m, const char *s)
2975{
2976    size_t len = 0;
2977
2978    switch (m->str_flags & PSTRING_LEN) {
2979    case PSTRING_1_LE:
2980        len = *s;
2981        break;
2982    case PSTRING_2_LE:
2983        len = (s[1] << 8) | s[0];
2984        break;
2985    case PSTRING_2_BE:
2986        len = (s[0] << 8) | s[1];
2987        break;
2988    case PSTRING_4_LE:
2989        len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2990        break;
2991    case PSTRING_4_BE:
2992        len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2993        break;
2994    default:
2995        abort();    /* Impossible */
2996    }
2997
2998    if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2999        len -= file_pstring_length_size(m);
3000
3001    return len;
3002}
3003
3004protected int
3005file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3006{
3007    uint32_t i, j;
3008    struct mlist *mlist, *ml;
3009
3010    mlist = ms->mlist[1];
3011
3012    for (ml = mlist->next; ml != mlist; ml = ml->next) {
3013        struct magic *ma = ml->magic;
3014        uint32_t nma = ml->nmagic;
3015        for (i = 0; i < nma; i++) {
3016            if (ma[i].type != FILE_NAME)
3017                continue;
3018            if (strcmp(ma[i].value.s, name) == 0) {
3019                v->magic = &ma[i];
3020                for (j = i + 1; j < nma; j++)
3021                    if (ma[j].cont_level == 0)
3022                        break;
3023                v->nmagic = j - i;
3024                return 0;
3025            }
3026        }
3027    }
3028    return -1;
3029}
3030