1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "php.h"
33
34#include "file.h"
35
36#ifndef lint
37FILE_RCSID("@(#)$File: apprentice.c,v 1.196 2013/11/19 21:01:12 christos Exp $")
38#endif  /* lint */
39
40#include "magic.h"
41#include "patchlevel.h"
42#include <stdlib.h>
43
44#if defined(__hpux) && !defined(HAVE_STRTOULL)
45#if SIZEOF_LONG == 8
46# define strtoull strtoul
47#else
48# define strtoull __strtoull
49#endif
50#endif
51
52#ifdef PHP_WIN32
53#include "win32/unistd.h"
54#if _MSC_VER <= 1300
55# include "win32/php_strtoi64.h"
56#endif
57#define strtoull _strtoui64
58#else
59#include <unistd.h>
60#endif
61#include <string.h>
62#include <assert.h>
63#include <ctype.h>
64#include <fcntl.h>
65
66#define EATAB {while (isascii((unsigned char) *l) && \
67              isspace((unsigned char) *l))  ++l;}
68#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69            tolower((unsigned char) (l)) : (l))
70/*
71 * Work around a bug in headers on Digital Unix.
72 * At least confirmed for: OSF1 V4.0 878
73 */
74#if defined(__osf__) && defined(__DECC)
75#ifdef MAP_FAILED
76#undef MAP_FAILED
77#endif
78#endif
79
80#ifndef MAP_FAILED
81#define MAP_FAILED (void *) -1
82#endif
83
84#ifndef MAP_FILE
85#define MAP_FILE 0
86#endif
87
88#define ALLOC_CHUNK (size_t)10
89#define ALLOC_INCR  (size_t)200
90
91struct magic_entry {
92    struct magic *mp;
93    uint32_t cont_count;
94    uint32_t max_count;
95};
96
97struct magic_entry_set {
98    struct magic_entry *me;
99    uint32_t count;
100    uint32_t max;
101};
102
103struct magic_map {
104    void *p;
105    size_t len;
106    struct magic *magic[MAGIC_SETS];
107    uint32_t nmagic[MAGIC_SETS];
108};
109
110int file_formats[FILE_NAMES_SIZE];
111const size_t file_nformats = FILE_NAMES_SIZE;
112const char *file_names[FILE_NAMES_SIZE];
113const size_t file_nnames = FILE_NAMES_SIZE;
114
115private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
116private int hextoint(int);
117private const char *getstr(struct magic_set *, struct magic *, const char *,
118    int);
119private int parse(struct magic_set *, struct magic_entry *, const char *,
120    size_t, int);
121private void eatsize(const char **);
122private int apprentice_1(struct magic_set *, const char *, int);
123private size_t apprentice_magic_strength(const struct magic *);
124private int apprentice_sort(const void *, const void *);
125private void apprentice_list(struct mlist *, int );
126private struct magic_map *apprentice_load(struct magic_set *,
127    const char *, int);
128private struct mlist *mlist_alloc(void);
129private void mlist_free(struct mlist *);
130private void byteswap(struct magic *, uint32_t);
131private void bs1(struct magic *);
132private uint16_t swap2(uint16_t);
133private uint32_t swap4(uint32_t);
134private uint64_t swap8(uint64_t);
135private char *mkdbname(struct magic_set *, const char *, int);
136private struct magic_map *apprentice_map(struct magic_set *, const char *);
137private void apprentice_unmap(struct magic_map *);
138private int apprentice_compile(struct magic_set *, struct magic_map *,
139    const char *);
140private int check_format_type(const char *, int);
141private int check_format(struct magic_set *, struct magic *);
142private int get_op(char);
143private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
144private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
145private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
146
147
148private size_t magicsize = sizeof(struct magic);
149
150private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
151
152private struct {
153    const char *name;
154    size_t len;
155    int (*fun)(struct magic_set *, struct magic_entry *, const char *);
156} bang[] = {
157#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
158    DECLARE_FIELD(mime),
159    DECLARE_FIELD(apple),
160    DECLARE_FIELD(strength),
161#undef  DECLARE_FIELD
162    { NULL, 0, NULL }
163};
164
165#include "../data_file.c"
166
167struct type_tbl_s {
168    const char name[16];
169    const size_t len;
170    const int type;
171    const int format;
172};
173
174/*
175 * XXX - the actual Single UNIX Specification says that "long" means "long",
176 * as in the C data type, but we treat it as meaning "4-byte integer".
177 * Given that the OS X version of file 5.04 did the same, I guess that passes
178 * the actual test; having "long" be dependent on how big a "long" is on
179 * the machine running "file" is silly.
180 */
181static const struct type_tbl_s type_tbl[] = {
182# define XX(s)      s, (sizeof(s) - 1)
183# define XX_NULL    "", 0
184    { XX("invalid"),    FILE_INVALID,       FILE_FMT_NONE },
185    { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
186    { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
187    { XX("default"),    FILE_DEFAULT,       FILE_FMT_NONE },
188    { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
189    { XX("string"),     FILE_STRING,        FILE_FMT_STR },
190    { XX("date"),       FILE_DATE,      FILE_FMT_STR },
191    { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
192    { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
193    { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
194    { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
195    { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
196    { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
197    { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
198    { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
199    { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
200    { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
201    { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
202    { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
203    { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
204    { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
205    { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
206    { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
207    { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
208    { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
209    { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
210    { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
211    { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
212    { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
213    { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
214    { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
215    { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
216    { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
217    { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
218    { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
219    { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
220    { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
221    { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
222    { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
223    { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
224    { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
225    { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NUM },
226    { XX("qwdate"),     FILE_QWDATE,        FILE_FMT_STR },
227    { XX("leqwdate"),   FILE_LEQWDATE,      FILE_FMT_STR },
228    { XX("beqwdate"),   FILE_BEQWDATE,      FILE_FMT_STR },
229    { XX("name"),       FILE_NAME,      FILE_FMT_NONE },
230    { XX("use"),        FILE_USE,       FILE_FMT_NONE },
231    { XX("clear"),      FILE_CLEAR,     FILE_FMT_NONE },
232    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
233};
234
235/*
236 * These are not types, and cannot be preceded by "u" to make them
237 * unsigned.
238 */
239static const struct type_tbl_s special_tbl[] = {
240    { XX("name"),       FILE_NAME,      FILE_FMT_STR },
241    { XX("use"),        FILE_USE,       FILE_FMT_STR },
242    { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
243};
244# undef XX
245# undef XX_NULL
246
247#ifndef S_ISDIR
248#define S_ISDIR(mode) ((mode) & _S_IFDIR)
249#endif
250
251private int
252get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
253{
254    const struct type_tbl_s *p;
255
256    for (p = tbl; p->len; p++) {
257        if (strncmp(l, p->name, p->len) == 0) {
258            if (t)
259                *t = l + p->len;
260            break;
261        }
262    }
263    return p->type;
264}
265
266private int
267get_standard_integer_type(const char *l, const char **t)
268{
269    int type;
270
271    if (isalpha((unsigned char)l[1])) {
272        switch (l[1]) {
273        case 'C':
274            /* "dC" and "uC" */
275            type = FILE_BYTE;
276            break;
277        case 'S':
278            /* "dS" and "uS" */
279            type = FILE_SHORT;
280            break;
281        case 'I':
282        case 'L':
283            /*
284             * "dI", "dL", "uI", and "uL".
285             *
286             * XXX - the actual Single UNIX Specification says
287             * that "L" means "long", as in the C data type,
288             * but we treat it as meaning "4-byte integer".
289             * Given that the OS X version of file 5.04 did
290             * the same, I guess that passes the actual SUS
291             * validation suite; having "dL" be dependent on
292             * how big a "long" is on the machine running
293             * "file" is silly.
294             */
295            type = FILE_LONG;
296            break;
297        case 'Q':
298            /* "dQ" and "uQ" */
299            type = FILE_QUAD;
300            break;
301        default:
302            /* "d{anything else}", "u{anything else}" */
303            return FILE_INVALID;
304        }
305        l += 2;
306    } else if (isdigit((unsigned char)l[1])) {
307        /*
308         * "d{num}" and "u{num}"; we only support {num} values
309         * of 1, 2, 4, and 8 - the Single UNIX Specification
310         * doesn't say anything about whether arbitrary
311         * values should be supported, but both the Solaris 10
312         * and OS X Mountain Lion versions of file passed the
313         * Single UNIX Specification validation suite, and
314         * neither of them support values bigger than 8 or
315         * non-power-of-2 values.
316         */
317        if (isdigit((unsigned char)l[2])) {
318            /* Multi-digit, so > 9 */
319            return FILE_INVALID;
320        }
321        switch (l[1]) {
322        case '1':
323            type = FILE_BYTE;
324            break;
325        case '2':
326            type = FILE_SHORT;
327            break;
328        case '4':
329            type = FILE_LONG;
330            break;
331        case '8':
332            type = FILE_QUAD;
333            break;
334        default:
335            /* XXX - what about 3, 5, 6, or 7? */
336            return FILE_INVALID;
337        }
338        l += 2;
339    } else {
340        /*
341         * "d" or "u" by itself.
342         */
343        type = FILE_LONG;
344        ++l;
345    }
346    if (t)
347        *t = l;
348    return type;
349}
350
351private void
352init_file_tables(void)
353{
354    static int done = 0;
355    const struct type_tbl_s *p;
356
357    if (done)
358        return;
359    done++;
360
361    for (p = type_tbl; p->len; p++) {
362        assert(p->type < FILE_NAMES_SIZE);
363        file_names[p->type] = p->name;
364        file_formats[p->type] = p->format;
365    }
366    assert(p - type_tbl == FILE_NAMES_SIZE);
367}
368
369private int
370add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
371{
372    struct mlist *ml;
373
374    if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
375        return -1;
376
377    ml->map = idx == 0 ? map : NULL;
378    ml->magic = map->magic[idx];
379    ml->nmagic = map->nmagic[idx];
380
381    mlp->prev->next = ml;
382    ml->prev = mlp->prev;
383    ml->next = mlp;
384    mlp->prev = ml;
385    return 0;
386}
387
388/*
389 * Handle one file or directory.
390 */
391private int
392apprentice_1(struct magic_set *ms, const char *fn, int action)
393{
394    struct mlist *ml;
395    struct magic_map *map;
396    size_t i;
397
398    if (magicsize != FILE_MAGICSIZE) {
399        file_error(ms, 0, "magic element size %lu != %lu",
400            (unsigned long)sizeof(*map->magic[0]),
401            (unsigned long)FILE_MAGICSIZE);
402        return -1;
403    }
404
405    if (action == FILE_COMPILE) {
406        map = apprentice_load(ms, fn, action);
407        if (map == NULL)
408            return -1;
409        return apprentice_compile(ms, map, fn);
410    }
411
412    map = apprentice_map(ms, fn);
413    if (map == NULL) {
414        if (fn) {
415            if (ms->flags & MAGIC_CHECK)
416                file_magwarn(ms, "using regular magic file `%s'", fn);
417            map = apprentice_load(ms, fn, action);
418        }
419        if (map == NULL)
420            return -1;
421    }
422
423    for (i = 0; i < MAGIC_SETS; i++) {
424        if (add_mlist(ms->mlist[i], map, i) == -1) {
425            file_oomem(ms, sizeof(*ml));
426            apprentice_unmap(map);
427            return -1;
428        }
429    }
430
431    if (action == FILE_LIST) {
432        for (i = 0; i < MAGIC_SETS; i++) {
433            printf("Set %zu:\nBinary patterns:\n", i);
434            apprentice_list(ms->mlist[i], BINTEST);
435            printf("Text patterns:\n");
436            apprentice_list(ms->mlist[i], TEXTTEST);
437        }
438    }
439
440    return 0;
441}
442
443protected void
444file_ms_free(struct magic_set *ms)
445{
446    size_t i;
447    if (ms == NULL)
448        return;
449    for (i = 0; i < MAGIC_SETS; i++)
450        mlist_free(ms->mlist[i]);
451    if (ms->o.pbuf) {
452        efree(ms->o.pbuf);
453    }
454    if (ms->o.buf) {
455        efree(ms->o.buf);
456    }
457    if (ms->c.li) {
458        efree(ms->c.li);
459    }
460    efree(ms);
461}
462
463protected struct magic_set *
464file_ms_alloc(int flags)
465{
466    struct magic_set *ms;
467    size_t i, len;
468
469    if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
470        sizeof(struct magic_set)))) == NULL)
471        return NULL;
472
473    if (magic_setflags(ms, flags) == -1) {
474        errno = EINVAL;
475        goto free;
476    }
477
478    ms->o.buf = ms->o.pbuf = NULL;
479    len = (ms->c.len = 10) * sizeof(*ms->c.li);
480
481    if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
482        goto free;
483
484    ms->event_flags = 0;
485    ms->error = -1;
486    for (i = 0; i < MAGIC_SETS; i++)
487        ms->mlist[i] = NULL;
488    ms->file = "unknown";
489    ms->line = 0;
490    return ms;
491free:
492    efree(ms);
493    return NULL;
494}
495
496private void
497apprentice_unmap(struct magic_map *map)
498{
499    if (map == NULL)
500        return;
501    if (map->p != php_magic_database) {
502        if (map->p == NULL) {
503            int j;
504            for (j = 0; j < MAGIC_SETS; j++) {
505                if (map->magic[j]) {
506                    efree(map->magic[j]);
507                }
508            }
509        } else {
510            efree(map->p);
511        }
512    }
513    efree(map);
514}
515
516private struct mlist *
517mlist_alloc(void)
518{
519    struct mlist *mlist;
520    if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
521        return NULL;
522    }
523    mlist->next = mlist->prev = mlist;
524    return mlist;
525}
526
527private void
528mlist_free(struct mlist *mlist)
529{
530    struct mlist *ml;
531
532    if (mlist == NULL)
533        return;
534
535    for (ml = mlist->next; ml != mlist;) {
536        struct mlist *next = ml->next;
537        if (ml->map)
538            apprentice_unmap(ml->map);
539        efree(ml);
540        ml = next;
541    }
542    efree(ml);
543}
544
545/* const char *fn: list of magic files and directories */
546protected int
547file_apprentice(struct magic_set *ms, const char *fn, int action)
548{
549    char *p, *mfn;
550    int file_err, errs = -1;
551    size_t i;
552
553    file_reset(ms);
554
555/* XXX disabling default magic loading so the compiled in data is used */
556#if 0
557    if ((fn = magic_getpath(fn, action)) == NULL)
558        return -1;
559#endif
560
561    init_file_tables();
562
563    if (fn == NULL)
564        fn = getenv("MAGIC");
565    if (fn == NULL) {
566        for (i = 0; i < MAGIC_SETS; i++) {
567            mlist_free(ms->mlist[i]);
568            if ((ms->mlist[i] = mlist_alloc()) == NULL) {
569                file_oomem(ms, sizeof(*ms->mlist[i]));
570                return -1;
571            }
572        }
573        return apprentice_1(ms, fn, action);
574    }
575
576    if ((mfn = estrdup(fn)) == NULL) {
577        file_oomem(ms, strlen(fn));
578        return -1;
579    }
580
581    for (i = 0; i < MAGIC_SETS; i++) {
582        mlist_free(ms->mlist[i]);
583        if ((ms->mlist[i] = mlist_alloc()) == NULL) {
584            file_oomem(ms, sizeof(*ms->mlist[i]));
585            if (i != 0) {
586                --i;
587                do
588                    mlist_free(ms->mlist[i]);
589                while (i != 0);
590            }
591            efree(mfn);
592            return -1;
593        }
594    }
595    fn = mfn;
596
597    while (fn) {
598        p = strchr(fn, PATHSEP);
599        if (p)
600            *p++ = '\0';
601        if (*fn == '\0')
602            break;
603        file_err = apprentice_1(ms, fn, action);
604        errs = MAX(errs, file_err);
605        fn = p;
606    }
607
608    efree(mfn);
609
610    if (errs == -1) {
611        for (i = 0; i < MAGIC_SETS; i++) {
612            mlist_free(ms->mlist[i]);
613            ms->mlist[i] = NULL;
614        }
615        file_error(ms, 0, "could not find any valid magic files!");
616        return -1;
617    }
618
619#if 0
620    /*
621     * Always leave the database loaded
622     */
623    if (action == FILE_LOAD)
624        return 0;
625
626    for (i = 0; i < MAGIC_SETS; i++) {
627        mlist_free(ms->mlist[i]);
628        ms->mlist[i] = NULL;
629    }
630#endif
631
632    switch (action) {
633    case FILE_LOAD:
634    case FILE_COMPILE:
635    case FILE_CHECK:
636    case FILE_LIST:
637        return 0;
638    default:
639        file_error(ms, 0, "Invalid action %d", action);
640        return -1;
641    }
642}
643
644/*
645 * Get weight of this magic entry, for sorting purposes.
646 */
647private size_t
648apprentice_magic_strength(const struct magic *m)
649{
650#define MULT 10
651    size_t val = 2 * MULT;  /* baseline strength */
652
653    switch (m->type) {
654    case FILE_DEFAULT:  /* make sure this sorts last */
655        if (m->factor_op != FILE_FACTOR_OP_NONE)
656            abort();
657        return 0;
658
659    case FILE_BYTE:
660        val += 1 * MULT;
661        break;
662
663    case FILE_SHORT:
664    case FILE_LESHORT:
665    case FILE_BESHORT:
666        val += 2 * MULT;
667        break;
668
669    case FILE_LONG:
670    case FILE_LELONG:
671    case FILE_BELONG:
672    case FILE_MELONG:
673        val += 4 * MULT;
674        break;
675
676    case FILE_PSTRING:
677    case FILE_STRING:
678        val += m->vallen * MULT;
679        break;
680
681    case FILE_BESTRING16:
682    case FILE_LESTRING16:
683        val += m->vallen * MULT / 2;
684        break;
685
686    case FILE_SEARCH:
687    case FILE_REGEX:
688        val += m->vallen * MAX(MULT / m->vallen, 1);
689        break;
690
691    case FILE_DATE:
692    case FILE_LEDATE:
693    case FILE_BEDATE:
694    case FILE_MEDATE:
695    case FILE_LDATE:
696    case FILE_LELDATE:
697    case FILE_BELDATE:
698    case FILE_MELDATE:
699    case FILE_FLOAT:
700    case FILE_BEFLOAT:
701    case FILE_LEFLOAT:
702        val += 4 * MULT;
703        break;
704
705    case FILE_QUAD:
706    case FILE_BEQUAD:
707    case FILE_LEQUAD:
708    case FILE_QDATE:
709    case FILE_LEQDATE:
710    case FILE_BEQDATE:
711    case FILE_QLDATE:
712    case FILE_LEQLDATE:
713    case FILE_BEQLDATE:
714    case FILE_QWDATE:
715    case FILE_LEQWDATE:
716    case FILE_BEQWDATE:
717    case FILE_DOUBLE:
718    case FILE_BEDOUBLE:
719    case FILE_LEDOUBLE:
720        val += 8 * MULT;
721        break;
722
723    case FILE_INDIRECT:
724    case FILE_NAME:
725    case FILE_USE:
726        break;
727
728    default:
729        val = 0;
730        (void)fprintf(stderr, "Bad type %d\n", m->type);
731        abort();
732    }
733
734    switch (m->reln) {
735    case 'x':   /* matches anything penalize */
736    case '!':       /* matches almost anything penalize */
737        val = 0;
738        break;
739
740    case '=':   /* Exact match, prefer */
741        val += MULT;
742        break;
743
744    case '>':
745    case '<':   /* comparison match reduce strength */
746        val -= 2 * MULT;
747        break;
748
749    case '^':
750    case '&':   /* masking bits, we could count them too */
751        val -= MULT;
752        break;
753
754    default:
755        (void)fprintf(stderr, "Bad relation %c\n", m->reln);
756        abort();
757    }
758
759    if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
760        val = 1;
761
762    switch (m->factor_op) {
763    case FILE_FACTOR_OP_NONE:
764        break;
765    case FILE_FACTOR_OP_PLUS:
766        val += m->factor;
767        break;
768    case FILE_FACTOR_OP_MINUS:
769        val -= m->factor;
770        break;
771    case FILE_FACTOR_OP_TIMES:
772        val *= m->factor;
773        break;
774    case FILE_FACTOR_OP_DIV:
775        val /= m->factor;
776        break;
777    default:
778        abort();
779    }
780
781    /*
782     * Magic entries with no description get a bonus because they depend
783     * on subsequent magic entries to print something.
784     */
785    if (m->desc[0] == '\0')
786        val++;
787    return val;
788}
789
790/*
791 * Sort callback for sorting entries by "strength" (basically length)
792 */
793private int
794apprentice_sort(const void *a, const void *b)
795{
796    const struct magic_entry *ma = CAST(const struct magic_entry *, a);
797    const struct magic_entry *mb = CAST(const struct magic_entry *, b);
798    size_t sa = apprentice_magic_strength(ma->mp);
799    size_t sb = apprentice_magic_strength(mb->mp);
800    if (sa == sb)
801        return 0;
802    else if (sa > sb)
803        return -1;
804    else
805        return 1;
806}
807
808/*
809 * Shows sorted patterns list in the order which is used for the matching
810 */
811private void
812apprentice_list(struct mlist *mlist, int mode)
813{
814    uint32_t magindex = 0;
815    struct mlist *ml;
816    for (ml = mlist->next; ml != mlist; ml = ml->next) {
817        for (magindex = 0; magindex < ml->nmagic; magindex++) {
818            struct magic *m = &ml->magic[magindex];
819            if ((m->flag & mode) != mode) {
820                /* Skip sub-tests */
821                while (magindex + 1 < ml->nmagic &&
822                       ml->magic[magindex + 1].cont_level != 0)
823                    ++magindex;
824                continue; /* Skip to next top-level test*/
825            }
826
827            /*
828             * Try to iterate over the tree until we find item with
829             * description/mimetype.
830             */
831            while (magindex + 1 < ml->nmagic &&
832                   ml->magic[magindex + 1].cont_level != 0 &&
833                   *ml->magic[magindex].desc == '\0' &&
834                   *ml->magic[magindex].mimetype == '\0')
835                magindex++;
836
837            printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
838                apprentice_magic_strength(m),
839                ml->magic[magindex].desc,
840                ml->magic[magindex].mimetype);
841        }
842    }
843}
844
845private void
846set_test_type(struct magic *mstart, struct magic *m)
847{
848    switch (m->type) {
849    case FILE_BYTE:
850    case FILE_SHORT:
851    case FILE_LONG:
852    case FILE_DATE:
853    case FILE_BESHORT:
854    case FILE_BELONG:
855    case FILE_BEDATE:
856    case FILE_LESHORT:
857    case FILE_LELONG:
858    case FILE_LEDATE:
859    case FILE_LDATE:
860    case FILE_BELDATE:
861    case FILE_LELDATE:
862    case FILE_MEDATE:
863    case FILE_MELDATE:
864    case FILE_MELONG:
865    case FILE_QUAD:
866    case FILE_LEQUAD:
867    case FILE_BEQUAD:
868    case FILE_QDATE:
869    case FILE_LEQDATE:
870    case FILE_BEQDATE:
871    case FILE_QLDATE:
872    case FILE_LEQLDATE:
873    case FILE_BEQLDATE:
874    case FILE_QWDATE:
875    case FILE_LEQWDATE:
876    case FILE_BEQWDATE:
877    case FILE_FLOAT:
878    case FILE_BEFLOAT:
879    case FILE_LEFLOAT:
880    case FILE_DOUBLE:
881    case FILE_BEDOUBLE:
882    case FILE_LEDOUBLE:
883        mstart->flag |= BINTEST;
884        break;
885    case FILE_STRING:
886    case FILE_PSTRING:
887    case FILE_BESTRING16:
888    case FILE_LESTRING16:
889        /* Allow text overrides */
890        if (mstart->str_flags & STRING_TEXTTEST)
891            mstart->flag |= TEXTTEST;
892        else
893            mstart->flag |= BINTEST;
894        break;
895    case FILE_REGEX:
896    case FILE_SEARCH:
897        /* Check for override */
898        if (mstart->str_flags & STRING_BINTEST)
899            mstart->flag |= BINTEST;
900        if (mstart->str_flags & STRING_TEXTTEST)
901            mstart->flag |= TEXTTEST;
902
903        if (mstart->flag & (TEXTTEST|BINTEST))
904            break;
905
906        /* binary test if pattern is not text */
907        if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
908            NULL) <= 0)
909            mstart->flag |= BINTEST;
910        else
911            mstart->flag |= TEXTTEST;
912        break;
913    case FILE_DEFAULT:
914        /* can't deduce anything; we shouldn't see this at the
915           top level anyway */
916        break;
917    case FILE_INVALID:
918    default:
919        /* invalid search type, but no need to complain here */
920        break;
921    }
922}
923
924private int
925addentry(struct magic_set *ms, struct magic_entry *me,
926   struct magic_entry_set *mset)
927{
928    size_t i = me->mp->type == FILE_NAME ? 1 : 0;
929    if (mset[i].count == mset[i].max) {
930        struct magic_entry *mp;
931
932        mset[i].max += ALLOC_INCR;
933        if ((mp = CAST(struct magic_entry *,
934            erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
935            NULL) {
936            file_oomem(ms, sizeof(*mp) * mset[i].max);
937            return -1;
938        }
939        (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
940            ALLOC_INCR);
941        mset[i].me = mp;
942    }
943    mset[i].me[mset[i].count++] = *me;
944    memset(me, 0, sizeof(*me));
945    return 0;
946}
947
948/*
949 * Load and parse one file.
950 */
951private void
952load_1(struct magic_set *ms, int action, const char *fn, int *errs,
953   struct magic_entry_set *mset)
954{
955    char buffer[BUFSIZ + 1];
956    char *line = NULL;
957    size_t len;
958    size_t lineno = 0;
959    struct magic_entry me;
960
961    php_stream *stream;
962
963
964    ms->file = fn;
965    stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
966
967    if (stream == NULL) {
968        if (errno != ENOENT)
969            file_error(ms, errno, "cannot read magic file `%s'",
970                   fn);
971        (*errs)++;
972        return;
973    }
974
975    memset(&me, 0, sizeof(me));
976    /* read and parse this file */
977    for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
978        if (len == 0) /* null line, garbage, etc */
979            continue;
980        if (line[len - 1] == '\n') {
981            lineno++;
982            line[len - 1] = '\0'; /* delete newline */
983        }
984        switch (line[0]) {
985        case '\0':  /* empty, do not parse */
986        case '#':   /* comment, do not parse */
987            continue;
988        case '!':
989            if (line[1] == ':') {
990                size_t i;
991
992                for (i = 0; bang[i].name != NULL; i++) {
993                    if ((size_t)(len - 2) > bang[i].len &&
994                        memcmp(bang[i].name, line + 2,
995                        bang[i].len) == 0)
996                        break;
997                }
998                if (bang[i].name == NULL) {
999                    file_error(ms, 0,
1000                        "Unknown !: entry `%s'", line);
1001                    (*errs)++;
1002                    continue;
1003                }
1004                if (me.mp == NULL) {
1005                    file_error(ms, 0,
1006                        "No current entry for :!%s type",
1007                        bang[i].name);
1008                    (*errs)++;
1009                    continue;
1010                }
1011                if ((*bang[i].fun)(ms, &me,
1012                    line + bang[i].len + 2) != 0) {
1013                    (*errs)++;
1014                    continue;
1015                }
1016                continue;
1017            }
1018            /*FALLTHROUGH*/
1019        default:
1020        again:
1021            switch (parse(ms, &me, line, lineno, action)) {
1022            case 0:
1023                continue;
1024            case 1:
1025                (void)addentry(ms, &me, mset);
1026                goto again;
1027            default:
1028                (*errs)++;
1029                break;
1030            }
1031        }
1032    }
1033    if (me.mp)
1034        (void)addentry(ms, &me, mset);
1035    php_stream_close(stream);
1036}
1037
1038/*
1039 * parse a file or directory of files
1040 * const char *fn: name of magic file or directory
1041 */
1042private int
1043cmpstrp(const void *p1, const void *p2)
1044{
1045        return strcmp(*(char *const *)p1, *(char *const *)p2);
1046}
1047
1048
1049private uint32_t
1050set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1051    uint32_t starttest)
1052{
1053    static const char text[] = "text";
1054    static const char binary[] = "binary";
1055    static const size_t len = sizeof(text);
1056
1057    uint32_t i = starttest;
1058
1059    do {
1060        set_test_type(me[starttest].mp, me[i].mp);
1061        if ((ms->flags & MAGIC_DEBUG) == 0)
1062            continue;
1063        (void)fprintf(stderr, "%s%s%s: %s\n",
1064            me[i].mp->mimetype,
1065            me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1066            me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1067            me[i].mp->flag & BINTEST ? binary : text);
1068        if (me[i].mp->flag & BINTEST) {
1069            char *p = strstr(me[i].mp->desc, text);
1070            if (p && (p == me[i].mp->desc ||
1071                isspace((unsigned char)p[-1])) &&
1072                (p + len - me[i].mp->desc == MAXstring
1073                || (p[len] == '\0' ||
1074                isspace((unsigned char)p[len]))))
1075                (void)fprintf(stderr, "*** Possible "
1076                    "binary test for text type\n");
1077        }
1078    } while (++i < nme && me[i].mp->cont_level != 0);
1079    return i;
1080}
1081
1082private void
1083set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1084{
1085    uint32_t i;
1086    for (i = 0; i < nme; i++) {
1087        if (me[i].mp->cont_level == 0 &&
1088            me[i].mp->type == FILE_DEFAULT) {
1089            while (++i < nme)
1090                if (me[i].mp->cont_level == 0)
1091                    break;
1092            if (i != nme) {
1093                /* XXX - Ugh! */
1094                ms->line = me[i].mp->lineno;
1095                file_magwarn(ms,
1096                    "level 0 \"default\" did not sort last");
1097            }
1098            return;
1099        }
1100    }
1101}
1102
1103private int
1104coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1105    struct magic **ma, uint32_t *nma)
1106{
1107    uint32_t i, mentrycount = 0;
1108    size_t slen;
1109
1110    for (i = 0; i < nme; i++)
1111        mentrycount += me[i].cont_count;
1112
1113    slen = sizeof(**ma) * mentrycount;
1114    if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1115        file_oomem(ms, slen);
1116        return -1;
1117    }
1118
1119    mentrycount = 0;
1120    for (i = 0; i < nme; i++) {
1121        (void)memcpy(*ma + mentrycount, me[i].mp,
1122            me[i].cont_count * sizeof(**ma));
1123        mentrycount += me[i].cont_count;
1124    }
1125    *nma = mentrycount;
1126    return 0;
1127}
1128
1129private void
1130magic_entry_free(struct magic_entry *me, uint32_t nme)
1131{
1132    uint32_t i;
1133    if (me == NULL)
1134        return;
1135    for (i = 0; i < nme; i++)
1136        efree(me[i].mp);
1137    efree(me);
1138}
1139
1140private struct magic_map *
1141apprentice_load(struct magic_set *ms, const char *fn, int action)
1142{
1143    int errs = 0;
1144    uint32_t i, j;
1145    size_t files = 0, maxfiles = 0;
1146    char **filearr = NULL;
1147    zend_stat_t st;
1148    struct magic_map *map;
1149    struct magic_entry_set mset[MAGIC_SETS];
1150    php_stream *dir;
1151    php_stream_dirent d;
1152
1153
1154    memset(mset, 0, sizeof(mset));
1155    ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
1156
1157
1158    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1159    {
1160        file_oomem(ms, sizeof(*map));
1161        return NULL;
1162    }
1163
1164    /* print silly verbose header for USG compat. */
1165    if (action == FILE_CHECK)
1166        (void)fprintf(stderr, "%s\n", usg_hdr);
1167
1168    /* load directory or file */
1169    /* FIXME: Read file names and sort them to prevent
1170       non-determinism. See Debian bug #488562. */
1171    if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1172        int mflen;
1173        char mfn[MAXPATHLEN];
1174
1175        dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1176        if (!dir) {
1177            errs++;
1178            goto out;
1179        }
1180        while (php_stream_readdir(dir, &d)) {
1181            if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1182                file_oomem(ms,
1183                strlen(fn) + strlen(d.d_name) + 2);
1184                errs++;
1185                php_stream_closedir(dir);
1186                goto out;
1187            }
1188            if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1189                continue;
1190            }
1191            if (files >= maxfiles) {
1192                size_t mlen;
1193                maxfiles = (maxfiles + 1) * 2;
1194                mlen = maxfiles * sizeof(*filearr);
1195                if ((filearr = CAST(char **,
1196                    erealloc(filearr, mlen))) == NULL) {
1197                    file_oomem(ms, mlen);
1198                    php_stream_closedir(dir);
1199                    errs++;
1200                    goto out;
1201                }
1202            }
1203            filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1204        }
1205        php_stream_closedir(dir);
1206        qsort(filearr, files, sizeof(*filearr), cmpstrp);
1207        for (i = 0; i < files; i++) {
1208            load_1(ms, action, filearr[i], &errs, mset);
1209            efree(filearr[i]);
1210        }
1211        efree(filearr);
1212    } else
1213        load_1(ms, action, fn, &errs, mset);
1214    if (errs)
1215        goto out;
1216
1217    for (j = 0; j < MAGIC_SETS; j++) {
1218        /* Set types of tests */
1219        for (i = 0; i < mset[j].count; ) {
1220            if (mset[j].me[i].mp->cont_level != 0) {
1221                i++;
1222                continue;
1223            }
1224            i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1225        }
1226        qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1227            apprentice_sort);
1228
1229        /*
1230         * Make sure that any level 0 "default" line is last
1231         * (if one exists).
1232         */
1233        set_last_default(ms, mset[j].me, mset[j].count);
1234
1235        /* coalesce per file arrays into a single one */
1236        if (coalesce_entries(ms, mset[j].me, mset[j].count,
1237            &map->magic[j], &map->nmagic[j]) == -1) {
1238            errs++;
1239            goto out;
1240        }
1241    }
1242
1243out:
1244    for (j = 0; j < MAGIC_SETS; j++)
1245        magic_entry_free(mset[j].me, mset[j].count);
1246
1247    if (errs) {
1248        for (j = 0; j < MAGIC_SETS; j++) {
1249            if (map->magic[j])
1250                efree(map->magic[j]);
1251        }
1252        efree(map);
1253        return NULL;
1254    }
1255    return map;
1256}
1257
1258/*
1259 * extend the sign bit if the comparison is to be signed
1260 */
1261protected uint64_t
1262file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1263{
1264    if (!(m->flag & UNSIGNED)) {
1265        switch(m->type) {
1266        /*
1267         * Do not remove the casts below.  They are
1268         * vital.  When later compared with the data,
1269         * the sign extension must have happened.
1270         */
1271        case FILE_BYTE:
1272            v = (signed char) v;
1273            break;
1274        case FILE_SHORT:
1275        case FILE_BESHORT:
1276        case FILE_LESHORT:
1277            v = (short) v;
1278            break;
1279        case FILE_DATE:
1280        case FILE_BEDATE:
1281        case FILE_LEDATE:
1282        case FILE_MEDATE:
1283        case FILE_LDATE:
1284        case FILE_BELDATE:
1285        case FILE_LELDATE:
1286        case FILE_MELDATE:
1287        case FILE_LONG:
1288        case FILE_BELONG:
1289        case FILE_LELONG:
1290        case FILE_MELONG:
1291        case FILE_FLOAT:
1292        case FILE_BEFLOAT:
1293        case FILE_LEFLOAT:
1294            v = (int32_t) v;
1295            break;
1296        case FILE_QUAD:
1297        case FILE_BEQUAD:
1298        case FILE_LEQUAD:
1299        case FILE_QDATE:
1300        case FILE_QLDATE:
1301        case FILE_QWDATE:
1302        case FILE_BEQDATE:
1303        case FILE_BEQLDATE:
1304        case FILE_BEQWDATE:
1305        case FILE_LEQDATE:
1306        case FILE_LEQLDATE:
1307        case FILE_LEQWDATE:
1308        case FILE_DOUBLE:
1309        case FILE_BEDOUBLE:
1310        case FILE_LEDOUBLE:
1311            v = (int64_t) v;
1312            break;
1313        case FILE_STRING:
1314        case FILE_PSTRING:
1315        case FILE_BESTRING16:
1316        case FILE_LESTRING16:
1317        case FILE_REGEX:
1318        case FILE_SEARCH:
1319        case FILE_DEFAULT:
1320        case FILE_INDIRECT:
1321        case FILE_NAME:
1322        case FILE_USE:
1323        case FILE_CLEAR:
1324            break;
1325        default:
1326            if (ms->flags & MAGIC_CHECK)
1327                file_magwarn(ms, "cannot happen: m->type=%d\n",
1328                    m->type);
1329            return ~0U;
1330        }
1331    }
1332    return v;
1333}
1334
1335private int
1336string_modifier_check(struct magic_set *ms, struct magic *m)
1337{
1338    if ((ms->flags & MAGIC_CHECK) == 0)
1339        return 0;
1340
1341    if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1342        file_magwarn(ms,
1343            "'/BHhLl' modifiers are only allowed for pascal strings\n");
1344        return -1;
1345    }
1346    switch (m->type) {
1347    case FILE_BESTRING16:
1348    case FILE_LESTRING16:
1349        if (m->str_flags != 0) {
1350            file_magwarn(ms,
1351                "no modifiers allowed for 16-bit strings\n");
1352            return -1;
1353        }
1354        break;
1355    case FILE_STRING:
1356    case FILE_PSTRING:
1357        if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1358            file_magwarn(ms,
1359                "'/%c' only allowed on regex and search\n",
1360                CHAR_REGEX_OFFSET_START);
1361            return -1;
1362        }
1363        break;
1364    case FILE_SEARCH:
1365        if (m->str_range == 0) {
1366            file_magwarn(ms,
1367                "missing range; defaulting to %d\n",
1368                            STRING_DEFAULT_RANGE);
1369            m->str_range = STRING_DEFAULT_RANGE;
1370            return -1;
1371        }
1372        break;
1373    case FILE_REGEX:
1374        if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1375            file_magwarn(ms, "'/%c' not allowed on regex\n",
1376                CHAR_COMPACT_WHITESPACE);
1377            return -1;
1378        }
1379        if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1380            file_magwarn(ms, "'/%c' not allowed on regex\n",
1381                CHAR_COMPACT_OPTIONAL_WHITESPACE);
1382            return -1;
1383        }
1384        break;
1385    default:
1386        file_magwarn(ms, "coding error: m->type=%d\n",
1387            m->type);
1388        return -1;
1389    }
1390    return 0;
1391}
1392
1393private int
1394get_op(char c)
1395{
1396    switch (c) {
1397    case '&':
1398        return FILE_OPAND;
1399    case '|':
1400        return FILE_OPOR;
1401    case '^':
1402        return FILE_OPXOR;
1403    case '+':
1404        return FILE_OPADD;
1405    case '-':
1406        return FILE_OPMINUS;
1407    case '*':
1408        return FILE_OPMULTIPLY;
1409    case '/':
1410        return FILE_OPDIVIDE;
1411    case '%':
1412        return FILE_OPMODULO;
1413    default:
1414        return -1;
1415    }
1416}
1417
1418#ifdef ENABLE_CONDITIONALS
1419private int
1420get_cond(const char *l, const char **t)
1421{
1422    static const struct cond_tbl_s {
1423        char name[8];
1424        size_t len;
1425        int cond;
1426    } cond_tbl[] = {
1427        { "if",     2,  COND_IF },
1428        { "elif",   4,  COND_ELIF },
1429        { "else",   4,  COND_ELSE },
1430        { "",       0,  COND_NONE },
1431    };
1432    const struct cond_tbl_s *p;
1433
1434    for (p = cond_tbl; p->len; p++) {
1435        if (strncmp(l, p->name, p->len) == 0 &&
1436            isspace((unsigned char)l[p->len])) {
1437            if (t)
1438                *t = l + p->len;
1439            break;
1440        }
1441    }
1442    return p->cond;
1443}
1444
1445private int
1446check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1447{
1448    int last_cond;
1449    last_cond = ms->c.li[cont_level].last_cond;
1450
1451    switch (cond) {
1452    case COND_IF:
1453        if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1454            if (ms->flags & MAGIC_CHECK)
1455                file_magwarn(ms, "syntax error: `if'");
1456            return -1;
1457        }
1458        last_cond = COND_IF;
1459        break;
1460
1461    case COND_ELIF:
1462        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1463            if (ms->flags & MAGIC_CHECK)
1464                file_magwarn(ms, "syntax error: `elif'");
1465            return -1;
1466        }
1467        last_cond = COND_ELIF;
1468        break;
1469
1470    case COND_ELSE:
1471        if (last_cond != COND_IF && last_cond != COND_ELIF) {
1472            if (ms->flags & MAGIC_CHECK)
1473                file_magwarn(ms, "syntax error: `else'");
1474            return -1;
1475        }
1476        last_cond = COND_NONE;
1477        break;
1478
1479    case COND_NONE:
1480        last_cond = COND_NONE;
1481        break;
1482    }
1483
1484    ms->c.li[cont_level].last_cond = last_cond;
1485    return 0;
1486}
1487#endif /* ENABLE_CONDITIONALS */
1488
1489/*
1490 * parse one line from magic file, put into magic[index++] if valid
1491 */
1492private int
1493parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1494    size_t lineno, int action)
1495{
1496#ifdef ENABLE_CONDITIONALS
1497    static uint32_t last_cont_level = 0;
1498#endif
1499    size_t i;
1500    struct magic *m;
1501    const char *l = line;
1502    char *t;
1503    int op;
1504    uint32_t cont_level;
1505    int32_t diff;
1506
1507    cont_level = 0;
1508
1509    /*
1510     * Parse the offset.
1511     */
1512    while (*l == '>') {
1513        ++l;        /* step over */
1514        cont_level++;
1515    }
1516#ifdef ENABLE_CONDITIONALS
1517    if (cont_level == 0 || cont_level > last_cont_level)
1518        if (file_check_mem(ms, cont_level) == -1)
1519            return -1;
1520    last_cont_level = cont_level;
1521#endif
1522    if (cont_level != 0) {
1523        if (me->mp == NULL) {
1524            file_magerror(ms, "No current entry for continuation");
1525            return -1;
1526        }
1527        if (me->cont_count == 0) {
1528            file_magerror(ms, "Continuations present with 0 count");
1529            return -1;
1530        }
1531        m = &me->mp[me->cont_count - 1];
1532        diff = (int32_t)cont_level - (int32_t)m->cont_level;
1533        if (diff > 1)
1534            file_magwarn(ms, "New continuation level %u is more "
1535                "than one larger than current level %u", cont_level,
1536                m->cont_level);
1537        if (me->cont_count == me->max_count) {
1538            struct magic *nm;
1539            size_t cnt = me->max_count + ALLOC_CHUNK;
1540            if ((nm = CAST(struct magic *, erealloc(me->mp,
1541                sizeof(*nm) * cnt))) == NULL) {
1542                file_oomem(ms, sizeof(*nm) * cnt);
1543                return -1;
1544            }
1545            me->mp = m = nm;
1546            me->max_count = CAST(uint32_t, cnt);
1547        }
1548        m = &me->mp[me->cont_count++];
1549        (void)memset(m, 0, sizeof(*m));
1550        m->cont_level = cont_level;
1551    } else {
1552        static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1553        if (me->mp != NULL)
1554            return 1;
1555        if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1556            file_oomem(ms, len);
1557            return -1;
1558        }
1559        me->mp = m;
1560        me->max_count = ALLOC_CHUNK;
1561        (void)memset(m, 0, sizeof(*m));
1562        m->factor_op = FILE_FACTOR_OP_NONE;
1563        m->cont_level = 0;
1564        me->cont_count = 1;
1565    }
1566    m->lineno = CAST(uint32_t, lineno);
1567
1568    if (*l == '&') {  /* m->cont_level == 0 checked below. */
1569                ++l;            /* step over */
1570                m->flag |= OFFADD;
1571        }
1572    if (*l == '(') {
1573        ++l;        /* step over */
1574        m->flag |= INDIR;
1575        if (m->flag & OFFADD)
1576            m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1577
1578        if (*l == '&') {  /* m->cont_level == 0 checked below */
1579            ++l;            /* step over */
1580            m->flag |= OFFADD;
1581        }
1582    }
1583    /* Indirect offsets are not valid at level 0. */
1584    if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1585        if (ms->flags & MAGIC_CHECK)
1586            file_magwarn(ms, "relative offset at level 0");
1587
1588    /* get offset, then skip over it */
1589    m->offset = (uint32_t)strtoul(l, &t, 0);
1590        if (l == t)
1591        if (ms->flags & MAGIC_CHECK)
1592            file_magwarn(ms, "offset `%s' invalid", l);
1593        l = t;
1594
1595    if (m->flag & INDIR) {
1596        m->in_type = FILE_LONG;
1597        m->in_offset = 0;
1598        /*
1599         * read [.lbs][+-]nnnnn)
1600         */
1601        if (*l == '.') {
1602            l++;
1603            switch (*l) {
1604            case 'l':
1605                m->in_type = FILE_LELONG;
1606                break;
1607            case 'L':
1608                m->in_type = FILE_BELONG;
1609                break;
1610            case 'm':
1611                m->in_type = FILE_MELONG;
1612                break;
1613            case 'h':
1614            case 's':
1615                m->in_type = FILE_LESHORT;
1616                break;
1617            case 'H':
1618            case 'S':
1619                m->in_type = FILE_BESHORT;
1620                break;
1621            case 'c':
1622            case 'b':
1623            case 'C':
1624            case 'B':
1625                m->in_type = FILE_BYTE;
1626                break;
1627            case 'e':
1628            case 'f':
1629            case 'g':
1630                m->in_type = FILE_LEDOUBLE;
1631                break;
1632            case 'E':
1633            case 'F':
1634            case 'G':
1635                m->in_type = FILE_BEDOUBLE;
1636                break;
1637            case 'i':
1638                m->in_type = FILE_LEID3;
1639                break;
1640            case 'I':
1641                m->in_type = FILE_BEID3;
1642                break;
1643            default:
1644                if (ms->flags & MAGIC_CHECK)
1645                    file_magwarn(ms,
1646                        "indirect offset type `%c' invalid",
1647                        *l);
1648                break;
1649            }
1650            l++;
1651        }
1652
1653        m->in_op = 0;
1654        if (*l == '~') {
1655            m->in_op |= FILE_OPINVERSE;
1656            l++;
1657        }
1658        if ((op = get_op(*l)) != -1) {
1659            m->in_op |= op;
1660            l++;
1661        }
1662        if (*l == '(') {
1663            m->in_op |= FILE_OPINDIRECT;
1664            l++;
1665        }
1666        if (isdigit((unsigned char)*l) || *l == '-') {
1667            m->in_offset = (int32_t)strtol(l, &t, 0);
1668            if (l == t)
1669                if (ms->flags & MAGIC_CHECK)
1670                    file_magwarn(ms,
1671                        "in_offset `%s' invalid", l);
1672            l = t;
1673        }
1674        if (*l++ != ')' ||
1675            ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1676            if (ms->flags & MAGIC_CHECK)
1677                file_magwarn(ms,
1678                    "missing ')' in indirect offset");
1679    }
1680    EATAB;
1681
1682#ifdef ENABLE_CONDITIONALS
1683    m->cond = get_cond(l, &l);
1684    if (check_cond(ms, m->cond, cont_level) == -1)
1685        return -1;
1686
1687    EATAB;
1688#endif
1689
1690    /*
1691     * Parse the type.
1692     */
1693    if (*l == 'u') {
1694        /*
1695         * Try it as a keyword type prefixed by "u"; match what
1696         * follows the "u".  If that fails, try it as an SUS
1697         * integer type.
1698         */
1699        m->type = get_type(type_tbl, l + 1, &l);
1700        if (m->type == FILE_INVALID) {
1701            /*
1702             * Not a keyword type; parse it as an SUS type,
1703             * 'u' possibly followed by a number or C/S/L.
1704             */
1705            m->type = get_standard_integer_type(l, &l);
1706        }
1707        // It's unsigned.
1708        if (m->type != FILE_INVALID)
1709            m->flag |= UNSIGNED;
1710    } else {
1711        /*
1712         * Try it as a keyword type.  If that fails, try it as
1713         * an SUS integer type if it begins with "d" or as an
1714         * SUS string type if it begins with "s".  In any case,
1715         * it's not unsigned.
1716         */
1717        m->type = get_type(type_tbl, l, &l);
1718        if (m->type == FILE_INVALID) {
1719            /*
1720             * Not a keyword type; parse it as an SUS type,
1721             * either 'd' possibly followed by a number or
1722             * C/S/L, or just 's'.
1723             */
1724            if (*l == 'd')
1725                m->type = get_standard_integer_type(l, &l);
1726            else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1727                m->type = FILE_STRING;
1728        ++l;
1729            }
1730        }
1731    }
1732
1733    if (m->type == FILE_INVALID) {
1734        /* Not found - try it as a special keyword. */
1735        m->type = get_type(special_tbl, l, &l);
1736    }
1737
1738    if (m->type == FILE_INVALID) {
1739        if (ms->flags & MAGIC_CHECK)
1740            file_magwarn(ms, "type `%s' invalid", l);
1741        /*if (me->mp) {
1742            efree(me->mp);
1743            me->mp = NULL;
1744        }*/
1745        return -1;
1746    }
1747
1748    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1749    /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1750
1751    m->mask_op = 0;
1752    if (*l == '~') {
1753        if (!IS_LIBMAGIC_STRING(m->type))
1754            m->mask_op |= FILE_OPINVERSE;
1755        else if (ms->flags & MAGIC_CHECK)
1756            file_magwarn(ms, "'~' invalid for string types");
1757        ++l;
1758    }
1759    m->str_range = 0;
1760    m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1761    if ((op = get_op(*l)) != -1) {
1762        if (!IS_LIBMAGIC_STRING(m->type)) {
1763            uint64_t val;
1764            ++l;
1765            m->mask_op |= op;
1766            val = (uint64_t)strtoull(l, &t, 0);
1767            l = t;
1768            m->num_mask = file_signextend(ms, m, val);
1769            eatsize(&l);
1770        }
1771        else if (op == FILE_OPDIVIDE) {
1772            int have_range = 0;
1773            while (!isspace((unsigned char)*++l)) {
1774                switch (*l) {
1775                case '0':  case '1':  case '2':
1776                case '3':  case '4':  case '5':
1777                case '6':  case '7':  case '8':
1778                case '9':
1779                    if (have_range &&
1780                        (ms->flags & MAGIC_CHECK))
1781                        file_magwarn(ms,
1782                            "multiple ranges");
1783                    have_range = 1;
1784                    m->str_range = CAST(uint32_t,
1785                        strtoul(l, &t, 0));
1786                    if (m->str_range == 0)
1787                        file_magwarn(ms,
1788                            "zero range");
1789                    l = t - 1;
1790                    break;
1791                case CHAR_COMPACT_WHITESPACE:
1792                    m->str_flags |=
1793                        STRING_COMPACT_WHITESPACE;
1794                    break;
1795                case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1796                    m->str_flags |=
1797                        STRING_COMPACT_OPTIONAL_WHITESPACE;
1798                    break;
1799                case CHAR_IGNORE_LOWERCASE:
1800                    m->str_flags |= STRING_IGNORE_LOWERCASE;
1801                    break;
1802                case CHAR_IGNORE_UPPERCASE:
1803                    m->str_flags |= STRING_IGNORE_UPPERCASE;
1804                    break;
1805                case CHAR_REGEX_OFFSET_START:
1806                    m->str_flags |= REGEX_OFFSET_START;
1807                    break;
1808                case CHAR_BINTEST:
1809                    m->str_flags |= STRING_BINTEST;
1810                    break;
1811                case CHAR_TEXTTEST:
1812                    m->str_flags |= STRING_TEXTTEST;
1813                    break;
1814                case CHAR_TRIM:
1815                    m->str_flags |= STRING_TRIM;
1816                    break;
1817                case CHAR_PSTRING_1_LE:
1818                    if (m->type != FILE_PSTRING)
1819                        goto bad;
1820                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1821                    break;
1822                case CHAR_PSTRING_2_BE:
1823                    if (m->type != FILE_PSTRING)
1824                        goto bad;
1825                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1826                    break;
1827                case CHAR_PSTRING_2_LE:
1828                    if (m->type != FILE_PSTRING)
1829                        goto bad;
1830                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1831                    break;
1832                case CHAR_PSTRING_4_BE:
1833                    if (m->type != FILE_PSTRING)
1834                        goto bad;
1835                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1836                    break;
1837                case CHAR_PSTRING_4_LE:
1838                    if (m->type != FILE_PSTRING)
1839                        goto bad;
1840                    m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1841                    break;
1842                case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1843                    if (m->type != FILE_PSTRING)
1844                        goto bad;
1845                    m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1846                    break;
1847                default:
1848                bad:
1849                    if (ms->flags & MAGIC_CHECK)
1850                        file_magwarn(ms,
1851                            "string extension `%c' "
1852                            "invalid", *l);
1853                    return -1;
1854                }
1855                /* allow multiple '/' for readability */
1856                if (l[1] == '/' &&
1857                    !isspace((unsigned char)l[2]))
1858                    l++;
1859            }
1860            if (string_modifier_check(ms, m) == -1)
1861                return -1;
1862        }
1863        else {
1864            if (ms->flags & MAGIC_CHECK)
1865                file_magwarn(ms, "invalid string op: %c", *t);
1866            return -1;
1867        }
1868    }
1869    /*
1870     * We used to set mask to all 1's here, instead let's just not do
1871     * anything if mask = 0 (unless you have a better idea)
1872     */
1873    EATAB;
1874
1875    switch (*l) {
1876    case '>':
1877    case '<':
1878        m->reln = *l;
1879        ++l;
1880        if (*l == '=') {
1881            if (ms->flags & MAGIC_CHECK) {
1882                file_magwarn(ms, "%c= not supported",
1883                    m->reln);
1884                return -1;
1885            }
1886           ++l;
1887        }
1888        break;
1889    /* Old-style anding: "0 byte &0x80 dynamically linked" */
1890    case '&':
1891    case '^':
1892    case '=':
1893        m->reln = *l;
1894        ++l;
1895        if (*l == '=') {
1896           /* HP compat: ignore &= etc. */
1897           ++l;
1898        }
1899        break;
1900    case '!':
1901        m->reln = *l;
1902        ++l;
1903        break;
1904    default:
1905        m->reln = '=';  /* the default relation */
1906        if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1907            isspace((unsigned char)l[1])) || !l[1])) {
1908            m->reln = *l;
1909            ++l;
1910        }
1911        break;
1912    }
1913    /*
1914     * Grab the value part, except for an 'x' reln.
1915     */
1916    if (m->reln != 'x' && getvalue(ms, m, &l, action))
1917        return -1;
1918
1919    /*
1920     * TODO finish this macro and start using it!
1921     * #define offsetcheck {if (offset > HOWMANY-1)
1922     *  magwarn("offset too big"); }
1923     */
1924
1925    /*
1926     * Now get last part - the description
1927     */
1928    EATAB;
1929    if (l[0] == '\b') {
1930        ++l;
1931        m->flag |= NOSPACE;
1932    } else if ((l[0] == '\\') && (l[1] == 'b')) {
1933        ++l;
1934        ++l;
1935        m->flag |= NOSPACE;
1936    }
1937    for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1938        continue;
1939    if (i == sizeof(m->desc)) {
1940        m->desc[sizeof(m->desc) - 1] = '\0';
1941        if (ms->flags & MAGIC_CHECK)
1942            file_magwarn(ms, "description `%s' truncated", m->desc);
1943    }
1944
1945        /*
1946     * We only do this check while compiling, or if any of the magic
1947     * files were not compiled.
1948         */
1949        if (ms->flags & MAGIC_CHECK) {
1950        if (check_format(ms, m) == -1)
1951            return -1;
1952    }
1953    m->mimetype[0] = '\0';      /* initialise MIME type to none */
1954    return 0;
1955}
1956
1957/*
1958 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1959 * if valid
1960 */
1961private int
1962parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1963{
1964    const char *l = line;
1965    char *el;
1966    unsigned long factor;
1967    struct magic *m = &me->mp[0];
1968
1969    if (m->factor_op != FILE_FACTOR_OP_NONE) {
1970        file_magwarn(ms,
1971            "Current entry already has a strength type: %c %d",
1972            m->factor_op, m->factor);
1973        return -1;
1974    }
1975    if (m->type == FILE_NAME) {
1976        file_magwarn(ms, "%s: Strength setting is not supported in "
1977            "\"name\" magic entries", m->value.s);
1978        return -1;
1979    }
1980    EATAB;
1981    switch (*l) {
1982    case FILE_FACTOR_OP_NONE:
1983    case FILE_FACTOR_OP_PLUS:
1984    case FILE_FACTOR_OP_MINUS:
1985    case FILE_FACTOR_OP_TIMES:
1986    case FILE_FACTOR_OP_DIV:
1987        m->factor_op = *l++;
1988        break;
1989    default:
1990        file_magwarn(ms, "Unknown factor op `%c'", *l);
1991        return -1;
1992    }
1993    EATAB;
1994    factor = strtoul(l, &el, 0);
1995    if (factor > 255) {
1996        file_magwarn(ms, "Too large factor `%lu'", factor);
1997        goto out;
1998    }
1999    if (*el && !isspace((unsigned char)*el)) {
2000        file_magwarn(ms, "Bad factor `%s'", l);
2001        goto out;
2002    }
2003    m->factor = (uint8_t)factor;
2004    if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2005        file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2006            m->factor_op, m->factor);
2007        goto out;
2008    }
2009    return 0;
2010out:
2011    m->factor_op = FILE_FACTOR_OP_NONE;
2012    m->factor = 0;
2013    return -1;
2014}
2015
2016/*
2017 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2018 * magic[index - 1]
2019 */
2020private int
2021parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2022{
2023    size_t i;
2024    const char *l = line;
2025    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2026
2027    if (m->apple[0] != '\0') {
2028        file_magwarn(ms, "Current entry already has a APPLE type "
2029            "`%.8s', new type `%s'", m->mimetype, l);
2030        return -1;
2031    }
2032
2033    EATAB;
2034    for (i = 0; *l && ((isascii((unsigned char)*l) &&
2035        isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2036        i < sizeof(m->apple); m->apple[i++] = *l++)
2037        continue;
2038    if (i == sizeof(m->apple) && *l) {
2039        /* We don't need to NUL terminate here, printing handles it */
2040        if (ms->flags & MAGIC_CHECK)
2041            file_magwarn(ms, "APPLE type `%s' truncated %"
2042                SIZE_T_FORMAT "u", line, i);
2043    }
2044
2045    if (i > 0)
2046        return 0;
2047    else
2048        return -1;
2049}
2050
2051/*
2052 * parse a MIME annotation line from magic file, put into magic[index - 1]
2053 * if valid
2054 */
2055private int
2056parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2057{
2058    size_t i;
2059    const char *l = line;
2060    struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2061
2062    if (m->mimetype[0] != '\0') {
2063        file_magwarn(ms, "Current entry already has a MIME type `%s',"
2064            " new type `%s'", m->mimetype, l);
2065        return -1;
2066    }
2067
2068    EATAB;
2069    for (i = 0; *l && ((isascii((unsigned char)*l) &&
2070        isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
2071        i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
2072        continue;
2073    if (i == sizeof(m->mimetype)) {
2074        m->mimetype[sizeof(m->mimetype) - 1] = '\0';
2075        if (ms->flags & MAGIC_CHECK)
2076            file_magwarn(ms, "MIME type `%s' truncated %"
2077                SIZE_T_FORMAT "u", m->mimetype, i);
2078    } else
2079        m->mimetype[i] = '\0';
2080
2081    if (i > 0)
2082        return 0;
2083    else
2084        return -1;
2085}
2086
2087private int
2088check_format_type(const char *ptr, int type)
2089{
2090    int quad = 0;
2091    if (*ptr == '\0') {
2092        /* Missing format string; bad */
2093        return -1;
2094    }
2095
2096    switch (type) {
2097    case FILE_FMT_QUAD:
2098        quad = 1;
2099        /*FALLTHROUGH*/
2100    case FILE_FMT_NUM:
2101        if (*ptr == '-')
2102            ptr++;
2103        if (*ptr == '.')
2104            ptr++;
2105        while (isdigit((unsigned char)*ptr)) ptr++;
2106        if (*ptr == '.')
2107            ptr++;
2108        while (isdigit((unsigned char)*ptr)) ptr++;
2109        if (quad) {
2110            if (*ptr++ != 'l')
2111                return -1;
2112            if (*ptr++ != 'l')
2113                return -1;
2114        }
2115
2116        switch (*ptr++) {
2117        case 'l':
2118            switch (*ptr++) {
2119            case 'i':
2120            case 'd':
2121            case 'u':
2122            case 'o':
2123            case 'x':
2124            case 'X':
2125                return 0;
2126            default:
2127                return -1;
2128            }
2129
2130        case 'h':
2131            switch (*ptr++) {
2132            case 'h':
2133                switch (*ptr++) {
2134                case 'i':
2135                case 'd':
2136                case 'u':
2137                case 'o':
2138                case 'x':
2139                case 'X':
2140                    return 0;
2141                default:
2142                    return -1;
2143                }
2144            case 'd':
2145                return 0;
2146            default:
2147                return -1;
2148            }
2149
2150        case 'i':
2151        case 'c':
2152        case 'd':
2153        case 'u':
2154        case 'o':
2155        case 'x':
2156        case 'X':
2157            return 0;
2158
2159        default:
2160            return -1;
2161        }
2162
2163    case FILE_FMT_FLOAT:
2164    case FILE_FMT_DOUBLE:
2165        if (*ptr == '-')
2166            ptr++;
2167        if (*ptr == '.')
2168            ptr++;
2169        while (isdigit((unsigned char)*ptr)) ptr++;
2170        if (*ptr == '.')
2171            ptr++;
2172        while (isdigit((unsigned char)*ptr)) ptr++;
2173
2174        switch (*ptr++) {
2175        case 'e':
2176        case 'E':
2177        case 'f':
2178        case 'F':
2179        case 'g':
2180        case 'G':
2181            return 0;
2182
2183        default:
2184            return -1;
2185        }
2186
2187
2188    case FILE_FMT_STR:
2189        if (*ptr == '-')
2190            ptr++;
2191        while (isdigit((unsigned char )*ptr))
2192            ptr++;
2193        if (*ptr == '.') {
2194            ptr++;
2195            while (isdigit((unsigned char )*ptr))
2196                ptr++;
2197        }
2198
2199        switch (*ptr++) {
2200        case 's':
2201            return 0;
2202        default:
2203            return -1;
2204        }
2205
2206    default:
2207        /* internal error */
2208        abort();
2209    }
2210    /*NOTREACHED*/
2211    return -1;
2212}
2213
2214/*
2215 * Check that the optional printf format in description matches
2216 * the type of the magic.
2217 */
2218private int
2219check_format(struct magic_set *ms, struct magic *m)
2220{
2221    char *ptr;
2222
2223    for (ptr = m->desc; *ptr; ptr++)
2224        if (*ptr == '%')
2225            break;
2226    if (*ptr == '\0') {
2227        /* No format string; ok */
2228        return 1;
2229    }
2230
2231    assert(file_nformats == file_nnames);
2232
2233    if (m->type >= file_nformats) {
2234        file_magwarn(ms, "Internal error inconsistency between "
2235            "m->type and format strings");
2236        return -1;
2237    }
2238    if (file_formats[m->type] == FILE_FMT_NONE) {
2239        file_magwarn(ms, "No format string for `%s' with description "
2240            "`%s'", m->desc, file_names[m->type]);
2241        return -1;
2242    }
2243
2244    ptr++;
2245    if (check_format_type(ptr, file_formats[m->type]) == -1) {
2246        /*
2247         * TODO: this error message is unhelpful if the format
2248         * string is not one character long
2249         */
2250        file_magwarn(ms, "Printf format `%c' is not valid for type "
2251            "`%s' in description `%s'", *ptr ? *ptr : '?',
2252            file_names[m->type], m->desc);
2253        return -1;
2254    }
2255
2256    for (; *ptr; ptr++) {
2257        if (*ptr == '%') {
2258            file_magwarn(ms,
2259                "Too many format strings (should have at most one) "
2260                "for `%s' with description `%s'",
2261                file_names[m->type], m->desc);
2262            return -1;
2263        }
2264    }
2265    return 0;
2266}
2267
2268/*
2269 * Read a numeric value from a pointer, into the value union of a magic
2270 * pointer, according to the magic type.  Update the string pointer to point
2271 * just after the number read.  Return 0 for success, non-zero for failure.
2272 */
2273private int
2274getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2275{
2276    switch (m->type) {
2277    case FILE_BESTRING16:
2278    case FILE_LESTRING16:
2279    case FILE_STRING:
2280    case FILE_PSTRING:
2281    case FILE_REGEX:
2282    case FILE_SEARCH:
2283    case FILE_NAME:
2284    case FILE_USE:
2285        *p = getstr(ms, m, *p, action == FILE_COMPILE);
2286        if (*p == NULL) {
2287            if (ms->flags & MAGIC_CHECK)
2288                file_magwarn(ms, "cannot get string from `%s'",
2289                    m->value.s);
2290            return -1;
2291        }
2292        return 0;
2293    case FILE_FLOAT:
2294    case FILE_BEFLOAT:
2295    case FILE_LEFLOAT:
2296        if (m->reln != 'x') {
2297            char *ep;
2298#ifdef HAVE_STRTOF
2299            m->value.f = strtof(*p, &ep);
2300#else
2301            m->value.f = (float)strtod(*p, &ep);
2302#endif
2303            *p = ep;
2304        }
2305        return 0;
2306    case FILE_DOUBLE:
2307    case FILE_BEDOUBLE:
2308    case FILE_LEDOUBLE:
2309        if (m->reln != 'x') {
2310            char *ep;
2311            m->value.d = strtod(*p, &ep);
2312            *p = ep;
2313        }
2314        return 0;
2315    default:
2316        if (m->reln != 'x') {
2317            char *ep;
2318            m->value.q = file_signextend(ms, m,
2319                (uint64_t)strtoull(*p, &ep, 0));
2320            *p = ep;
2321            eatsize(p);
2322        }
2323        return 0;
2324    }
2325}
2326
2327/*
2328 * Convert a string containing C character escapes.  Stop at an unescaped
2329 * space or tab.
2330 * Copy the converted version to "m->value.s", and the length in m->vallen.
2331 * Return updated scan pointer as function result. Warn if set.
2332 */
2333private const char *
2334getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2335{
2336    const char *origs = s;
2337    char    *p = m->value.s;
2338    size_t  plen = sizeof(m->value.s);
2339    char    *origp = p;
2340    char    *pmax = p + plen - 1;
2341    int c;
2342    int val;
2343
2344    while ((c = *s++) != '\0') {
2345        if (isspace((unsigned char) c))
2346            break;
2347        if (p >= pmax) {
2348            file_error(ms, 0, "string too long: `%s'", origs);
2349            return NULL;
2350        }
2351        if (c == '\\') {
2352            switch(c = *s++) {
2353
2354            case '\0':
2355                if (warn)
2356                    file_magwarn(ms, "incomplete escape");
2357                goto out;
2358
2359            case '\t':
2360                if (warn) {
2361                    file_magwarn(ms,
2362                        "escaped tab found, use \\t instead");
2363                    warn = 0;   /* already did */
2364                }
2365                /*FALLTHROUGH*/
2366            default:
2367                if (warn) {
2368                    if (isprint((unsigned char)c)) {
2369                        /* Allow escaping of
2370                         * ``relations'' */
2371                        if (strchr("<>&^=!", c) == NULL
2372                            && (m->type != FILE_REGEX ||
2373                            strchr("[]().*?^$|{}", c)
2374                            == NULL)) {
2375                            file_magwarn(ms, "no "
2376                                "need to escape "
2377                                "`%c'", c);
2378                        }
2379                    } else {
2380                        file_magwarn(ms,
2381                            "unknown escape sequence: "
2382                            "\\%03o", c);
2383                    }
2384                }
2385                /*FALLTHROUGH*/
2386            /* space, perhaps force people to use \040? */
2387            case ' ':
2388#if 0
2389            /*
2390             * Other things people escape, but shouldn't need to,
2391             * so we disallow them
2392             */
2393            case '\'':
2394            case '"':
2395            case '?':
2396#endif
2397            /* Relations */
2398            case '>':
2399            case '<':
2400            case '&':
2401            case '^':
2402            case '=':
2403            case '!':
2404            /* and baskslash itself */
2405            case '\\':
2406                *p++ = (char) c;
2407                break;
2408
2409            case 'a':
2410                *p++ = '\a';
2411                break;
2412
2413            case 'b':
2414                *p++ = '\b';
2415                break;
2416
2417            case 'f':
2418                *p++ = '\f';
2419                break;
2420
2421            case 'n':
2422                *p++ = '\n';
2423                break;
2424
2425            case 'r':
2426                *p++ = '\r';
2427                break;
2428
2429            case 't':
2430                *p++ = '\t';
2431                break;
2432
2433            case 'v':
2434                *p++ = '\v';
2435                break;
2436
2437            /* \ and up to 3 octal digits */
2438            case '0':
2439            case '1':
2440            case '2':
2441            case '3':
2442            case '4':
2443            case '5':
2444            case '6':
2445            case '7':
2446                val = c - '0';
2447                c = *s++;  /* try for 2 */
2448                if (c >= '0' && c <= '7') {
2449                    val = (val << 3) | (c - '0');
2450                    c = *s++;  /* try for 3 */
2451                    if (c >= '0' && c <= '7')
2452                        val = (val << 3) | (c-'0');
2453                    else
2454                        --s;
2455                }
2456                else
2457                    --s;
2458                *p++ = (char)val;
2459                break;
2460
2461            /* \x and up to 2 hex digits */
2462            case 'x':
2463                val = 'x';  /* Default if no digits */
2464                c = hextoint(*s++); /* Get next char */
2465                if (c >= 0) {
2466                    val = c;
2467                    c = hextoint(*s++);
2468                    if (c >= 0)
2469                        val = (val << 4) + c;
2470                    else
2471                        --s;
2472                } else
2473                    --s;
2474                *p++ = (char)val;
2475                break;
2476            }
2477        } else
2478            *p++ = (char)c;
2479    }
2480out:
2481    *p = '\0';
2482    m->vallen = CAST(unsigned char, (p - origp));
2483    if (m->type == FILE_PSTRING)
2484        m->vallen += (unsigned char)file_pstring_length_size(m);
2485    return s;
2486}
2487
2488
2489/* Single hex char to int; -1 if not a hex char. */
2490private int
2491hextoint(int c)
2492{
2493    if (!isascii((unsigned char) c))
2494        return -1;
2495    if (isdigit((unsigned char) c))
2496        return c - '0';
2497    if ((c >= 'a') && (c <= 'f'))
2498        return c + 10 - 'a';
2499    if (( c>= 'A') && (c <= 'F'))
2500        return c + 10 - 'A';
2501    return -1;
2502}
2503
2504
2505/*
2506 * Print a string containing C character escapes.
2507 */
2508protected void
2509file_showstr(FILE *fp, const char *s, size_t len)
2510{
2511    char    c;
2512
2513    for (;;) {
2514        if (len == ~0U) {
2515            c = *s++;
2516            if (c == '\0')
2517                break;
2518        }
2519        else  {
2520            if (len-- == 0)
2521                break;
2522            c = *s++;
2523        }
2524        if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
2525            (void) fputc(c, fp);
2526        else {
2527            (void) fputc('\\', fp);
2528            switch (c) {
2529            case '\a':
2530                (void) fputc('a', fp);
2531                break;
2532
2533            case '\b':
2534                (void) fputc('b', fp);
2535                break;
2536
2537            case '\f':
2538                (void) fputc('f', fp);
2539                break;
2540
2541            case '\n':
2542                (void) fputc('n', fp);
2543                break;
2544
2545            case '\r':
2546                (void) fputc('r', fp);
2547                break;
2548
2549            case '\t':
2550                (void) fputc('t', fp);
2551                break;
2552
2553            case '\v':
2554                (void) fputc('v', fp);
2555                break;
2556
2557            default:
2558                (void) fprintf(fp, "%.3o", c & 0377);
2559                break;
2560            }
2561        }
2562    }
2563}
2564
2565/*
2566 * eatsize(): Eat the size spec from a number [eg. 10UL]
2567 */
2568private void
2569eatsize(const char **p)
2570{
2571    const char *l = *p;
2572
2573    if (LOWCASE(*l) == 'u')
2574        l++;
2575
2576    switch (LOWCASE(*l)) {
2577    case 'l':    /* long */
2578    case 's':    /* short */
2579    case 'h':    /* short */
2580    case 'b':    /* char/byte */
2581    case 'c':    /* char/byte */
2582        l++;
2583        /*FALLTHROUGH*/
2584    default:
2585        break;
2586    }
2587
2588    *p = l;
2589}
2590
2591/*
2592 * handle a compiled file.
2593 */
2594
2595private struct magic_map *
2596apprentice_map(struct magic_set *ms, const char *fn)
2597{
2598    uint32_t *ptr;
2599    uint32_t version, entries, nentries;
2600    int needsbyteswap;
2601    char *dbname = NULL;
2602    struct magic_map *map;
2603    size_t i;
2604    php_stream *stream = NULL;
2605    php_stream_statbuf st;
2606
2607
2608
2609    if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2610        file_oomem(ms, sizeof(*map));
2611        efree(map);
2612        goto error;
2613    }
2614
2615    if (fn == NULL) {
2616        map->p = (void *)&php_magic_database;
2617        goto internal_loaded;
2618    }
2619
2620#ifdef PHP_WIN32
2621    /* Don't bother on windows with php_stream_open_wrapper,
2622    return to give apprentice_load() a chance. */
2623    if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2624               if (st.sb.st_mode & S_IFDIR) {
2625                       goto error;
2626               }
2627       }
2628#endif
2629
2630    dbname = mkdbname(ms, fn, 0);
2631    if (dbname == NULL)
2632        goto error;
2633
2634        stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2635
2636    if (!stream) {
2637        goto error;
2638    }
2639
2640    if (php_stream_stat(stream, &st) < 0) {
2641        file_error(ms, errno, "cannot stat `%s'", dbname);
2642        goto error;
2643    }
2644
2645    if (st.sb.st_size < 8) {
2646        file_error(ms, 0, "file `%s' is too small", dbname);
2647        goto error;
2648    }
2649
2650    map->len = (size_t)st.sb.st_size;
2651    if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2652        file_oomem(ms, map->len);
2653        goto error;
2654    }
2655    if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2656        file_badread(ms);
2657        goto error;
2658    }
2659    map->len = 0;
2660#define RET 1
2661
2662    php_stream_close(stream);
2663    stream = NULL;
2664
2665internal_loaded:
2666    ptr = (uint32_t *)(void *)map->p;
2667    if (*ptr != MAGICNO) {
2668        if (swap4(*ptr) != MAGICNO) {
2669            file_error(ms, 0, "bad magic in `%s'", dbname);
2670            goto error;
2671        }
2672        needsbyteswap = 1;
2673    } else
2674        needsbyteswap = 0;
2675    if (needsbyteswap)
2676        version = swap4(ptr[1]);
2677    else
2678        version = ptr[1];
2679    if (version != VERSIONNO) {
2680        file_error(ms, 0, "File %d.%d supports only version %d magic "
2681            "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2682            VERSIONNO, dbname, version);
2683        goto error;
2684    }
2685
2686    /* php_magic_database is a const, performing writes will segfault. This is for big-endian
2687    machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2688    future. */
2689    if (needsbyteswap && fn == NULL) {
2690        map->p = emalloc(sizeof(php_magic_database));
2691        map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2692    }
2693
2694    if (NULL != fn) {
2695        nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2696        entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2697        if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2698            file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2699                dbname, (unsigned long long)st.sb.st_size,
2700                sizeof(struct magic));
2701            goto error;
2702        }
2703    }
2704    map->magic[0] = CAST(struct magic *, map->p) + 1;
2705    nentries = 0;
2706    for (i = 0; i < MAGIC_SETS; i++) {
2707        if (needsbyteswap)
2708            map->nmagic[i] = swap4(ptr[i + 2]);
2709        else
2710            map->nmagic[i] = ptr[i + 2];
2711        if (i != MAGIC_SETS - 1)
2712            map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2713        nentries += map->nmagic[i];
2714    }
2715    if (NULL != fn && entries != nentries + 1) {
2716        file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2717            dbname, entries, nentries + 1);
2718        goto error;
2719    }
2720
2721    if (needsbyteswap)
2722        for (i = 0; i < MAGIC_SETS; i++)
2723            byteswap(map->magic[i], map->nmagic[i]);
2724
2725    if (dbname) {
2726        efree(dbname);
2727    }
2728    return map;
2729
2730error:
2731    if (stream) {
2732        php_stream_close(stream);
2733    }
2734    apprentice_unmap(map);
2735    if (dbname) {
2736        efree(dbname);
2737    }
2738    return NULL;
2739}
2740
2741private const uint32_t ar[] = {
2742    MAGICNO, VERSIONNO
2743};
2744
2745/*
2746 * handle an mmaped file.
2747 */
2748private int
2749apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
2750{
2751    static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
2752    static const size_t m = sizeof(**map->magic);
2753    int fd = -1;
2754    size_t len;
2755    char *dbname;
2756    int rv = -1;
2757    uint32_t i;
2758    php_stream *stream;
2759
2760
2761    dbname = mkdbname(ms, fn, 0);
2762
2763    if (dbname == NULL)
2764        goto out;
2765
2766    /* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
2767    stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
2768
2769    if (!stream) {
2770        file_error(ms, errno, "cannot open `%s'", dbname);
2771        goto out;
2772    }
2773
2774    if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2775        file_error(ms, errno, "error writing `%s'", dbname);
2776        goto out;
2777    }
2778
2779    if (php_stream_write(stream, (const char *)map->nmagic, nm) != (ssize_t)nm) {
2780        file_error(ms, errno, "error writing `%s'", dbname);
2781        goto out;
2782    }
2783
2784    assert(nm + sizeof(ar) < m);
2785
2786    if (php_stream_seek(stream,(zend_off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
2787        file_error(ms, errno, "error seeking `%s'", dbname);
2788        goto out;
2789    }
2790
2791    for (i = 0; i < MAGIC_SETS; i++) {
2792        len = m * map->nmagic[i];
2793        if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
2794            file_error(ms, errno, "error writing `%s'", dbname);
2795            goto out;
2796        }
2797    }
2798
2799    if (stream) {
2800        php_stream_close(stream);
2801    }
2802
2803    rv = 0;
2804out:
2805    efree(dbname);
2806    return rv;
2807}
2808
2809private const char ext[] = ".mgc";
2810/*
2811 * make a dbname
2812 */
2813private char *
2814mkdbname(struct magic_set *ms, const char *fn, int strip)
2815{
2816    const char *p, *q;
2817    char *buf;
2818
2819    if (strip) {
2820        if ((p = strrchr(fn, '/')) != NULL)
2821            fn = ++p;
2822    }
2823
2824    for (q = fn; *q; q++)
2825        continue;
2826    /* Look for .mgc */
2827    for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2828        if (*p != *q)
2829            break;
2830
2831    /* Did not find .mgc, restore q */
2832    if (p >= ext)
2833        while (*q)
2834            q++;
2835
2836    q++;
2837    /* Compatibility with old code that looked in .mime */
2838    if (ms->flags & MAGIC_MIME) {
2839        spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
2840#ifdef PHP_WIN32
2841        if (VCWD_ACCESS(buf, R_OK) == 0) {
2842#else
2843        if (VCWD_ACCESS(buf, R_OK) != -1) {
2844#endif
2845            ms->flags &= MAGIC_MIME_TYPE;
2846            return buf;
2847        }
2848        efree(buf);
2849    }
2850    spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
2851
2852    /* Compatibility with old code that looked in .mime */
2853    if (strstr(p, ".mime") != NULL)
2854        ms->flags &= MAGIC_MIME_TYPE;
2855    return buf;
2856}
2857
2858/*
2859 * Byteswap an mmap'ed file if needed
2860 */
2861private void
2862byteswap(struct magic *magic, uint32_t nmagic)
2863{
2864    uint32_t i;
2865    for (i = 0; i < nmagic; i++)
2866        bs1(&magic[i]);
2867}
2868
2869/*
2870 * swap a short
2871 */
2872private uint16_t
2873swap2(uint16_t sv)
2874{
2875    uint16_t rv;
2876    uint8_t *s = (uint8_t *)(void *)&sv;
2877    uint8_t *d = (uint8_t *)(void *)&rv;
2878    d[0] = s[1];
2879    d[1] = s[0];
2880    return rv;
2881}
2882
2883/*
2884 * swap an int
2885 */
2886private uint32_t
2887swap4(uint32_t sv)
2888{
2889    uint32_t rv;
2890    uint8_t *s = (uint8_t *)(void *)&sv;
2891    uint8_t *d = (uint8_t *)(void *)&rv;
2892    d[0] = s[3];
2893    d[1] = s[2];
2894    d[2] = s[1];
2895    d[3] = s[0];
2896    return rv;
2897}
2898
2899/*
2900 * swap a quad
2901 */
2902private uint64_t
2903swap8(uint64_t sv)
2904{
2905    uint64_t rv;
2906    uint8_t *s = (uint8_t *)(void *)&sv;
2907    uint8_t *d = (uint8_t *)(void *)&rv;
2908#if 0
2909    d[0] = s[3];
2910    d[1] = s[2];
2911    d[2] = s[1];
2912    d[3] = s[0];
2913    d[4] = s[7];
2914    d[5] = s[6];
2915    d[6] = s[5];
2916    d[7] = s[4];
2917#else
2918    d[0] = s[7];
2919    d[1] = s[6];
2920    d[2] = s[5];
2921    d[3] = s[4];
2922    d[4] = s[3];
2923    d[5] = s[2];
2924    d[6] = s[1];
2925    d[7] = s[0];
2926#endif
2927    return rv;
2928}
2929
2930/*
2931 * byteswap a single magic entry
2932 */
2933private void
2934bs1(struct magic *m)
2935{
2936    m->cont_level = swap2(m->cont_level);
2937    m->offset = swap4((uint32_t)m->offset);
2938    m->in_offset = swap4((uint32_t)m->in_offset);
2939    m->lineno = swap4((uint32_t)m->lineno);
2940    if (IS_LIBMAGIC_STRING(m->type)) {
2941        m->str_range = swap4(m->str_range);
2942        m->str_flags = swap4(m->str_flags);
2943    }
2944    else {
2945        m->value.q = swap8(m->value.q);
2946        m->num_mask = swap8(m->num_mask);
2947    }
2948}
2949
2950protected size_t
2951file_pstring_length_size(const struct magic *m)
2952{
2953    switch (m->str_flags & PSTRING_LEN) {
2954    case PSTRING_1_LE:
2955        return 1;
2956    case PSTRING_2_LE:
2957    case PSTRING_2_BE:
2958        return 2;
2959    case PSTRING_4_LE:
2960    case PSTRING_4_BE:
2961        return 4;
2962    default:
2963        abort();    /* Impossible */
2964        return 1;
2965    }
2966}
2967protected size_t
2968file_pstring_get_length(const struct magic *m, const char *s)
2969{
2970    size_t len = 0;
2971
2972    switch (m->str_flags & PSTRING_LEN) {
2973    case PSTRING_1_LE:
2974        len = *s;
2975        break;
2976    case PSTRING_2_LE:
2977        len = (s[1] << 8) | s[0];
2978        break;
2979    case PSTRING_2_BE:
2980        len = (s[0] << 8) | s[1];
2981        break;
2982    case PSTRING_4_LE:
2983        len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2984        break;
2985    case PSTRING_4_BE:
2986        len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2987        break;
2988    default:
2989        abort();    /* Impossible */
2990    }
2991
2992    if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2993        len -= file_pstring_length_size(m);
2994
2995    return len;
2996}
2997
2998protected int
2999file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3000{
3001    uint32_t i, j;
3002    struct mlist *mlist, *ml;
3003
3004    mlist = ms->mlist[1];
3005
3006    for (ml = mlist->next; ml != mlist; ml = ml->next) {
3007        struct magic *ma = ml->magic;
3008        uint32_t nma = ml->nmagic;
3009        for (i = 0; i < nma; i++) {
3010            if (ma[i].type != FILE_NAME)
3011                continue;
3012            if (strcmp(ma[i].value.s, name) == 0) {
3013                v->magic = &ma[i];
3014                for (j = i + 1; j < nma; j++)
3015                    if (ma[j].cont_level == 0)
3016                        break;
3017                v->nmagic = j - i;
3018                return 0;
3019            }
3020        }
3021    }
3022    return -1;
3023}
3024