1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "php.h"
33
34#include "file.h"
35
36#ifndef	lint
37FILE_RCSID("@(#)$File: apprentice.c,v 1.230 2015/01/02 21:29:39 christos Exp $")
38#endif	/* lint */
39
40#include "magic.h"
41#include "patchlevel.h"
42#include <stdlib.h>
43
44#if defined(__hpux) && !defined(HAVE_STRTOULL)
45#if SIZEOF_LONG == 8
46# define strtoull strtoul
47#else
48# define strtoull __strtoull
49#endif
50#endif
51
52#ifdef PHP_WIN32
53#include "win32/unistd.h"
54#define strtoull _strtoui64
55#else
56#include <unistd.h>
57#endif
58#include <string.h>
59#include <assert.h>
60#include <ctype.h>
61#include <fcntl.h>
62
63#ifndef SSIZE_MAX
64#define MAXMAGIC_SIZE        ((ssize_t)0x7fffffff)
65#else
66#define MAXMAGIC_SIZE        SSIZE_MAX
67#endif
68
69#define	EATAB {while (isascii((unsigned char) *l) && \
70		      isspace((unsigned char) *l))  ++l;}
71#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
72			tolower((unsigned char) (l)) : (l))
73/*
74 * Work around a bug in headers on Digital Unix.
75 * At least confirmed for: OSF1 V4.0 878
76 */
77#if defined(__osf__) && defined(__DECC)
78#ifdef MAP_FAILED
79#undef MAP_FAILED
80#endif
81#endif
82
83#ifndef MAP_FAILED
84#define MAP_FAILED (void *) -1
85#endif
86
87#ifndef MAP_FILE
88#define MAP_FILE 0
89#endif
90
91#define ALLOC_CHUNK	(size_t)10
92#define ALLOC_INCR	(size_t)200
93
94#define MAP_TYPE_MMAP	0
95#define MAP_TYPE_MALLOC	1
96#define MAP_TYPE_USER	2
97
98struct magic_entry {
99	struct magic *mp;
100	uint32_t cont_count;
101	uint32_t max_count;
102};
103
104struct magic_entry_set {
105	struct magic_entry *me;
106	uint32_t count;
107	uint32_t max;
108};
109
110struct magic_map {
111	void *p;
112	size_t len;
113	int type;
114	struct magic *magic[MAGIC_SETS];
115	uint32_t nmagic[MAGIC_SETS];
116};
117
118int file_formats[FILE_NAMES_SIZE];
119const size_t file_nformats = FILE_NAMES_SIZE;
120const char *file_names[FILE_NAMES_SIZE];
121const size_t file_nnames = FILE_NAMES_SIZE;
122
123private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
124private int hextoint(int);
125private const char *getstr(struct magic_set *, struct magic *, const char *,
126    int);
127private int parse(struct magic_set *, struct magic_entry *, const char *,
128    size_t, int);
129private void eatsize(const char **);
130private int apprentice_1(struct magic_set *, const char *, int);
131private size_t apprentice_magic_strength(const struct magic *);
132private int apprentice_sort(const void *, const void *);
133private void apprentice_list(struct mlist *, int );
134private struct magic_map *apprentice_load(struct magic_set *,
135    const char *, int);
136private struct mlist *mlist_alloc(void);
137private void mlist_free(struct mlist *);
138private void byteswap(struct magic *, uint32_t);
139private void bs1(struct magic *);
140private uint16_t swap2(uint16_t);
141private uint32_t swap4(uint32_t);
142private uint64_t swap8(uint64_t);
143private char *mkdbname(struct magic_set *, const char *, int);
144private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
145    size_t);
146private struct magic_map *apprentice_map(struct magic_set *, const char *);
147private int check_buffer(struct magic_set *, struct magic_map *, const char *);
148private void apprentice_unmap(struct magic_map *);
149private int apprentice_compile(struct magic_set *, struct magic_map *,
150    const char *);
151private int check_format_type(const char *, int);
152private int check_format(struct magic_set *, struct magic *);
153private int get_op(char);
154private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
155private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
156private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
157
158
159private size_t magicsize = sizeof(struct magic);
160
161private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
162
163private struct {
164	const char *name;
165	size_t len;
166	int (*fun)(struct magic_set *, struct magic_entry *, const char *);
167} bang[] = {
168#define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
169	DECLARE_FIELD(mime),
170	DECLARE_FIELD(apple),
171	DECLARE_FIELD(strength),
172#undef	DECLARE_FIELD
173	{ NULL, 0, NULL }
174};
175
176#include "../data_file.c"
177
178struct type_tbl_s {
179	const char name[16];
180	const size_t len;
181	const int type;
182	const int format;
183};
184
185/*
186 * XXX - the actual Single UNIX Specification says that "long" means "long",
187 * as in the C data type, but we treat it as meaning "4-byte integer".
188 * Given that the OS X version of file 5.04 did the same, I guess that passes
189 * the actual test; having "long" be dependent on how big a "long" is on
190 * the machine running "file" is silly.
191 */
192static const struct type_tbl_s type_tbl[] = {
193# define XX(s)		s, (sizeof(s) - 1)
194# define XX_NULL	"", 0
195	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
196	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
197	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
198	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
199	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
200	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
201	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
202	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
203	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
204	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
205	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
206	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
207	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
208	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
209	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
210	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
211	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
212	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
213	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
214	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
215	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
216	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
217	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
218	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
219	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
220	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
221	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
222	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
223	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
224	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
225	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
226	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
227	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
228	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
229	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
230	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
231	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
232	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
233	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
234	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
235	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
236	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
237	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
238	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
239	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
240	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
241	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
242	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
243	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
244};
245
246/*
247 * These are not types, and cannot be preceded by "u" to make them
248 * unsigned.
249 */
250static const struct type_tbl_s special_tbl[] = {
251	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
252	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
253	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
254};
255# undef XX
256# undef XX_NULL
257
258#ifndef S_ISDIR
259#define S_ISDIR(mode) ((mode) & _S_IFDIR)
260#endif
261
262private int
263get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
264{
265	const struct type_tbl_s *p;
266
267	for (p = tbl; p->len; p++) {
268		if (strncmp(l, p->name, p->len) == 0) {
269			if (t)
270				*t = l + p->len;
271			break;
272		}
273	}
274	return p->type;
275}
276
277private int
278get_standard_integer_type(const char *l, const char **t)
279{
280	int type;
281
282	if (isalpha((unsigned char)l[1])) {
283		switch (l[1]) {
284		case 'C':
285			/* "dC" and "uC" */
286			type = FILE_BYTE;
287			break;
288		case 'S':
289			/* "dS" and "uS" */
290			type = FILE_SHORT;
291			break;
292		case 'I':
293		case 'L':
294			/*
295			 * "dI", "dL", "uI", and "uL".
296			 *
297			 * XXX - the actual Single UNIX Specification says
298			 * that "L" means "long", as in the C data type,
299			 * but we treat it as meaning "4-byte integer".
300			 * Given that the OS X version of file 5.04 did
301			 * the same, I guess that passes the actual SUS
302			 * validation suite; having "dL" be dependent on
303			 * how big a "long" is on the machine running
304			 * "file" is silly.
305			 */
306			type = FILE_LONG;
307			break;
308		case 'Q':
309			/* "dQ" and "uQ" */
310			type = FILE_QUAD;
311			break;
312		default:
313			/* "d{anything else}", "u{anything else}" */
314			return FILE_INVALID;
315		}
316		l += 2;
317	} else if (isdigit((unsigned char)l[1])) {
318		/*
319		 * "d{num}" and "u{num}"; we only support {num} values
320		 * of 1, 2, 4, and 8 - the Single UNIX Specification
321		 * doesn't say anything about whether arbitrary
322		 * values should be supported, but both the Solaris 10
323		 * and OS X Mountain Lion versions of file passed the
324		 * Single UNIX Specification validation suite, and
325		 * neither of them support values bigger than 8 or
326		 * non-power-of-2 values.
327		 */
328		if (isdigit((unsigned char)l[2])) {
329			/* Multi-digit, so > 9 */
330			return FILE_INVALID;
331		}
332		switch (l[1]) {
333		case '1':
334			type = FILE_BYTE;
335			break;
336		case '2':
337			type = FILE_SHORT;
338			break;
339		case '4':
340			type = FILE_LONG;
341			break;
342		case '8':
343			type = FILE_QUAD;
344			break;
345		default:
346			/* XXX - what about 3, 5, 6, or 7? */
347			return FILE_INVALID;
348		}
349		l += 2;
350	} else {
351		/*
352		 * "d" or "u" by itself.
353		 */
354		type = FILE_LONG;
355		++l;
356	}
357	if (t)
358		*t = l;
359	return type;
360}
361
362private void
363init_file_tables(void)
364{
365	static int done = 0;
366	const struct type_tbl_s *p;
367
368	if (done)
369		return;
370	done++;
371
372	for (p = type_tbl; p->len; p++) {
373		assert(p->type < FILE_NAMES_SIZE);
374		file_names[p->type] = p->name;
375		file_formats[p->type] = p->format;
376	}
377	assert(p - type_tbl == FILE_NAMES_SIZE);
378}
379
380private int
381add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
382{
383	struct mlist *ml;
384
385	mlp->map = idx == 0 ? map : NULL;
386	if ((ml = CAST(struct mlist *, emalloc(sizeof(*ml)))) == NULL)
387		return -1;
388
389	ml->map = NULL;
390	ml->magic = map->magic[idx];
391	ml->nmagic = map->nmagic[idx];
392
393	mlp->prev->next = ml;
394	ml->prev = mlp->prev;
395	ml->next = mlp;
396	mlp->prev = ml;
397	return 0;
398}
399
400/*
401 * Handle one file or directory.
402 */
403private int
404apprentice_1(struct magic_set *ms, const char *fn, int action)
405{
406	struct magic_map *map;
407	struct mlist *ml;
408	size_t i;
409
410	if (magicsize != FILE_MAGICSIZE) {
411		file_error(ms, 0, "magic element size %lu != %lu",
412		    (unsigned long)sizeof(*map->magic[0]),
413		    (unsigned long)FILE_MAGICSIZE);
414		return -1;
415	}
416
417	if (action == FILE_COMPILE) {
418		map = apprentice_load(ms, fn, action);
419		if (map == NULL)
420			return -1;
421		return apprentice_compile(ms, map, fn);
422	}
423
424	map = apprentice_map(ms, fn);
425	if (map == NULL) {
426		if (fn) {
427			if (ms->flags & MAGIC_CHECK)
428				file_magwarn(ms, "using regular magic file `%s'", fn);
429			map = apprentice_load(ms, fn, action);
430		}
431		if (map == NULL)
432			return -1;
433	}
434
435	for (i = 0; i < MAGIC_SETS; i++) {
436		if (add_mlist(ms->mlist[i], map, i) == -1) {
437			file_oomem(ms, sizeof(*ml));
438			apprentice_unmap(map);
439			return -1;
440		}
441	}
442
443	if (action == FILE_LIST) {
444		for (i = 0; i < MAGIC_SETS; i++) {
445			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
446			    i);
447			apprentice_list(ms->mlist[i], BINTEST);
448			printf("Text patterns:\n");
449			apprentice_list(ms->mlist[i], TEXTTEST);
450		}
451	}
452	return 0;
453}
454
455protected void
456file_ms_free(struct magic_set *ms)
457{
458	size_t i;
459	if (ms == NULL)
460		return;
461	for (i = 0; i < MAGIC_SETS; i++)
462		mlist_free(ms->mlist[i]);
463	if (ms->o.pbuf) {
464		efree(ms->o.pbuf);
465	}
466	if (ms->o.buf) {
467		efree(ms->o.buf);
468	}
469	if (ms->c.li) {
470		efree(ms->c.li);
471	}
472	efree(ms);
473}
474
475protected struct magic_set *
476file_ms_alloc(int flags)
477{
478	struct magic_set *ms;
479	size_t i, len;
480
481	if ((ms = CAST(struct magic_set *, ecalloc((size_t)1,
482	    sizeof(struct magic_set)))) == NULL)
483		return NULL;
484
485	if (magic_setflags(ms, flags) == -1) {
486		errno = EINVAL;
487		goto free;
488	}
489
490	ms->o.buf = ms->o.pbuf = NULL;
491	len = (ms->c.len = 10) * sizeof(*ms->c.li);
492
493	if ((ms->c.li = CAST(struct level_info *, emalloc(len))) == NULL)
494		goto free;
495
496	ms->event_flags = 0;
497	ms->error = -1;
498	for (i = 0; i < MAGIC_SETS; i++)
499		ms->mlist[i] = NULL;
500	ms->file = "unknown";
501	ms->line = 0;
502	ms->indir_max = FILE_INDIR_MAX;
503	ms->name_max = FILE_NAME_MAX;
504	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
505	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
506	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
507	return ms;
508free:
509	efree(ms);
510	return NULL;
511}
512
513private void
514apprentice_unmap(struct magic_map *map)
515{
516	if (map == NULL)
517		return;
518	if (map->p != php_magic_database) {
519		if (map->p == NULL) {
520			int j;
521			for (j = 0; j < MAGIC_SETS; j++) {
522				if (map->magic[j]) {
523					efree(map->magic[j]);
524				}
525			}
526		} else {
527			efree(map->p);
528		}
529	}
530	efree(map);
531}
532
533private struct mlist *
534mlist_alloc(void)
535{
536	struct mlist *mlist;
537	if ((mlist = CAST(struct mlist *, ecalloc(1, sizeof(*mlist)))) == NULL) {
538		return NULL;
539	}
540	mlist->next = mlist->prev = mlist;
541	return mlist;
542}
543
544private void
545mlist_free(struct mlist *mlist)
546{
547	struct mlist *ml, *next;
548
549	if (mlist == NULL)
550		return;
551
552	ml = mlist->next;
553	for (ml = mlist->next; (next = ml->next) != NULL; ml = next) {
554		if (ml->map)
555			apprentice_unmap(ml->map);
556		efree(ml);
557		if (ml == mlist)
558			break;
559	}
560}
561
562/* const char *fn: list of magic files and directories */
563protected int
564file_apprentice(struct magic_set *ms, const char *fn, int action)
565{
566	char *p, *mfn;
567	int file_err, errs = -1;
568	size_t i;
569
570	if (ms->mlist[0] != NULL)
571		file_reset(ms);
572
573/* XXX disabling default magic loading so the compiled in data is used */
574#if 0
575	if ((fn = magic_getpath(fn, action)) == NULL)
576		return -1;
577#endif
578
579	init_file_tables();
580
581	if (fn == NULL)
582		fn = getenv("MAGIC");
583	if (fn == NULL) {
584		for (i = 0; i < MAGIC_SETS; i++) {
585			mlist_free(ms->mlist[i]);
586			if ((ms->mlist[i] = mlist_alloc()) == NULL) {
587				file_oomem(ms, sizeof(*ms->mlist[i]));
588				return -1;
589			}
590		}
591		return apprentice_1(ms, fn, action);
592	}
593
594	if ((mfn = estrdup(fn)) == NULL) {
595		file_oomem(ms, strlen(fn));
596		return -1;
597	}
598
599	for (i = 0; i < MAGIC_SETS; i++) {
600		mlist_free(ms->mlist[i]);
601		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
602			file_oomem(ms, sizeof(*ms->mlist[i]));
603			while (i-- > 0) {
604				mlist_free(ms->mlist[i]);
605				ms->mlist[i] = NULL;
606			}
607			efree(mfn);
608			return -1;
609		}
610	}
611	fn = mfn;
612
613	while (fn) {
614		p = strchr(fn, PATHSEP);
615		if (p)
616			*p++ = '\0';
617		if (*fn == '\0')
618			break;
619		file_err = apprentice_1(ms, fn, action);
620		errs = MAX(errs, file_err);
621		fn = p;
622	}
623
624	efree(mfn);
625
626	if (errs == -1) {
627		for (i = 0; i < MAGIC_SETS; i++) {
628			mlist_free(ms->mlist[i]);
629			ms->mlist[i] = NULL;
630		}
631		file_error(ms, 0, "could not find any valid magic files!");
632		return -1;
633	}
634
635#if 0
636	/*
637	 * Always leave the database loaded
638	 */
639	if (action == FILE_LOAD)
640		return 0;
641
642	for (i = 0; i < MAGIC_SETS; i++) {
643		mlist_free(ms->mlist[i]);
644		ms->mlist[i] = NULL;
645	}
646#endif
647
648	switch (action) {
649	case FILE_LOAD:
650	case FILE_COMPILE:
651	case FILE_CHECK:
652	case FILE_LIST:
653		return 0;
654	default:
655		file_error(ms, 0, "Invalid action %d", action);
656		return -1;
657	}
658}
659
660/*
661 * Compute the real length of a magic expression, for the purposes
662 * of determining how "strong" a magic expression is (approximating
663 * how specific its matches are):
664 *	- magic characters count 0 unless escaped.
665 *	- [] expressions count 1
666 *	- {} expressions count 0
667 *	- regular characters or escaped magic characters count 1
668 *	- 0 length expressions count as one
669 */
670private size_t
671nonmagic(const char *str)
672{
673	const char *p;
674	size_t rv = 0;
675
676	for (p = str; *p; p++)
677		switch (*p) {
678		case '\\':	/* Escaped anything counts 1 */
679			if (!*++p)
680				p--;
681			rv++;
682			continue;
683		case '?':	/* Magic characters count 0 */
684		case '*':
685		case '.':
686		case '+':
687		case '^':
688		case '$':
689			continue;
690		case '[':	/* Bracketed expressions count 1 the ']' */
691			while (*p && *p != ']')
692				p++;
693			p--;
694			continue;
695		case '{':	/* Braced expressions count 0 */
696			while (*p && *p != '}')
697				p++;
698			if (!*p)
699				p--;
700			continue;
701		default:	/* Anything else counts 1 */
702			rv++;
703			continue;
704		}
705
706	return rv == 0 ? 1 : rv;	/* Return at least 1 */
707}
708
709/*
710 * Get weight of this magic entry, for sorting purposes.
711 */
712private size_t
713apprentice_magic_strength(const struct magic *m)
714{
715#define MULT 10
716	size_t v, val = 2 * MULT;	/* baseline strength */
717
718	switch (m->type) {
719	case FILE_DEFAULT:	/* make sure this sorts last */
720		if (m->factor_op != FILE_FACTOR_OP_NONE)
721			abort();
722		return 0;
723
724	case FILE_BYTE:
725		val += 1 * MULT;
726		break;
727
728	case FILE_SHORT:
729	case FILE_LESHORT:
730	case FILE_BESHORT:
731		val += 2 * MULT;
732		break;
733
734	case FILE_LONG:
735	case FILE_LELONG:
736	case FILE_BELONG:
737	case FILE_MELONG:
738		val += 4 * MULT;
739		break;
740
741	case FILE_PSTRING:
742	case FILE_STRING:
743		val += m->vallen * MULT;
744		break;
745
746	case FILE_BESTRING16:
747	case FILE_LESTRING16:
748		val += m->vallen * MULT / 2;
749		break;
750
751	case FILE_SEARCH:
752		val += m->vallen * MAX(MULT / m->vallen, 1);
753		break;
754
755	case FILE_REGEX:
756		v = nonmagic(m->value.s);
757		val += v * MAX(MULT / v, 1);
758		break;
759
760	case FILE_DATE:
761	case FILE_LEDATE:
762	case FILE_BEDATE:
763	case FILE_MEDATE:
764	case FILE_LDATE:
765	case FILE_LELDATE:
766	case FILE_BELDATE:
767	case FILE_MELDATE:
768	case FILE_FLOAT:
769	case FILE_BEFLOAT:
770	case FILE_LEFLOAT:
771		val += 4 * MULT;
772		break;
773
774	case FILE_QUAD:
775	case FILE_BEQUAD:
776	case FILE_LEQUAD:
777	case FILE_QDATE:
778	case FILE_LEQDATE:
779	case FILE_BEQDATE:
780	case FILE_QLDATE:
781	case FILE_LEQLDATE:
782	case FILE_BEQLDATE:
783	case FILE_QWDATE:
784	case FILE_LEQWDATE:
785	case FILE_BEQWDATE:
786	case FILE_DOUBLE:
787	case FILE_BEDOUBLE:
788	case FILE_LEDOUBLE:
789		val += 8 * MULT;
790		break;
791
792	case FILE_INDIRECT:
793	case FILE_NAME:
794	case FILE_USE:
795		break;
796
797	default:
798		(void)fprintf(stderr, "Bad type %d\n", m->type);
799		abort();
800	}
801
802	switch (m->reln) {
803	case 'x':	/* matches anything penalize */
804	case '!':       /* matches almost anything penalize */
805		val = 0;
806		break;
807
808	case '=':	/* Exact match, prefer */
809		val += MULT;
810		break;
811
812	case '>':
813	case '<':	/* comparison match reduce strength */
814		val -= 2 * MULT;
815		break;
816
817	case '^':
818	case '&':	/* masking bits, we could count them too */
819		val -= MULT;
820		break;
821
822	default:
823		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
824		abort();
825	}
826
827	if (val == 0)	/* ensure we only return 0 for FILE_DEFAULT */
828		val = 1;
829
830	switch (m->factor_op) {
831	case FILE_FACTOR_OP_NONE:
832		break;
833	case FILE_FACTOR_OP_PLUS:
834		val += m->factor;
835		break;
836	case FILE_FACTOR_OP_MINUS:
837		val -= m->factor;
838		break;
839	case FILE_FACTOR_OP_TIMES:
840		val *= m->factor;
841		break;
842	case FILE_FACTOR_OP_DIV:
843		val /= m->factor;
844		break;
845	default:
846		abort();
847	}
848
849	/*
850	 * Magic entries with no description get a bonus because they depend
851	 * on subsequent magic entries to print something.
852	 */
853	if (m->desc[0] == '\0')
854		val++;
855	return val;
856}
857
858/*
859 * Sort callback for sorting entries by "strength" (basically length)
860 */
861private int
862apprentice_sort(const void *a, const void *b)
863{
864	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
865	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
866	size_t sa = apprentice_magic_strength(ma->mp);
867	size_t sb = apprentice_magic_strength(mb->mp);
868	if (sa == sb)
869		return 0;
870	else if (sa > sb)
871		return -1;
872	else
873		return 1;
874}
875
876/*
877 * Shows sorted patterns list in the order which is used for the matching
878 */
879private void
880apprentice_list(struct mlist *mlist, int mode)
881{
882	uint32_t magindex = 0;
883	struct mlist *ml;
884	for (ml = mlist->next; ml != mlist; ml = ml->next) {
885		for (magindex = 0; magindex < ml->nmagic; magindex++) {
886			struct magic *m = &ml->magic[magindex];
887			if ((m->flag & mode) != mode) {
888				/* Skip sub-tests */
889				while (magindex + 1 < ml->nmagic &&
890				       ml->magic[magindex + 1].cont_level != 0)
891					++magindex;
892				continue; /* Skip to next top-level test*/
893			}
894
895			/*
896			 * Try to iterate over the tree until we find item with
897			 * description/mimetype.
898			 */
899			while (magindex + 1 < ml->nmagic &&
900			       ml->magic[magindex + 1].cont_level != 0 &&
901			       *ml->magic[magindex].desc == '\0' &&
902			       *ml->magic[magindex].mimetype == '\0')
903				magindex++;
904
905			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
906			    apprentice_magic_strength(m),
907			    ml->magic[magindex].lineno,
908			    ml->magic[magindex].desc,
909			    ml->magic[magindex].mimetype);
910		}
911	}
912}
913
914private void
915set_test_type(struct magic *mstart, struct magic *m)
916{
917	switch (m->type) {
918	case FILE_BYTE:
919	case FILE_SHORT:
920	case FILE_LONG:
921	case FILE_DATE:
922	case FILE_BESHORT:
923	case FILE_BELONG:
924	case FILE_BEDATE:
925	case FILE_LESHORT:
926	case FILE_LELONG:
927	case FILE_LEDATE:
928	case FILE_LDATE:
929	case FILE_BELDATE:
930	case FILE_LELDATE:
931	case FILE_MEDATE:
932	case FILE_MELDATE:
933	case FILE_MELONG:
934	case FILE_QUAD:
935	case FILE_LEQUAD:
936	case FILE_BEQUAD:
937	case FILE_QDATE:
938	case FILE_LEQDATE:
939	case FILE_BEQDATE:
940	case FILE_QLDATE:
941	case FILE_LEQLDATE:
942	case FILE_BEQLDATE:
943	case FILE_QWDATE:
944	case FILE_LEQWDATE:
945	case FILE_BEQWDATE:
946	case FILE_FLOAT:
947	case FILE_BEFLOAT:
948	case FILE_LEFLOAT:
949	case FILE_DOUBLE:
950	case FILE_BEDOUBLE:
951	case FILE_LEDOUBLE:
952		mstart->flag |= BINTEST;
953		break;
954	case FILE_STRING:
955	case FILE_PSTRING:
956	case FILE_BESTRING16:
957	case FILE_LESTRING16:
958		/* Allow text overrides */
959		if (mstart->str_flags & STRING_TEXTTEST)
960			mstart->flag |= TEXTTEST;
961		else
962			mstart->flag |= BINTEST;
963		break;
964	case FILE_REGEX:
965	case FILE_SEARCH:
966		/* Check for override */
967		if (mstart->str_flags & STRING_BINTEST)
968			mstart->flag |= BINTEST;
969		if (mstart->str_flags & STRING_TEXTTEST)
970			mstart->flag |= TEXTTEST;
971
972		if (mstart->flag & (TEXTTEST|BINTEST))
973			break;
974
975		/* binary test if pattern is not text */
976		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
977		    NULL) <= 0)
978			mstart->flag |= BINTEST;
979		else
980			mstart->flag |= TEXTTEST;
981		break;
982	case FILE_DEFAULT:
983		/* can't deduce anything; we shouldn't see this at the
984		   top level anyway */
985		break;
986	case FILE_INVALID:
987	default:
988		/* invalid search type, but no need to complain here */
989		break;
990	}
991}
992
993private int
994addentry(struct magic_set *ms, struct magic_entry *me,
995   struct magic_entry_set *mset)
996{
997	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
998	if (mset[i].count == mset[i].max) {
999		struct magic_entry *mp;
1000
1001		mset[i].max += ALLOC_INCR;
1002		if ((mp = CAST(struct magic_entry *,
1003		    erealloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
1004		    NULL) {
1005			file_oomem(ms, sizeof(*mp) * mset[i].max);
1006			return -1;
1007		}
1008		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1009		    ALLOC_INCR);
1010		mset[i].me = mp;
1011	}
1012	mset[i].me[mset[i].count++] = *me;
1013	memset(me, 0, sizeof(*me));
1014	return 0;
1015}
1016
1017/*
1018 * Load and parse one file.
1019 */
1020private void
1021load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1022   struct magic_entry_set *mset)
1023{
1024	char buffer[BUFSIZ + 1];
1025	char *line = NULL;
1026	size_t len;
1027	size_t lineno = 0;
1028	struct magic_entry me;
1029
1030	php_stream *stream;
1031
1032
1033	ms->file = fn;
1034	stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
1035
1036	if (stream == NULL) {
1037		if (errno != ENOENT)
1038			file_error(ms, errno, "cannot read magic file `%s'",
1039				   fn);
1040		(*errs)++;
1041		return;
1042	}
1043
1044	memset(&me, 0, sizeof(me));
1045	/* read and parse this file */
1046	for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) {
1047		if (len == 0) /* null line, garbage, etc */
1048			continue;
1049		if (line[len - 1] == '\n') {
1050			lineno++;
1051			line[len - 1] = '\0'; /* delete newline */
1052		}
1053		switch (line[0]) {
1054		case '\0':	/* empty, do not parse */
1055		case '#':	/* comment, do not parse */
1056			continue;
1057		case '!':
1058			if (line[1] == ':') {
1059				size_t i;
1060
1061				for (i = 0; bang[i].name != NULL; i++) {
1062					if ((size_t)(len - 2) > bang[i].len &&
1063					    memcmp(bang[i].name, line + 2,
1064					    bang[i].len) == 0)
1065						break;
1066				}
1067				if (bang[i].name == NULL) {
1068					file_error(ms, 0,
1069					    "Unknown !: entry `%s'", line);
1070					(*errs)++;
1071					continue;
1072				}
1073				if (me.mp == NULL) {
1074					file_error(ms, 0,
1075					    "No current entry for :!%s type",
1076						bang[i].name);
1077					(*errs)++;
1078					continue;
1079				}
1080				if ((*bang[i].fun)(ms, &me,
1081				    line + bang[i].len + 2) != 0) {
1082					(*errs)++;
1083					continue;
1084				}
1085				continue;
1086			}
1087			/*FALLTHROUGH*/
1088		default:
1089		again:
1090			switch (parse(ms, &me, line, lineno, action)) {
1091			case 0:
1092				continue;
1093			case 1:
1094				(void)addentry(ms, &me, mset);
1095				goto again;
1096			default:
1097				(*errs)++;
1098				break;
1099			}
1100		}
1101	}
1102	if (me.mp)
1103		(void)addentry(ms, &me, mset);
1104    efree(line);
1105	php_stream_close(stream);
1106}
1107
1108/*
1109 * parse a file or directory of files
1110 * const char *fn: name of magic file or directory
1111 */
1112private int
1113cmpstrp(const void *p1, const void *p2)
1114{
1115        return strcmp(*(char *const *)p1, *(char *const *)p2);
1116}
1117
1118
1119private uint32_t
1120set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1121    uint32_t starttest)
1122{
1123	static const char text[] = "text";
1124	static const char binary[] = "binary";
1125	static const size_t len = sizeof(text);
1126
1127	uint32_t i = starttest;
1128
1129	do {
1130		set_test_type(me[starttest].mp, me[i].mp);
1131		if ((ms->flags & MAGIC_DEBUG) == 0)
1132			continue;
1133		(void)fprintf(stderr, "%s%s%s: %s\n",
1134		    me[i].mp->mimetype,
1135		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1136		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1137		    me[i].mp->flag & BINTEST ? binary : text);
1138		if (me[i].mp->flag & BINTEST) {
1139			char *p = strstr(me[i].mp->desc, text);
1140			if (p && (p == me[i].mp->desc ||
1141			    isspace((unsigned char)p[-1])) &&
1142			    (p + len - me[i].mp->desc == MAXstring
1143			    || (p[len] == '\0' ||
1144			    isspace((unsigned char)p[len]))))
1145				(void)fprintf(stderr, "*** Possible "
1146				    "binary test for text type\n");
1147		}
1148	} while (++i < nme && me[i].mp->cont_level != 0);
1149	return i;
1150}
1151
1152private void
1153set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1154{
1155	uint32_t i;
1156	for (i = 0; i < nme; i++) {
1157		if (me[i].mp->cont_level == 0 &&
1158		    me[i].mp->type == FILE_DEFAULT) {
1159			while (++i < nme)
1160				if (me[i].mp->cont_level == 0)
1161					break;
1162			if (i != nme) {
1163				/* XXX - Ugh! */
1164				ms->line = me[i].mp->lineno;
1165				file_magwarn(ms,
1166				    "level 0 \"default\" did not sort last");
1167			}
1168			return;
1169		}
1170	}
1171}
1172
1173private int
1174coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1175    struct magic **ma, uint32_t *nma)
1176{
1177	uint32_t i, mentrycount = 0;
1178	size_t slen;
1179
1180	for (i = 0; i < nme; i++)
1181		mentrycount += me[i].cont_count;
1182
1183	slen = sizeof(**ma) * mentrycount;
1184	if ((*ma = CAST(struct magic *, emalloc(slen))) == NULL) {
1185		file_oomem(ms, slen);
1186		return -1;
1187	}
1188
1189	mentrycount = 0;
1190	for (i = 0; i < nme; i++) {
1191		(void)memcpy(*ma + mentrycount, me[i].mp,
1192		    me[i].cont_count * sizeof(**ma));
1193		mentrycount += me[i].cont_count;
1194	}
1195	*nma = mentrycount;
1196	return 0;
1197}
1198
1199private void
1200magic_entry_free(struct magic_entry *me, uint32_t nme)
1201{
1202	uint32_t i;
1203	if (me == NULL)
1204		return;
1205	for (i = 0; i < nme; i++)
1206		efree(me[i].mp);
1207	efree(me);
1208}
1209
1210private struct magic_map *
1211apprentice_load(struct magic_set *ms, const char *fn, int action)
1212{
1213	int errs = 0;
1214	uint32_t i, j;
1215	size_t files = 0, maxfiles = 0;
1216	char **filearr = NULL;
1217	zend_stat_t st;
1218	struct magic_map *map;
1219	struct magic_entry_set mset[MAGIC_SETS];
1220	php_stream *dir;
1221	php_stream_dirent d;
1222
1223
1224	memset(mset, 0, sizeof(mset));
1225	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1226
1227
1228	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL)
1229	{
1230		file_oomem(ms, sizeof(*map));
1231		return NULL;
1232	}
1233
1234	/* print silly verbose header for USG compat. */
1235	if (action == FILE_CHECK)
1236		(void)fprintf(stderr, "%s\n", usg_hdr);
1237
1238	/* load directory or file */
1239	/* FIXME: Read file names and sort them to prevent
1240	   non-determinism. See Debian bug #488562. */
1241	if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1242		int mflen;
1243		char mfn[MAXPATHLEN];
1244
1245		dir = php_stream_opendir((char *)fn, REPORT_ERRORS, NULL);
1246		if (!dir) {
1247			errs++;
1248			goto out;
1249		}
1250		while (php_stream_readdir(dir, &d)) {
1251			if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) {
1252				file_oomem(ms,
1253				strlen(fn) + strlen(d.d_name) + 2);
1254				errs++;
1255				php_stream_closedir(dir);
1256				goto out;
1257			}
1258			if (zend_stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1259				continue;
1260			}
1261			if (files >= maxfiles) {
1262				size_t mlen;
1263				maxfiles = (maxfiles + 1) * 2;
1264				mlen = maxfiles * sizeof(*filearr);
1265				if ((filearr = CAST(char **,
1266				    erealloc(filearr, mlen))) == NULL) {
1267					file_oomem(ms, mlen);
1268					php_stream_closedir(dir);
1269					errs++;
1270					goto out;
1271				}
1272			}
1273			filearr[files++] = estrndup(mfn, (mflen > sizeof(mfn) - 1)? sizeof(mfn) - 1: mflen);
1274		}
1275		php_stream_closedir(dir);
1276		qsort(filearr, files, sizeof(*filearr), cmpstrp);
1277		for (i = 0; i < files; i++) {
1278			load_1(ms, action, filearr[i], &errs, mset);
1279			efree(filearr[i]);
1280		}
1281		efree(filearr);
1282	} else
1283		load_1(ms, action, fn, &errs, mset);
1284	if (errs)
1285		goto out;
1286
1287	for (j = 0; j < MAGIC_SETS; j++) {
1288		/* Set types of tests */
1289		for (i = 0; i < mset[j].count; ) {
1290			if (mset[j].me[i].mp->cont_level != 0) {
1291				i++;
1292				continue;
1293			}
1294			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1295		}
1296		qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
1297		    apprentice_sort);
1298
1299		/*
1300		 * Make sure that any level 0 "default" line is last
1301		 * (if one exists).
1302		 */
1303		set_last_default(ms, mset[j].me, mset[j].count);
1304
1305		/* coalesce per file arrays into a single one */
1306		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1307		    &map->magic[j], &map->nmagic[j]) == -1) {
1308			errs++;
1309			goto out;
1310		}
1311	}
1312
1313out:
1314	for (j = 0; j < MAGIC_SETS; j++)
1315		magic_entry_free(mset[j].me, mset[j].count);
1316
1317	if (errs) {
1318		apprentice_unmap(map);
1319		return NULL;
1320	}
1321	return map;
1322}
1323
1324/*
1325 * extend the sign bit if the comparison is to be signed
1326 */
1327protected uint64_t
1328file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1329{
1330	if (!(m->flag & UNSIGNED)) {
1331		switch(m->type) {
1332		/*
1333		 * Do not remove the casts below.  They are
1334		 * vital.  When later compared with the data,
1335		 * the sign extension must have happened.
1336		 */
1337		case FILE_BYTE:
1338			v = (signed char) v;
1339			break;
1340		case FILE_SHORT:
1341		case FILE_BESHORT:
1342		case FILE_LESHORT:
1343			v = (short) v;
1344			break;
1345		case FILE_DATE:
1346		case FILE_BEDATE:
1347		case FILE_LEDATE:
1348		case FILE_MEDATE:
1349		case FILE_LDATE:
1350		case FILE_BELDATE:
1351		case FILE_LELDATE:
1352		case FILE_MELDATE:
1353		case FILE_LONG:
1354		case FILE_BELONG:
1355		case FILE_LELONG:
1356		case FILE_MELONG:
1357		case FILE_FLOAT:
1358		case FILE_BEFLOAT:
1359		case FILE_LEFLOAT:
1360			v = (int32_t) v;
1361			break;
1362		case FILE_QUAD:
1363		case FILE_BEQUAD:
1364		case FILE_LEQUAD:
1365		case FILE_QDATE:
1366		case FILE_QLDATE:
1367		case FILE_QWDATE:
1368		case FILE_BEQDATE:
1369		case FILE_BEQLDATE:
1370		case FILE_BEQWDATE:
1371		case FILE_LEQDATE:
1372		case FILE_LEQLDATE:
1373		case FILE_LEQWDATE:
1374		case FILE_DOUBLE:
1375		case FILE_BEDOUBLE:
1376		case FILE_LEDOUBLE:
1377			v = (int64_t) v;
1378			break;
1379		case FILE_STRING:
1380		case FILE_PSTRING:
1381		case FILE_BESTRING16:
1382		case FILE_LESTRING16:
1383		case FILE_REGEX:
1384		case FILE_SEARCH:
1385		case FILE_DEFAULT:
1386		case FILE_INDIRECT:
1387		case FILE_NAME:
1388		case FILE_USE:
1389		case FILE_CLEAR:
1390			break;
1391		default:
1392			if (ms->flags & MAGIC_CHECK)
1393			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1394				    m->type);
1395			return ~0U;
1396		}
1397	}
1398	return v;
1399}
1400
1401private int
1402string_modifier_check(struct magic_set *ms, struct magic *m)
1403{
1404	if ((ms->flags & MAGIC_CHECK) == 0)
1405		return 0;
1406
1407	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1408	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1409		file_magwarn(ms,
1410		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1411		return -1;
1412	}
1413	switch (m->type) {
1414	case FILE_BESTRING16:
1415	case FILE_LESTRING16:
1416		if (m->str_flags != 0) {
1417			file_magwarn(ms,
1418			    "no modifiers allowed for 16-bit strings\n");
1419			return -1;
1420		}
1421		break;
1422	case FILE_STRING:
1423	case FILE_PSTRING:
1424		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1425			file_magwarn(ms,
1426			    "'/%c' only allowed on regex and search\n",
1427			    CHAR_REGEX_OFFSET_START);
1428			return -1;
1429		}
1430		break;
1431	case FILE_SEARCH:
1432		if (m->str_range == 0) {
1433			file_magwarn(ms,
1434			    "missing range; defaulting to %d\n",
1435                            STRING_DEFAULT_RANGE);
1436			m->str_range = STRING_DEFAULT_RANGE;
1437			return -1;
1438		}
1439		break;
1440	case FILE_REGEX:
1441		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1442			file_magwarn(ms, "'/%c' not allowed on regex\n",
1443			    CHAR_COMPACT_WHITESPACE);
1444			return -1;
1445		}
1446		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1447			file_magwarn(ms, "'/%c' not allowed on regex\n",
1448			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1449			return -1;
1450		}
1451		break;
1452	default:
1453		file_magwarn(ms, "coding error: m->type=%d\n",
1454		    m->type);
1455		return -1;
1456	}
1457	return 0;
1458}
1459
1460private int
1461get_op(char c)
1462{
1463	switch (c) {
1464	case '&':
1465		return FILE_OPAND;
1466	case '|':
1467		return FILE_OPOR;
1468	case '^':
1469		return FILE_OPXOR;
1470	case '+':
1471		return FILE_OPADD;
1472	case '-':
1473		return FILE_OPMINUS;
1474	case '*':
1475		return FILE_OPMULTIPLY;
1476	case '/':
1477		return FILE_OPDIVIDE;
1478	case '%':
1479		return FILE_OPMODULO;
1480	default:
1481		return -1;
1482	}
1483}
1484
1485#ifdef ENABLE_CONDITIONALS
1486private int
1487get_cond(const char *l, const char **t)
1488{
1489	static const struct cond_tbl_s {
1490		char name[8];
1491		size_t len;
1492		int cond;
1493	} cond_tbl[] = {
1494		{ "if",		2,	COND_IF },
1495		{ "elif",	4,	COND_ELIF },
1496		{ "else",	4,	COND_ELSE },
1497		{ "",		0,	COND_NONE },
1498	};
1499	const struct cond_tbl_s *p;
1500
1501	for (p = cond_tbl; p->len; p++) {
1502		if (strncmp(l, p->name, p->len) == 0 &&
1503		    isspace((unsigned char)l[p->len])) {
1504			if (t)
1505				*t = l + p->len;
1506			break;
1507		}
1508	}
1509	return p->cond;
1510}
1511
1512private int
1513check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1514{
1515	int last_cond;
1516	last_cond = ms->c.li[cont_level].last_cond;
1517
1518	switch (cond) {
1519	case COND_IF:
1520		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1521			if (ms->flags & MAGIC_CHECK)
1522				file_magwarn(ms, "syntax error: `if'");
1523			return -1;
1524		}
1525		last_cond = COND_IF;
1526		break;
1527
1528	case COND_ELIF:
1529		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1530			if (ms->flags & MAGIC_CHECK)
1531				file_magwarn(ms, "syntax error: `elif'");
1532			return -1;
1533		}
1534		last_cond = COND_ELIF;
1535		break;
1536
1537	case COND_ELSE:
1538		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1539			if (ms->flags & MAGIC_CHECK)
1540				file_magwarn(ms, "syntax error: `else'");
1541			return -1;
1542		}
1543		last_cond = COND_NONE;
1544		break;
1545
1546	case COND_NONE:
1547		last_cond = COND_NONE;
1548		break;
1549	}
1550
1551	ms->c.li[cont_level].last_cond = last_cond;
1552	return 0;
1553}
1554#endif /* ENABLE_CONDITIONALS */
1555
1556private int
1557parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1558{
1559	const char *l = *lp;
1560
1561	while (!isspace((unsigned char)*++l))
1562		switch (*l) {
1563		case CHAR_INDIRECT_RELATIVE:
1564			m->str_flags |= INDIRECT_RELATIVE;
1565			break;
1566		default:
1567			if (ms->flags & MAGIC_CHECK)
1568				file_magwarn(ms, "indirect modifier `%c' "
1569					"invalid", *l);
1570			*lp = l;
1571			return -1;
1572		}
1573	*lp = l;
1574	return 0;
1575}
1576
1577private void
1578parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1579    int op)
1580{
1581	const char *l = *lp;
1582	char *t;
1583	uint64_t val;
1584
1585	++l;
1586	m->mask_op |= op;
1587	val = (uint64_t)strtoull(l, &t, 0);
1588	l = t;
1589	m->num_mask = file_signextend(ms, m, val);
1590	eatsize(&l);
1591	*lp = l;
1592}
1593
1594private int
1595parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1596{
1597	const char *l = *lp;
1598	char *t;
1599	int have_range = 0;
1600
1601	while (!isspace((unsigned char)*++l)) {
1602		switch (*l) {
1603		case '0':  case '1':  case '2':
1604		case '3':  case '4':  case '5':
1605		case '6':  case '7':  case '8':
1606		case '9':
1607			if (have_range && (ms->flags & MAGIC_CHECK))
1608				file_magwarn(ms, "multiple ranges");
1609			have_range = 1;
1610			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1611			if (m->str_range == 0)
1612				file_magwarn(ms, "zero range");
1613			l = t - 1;
1614			break;
1615		case CHAR_COMPACT_WHITESPACE:
1616			m->str_flags |= STRING_COMPACT_WHITESPACE;
1617			break;
1618		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1619			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1620			break;
1621		case CHAR_IGNORE_LOWERCASE:
1622			m->str_flags |= STRING_IGNORE_LOWERCASE;
1623			break;
1624		case CHAR_IGNORE_UPPERCASE:
1625			m->str_flags |= STRING_IGNORE_UPPERCASE;
1626			break;
1627		case CHAR_REGEX_OFFSET_START:
1628			m->str_flags |= REGEX_OFFSET_START;
1629			break;
1630		case CHAR_BINTEST:
1631			m->str_flags |= STRING_BINTEST;
1632			break;
1633		case CHAR_TEXTTEST:
1634			m->str_flags |= STRING_TEXTTEST;
1635			break;
1636		case CHAR_TRIM:
1637			m->str_flags |= STRING_TRIM;
1638			break;
1639		case CHAR_PSTRING_1_LE:
1640#define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1641			if (m->type != FILE_PSTRING)
1642				goto bad;
1643			SET_LENGTH(PSTRING_1_LE);
1644			break;
1645		case CHAR_PSTRING_2_BE:
1646			if (m->type != FILE_PSTRING)
1647				goto bad;
1648			SET_LENGTH(PSTRING_2_BE);
1649			break;
1650		case CHAR_PSTRING_2_LE:
1651			if (m->type != FILE_PSTRING)
1652				goto bad;
1653			SET_LENGTH(PSTRING_2_LE);
1654			break;
1655		case CHAR_PSTRING_4_BE:
1656			if (m->type != FILE_PSTRING)
1657				goto bad;
1658			SET_LENGTH(PSTRING_4_BE);
1659			break;
1660		case CHAR_PSTRING_4_LE:
1661			switch (m->type) {
1662			case FILE_PSTRING:
1663			case FILE_REGEX:
1664				break;
1665			default:
1666				goto bad;
1667			}
1668			SET_LENGTH(PSTRING_4_LE);
1669			break;
1670		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1671			if (m->type != FILE_PSTRING)
1672				goto bad;
1673			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1674			break;
1675		default:
1676		bad:
1677			if (ms->flags & MAGIC_CHECK)
1678				file_magwarn(ms, "string modifier `%c' "
1679					"invalid", *l);
1680			goto out;
1681		}
1682		/* allow multiple '/' for readability */
1683		if (l[1] == '/' && !isspace((unsigned char)l[2]))
1684			l++;
1685	}
1686	if (string_modifier_check(ms, m) == -1)
1687		goto out;
1688	*lp = l;
1689	return 0;
1690out:
1691	*lp = l;
1692	return -1;
1693}
1694
1695/*
1696 * parse one line from magic file, put into magic[index++] if valid
1697 */
1698private int
1699parse(struct magic_set *ms, struct magic_entry *me, const char *line,
1700    size_t lineno, int action)
1701{
1702#ifdef ENABLE_CONDITIONALS
1703	static uint32_t last_cont_level = 0;
1704#endif
1705	size_t i;
1706	struct magic *m;
1707	const char *l = line;
1708	char *t;
1709	int op;
1710	uint32_t cont_level;
1711	int32_t diff;
1712
1713	cont_level = 0;
1714
1715	/*
1716	 * Parse the offset.
1717	 */
1718	while (*l == '>') {
1719		++l;		/* step over */
1720		cont_level++;
1721	}
1722#ifdef ENABLE_CONDITIONALS
1723	if (cont_level == 0 || cont_level > last_cont_level)
1724		if (file_check_mem(ms, cont_level) == -1)
1725			return -1;
1726	last_cont_level = cont_level;
1727#endif
1728	if (cont_level != 0) {
1729		if (me->mp == NULL) {
1730			file_magerror(ms, "No current entry for continuation");
1731			return -1;
1732		}
1733		if (me->cont_count == 0) {
1734			file_magerror(ms, "Continuations present with 0 count");
1735			return -1;
1736		}
1737		m = &me->mp[me->cont_count - 1];
1738		diff = (int32_t)cont_level - (int32_t)m->cont_level;
1739		if (diff > 1)
1740			file_magwarn(ms, "New continuation level %u is more "
1741			    "than one larger than current level %u", cont_level,
1742			    m->cont_level);
1743		if (me->cont_count == me->max_count) {
1744			struct magic *nm;
1745			size_t cnt = me->max_count + ALLOC_CHUNK;
1746			if ((nm = CAST(struct magic *, erealloc(me->mp,
1747			    sizeof(*nm) * cnt))) == NULL) {
1748				file_oomem(ms, sizeof(*nm) * cnt);
1749				return -1;
1750			}
1751			me->mp = m = nm;
1752			me->max_count = CAST(uint32_t, cnt);
1753		}
1754		m = &me->mp[me->cont_count++];
1755		(void)memset(m, 0, sizeof(*m));
1756		m->cont_level = cont_level;
1757	} else {
1758		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
1759		if (me->mp != NULL)
1760			return 1;
1761		if ((m = CAST(struct magic *, emalloc(len))) == NULL) {
1762			file_oomem(ms, len);
1763			return -1;
1764		}
1765		me->mp = m;
1766		me->max_count = ALLOC_CHUNK;
1767		(void)memset(m, 0, sizeof(*m));
1768		m->factor_op = FILE_FACTOR_OP_NONE;
1769		m->cont_level = 0;
1770		me->cont_count = 1;
1771	}
1772	m->lineno = CAST(uint32_t, lineno);
1773
1774	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1775                ++l;            /* step over */
1776                m->flag |= OFFADD;
1777        }
1778	if (*l == '(') {
1779		++l;		/* step over */
1780		m->flag |= INDIR;
1781		if (m->flag & OFFADD)
1782			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1783
1784		if (*l == '&') {  /* m->cont_level == 0 checked below */
1785			++l;            /* step over */
1786			m->flag |= OFFADD;
1787		}
1788	}
1789	/* Indirect offsets are not valid at level 0. */
1790	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1791		if (ms->flags & MAGIC_CHECK)
1792			file_magwarn(ms, "relative offset at level 0");
1793
1794	/* get offset, then skip over it */
1795	m->offset = (uint32_t)strtoul(l, &t, 0);
1796        if (l == t)
1797		if (ms->flags & MAGIC_CHECK)
1798			file_magwarn(ms, "offset `%s' invalid", l);
1799        l = t;
1800
1801	if (m->flag & INDIR) {
1802		m->in_type = FILE_LONG;
1803		m->in_offset = 0;
1804		/*
1805		 * read [.lbs][+-]nnnnn)
1806		 */
1807		if (*l == '.') {
1808			l++;
1809			switch (*l) {
1810			case 'l':
1811				m->in_type = FILE_LELONG;
1812				break;
1813			case 'L':
1814				m->in_type = FILE_BELONG;
1815				break;
1816			case 'm':
1817				m->in_type = FILE_MELONG;
1818				break;
1819			case 'h':
1820			case 's':
1821				m->in_type = FILE_LESHORT;
1822				break;
1823			case 'H':
1824			case 'S':
1825				m->in_type = FILE_BESHORT;
1826				break;
1827			case 'c':
1828			case 'b':
1829			case 'C':
1830			case 'B':
1831				m->in_type = FILE_BYTE;
1832				break;
1833			case 'e':
1834			case 'f':
1835			case 'g':
1836				m->in_type = FILE_LEDOUBLE;
1837				break;
1838			case 'E':
1839			case 'F':
1840			case 'G':
1841				m->in_type = FILE_BEDOUBLE;
1842				break;
1843			case 'i':
1844				m->in_type = FILE_LEID3;
1845				break;
1846			case 'I':
1847				m->in_type = FILE_BEID3;
1848				break;
1849			default:
1850				if (ms->flags & MAGIC_CHECK)
1851					file_magwarn(ms,
1852					    "indirect offset type `%c' invalid",
1853					    *l);
1854				break;
1855			}
1856			l++;
1857		}
1858
1859		m->in_op = 0;
1860		if (*l == '~') {
1861			m->in_op |= FILE_OPINVERSE;
1862			l++;
1863		}
1864		if ((op = get_op(*l)) != -1) {
1865			m->in_op |= op;
1866			l++;
1867		}
1868		if (*l == '(') {
1869			m->in_op |= FILE_OPINDIRECT;
1870			l++;
1871		}
1872		if (isdigit((unsigned char)*l) || *l == '-') {
1873			m->in_offset = (int32_t)strtol(l, &t, 0);
1874			if (l == t)
1875				if (ms->flags & MAGIC_CHECK)
1876					file_magwarn(ms,
1877					    "in_offset `%s' invalid", l);
1878			l = t;
1879		}
1880		if (*l++ != ')' ||
1881		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1882			if (ms->flags & MAGIC_CHECK)
1883				file_magwarn(ms,
1884				    "missing ')' in indirect offset");
1885	}
1886	EATAB;
1887
1888#ifdef ENABLE_CONDITIONALS
1889	m->cond = get_cond(l, &l);
1890	if (check_cond(ms, m->cond, cont_level) == -1)
1891		return -1;
1892
1893	EATAB;
1894#endif
1895
1896	/*
1897	 * Parse the type.
1898	 */
1899	if (*l == 'u') {
1900		/*
1901		 * Try it as a keyword type prefixed by "u"; match what
1902		 * follows the "u".  If that fails, try it as an SUS
1903		 * integer type.
1904		 */
1905		m->type = get_type(type_tbl, l + 1, &l);
1906		if (m->type == FILE_INVALID) {
1907			/*
1908			 * Not a keyword type; parse it as an SUS type,
1909			 * 'u' possibly followed by a number or C/S/L.
1910			 */
1911			m->type = get_standard_integer_type(l, &l);
1912		}
1913		/* It's unsigned. */
1914		if (m->type != FILE_INVALID)
1915			m->flag |= UNSIGNED;
1916	} else {
1917		/*
1918		 * Try it as a keyword type.  If that fails, try it as
1919		 * an SUS integer type if it begins with "d" or as an
1920		 * SUS string type if it begins with "s".  In any case,
1921		 * it's not unsigned.
1922		 */
1923		m->type = get_type(type_tbl, l, &l);
1924		if (m->type == FILE_INVALID) {
1925			/*
1926			 * Not a keyword type; parse it as an SUS type,
1927			 * either 'd' possibly followed by a number or
1928			 * C/S/L, or just 's'.
1929			 */
1930			if (*l == 'd')
1931				m->type = get_standard_integer_type(l, &l);
1932			else if (*l == 's' && !isalpha((unsigned char)l[1])) {
1933				m->type = FILE_STRING;
1934		++l;
1935			}
1936		}
1937	}
1938
1939	if (m->type == FILE_INVALID) {
1940		/* Not found - try it as a special keyword. */
1941		m->type = get_type(special_tbl, l, &l);
1942	}
1943
1944	if (m->type == FILE_INVALID) {
1945		if (ms->flags & MAGIC_CHECK)
1946			file_magwarn(ms, "type `%s' invalid", l);
1947		return -1;
1948	}
1949
1950	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1951	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1952
1953	m->mask_op = 0;
1954	if (*l == '~') {
1955		if (!IS_LIBMAGIC_STRING(m->type))
1956			m->mask_op |= FILE_OPINVERSE;
1957		else if (ms->flags & MAGIC_CHECK)
1958			file_magwarn(ms, "'~' invalid for string types");
1959		++l;
1960	}
1961	m->str_range = 0;
1962	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1963	if ((op = get_op(*l)) != -1) {
1964		if (IS_LIBMAGIC_STRING(m->type)) {
1965			int r;
1966
1967			if (op != FILE_OPDIVIDE) {
1968				if (ms->flags & MAGIC_CHECK)
1969					file_magwarn(ms,
1970					    "invalid string/indirect op: "
1971					    "`%c'", *t);
1972				return -1;
1973			}
1974
1975			if (m->type == FILE_INDIRECT)
1976				r = parse_indirect_modifier(ms, m, &l);
1977			else
1978				r = parse_string_modifier(ms, m, &l);
1979			if (r == -1)
1980				return -1;
1981		} else
1982			parse_op_modifier(ms, m, &l, op);
1983	}
1984
1985	/*
1986	 * We used to set mask to all 1's here, instead let's just not do
1987	 * anything if mask = 0 (unless you have a better idea)
1988	 */
1989	EATAB;
1990
1991	switch (*l) {
1992	case '>':
1993	case '<':
1994  		m->reln = *l;
1995  		++l;
1996		if (*l == '=') {
1997			if (ms->flags & MAGIC_CHECK) {
1998				file_magwarn(ms, "%c= not supported",
1999				    m->reln);
2000				return -1;
2001			}
2002		   ++l;
2003		}
2004		break;
2005	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2006	case '&':
2007	case '^':
2008	case '=':
2009  		m->reln = *l;
2010  		++l;
2011		if (*l == '=') {
2012		   /* HP compat: ignore &= etc. */
2013		   ++l;
2014		}
2015		break;
2016	case '!':
2017		m->reln = *l;
2018		++l;
2019		break;
2020	default:
2021  		m->reln = '=';	/* the default relation */
2022		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
2023		    isspace((unsigned char)l[1])) || !l[1])) {
2024			m->reln = *l;
2025			++l;
2026		}
2027		break;
2028	}
2029	/*
2030	 * Grab the value part, except for an 'x' reln.
2031	 */
2032	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2033		return -1;
2034
2035	/*
2036	 * TODO finish this macro and start using it!
2037	 * #define offsetcheck {if (offset > HOWMANY-1)
2038	 *	magwarn("offset too big"); }
2039	 */
2040
2041	/*
2042	 * Now get last part - the description
2043	 */
2044	EATAB;
2045	if (l[0] == '\b') {
2046		++l;
2047		m->flag |= NOSPACE;
2048	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2049		++l;
2050		++l;
2051		m->flag |= NOSPACE;
2052	}
2053	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2054		continue;
2055	if (i == sizeof(m->desc)) {
2056		m->desc[sizeof(m->desc) - 1] = '\0';
2057		if (ms->flags & MAGIC_CHECK)
2058			file_magwarn(ms, "description `%s' truncated", m->desc);
2059	}
2060
2061        /*
2062	 * We only do this check while compiling, or if any of the magic
2063	 * files were not compiled.
2064         */
2065        if (ms->flags & MAGIC_CHECK) {
2066		if (check_format(ms, m) == -1)
2067			return -1;
2068	}
2069	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2070	return 0;
2071}
2072
2073/*
2074 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2075 * if valid
2076 */
2077private int
2078parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
2079{
2080	const char *l = line;
2081	char *el;
2082	unsigned long factor;
2083	struct magic *m = &me->mp[0];
2084
2085	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2086		file_magwarn(ms,
2087		    "Current entry already has a strength type: %c %d",
2088		    m->factor_op, m->factor);
2089		return -1;
2090	}
2091	if (m->type == FILE_NAME) {
2092		file_magwarn(ms, "%s: Strength setting is not supported in "
2093		    "\"name\" magic entries", m->value.s);
2094		return -1;
2095	}
2096	EATAB;
2097	switch (*l) {
2098	case FILE_FACTOR_OP_NONE:
2099	case FILE_FACTOR_OP_PLUS:
2100	case FILE_FACTOR_OP_MINUS:
2101	case FILE_FACTOR_OP_TIMES:
2102	case FILE_FACTOR_OP_DIV:
2103		m->factor_op = *l++;
2104		break;
2105	default:
2106		file_magwarn(ms, "Unknown factor op `%c'", *l);
2107		return -1;
2108	}
2109	EATAB;
2110	factor = strtoul(l, &el, 0);
2111	if (factor > 255) {
2112		file_magwarn(ms, "Too large factor `%lu'", factor);
2113		goto out;
2114	}
2115	if (*el && !isspace((unsigned char)*el)) {
2116		file_magwarn(ms, "Bad factor `%s'", l);
2117		goto out;
2118	}
2119	m->factor = (uint8_t)factor;
2120	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2121		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2122		    m->factor_op, m->factor);
2123		goto out;
2124	}
2125	return 0;
2126out:
2127	m->factor_op = FILE_FACTOR_OP_NONE;
2128	m->factor = 0;
2129	return -1;
2130}
2131
2132private int
2133goodchar(unsigned char x, const char *extra)
2134{
2135	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2136}
2137
2138private int
2139parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2140    zend_off_t off, size_t len, const char *name, const char *extra, int nt)
2141{
2142	size_t i;
2143	const char *l = line;
2144	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2145	char *buf = CAST(char *, CAST(void *, m)) + off;
2146
2147	if (buf[0] != '\0') {
2148		len = nt ? strlen(buf) : len;
2149		file_magwarn(ms, "Current entry already has a %s type "
2150		    "`%.*s', new type `%s'", name, (int)len, buf, l);
2151		return -1;
2152	}
2153
2154	if (*m->desc == '\0') {
2155		file_magwarn(ms, "Current entry does not yet have a "
2156		    "description for adding a %s type", name);
2157		return -1;
2158	}
2159
2160	EATAB;
2161	for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
2162		continue;
2163
2164	if (i == len && *l) {
2165		if (nt)
2166			buf[len - 1] = '\0';
2167		if (ms->flags & MAGIC_CHECK)
2168			file_magwarn(ms, "%s type `%s' truncated %"
2169			    SIZE_T_FORMAT "u", name, line, i);
2170	} else {
2171		if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
2172			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2173			    name, line, *l);
2174		if (nt)
2175			buf[i] = '\0';
2176	}
2177
2178	if (i > 0)
2179		return 0;
2180
2181	file_magerror(ms, "Bad magic entry '%s'", line);
2182	return -1;
2183}
2184
2185/*
2186 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2187 * magic[index - 1]
2188 */
2189private int
2190parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
2191{
2192	struct magic *m = &me->mp[0];
2193
2194	return parse_extra(ms, me, line,
2195	    CAST(off_t, offsetof(struct magic, apple)),
2196	    sizeof(m->apple), "APPLE", "!+-./", 0);
2197}
2198
2199/*
2200 * parse a MIME annotation line from magic file, put into magic[index - 1]
2201 * if valid
2202 */
2203private int
2204parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
2205{
2206	struct magic *m = &me->mp[0];
2207
2208	return parse_extra(ms, me, line,
2209	    CAST(zend_off_t, offsetof(struct magic, mimetype)),
2210	    sizeof(m->mimetype), "MIME", "+-/.", 1);
2211}
2212
2213private int
2214check_format_type(const char *ptr, int type)
2215{
2216	int quad = 0, h;
2217	if (*ptr == '\0') {
2218		/* Missing format string; bad */
2219		return -1;
2220	}
2221
2222	switch (file_formats[type]) {
2223	case FILE_FMT_QUAD:
2224		quad = 1;
2225		/*FALLTHROUGH*/
2226	case FILE_FMT_NUM:
2227		if (quad == 0) {
2228			switch (type) {
2229			case FILE_BYTE:
2230				h = 2;
2231				break;
2232			case FILE_SHORT:
2233			case FILE_BESHORT:
2234			case FILE_LESHORT:
2235				h = 1;
2236				break;
2237			case FILE_LONG:
2238			case FILE_BELONG:
2239			case FILE_LELONG:
2240			case FILE_MELONG:
2241			case FILE_LEID3:
2242			case FILE_BEID3:
2243			case FILE_INDIRECT:
2244				h = 0;
2245				break;
2246			default:
2247				abort();
2248			}
2249		} else
2250			h = 0;
2251		if (*ptr == '-')
2252			ptr++;
2253		if (*ptr == '.')
2254			ptr++;
2255		while (isdigit((unsigned char)*ptr)) ptr++;
2256		if (*ptr == '.')
2257			ptr++;
2258		while (isdigit((unsigned char)*ptr)) ptr++;
2259		if (quad) {
2260			if (*ptr++ != 'l')
2261				return -1;
2262			if (*ptr++ != 'l')
2263				return -1;
2264		}
2265
2266		switch (*ptr++) {
2267#ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2268		/* so don't accept the 'l' modifier */
2269		case 'l':
2270			switch (*ptr++) {
2271			case 'i':
2272			case 'd':
2273			case 'u':
2274			case 'o':
2275			case 'x':
2276			case 'X':
2277				return h != 0 ? -1 : 0;
2278			default:
2279				return -1;
2280			}
2281
2282		/*
2283		 * Don't accept h and hh modifiers. They make writing
2284		 * magic entries more complicated, for very little benefit
2285		 */
2286		case 'h':
2287			if (h-- <= 0)
2288				return -1;
2289			switch (*ptr++) {
2290			case 'h':
2291				if (h-- <= 0)
2292					return -1;
2293				switch (*ptr++) {
2294				case 'i':
2295				case 'd':
2296				case 'u':
2297				case 'o':
2298				case 'x':
2299				case 'X':
2300					return 0;
2301				default:
2302					return -1;
2303				}
2304			case 'i':
2305			case 'd':
2306			case 'u':
2307			case 'o':
2308			case 'x':
2309			case 'X':
2310				return h != 0 ? -1 : 0;
2311			default:
2312				return -1;
2313			}
2314#endif
2315		case 'c':
2316			return h != 2 ? -1 : 0;
2317		case 'i':
2318		case 'd':
2319		case 'u':
2320		case 'o':
2321		case 'x':
2322		case 'X':
2323#ifdef STRICT_FORMAT
2324			return h != 0 ? -1 : 0;
2325#else
2326			return 0;
2327#endif
2328		default:
2329			return -1;
2330		}
2331
2332	case FILE_FMT_FLOAT:
2333	case FILE_FMT_DOUBLE:
2334		if (*ptr == '-')
2335			ptr++;
2336		if (*ptr == '.')
2337			ptr++;
2338		while (isdigit((unsigned char)*ptr)) ptr++;
2339		if (*ptr == '.')
2340			ptr++;
2341		while (isdigit((unsigned char)*ptr)) ptr++;
2342
2343		switch (*ptr++) {
2344		case 'e':
2345		case 'E':
2346		case 'f':
2347		case 'F':
2348		case 'g':
2349		case 'G':
2350			return 0;
2351
2352		default:
2353			return -1;
2354		}
2355
2356
2357	case FILE_FMT_STR:
2358		if (*ptr == '-')
2359			ptr++;
2360		while (isdigit((unsigned char )*ptr))
2361			ptr++;
2362		if (*ptr == '.') {
2363			ptr++;
2364			while (isdigit((unsigned char )*ptr))
2365				ptr++;
2366		}
2367
2368		switch (*ptr++) {
2369		case 's':
2370			return 0;
2371		default:
2372			return -1;
2373		}
2374
2375	default:
2376		/* internal error */
2377		abort();
2378	}
2379	/*NOTREACHED*/
2380	return -1;
2381}
2382
2383/*
2384 * Check that the optional printf format in description matches
2385 * the type of the magic.
2386 */
2387private int
2388check_format(struct magic_set *ms, struct magic *m)
2389{
2390	char *ptr;
2391
2392	for (ptr = m->desc; *ptr; ptr++)
2393		if (*ptr == '%')
2394			break;
2395	if (*ptr == '\0') {
2396		/* No format string; ok */
2397		return 1;
2398	}
2399
2400	assert(file_nformats == file_nnames);
2401
2402	if (m->type >= file_nformats) {
2403		file_magwarn(ms, "Internal error inconsistency between "
2404		    "m->type and format strings");
2405		return -1;
2406	}
2407	if (file_formats[m->type] == FILE_FMT_NONE) {
2408		file_magwarn(ms, "No format string for `%s' with description "
2409		    "`%s'", m->desc, file_names[m->type]);
2410		return -1;
2411	}
2412
2413	ptr++;
2414	if (check_format_type(ptr, m->type) == -1) {
2415		/*
2416		 * TODO: this error message is unhelpful if the format
2417		 * string is not one character long
2418		 */
2419		file_magwarn(ms, "Printf format `%c' is not valid for type "
2420		    "`%s' in description `%s'", *ptr ? *ptr : '?',
2421		    file_names[m->type], m->desc);
2422		return -1;
2423	}
2424
2425	for (; *ptr; ptr++) {
2426		if (*ptr == '%') {
2427			file_magwarn(ms,
2428			    "Too many format strings (should have at most one) "
2429			    "for `%s' with description `%s'",
2430			    file_names[m->type], m->desc);
2431			return -1;
2432		}
2433	}
2434	return 0;
2435}
2436
2437/*
2438 * Read a numeric value from a pointer, into the value union of a magic
2439 * pointer, according to the magic type.  Update the string pointer to point
2440 * just after the number read.  Return 0 for success, non-zero for failure.
2441 */
2442private int
2443getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2444{
2445	switch (m->type) {
2446	case FILE_BESTRING16:
2447	case FILE_LESTRING16:
2448	case FILE_STRING:
2449	case FILE_PSTRING:
2450	case FILE_REGEX:
2451	case FILE_SEARCH:
2452	case FILE_NAME:
2453	case FILE_USE:
2454		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2455		if (*p == NULL) {
2456			if (ms->flags & MAGIC_CHECK)
2457				file_magwarn(ms, "cannot get string from `%s'",
2458				    m->value.s);
2459			return -1;
2460		}
2461		if (m->type == FILE_REGEX) {
2462			/*  XXX do we need this? */
2463			/*zval pattern;
2464			int options = 0;
2465			pcre_cache_entry *pce;
2466
2467			convert_libmagic_pattern(&pattern, m->value.s, strlen(m->value.s), options);
2468
2469			if ((pce = pcre_get_compiled_regex_cache(Z_STR(pattern))) == NULL) {
2470				return -1;
2471			}
2472
2473			return 0;*/
2474		}
2475		return 0;
2476	case FILE_FLOAT:
2477	case FILE_BEFLOAT:
2478	case FILE_LEFLOAT:
2479		if (m->reln != 'x') {
2480			char *ep;
2481#ifdef HAVE_STRTOF
2482			m->value.f = strtof(*p, &ep);
2483#else
2484			m->value.f = (float)strtod(*p, &ep);
2485#endif
2486			*p = ep;
2487		}
2488		return 0;
2489	case FILE_DOUBLE:
2490	case FILE_BEDOUBLE:
2491	case FILE_LEDOUBLE:
2492		if (m->reln != 'x') {
2493			char *ep;
2494			m->value.d = strtod(*p, &ep);
2495			*p = ep;
2496		}
2497		return 0;
2498	default:
2499		if (m->reln != 'x') {
2500			char *ep;
2501			m->value.q = file_signextend(ms, m,
2502			    (uint64_t)strtoull(*p, &ep, 0));
2503			*p = ep;
2504			eatsize(p);
2505		}
2506		return 0;
2507	}
2508}
2509
2510/*
2511 * Convert a string containing C character escapes.  Stop at an unescaped
2512 * space or tab.
2513 * Copy the converted version to "m->value.s", and the length in m->vallen.
2514 * Return updated scan pointer as function result. Warn if set.
2515 */
2516private const char *
2517getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2518{
2519	const char *origs = s;
2520	char	*p = m->value.s;
2521	size_t  plen = sizeof(m->value.s);
2522	char 	*origp = p;
2523	char	*pmax = p + plen - 1;
2524	int	c;
2525	int	val;
2526
2527	while ((c = *s++) != '\0') {
2528		if (isspace((unsigned char) c))
2529			break;
2530		if (p >= pmax) {
2531			file_error(ms, 0, "string too long: `%s'", origs);
2532			return NULL;
2533		}
2534		if (c == '\\') {
2535			switch(c = *s++) {
2536
2537			case '\0':
2538				if (warn)
2539					file_magwarn(ms, "incomplete escape");
2540				goto out;
2541
2542			case '\t':
2543				if (warn) {
2544					file_magwarn(ms,
2545					    "escaped tab found, use \\t instead");
2546					warn = 0;	/* already did */
2547				}
2548				/*FALLTHROUGH*/
2549			default:
2550				if (warn) {
2551					if (isprint((unsigned char)c)) {
2552						/* Allow escaping of
2553						 * ``relations'' */
2554						if (strchr("<>&^=!", c) == NULL
2555						    && (m->type != FILE_REGEX ||
2556						    strchr("[]().*?^$|{}", c)
2557						    == NULL)) {
2558							file_magwarn(ms, "no "
2559							    "need to escape "
2560							    "`%c'", c);
2561						}
2562					} else {
2563						file_magwarn(ms,
2564						    "unknown escape sequence: "
2565						    "\\%03o", c);
2566					}
2567				}
2568				/*FALLTHROUGH*/
2569			/* space, perhaps force people to use \040? */
2570			case ' ':
2571#if 0
2572			/*
2573			 * Other things people escape, but shouldn't need to,
2574			 * so we disallow them
2575			 */
2576			case '\'':
2577			case '"':
2578			case '?':
2579#endif
2580			/* Relations */
2581			case '>':
2582			case '<':
2583			case '&':
2584			case '^':
2585			case '=':
2586			case '!':
2587			/* and baskslash itself */
2588			case '\\':
2589				*p++ = (char) c;
2590				break;
2591
2592			case 'a':
2593				*p++ = '\a';
2594				break;
2595
2596			case 'b':
2597				*p++ = '\b';
2598				break;
2599
2600			case 'f':
2601				*p++ = '\f';
2602				break;
2603
2604			case 'n':
2605				*p++ = '\n';
2606				break;
2607
2608			case 'r':
2609				*p++ = '\r';
2610				break;
2611
2612			case 't':
2613				*p++ = '\t';
2614				break;
2615
2616			case 'v':
2617				*p++ = '\v';
2618				break;
2619
2620			/* \ and up to 3 octal digits */
2621			case '0':
2622			case '1':
2623			case '2':
2624			case '3':
2625			case '4':
2626			case '5':
2627			case '6':
2628			case '7':
2629				val = c - '0';
2630				c = *s++;  /* try for 2 */
2631				if (c >= '0' && c <= '7') {
2632					val = (val << 3) | (c - '0');
2633					c = *s++;  /* try for 3 */
2634					if (c >= '0' && c <= '7')
2635						val = (val << 3) | (c-'0');
2636					else
2637						--s;
2638				}
2639				else
2640					--s;
2641				*p++ = (char)val;
2642				break;
2643
2644			/* \x and up to 2 hex digits */
2645			case 'x':
2646				val = 'x';	/* Default if no digits */
2647				c = hextoint(*s++);	/* Get next char */
2648				if (c >= 0) {
2649					val = c;
2650					c = hextoint(*s++);
2651					if (c >= 0)
2652						val = (val << 4) + c;
2653					else
2654						--s;
2655				} else
2656					--s;
2657				*p++ = (char)val;
2658				break;
2659			}
2660		} else
2661			*p++ = (char)c;
2662	}
2663out:
2664	*p = '\0';
2665	m->vallen = CAST(unsigned char, (p - origp));
2666	if (m->type == FILE_PSTRING)
2667		m->vallen += (unsigned char)file_pstring_length_size(m);
2668	return s;
2669}
2670
2671
2672/* Single hex char to int; -1 if not a hex char. */
2673private int
2674hextoint(int c)
2675{
2676	if (!isascii((unsigned char) c))
2677		return -1;
2678	if (isdigit((unsigned char) c))
2679		return c - '0';
2680	if ((c >= 'a') && (c <= 'f'))
2681		return c + 10 - 'a';
2682	if (( c>= 'A') && (c <= 'F'))
2683		return c + 10 - 'A';
2684	return -1;
2685}
2686
2687
2688/*
2689 * Print a string containing C character escapes.
2690 */
2691protected void
2692file_showstr(FILE *fp, const char *s, size_t len)
2693{
2694	char	c;
2695
2696	for (;;) {
2697		if (len == ~0U) {
2698			c = *s++;
2699			if (c == '\0')
2700				break;
2701		}
2702		else  {
2703			if (len-- == 0)
2704				break;
2705			c = *s++;
2706		}
2707		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
2708			(void) fputc(c, fp);
2709		else {
2710			(void) fputc('\\', fp);
2711			switch (c) {
2712			case '\a':
2713				(void) fputc('a', fp);
2714				break;
2715
2716			case '\b':
2717				(void) fputc('b', fp);
2718				break;
2719
2720			case '\f':
2721				(void) fputc('f', fp);
2722				break;
2723
2724			case '\n':
2725				(void) fputc('n', fp);
2726				break;
2727
2728			case '\r':
2729				(void) fputc('r', fp);
2730				break;
2731
2732			case '\t':
2733				(void) fputc('t', fp);
2734				break;
2735
2736			case '\v':
2737				(void) fputc('v', fp);
2738				break;
2739
2740			default:
2741				(void) fprintf(fp, "%.3o", c & 0377);
2742				break;
2743			}
2744		}
2745	}
2746}
2747
2748/*
2749 * eatsize(): Eat the size spec from a number [eg. 10UL]
2750 */
2751private void
2752eatsize(const char **p)
2753{
2754	const char *l = *p;
2755
2756	if (LOWCASE(*l) == 'u')
2757		l++;
2758
2759	switch (LOWCASE(*l)) {
2760	case 'l':    /* long */
2761	case 's':    /* short */
2762	case 'h':    /* short */
2763	case 'b':    /* char/byte */
2764	case 'c':    /* char/byte */
2765		l++;
2766		/*FALLTHROUGH*/
2767	default:
2768		break;
2769	}
2770
2771	*p = l;
2772}
2773
2774/*
2775 * handle a buffer containing a compiled file.
2776 */
2777private struct magic_map *
2778apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
2779{
2780	struct magic_map *map;
2781
2782	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
2783		file_oomem(ms, sizeof(*map));
2784		return NULL;
2785	}
2786	map->len = len;
2787	map->p = buf;
2788	map->type = MAP_TYPE_USER;
2789	if (check_buffer(ms, map, "buffer") != 0) {
2790		apprentice_unmap(map);
2791		return NULL;
2792	}
2793	return map;
2794}
2795
2796/*
2797 * handle a compiled file.
2798 */
2799
2800private struct magic_map *
2801apprentice_map(struct magic_set *ms, const char *fn)
2802{
2803	uint32_t *ptr;
2804	uint32_t version, entries, nentries;
2805	int needsbyteswap;
2806	char *dbname = NULL;
2807	struct magic_map *map;
2808	size_t i;
2809	php_stream *stream = NULL;
2810	php_stream_statbuf st;
2811
2812
2813
2814	if ((map = CAST(struct magic_map *, ecalloc(1, sizeof(*map)))) == NULL) {
2815		file_oomem(ms, sizeof(*map));
2816		return NULL;
2817	}
2818
2819	if (fn == NULL) {
2820		map->p = (void *)&php_magic_database;
2821		goto internal_loaded;
2822	}
2823
2824#ifdef PHP_WIN32
2825	/* Don't bother on windows with php_stream_open_wrapper,
2826	return to give apprentice_load() a chance. */
2827	if (php_stream_stat_path_ex((char *)fn, 0, &st, NULL) == SUCCESS) {
2828               if (st.sb.st_mode & S_IFDIR) {
2829                       return NULL;
2830               }
2831       }
2832#endif
2833
2834	dbname = mkdbname(ms, fn, 0);
2835	if (dbname == NULL)
2836		goto error;
2837
2838		stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL);
2839
2840	if (!stream) {
2841		goto error;
2842	}
2843
2844	if (php_stream_stat(stream, &st) < 0) {
2845		file_error(ms, errno, "cannot stat `%s'", dbname);
2846		goto error;
2847	}
2848	if (st.sb.st_size < 8 || st.sb.st_size > MAXMAGIC_SIZE) {
2849		file_error(ms, 0, "file `%s' is too %s", dbname,
2850		    st.sb.st_size < 8 ? "small" : "large");
2851		goto error;
2852	}
2853
2854	map->len = (size_t)st.sb.st_size;
2855	if ((map->p = CAST(void *, emalloc(map->len))) == NULL) {
2856		file_oomem(ms, map->len);
2857		goto error;
2858	}
2859	if (php_stream_read(stream, map->p, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) {
2860		file_badread(ms);
2861		goto error;
2862	}
2863	map->len = 0;
2864#define RET	1
2865
2866	php_stream_close(stream);
2867	stream = NULL;
2868
2869internal_loaded:
2870	ptr = (uint32_t *)(void *)map->p;
2871	if (*ptr != MAGICNO) {
2872		if (swap4(*ptr) != MAGICNO) {
2873			file_error(ms, 0, "bad magic in `%s'", dbname);
2874			goto error;
2875		}
2876		needsbyteswap = 1;
2877	} else
2878		needsbyteswap = 0;
2879	if (needsbyteswap)
2880		version = swap4(ptr[1]);
2881	else
2882		version = ptr[1];
2883	if (version != VERSIONNO) {
2884		file_error(ms, 0, "File %d.%d supports only version %d magic "
2885		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2886		    VERSIONNO, dbname, version);
2887		goto error;
2888	}
2889
2890	/* php_magic_database is a const, performing writes will segfault. This is for big-endian
2891	machines only, PPC and Sparc specifically. Consider static variable or MINIT in
2892	future. */
2893	if (needsbyteswap && fn == NULL) {
2894		map->p = emalloc(sizeof(php_magic_database));
2895		map->p = memcpy(map->p, php_magic_database, sizeof(php_magic_database));
2896	}
2897
2898	if (NULL != fn) {
2899		nentries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2900		entries = (uint32_t)(st.sb.st_size / sizeof(struct magic));
2901		if ((zend_off_t)(entries * sizeof(struct magic)) != st.sb.st_size) {
2902			file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu",
2903				dbname, (unsigned long long)st.sb.st_size,
2904				sizeof(struct magic));
2905			goto error;
2906		}
2907	}
2908	map->magic[0] = CAST(struct magic *, map->p) + 1;
2909	nentries = 0;
2910	for (i = 0; i < MAGIC_SETS; i++) {
2911		if (needsbyteswap)
2912			map->nmagic[i] = swap4(ptr[i + 2]);
2913		else
2914			map->nmagic[i] = ptr[i + 2];
2915		if (i != MAGIC_SETS - 1)
2916			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2917		nentries += map->nmagic[i];
2918	}
2919	if (NULL != fn && entries != nentries + 1) {
2920		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2921		    dbname, entries, nentries + 1);
2922		goto error;
2923	}
2924	if (needsbyteswap)
2925		for (i = 0; i < MAGIC_SETS; i++)
2926			byteswap(map->magic[i], map->nmagic[i]);
2927
2928	if (dbname) {
2929		efree(dbname);
2930	}
2931	return map;
2932
2933error:
2934	if (stream) {
2935		php_stream_close(stream);
2936	}
2937	apprentice_unmap(map);
2938	if (dbname) {
2939		efree(dbname);
2940	}
2941	return NULL;
2942}
2943
2944private int
2945check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
2946{
2947	uint32_t *ptr;
2948	uint32_t entries, nentries;
2949	uint32_t version;
2950	int i, needsbyteswap;
2951
2952	ptr = CAST(uint32_t *, map->p);
2953	if (*ptr != MAGICNO) {
2954		if (swap4(*ptr) != MAGICNO) {
2955			file_error(ms, 0, "bad magic in `%s'", dbname);
2956			return -1;
2957		}
2958		needsbyteswap = 1;
2959	} else
2960		needsbyteswap = 0;
2961	if (needsbyteswap)
2962		version = swap4(ptr[1]);
2963	else
2964		version = ptr[1];
2965	if (version != VERSIONNO) {
2966		file_error(ms, 0, "File %s supports only version %d magic "
2967		    "files. `%s' is version %d", FILE_VERSION_MAJOR,
2968		    VERSIONNO, dbname, version);
2969		return -1;
2970	}
2971	entries = (uint32_t)(map->len / sizeof(struct magic));
2972	if ((entries * sizeof(struct magic)) != map->len) {
2973		file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
2974		    "a multiple of %" SIZE_T_FORMAT "u",
2975		    dbname, map->len, sizeof(struct magic));
2976		return -1;
2977	}
2978	map->magic[0] = CAST(struct magic *, map->p) + 1;
2979	nentries = 0;
2980	for (i = 0; i < MAGIC_SETS; i++) {
2981		if (needsbyteswap)
2982			map->nmagic[i] = swap4(ptr[i + 2]);
2983		else
2984			map->nmagic[i] = ptr[i + 2];
2985		if (i != MAGIC_SETS - 1)
2986			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
2987		nentries += map->nmagic[i];
2988	}
2989	if (entries != nentries + 1) {
2990		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
2991		    dbname, entries, nentries + 1);
2992		return -1;
2993	}
2994	if (needsbyteswap)
2995		for (i = 0; i < MAGIC_SETS; i++)
2996			byteswap(map->magic[i], map->nmagic[i]);
2997	return 0;
2998}
2999
3000/*
3001 * handle an mmaped file.
3002 */
3003private int
3004apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3005{
3006	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3007	static const size_t m = sizeof(**map->magic);
3008	size_t len;
3009	char *dbname;
3010	int rv = -1;
3011	uint32_t i;
3012	union {
3013		struct magic m;
3014		uint32_t h[2 + MAGIC_SETS];
3015	} hdr;
3016	php_stream *stream;
3017
3018
3019	dbname = mkdbname(ms, fn, 0);
3020
3021	if (dbname == NULL)
3022		goto out;
3023
3024	/* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */
3025	stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL);
3026
3027	if (!stream) {
3028		file_error(ms, errno, "cannot open `%s'", dbname);
3029		goto out;
3030	}
3031	memset(&hdr, 0, sizeof(hdr));
3032	hdr.h[0] = MAGICNO;
3033	hdr.h[1] = VERSIONNO;
3034	memcpy(hdr.h + 2, map->nmagic, nm);
3035
3036	if (php_stream_write(stream,(const char *)&hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
3037		file_error(ms, errno, "error writing `%s'", dbname);
3038		goto out;
3039	}
3040
3041	for (i = 0; i < MAGIC_SETS; i++) {
3042		len = m * map->nmagic[i];
3043		if (php_stream_write(stream, (const char *)map->magic[i], len) != (ssize_t)len) {
3044			file_error(ms, errno, "error writing `%s'", dbname);
3045			goto out;
3046		}
3047	}
3048
3049	if (stream) {
3050		php_stream_close(stream);
3051	}
3052	rv = 0;
3053out:
3054	efree(dbname);
3055	return rv;
3056}
3057
3058private const char ext[] = ".mgc";
3059/*
3060 * make a dbname
3061 */
3062private char *
3063mkdbname(struct magic_set *ms, const char *fn, int strip)
3064{
3065	const char *p, *q;
3066	char *buf;
3067
3068	if (strip) {
3069		if ((p = strrchr(fn, '/')) != NULL)
3070			fn = ++p;
3071	}
3072
3073	for (q = fn; *q; q++)
3074		continue;
3075	/* Look for .mgc */
3076	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3077		if (*p != *q)
3078			break;
3079
3080	/* Did not find .mgc, restore q */
3081	if (p >= ext)
3082		while (*q)
3083			q++;
3084
3085	q++;
3086	/* Compatibility with old code that looked in .mime */
3087	if (ms->flags & MAGIC_MIME) {
3088		spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext);
3089#ifdef PHP_WIN32
3090		if (VCWD_ACCESS(buf, R_OK) == 0) {
3091#else
3092		if (VCWD_ACCESS(buf, R_OK) != -1) {
3093#endif
3094			ms->flags &= MAGIC_MIME_TYPE;
3095			return buf;
3096		}
3097		efree(buf);
3098	}
3099	spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext);
3100
3101	/* Compatibility with old code that looked in .mime */
3102	if (strstr(p, ".mime") != NULL)
3103		ms->flags &= MAGIC_MIME_TYPE;
3104	return buf;
3105}
3106
3107/*
3108 * Byteswap an mmap'ed file if needed
3109 */
3110private void
3111byteswap(struct magic *magic, uint32_t nmagic)
3112{
3113	uint32_t i;
3114	for (i = 0; i < nmagic; i++)
3115		bs1(&magic[i]);
3116}
3117
3118/*
3119 * swap a short
3120 */
3121private uint16_t
3122swap2(uint16_t sv)
3123{
3124	uint16_t rv;
3125	uint8_t *s = (uint8_t *)(void *)&sv;
3126	uint8_t *d = (uint8_t *)(void *)&rv;
3127	d[0] = s[1];
3128	d[1] = s[0];
3129	return rv;
3130}
3131
3132/*
3133 * swap an int
3134 */
3135private uint32_t
3136swap4(uint32_t sv)
3137{
3138	uint32_t rv;
3139	uint8_t *s = (uint8_t *)(void *)&sv;
3140	uint8_t *d = (uint8_t *)(void *)&rv;
3141	d[0] = s[3];
3142	d[1] = s[2];
3143	d[2] = s[1];
3144	d[3] = s[0];
3145	return rv;
3146}
3147
3148/*
3149 * swap a quad
3150 */
3151private uint64_t
3152swap8(uint64_t sv)
3153{
3154	uint64_t rv;
3155	uint8_t *s = (uint8_t *)(void *)&sv;
3156	uint8_t *d = (uint8_t *)(void *)&rv;
3157#if 0
3158	d[0] = s[3];
3159	d[1] = s[2];
3160	d[2] = s[1];
3161	d[3] = s[0];
3162	d[4] = s[7];
3163	d[5] = s[6];
3164	d[6] = s[5];
3165	d[7] = s[4];
3166#else
3167	d[0] = s[7];
3168	d[1] = s[6];
3169	d[2] = s[5];
3170	d[3] = s[4];
3171	d[4] = s[3];
3172	d[5] = s[2];
3173	d[6] = s[1];
3174	d[7] = s[0];
3175#endif
3176	return rv;
3177}
3178
3179/*
3180 * byteswap a single magic entry
3181 */
3182private void
3183bs1(struct magic *m)
3184{
3185	m->cont_level = swap2(m->cont_level);
3186	m->offset = swap4((uint32_t)m->offset);
3187	m->in_offset = swap4((uint32_t)m->in_offset);
3188	m->lineno = swap4((uint32_t)m->lineno);
3189	if (IS_LIBMAGIC_STRING(m->type)) {
3190		m->str_range = swap4(m->str_range);
3191		m->str_flags = swap4(m->str_flags);
3192	}
3193	else {
3194		m->value.q = swap8(m->value.q);
3195		m->num_mask = swap8(m->num_mask);
3196	}
3197}
3198
3199protected size_t
3200file_pstring_length_size(const struct magic *m)
3201{
3202	switch (m->str_flags & PSTRING_LEN) {
3203	case PSTRING_1_LE:
3204		return 1;
3205	case PSTRING_2_LE:
3206	case PSTRING_2_BE:
3207		return 2;
3208	case PSTRING_4_LE:
3209	case PSTRING_4_BE:
3210		return 4;
3211	default:
3212		abort();	/* Impossible */
3213		return 1;
3214	}
3215}
3216protected size_t
3217file_pstring_get_length(const struct magic *m, const char *s)
3218{
3219	size_t len = 0;
3220
3221	switch (m->str_flags & PSTRING_LEN) {
3222	case PSTRING_1_LE:
3223		len = *s;
3224		break;
3225	case PSTRING_2_LE:
3226		len = (s[1] << 8) | s[0];
3227		break;
3228	case PSTRING_2_BE:
3229		len = (s[0] << 8) | s[1];
3230		break;
3231	case PSTRING_4_LE:
3232		len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
3233		break;
3234	case PSTRING_4_BE:
3235		len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
3236		break;
3237	default:
3238		abort();	/* Impossible */
3239	}
3240
3241	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
3242		len -= file_pstring_length_size(m);
3243
3244	return len;
3245}
3246
3247protected int
3248file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3249{
3250	uint32_t i, j;
3251	struct mlist *mlist, *ml;
3252
3253	mlist = ms->mlist[1];
3254
3255	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3256		struct magic *ma = ml->magic;
3257		uint32_t nma = ml->nmagic;
3258		for (i = 0; i < nma; i++) {
3259			if (ma[i].type != FILE_NAME)
3260				continue;
3261			if (strcmp(ma[i].value.s, name) == 0) {
3262				v->magic = &ma[i];
3263				for (j = i + 1; j < nma; j++)
3264				    if (ma[j].cont_level == 0)
3265					    break;
3266				v->nmagic = j - i;
3267				return 0;
3268			}
3269		}
3270	}
3271	return -1;
3272}
3273