1/* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32#include "php.h" 33 34#include "file.h" 35 36#ifndef lint 37FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $") 38#endif /* lint */ 39 40#include "magic.h" 41#include "patchlevel.h" 42#include <stdlib.h> 43 44#if defined(__hpux) && !defined(HAVE_STRTOULL) 45#if SIZEOF_LONG == 8 46# define strtoull strtoul 47#else 48# define strtoull __strtoull 49#endif 50#endif 51 52#ifdef PHP_WIN32 53#include "win32/unistd.h" 54#if _MSC_VER <= 1300 55# include "win32/php_strtoi64.h" 56#endif 57#define strtoull _strtoui64 58#else 59#include <unistd.h> 60#endif 61 62#include <string.h> 63#include <assert.h> 64#include <ctype.h> 65#include <fcntl.h> 66 67#define EATAB {while (isascii((unsigned char) *l) && \ 68 isspace((unsigned char) *l)) ++l;} 69#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 70 tolower((unsigned char) (l)) : (l)) 71/* 72 * Work around a bug in headers on Digital Unix. 73 * At least confirmed for: OSF1 V4.0 878 74 */ 75#if defined(__osf__) && defined(__DECC) 76#ifdef MAP_FAILED 77#undef MAP_FAILED 78#endif 79#endif 80 81#ifndef MAP_FAILED 82#define MAP_FAILED (void *) -1 83#endif 84 85#ifndef MAP_FILE 86#define MAP_FILE 0 87#endif 88 89struct magic_entry { 90 struct magic *mp; 91 uint32_t cont_count; 92 uint32_t max_count; 93}; 94 95int file_formats[FILE_NAMES_SIZE]; 96const size_t file_nformats = FILE_NAMES_SIZE; 97const char *file_names[FILE_NAMES_SIZE]; 98const size_t file_nnames = FILE_NAMES_SIZE; 99 100private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 101private int hextoint(int); 102private const char *getstr(struct magic_set *, struct magic *, const char *, 103 int); 104private int parse(struct magic_set *, struct magic_entry **, uint32_t *, 105 const char *, size_t, int); 106private void eatsize(const char **); 107private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); 108private size_t apprentice_magic_strength(const struct magic *); 109private int apprentice_sort(const void *, const void *); 110private void apprentice_list(struct mlist *, int ); 111private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, 112 const char *, int); 113private void byteswap(struct magic *, uint32_t); 114private void bs1(struct magic *); 115private uint16_t swap2(uint16_t); 116private uint32_t swap4(uint32_t); 117private uint64_t swap8(uint64_t); 118private char *mkdbname(struct magic_set *, const char *, int); 119private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 120 const char *); 121private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 122 const char *); 123private int check_format_type(const char *, int); 124private int check_format(struct magic_set *, struct magic *); 125private int get_op(char); 126private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 127private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 128private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 129 130private size_t maxmagic = 0; 131private size_t magicsize = sizeof(struct magic); 132 133private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 134private struct { 135 const char *name; 136 size_t len; 137 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 138} bang[] = { 139#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 140 DECLARE_FIELD(mime), 141 DECLARE_FIELD(apple), 142 DECLARE_FIELD(strength), 143#undef DECLARE_FIELD 144 { NULL, 0, NULL } 145}; 146 147#include "../data_file.c" 148 149static const struct type_tbl_s { 150 const char name[16]; 151 const size_t len; 152 const int type; 153 const int format; 154} type_tbl[] = { 155# define XX(s) s, (sizeof(s) - 1) 156# define XX_NULL "", 0 157 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 158 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 159 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 160 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 161 { XX("string"), FILE_STRING, FILE_FMT_STR }, 162 { XX("date"), FILE_DATE, FILE_FMT_STR }, 163 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 164 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 165 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 166 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 167 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 168 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 169 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 170 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 171 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 172 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 173 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 174 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 175 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 176 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 177 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 178 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 179 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 180 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 181 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 182 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 183 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 184 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 185 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 186 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 187 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 188 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 189 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 190 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 191 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 192 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 193 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 194 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 195 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 196 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 197 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE }, 198 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 199# undef XX 200# undef XX_NULL 201}; 202 203#ifndef S_ISDIR 204#define S_ISDIR(mode) ((mode) & _S_IFDIR) 205#endif 206 207private int 208get_type(const char *l, const char **t) 209{ 210 const struct type_tbl_s *p; 211 212 for (p = type_tbl; p->len; p++) { 213 if (strncmp(l, p->name, p->len) == 0) { 214 if (t) 215 *t = l + p->len; 216 break; 217 } 218 } 219 return p->type; 220} 221 222private void 223init_file_tables(void) 224{ 225 static int done = 0; 226 const struct type_tbl_s *p; 227 228 if (done) 229 return; 230 done++; 231 232 for (p = type_tbl; p->len; p++) { 233 assert(p->type < FILE_NAMES_SIZE); 234 file_names[p->type] = p->name; 235 file_formats[p->type] = p->format; 236 } 237} 238 239/* 240 * Handle one file or directory. 241 */ 242private int 243apprentice_1(struct magic_set *ms, const char *fn, int action, 244 struct mlist *mlist) 245{ 246 struct magic *magic = NULL; 247 uint32_t nmagic = 0; 248 struct mlist *ml; 249 int rv = -1; 250 int mapped; 251 252 if (magicsize != FILE_MAGICSIZE) { 253 file_error(ms, 0, "magic element size %lu != %lu", 254 (unsigned long)sizeof(*magic), 255 (unsigned long)FILE_MAGICSIZE); 256 return -1; 257 } 258 259 if (action == FILE_COMPILE) { 260 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 261 if (rv != 0) 262 return -1; 263 rv = apprentice_compile(ms, &magic, &nmagic, fn); 264 efree(magic); 265 return rv; 266 } 267 268 if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { 269 if (fn) { 270 if (ms->flags & MAGIC_CHECK) 271 file_magwarn(ms, "using regular magic file `%s'", fn); 272 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 273 } 274 275 if (rv != 0) 276 return -1; 277 } 278 279 mapped = rv; 280 281 if (magic == NULL) { 282 file_delmagic(magic, mapped, nmagic); 283 return -1; 284 } 285 286 ml = emalloc(sizeof(*ml)); 287 288 ml->magic = magic; 289 ml->nmagic = nmagic; 290 ml->mapped = mapped; 291 292 mlist->prev->next = ml; 293 ml->prev = mlist->prev; 294 ml->next = mlist; 295 mlist->prev = ml; 296 297 if (action == FILE_LIST) { 298 printf("Binary patterns:\n"); 299 apprentice_list(mlist, BINTEST); 300 printf("Text patterns:\n"); 301 apprentice_list(mlist, TEXTTEST); 302 } 303 304 return 0; 305} 306 307protected void 308file_delmagic(struct magic *p, int type, size_t entries) 309{ 310 if (p == NULL) 311 return; 312 switch (type) { 313 case 3: 314 /* Do nothing, it's part of the code segment */ 315 break; 316 317 case 1: 318 p--; 319 /*FALLTHROUGH*/ 320 321 case 0: 322 efree(p); 323 break; 324 325 default: 326 abort(); 327 } 328} 329 330/* const char *fn: list of magic files and directories */ 331protected struct mlist * 332file_apprentice(struct magic_set *ms, const char *fn, int action) 333{ 334 char *p, *mfn; 335 int file_err, errs = -1; 336 struct mlist *mlist; 337/* XXX disabling default magic loading so the compiled in data is used */ 338#if 0 339 if ((fn = magic_getpath(fn, action)) == NULL) 340 return NULL; 341#endif 342 343 init_file_tables(); 344 345 if (fn == NULL) 346 fn = getenv("MAGIC"); 347 if (fn == NULL) { 348 mlist = emalloc(sizeof(*mlist)); 349 mlist->next = mlist->prev = mlist; 350 apprentice_1(ms, fn, action, mlist); 351 return mlist; 352 } 353 354 mfn = estrdup(fn); 355 fn = mfn; 356 357 mlist = emalloc(sizeof(*mlist)); 358 mlist->next = mlist->prev = mlist; 359 360 while (fn) { 361 p = strchr(fn, PATHSEP); 362 if (p) 363 *p++ = '\0'; 364 if (*fn == '\0') 365 break; 366 file_err = apprentice_1(ms, fn, action, mlist); 367 errs = MAX(errs, file_err); 368 fn = p; 369 } 370 if (errs == -1) { 371 efree(mfn); 372 efree(mlist); 373 mlist = NULL; 374 file_error(ms, 0, "could not find any magic files!"); 375 return NULL; 376 } 377 efree(mfn); 378 return mlist; 379} 380 381/* 382 * Get weight of this magic entry, for sorting purposes. 383 */ 384private size_t 385apprentice_magic_strength(const struct magic *m) 386{ 387#define MULT 10 388 size_t val = 2 * MULT; /* baseline strength */ 389 390 switch (m->type) { 391 case FILE_DEFAULT: /* make sure this sorts last */ 392 if (m->factor_op != FILE_FACTOR_OP_NONE) 393 abort(); 394 return 0; 395 396 case FILE_BYTE: 397 val += 1 * MULT; 398 break; 399 400 case FILE_SHORT: 401 case FILE_LESHORT: 402 case FILE_BESHORT: 403 val += 2 * MULT; 404 break; 405 406 case FILE_LONG: 407 case FILE_LELONG: 408 case FILE_BELONG: 409 case FILE_MELONG: 410 val += 4 * MULT; 411 break; 412 413 case FILE_PSTRING: 414 case FILE_STRING: 415 val += m->vallen * MULT; 416 break; 417 418 case FILE_BESTRING16: 419 case FILE_LESTRING16: 420 val += m->vallen * MULT / 2; 421 break; 422 423 case FILE_SEARCH: 424 case FILE_REGEX: 425 val += m->vallen * MAX(MULT / m->vallen, 1); 426 break; 427 428 case FILE_DATE: 429 case FILE_LEDATE: 430 case FILE_BEDATE: 431 case FILE_MEDATE: 432 case FILE_LDATE: 433 case FILE_LELDATE: 434 case FILE_BELDATE: 435 case FILE_MELDATE: 436 case FILE_FLOAT: 437 case FILE_BEFLOAT: 438 case FILE_LEFLOAT: 439 val += 4 * MULT; 440 break; 441 442 case FILE_QUAD: 443 case FILE_BEQUAD: 444 case FILE_LEQUAD: 445 case FILE_QDATE: 446 case FILE_LEQDATE: 447 case FILE_BEQDATE: 448 case FILE_QLDATE: 449 case FILE_LEQLDATE: 450 case FILE_BEQLDATE: 451 case FILE_DOUBLE: 452 case FILE_BEDOUBLE: 453 case FILE_LEDOUBLE: 454 val += 8 * MULT; 455 break; 456 457 default: 458 val = 0; 459 (void)fprintf(stderr, "Bad type %d\n", m->type); 460 abort(); 461 } 462 463 switch (m->reln) { 464 case 'x': /* matches anything penalize */ 465 case '!': /* matches almost anything penalize */ 466 val = 0; 467 break; 468 469 case '=': /* Exact match, prefer */ 470 val += MULT; 471 break; 472 473 case '>': 474 case '<': /* comparison match reduce strength */ 475 val -= 2 * MULT; 476 break; 477 478 case '^': 479 case '&': /* masking bits, we could count them too */ 480 val -= MULT; 481 break; 482 483 default: 484 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 485 abort(); 486 } 487 488 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 489 val = 1; 490 491 switch (m->factor_op) { 492 case FILE_FACTOR_OP_NONE: 493 break; 494 case FILE_FACTOR_OP_PLUS: 495 val += m->factor; 496 break; 497 case FILE_FACTOR_OP_MINUS: 498 val -= m->factor; 499 break; 500 case FILE_FACTOR_OP_TIMES: 501 val *= m->factor; 502 break; 503 case FILE_FACTOR_OP_DIV: 504 val /= m->factor; 505 break; 506 default: 507 abort(); 508 } 509 510 511 /* 512 * Magic entries with no description get a bonus because they depend 513 * on subsequent magic entries to print something. 514 */ 515 if (m->desc[0] == '\0') 516 val++; 517 return val; 518} 519 520/* 521 * Sort callback for sorting entries by "strength" (basically length) 522 */ 523private int 524apprentice_sort(const void *a, const void *b) 525{ 526 const struct magic_entry *ma = a; 527 const struct magic_entry *mb = b; 528 size_t sa = apprentice_magic_strength(ma->mp); 529 size_t sb = apprentice_magic_strength(mb->mp); 530 if (sa == sb) 531 return 0; 532 else if (sa > sb) 533 return -1; 534 else 535 return 1; 536} 537 538/* 539 * Shows sorted patterns list in the order which is used for the matching 540 */ 541private void 542apprentice_list(struct mlist *mlist, int mode) 543{ 544 uint32_t magindex = 0; 545 struct mlist *ml; 546 for (ml = mlist->next; ml != mlist; ml = ml->next) { 547 for (magindex = 0; magindex < ml->nmagic; magindex++) { 548 struct magic *m = &ml->magic[magindex]; 549 if ((m->flag & mode) != mode) { 550 /* Skip sub-tests */ 551 while (magindex + 1 < ml->nmagic && 552 ml->magic[magindex + 1].cont_level != 0) 553 ++magindex; 554 continue; /* Skip to next top-level test*/ 555 } 556 557 /* 558 * Try to iterate over the tree until we find item with 559 * description/mimetype. 560 */ 561 while (magindex + 1 < ml->nmagic && 562 ml->magic[magindex + 1].cont_level != 0 && 563 *ml->magic[magindex].desc == '\0' && 564 *ml->magic[magindex].mimetype == '\0') 565 magindex++; 566 567 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 568 apprentice_magic_strength(m), 569 ml->magic[magindex].desc, 570 ml->magic[magindex].mimetype); 571 } 572 } 573} 574 575private void 576set_test_type(struct magic *mstart, struct magic *m) 577{ 578 switch (m->type) { 579 case FILE_BYTE: 580 case FILE_SHORT: 581 case FILE_LONG: 582 case FILE_DATE: 583 case FILE_BESHORT: 584 case FILE_BELONG: 585 case FILE_BEDATE: 586 case FILE_LESHORT: 587 case FILE_LELONG: 588 case FILE_LEDATE: 589 case FILE_LDATE: 590 case FILE_BELDATE: 591 case FILE_LELDATE: 592 case FILE_MEDATE: 593 case FILE_MELDATE: 594 case FILE_MELONG: 595 case FILE_QUAD: 596 case FILE_LEQUAD: 597 case FILE_BEQUAD: 598 case FILE_QDATE: 599 case FILE_LEQDATE: 600 case FILE_BEQDATE: 601 case FILE_QLDATE: 602 case FILE_LEQLDATE: 603 case FILE_BEQLDATE: 604 case FILE_FLOAT: 605 case FILE_BEFLOAT: 606 case FILE_LEFLOAT: 607 case FILE_DOUBLE: 608 case FILE_BEDOUBLE: 609 case FILE_LEDOUBLE: 610 mstart->flag |= BINTEST; 611 break; 612 case FILE_STRING: 613 case FILE_PSTRING: 614 case FILE_BESTRING16: 615 case FILE_LESTRING16: 616 /* Allow text overrides */ 617 if (mstart->str_flags & STRING_TEXTTEST) 618 mstart->flag |= TEXTTEST; 619 else 620 mstart->flag |= BINTEST; 621 break; 622 case FILE_REGEX: 623 case FILE_SEARCH: 624 /* Check for override */ 625 if (mstart->str_flags & STRING_BINTEST) 626 mstart->flag |= BINTEST; 627 if (mstart->str_flags & STRING_TEXTTEST) 628 mstart->flag |= TEXTTEST; 629 630 if (mstart->flag & (TEXTTEST|BINTEST)) 631 break; 632 633 /* binary test if pattern is not text */ 634 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 635 NULL) <= 0) 636 mstart->flag |= BINTEST; 637 else 638 mstart->flag |= TEXTTEST; 639 break; 640 case FILE_DEFAULT: 641 /* can't deduce anything; we shouldn't see this at the 642 top level anyway */ 643 break; 644 case FILE_INVALID: 645 default: 646 /* invalid search type, but no need to complain here */ 647 break; 648 } 649} 650 651/* 652 * Load and parse one file. 653 */ 654private void 655load_1(struct magic_set *ms, int action, const char *fn, int *errs, 656 struct magic_entry **marray, uint32_t *marraycount) 657{ 658 char buffer[BUFSIZ + 1]; 659 char *line = NULL; 660 size_t len; 661 size_t lineno = 0; 662 663 php_stream *stream; 664 665 TSRMLS_FETCH(); 666 667#if PHP_API_VERSION < 20100412 668 stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL); 669#else 670 stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL); 671#endif 672 673 if (stream == NULL) { 674 if (errno != ENOENT) 675 file_error(ms, errno, "cannot read magic file `%s'", 676 fn); 677 (*errs)++; 678 return; 679 } 680 681 /* read and parse this file */ 682#if (PHP_MAJOR_VERSION < 6) 683 for (ms->line = 1; (line = php_stream_get_line(stream, buffer , BUFSIZ, &len)) != NULL; ms->line++) { 684#else 685 for (ms->line = 1; (line = php_stream_get_line(stream, ZSTR(buffer), BUFSIZ, &len)) != NULL; ms->line++) { 686#endif 687 if (len == 0) /* null line, garbage, etc */ 688 continue; 689 if (line[len - 1] == '\n') { 690 lineno++; 691 line[len - 1] = '\0'; /* delete newline */ 692 } 693 switch (line[0]) { 694 case '\0': /* empty, do not parse */ 695 case '#': /* comment, do not parse */ 696 continue; 697 case '!': 698 if (line[1] == ':') { 699 size_t i; 700 701 for (i = 0; bang[i].name != NULL; i++) { 702 if ((size_t)(len - 2) > bang[i].len && 703 memcmp(bang[i].name, line + 2, 704 bang[i].len) == 0) 705 break; 706 } 707 if (bang[i].name == NULL) { 708 file_error(ms, 0, 709 "Unknown !: entry `%s'", line); 710 (*errs)++; 711 continue; 712 } 713 if (*marraycount == 0) { 714 file_error(ms, 0, 715 "No current entry for :!%s type", 716 bang[i].name); 717 (*errs)++; 718 continue; 719 } 720 if ((*bang[i].fun)(ms, 721 &(*marray)[*marraycount - 1], 722 line + bang[i].len + 2) != 0) { 723 (*errs)++; 724 continue; 725 } 726 continue; 727 } 728 /*FALLTHROUGH*/ 729 default: 730 if (parse(ms, marray, marraycount, line, lineno, 731 action) != 0) 732 (*errs)++; 733 break; 734 } 735 } 736 php_stream_close(stream); 737} 738 739/* 740 * parse a file or directory of files 741 * const char *fn: name of magic file or directory 742 */ 743private int 744cmpstrp(const void *p1, const void *p2) 745{ 746 return strcmp(*(char *const *)p1, *(char *const *)p2); 747} 748 749private int 750apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 751 const char *fn, int action) 752{ 753 int errs = 0, mflen = 0; 754 struct magic_entry *marray; 755 uint32_t marraycount, i, mentrycount = 0, starttest; 756 size_t files = 0, maxfiles = 0; 757 char **filearr = NULL, mfn[MAXPATHLEN]; 758 struct stat st; 759 php_stream *dir; 760 php_stream_dirent d; 761 762 TSRMLS_FETCH(); 763 764 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 765 766 maxmagic = MAXMAGIS; 767 marray = ecalloc(maxmagic, sizeof(*marray)); 768 marraycount = 0; 769 770 /* print silly verbose header for USG compat. */ 771 if (action == FILE_CHECK) 772 (void)fprintf(stderr, "%s\n", usg_hdr); 773 774 /* load directory or file */ 775 /* FIXME: Read file names and sort them to prevent 776 non-determinism. See Debian bug #488562. */ 777 if (php_sys_stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 778 int mflen; 779 char mfn[MAXPATHLEN]; 780 781 dir = php_stream_opendir(fn, REPORT_ERRORS, NULL); 782 if (!dir) { 783 errs++; 784 goto out; 785 } 786 while (php_stream_readdir(dir, &d)) { 787 if ((mflen = snprintf(mfn, sizeof(mfn), "%s/%s", fn, d.d_name)) < 0) { 788 file_oomem(ms, strlen(fn) + strlen(d.d_name) + 2); 789 errs++; 790 php_stream_closedir(dir); 791 goto out; 792 } 793 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 794 continue; 795 } 796 if (files >= maxfiles) { 797 size_t mlen; 798 maxfiles = (maxfiles + 1) * 2; 799 mlen = maxfiles * sizeof(*filearr); 800 if ((filearr = CAST(char **, 801 realloc(filearr, mlen))) == NULL) { 802 file_oomem(ms, mlen); 803 php_stream_closedir(dir); 804 errs++; 805 goto out; 806 } 807 } 808 filearr[files++] = estrndup(mfn, mflen); 809 } 810 php_stream_closedir(dir); 811 qsort(filearr, files, sizeof(*filearr), cmpstrp); 812 for (i = 0; i < files; i++) { 813 load_1(ms, action, filearr[i], &errs, &marray, 814 &marraycount); 815 efree(filearr[i]); 816 } 817 free(filearr); 818 } else 819 load_1(ms, action, fn, &errs, &marray, &marraycount); 820 if (errs) 821 goto out; 822 823 /* Set types of tests */ 824 for (i = 0; i < marraycount; ) { 825 if (marray[i].mp->cont_level != 0) { 826 i++; 827 continue; 828 } 829 830 starttest = i; 831 do { 832 static const char text[] = "text"; 833 static const char binary[] = "binary"; 834 static const size_t len = sizeof(text); 835 set_test_type(marray[starttest].mp, marray[i].mp); 836 if ((ms->flags & MAGIC_DEBUG) == 0) 837 continue; 838 (void)fprintf(stderr, "%s%s%s: %s\n", 839 marray[i].mp->mimetype, 840 marray[i].mp->mimetype[0] == '\0' ? "" : "; ", 841 marray[i].mp->desc[0] ? marray[i].mp->desc : 842 "(no description)", 843 marray[i].mp->flag & BINTEST ? binary : text); 844 if (marray[i].mp->flag & BINTEST) { 845 char *p = strstr(marray[i].mp->desc, text); 846 if (p && (p == marray[i].mp->desc || 847 isspace((unsigned char)p[-1])) && 848 (p + len - marray[i].mp->desc == 849 MAXstring || (p[len] == '\0' || 850 isspace((unsigned char)p[len])))) 851 (void)fprintf(stderr, "*** Possible " 852 "binary test for text type\n"); 853 } 854 } while (++i < marraycount && marray[i].mp->cont_level != 0); 855 } 856 857 qsort(marray, marraycount, sizeof(*marray), apprentice_sort); 858 859 /* 860 * Make sure that any level 0 "default" line is last (if one exists). 861 */ 862 for (i = 0; i < marraycount; i++) { 863 if (marray[i].mp->cont_level == 0 && 864 marray[i].mp->type == FILE_DEFAULT) { 865 while (++i < marraycount) 866 if (marray[i].mp->cont_level == 0) 867 break; 868 if (i != marraycount) { 869 /* XXX - Ugh! */ 870 ms->line = marray[i].mp->lineno; 871 file_magwarn(ms, 872 "level 0 \"default\" did not sort last"); 873 } 874 break; 875 } 876 } 877 878 for (i = 0; i < marraycount; i++) 879 mentrycount += marray[i].cont_count; 880 881 *magicp = emalloc(sizeof(**magicp) * mentrycount); 882 883 mentrycount = 0; 884 for (i = 0; i < marraycount; i++) { 885 (void)memcpy(*magicp + mentrycount, marray[i].mp, 886 marray[i].cont_count * sizeof(**magicp)); 887 mentrycount += marray[i].cont_count; 888 } 889out: 890 for (i = 0; i < marraycount; i++) { 891 if (marray[i].mp) { 892 efree(marray[i].mp); 893 } 894 } 895 if (marray) { 896 efree(marray); 897 } 898 if (errs) { 899 *magicp = NULL; 900 *nmagicp = 0; 901 return errs; 902 } else { 903 *nmagicp = mentrycount; 904 return 0; 905 } 906 907} 908 909/* 910 * extend the sign bit if the comparison is to be signed 911 */ 912protected uint64_t 913file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 914{ 915 if (!(m->flag & UNSIGNED)) { 916 switch(m->type) { 917 /* 918 * Do not remove the casts below. They are 919 * vital. When later compared with the data, 920 * the sign extension must have happened. 921 */ 922 case FILE_BYTE: 923 v = (char) v; 924 break; 925 case FILE_SHORT: 926 case FILE_BESHORT: 927 case FILE_LESHORT: 928 v = (short) v; 929 break; 930 case FILE_DATE: 931 case FILE_BEDATE: 932 case FILE_LEDATE: 933 case FILE_MEDATE: 934 case FILE_LDATE: 935 case FILE_BELDATE: 936 case FILE_LELDATE: 937 case FILE_MELDATE: 938 case FILE_LONG: 939 case FILE_BELONG: 940 case FILE_LELONG: 941 case FILE_MELONG: 942 case FILE_FLOAT: 943 case FILE_BEFLOAT: 944 case FILE_LEFLOAT: 945 v = (int32_t) v; 946 break; 947 case FILE_QUAD: 948 case FILE_BEQUAD: 949 case FILE_LEQUAD: 950 case FILE_QDATE: 951 case FILE_QLDATE: 952 case FILE_BEQDATE: 953 case FILE_BEQLDATE: 954 case FILE_LEQDATE: 955 case FILE_LEQLDATE: 956 case FILE_DOUBLE: 957 case FILE_BEDOUBLE: 958 case FILE_LEDOUBLE: 959 v = (int64_t) v; 960 break; 961 case FILE_STRING: 962 case FILE_PSTRING: 963 case FILE_BESTRING16: 964 case FILE_LESTRING16: 965 case FILE_REGEX: 966 case FILE_SEARCH: 967 case FILE_DEFAULT: 968 case FILE_INDIRECT: 969 break; 970 default: 971 if (ms->flags & MAGIC_CHECK) 972 file_magwarn(ms, "cannot happen: m->type=%d\n", 973 m->type); 974 return ~0U; 975 } 976 } 977 return v; 978} 979 980private int 981string_modifier_check(struct magic_set *ms, struct magic *m) 982{ 983 if ((ms->flags & MAGIC_CHECK) == 0) 984 return 0; 985 986 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 987 file_magwarn(ms, 988 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 989 return -1; 990 } 991 switch (m->type) { 992 case FILE_BESTRING16: 993 case FILE_LESTRING16: 994 if (m->str_flags != 0) { 995 file_magwarn(ms, 996 "no modifiers allowed for 16-bit strings\n"); 997 return -1; 998 } 999 break; 1000 case FILE_STRING: 1001 case FILE_PSTRING: 1002 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1003 file_magwarn(ms, 1004 "'/%c' only allowed on regex and search\n", 1005 CHAR_REGEX_OFFSET_START); 1006 return -1; 1007 } 1008 break; 1009 case FILE_SEARCH: 1010 if (m->str_range == 0) { 1011 file_magwarn(ms, 1012 "missing range; defaulting to %d\n", 1013 STRING_DEFAULT_RANGE); 1014 m->str_range = STRING_DEFAULT_RANGE; 1015 return -1; 1016 } 1017 break; 1018 case FILE_REGEX: 1019 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1020 file_magwarn(ms, "'/%c' not allowed on regex\n", 1021 CHAR_COMPACT_WHITESPACE); 1022 return -1; 1023 } 1024 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1025 file_magwarn(ms, "'/%c' not allowed on regex\n", 1026 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1027 return -1; 1028 } 1029 break; 1030 default: 1031 file_magwarn(ms, "coding error: m->type=%d\n", 1032 m->type); 1033 return -1; 1034 } 1035 return 0; 1036} 1037 1038private int 1039get_op(char c) 1040{ 1041 switch (c) { 1042 case '&': 1043 return FILE_OPAND; 1044 case '|': 1045 return FILE_OPOR; 1046 case '^': 1047 return FILE_OPXOR; 1048 case '+': 1049 return FILE_OPADD; 1050 case '-': 1051 return FILE_OPMINUS; 1052 case '*': 1053 return FILE_OPMULTIPLY; 1054 case '/': 1055 return FILE_OPDIVIDE; 1056 case '%': 1057 return FILE_OPMODULO; 1058 default: 1059 return -1; 1060 } 1061} 1062 1063#ifdef ENABLE_CONDITIONALS 1064private int 1065get_cond(const char *l, const char **t) 1066{ 1067 static const struct cond_tbl_s { 1068 char name[8]; 1069 size_t len; 1070 int cond; 1071 } cond_tbl[] = { 1072 { "if", 2, COND_IF }, 1073 { "elif", 4, COND_ELIF }, 1074 { "else", 4, COND_ELSE }, 1075 { "", 0, COND_NONE }, 1076 }; 1077 const struct cond_tbl_s *p; 1078 1079 for (p = cond_tbl; p->len; p++) { 1080 if (strncmp(l, p->name, p->len) == 0 && 1081 isspace((unsigned char)l[p->len])) { 1082 if (t) 1083 *t = l + p->len; 1084 break; 1085 } 1086 } 1087 return p->cond; 1088} 1089 1090private int 1091check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1092{ 1093 int last_cond; 1094 last_cond = ms->c.li[cont_level].last_cond; 1095 1096 switch (cond) { 1097 case COND_IF: 1098 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1099 if (ms->flags & MAGIC_CHECK) 1100 file_magwarn(ms, "syntax error: `if'"); 1101 return -1; 1102 } 1103 last_cond = COND_IF; 1104 break; 1105 1106 case COND_ELIF: 1107 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1108 if (ms->flags & MAGIC_CHECK) 1109 file_magwarn(ms, "syntax error: `elif'"); 1110 return -1; 1111 } 1112 last_cond = COND_ELIF; 1113 break; 1114 1115 case COND_ELSE: 1116 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1117 if (ms->flags & MAGIC_CHECK) 1118 file_magwarn(ms, "syntax error: `else'"); 1119 return -1; 1120 } 1121 last_cond = COND_NONE; 1122 break; 1123 1124 case COND_NONE: 1125 last_cond = COND_NONE; 1126 break; 1127 } 1128 1129 ms->c.li[cont_level].last_cond = last_cond; 1130 return 0; 1131} 1132#endif /* ENABLE_CONDITIONALS */ 1133 1134/* 1135 * parse one line from magic file, put into magic[index++] if valid 1136 */ 1137private int 1138parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 1139 const char *line, size_t lineno, int action) 1140{ 1141#ifdef ENABLE_CONDITIONALS 1142 static uint32_t last_cont_level = 0; 1143#endif 1144 size_t i; 1145 struct magic_entry *me; 1146 struct magic *m; 1147 const char *l = line; 1148 char *t; 1149 int op; 1150 uint32_t cont_level; 1151 1152 cont_level = 0; 1153 1154 while (*l == '>') { 1155 ++l; /* step over */ 1156 cont_level++; 1157 } 1158#ifdef ENABLE_CONDITIONALS 1159 if (cont_level == 0 || cont_level > last_cont_level) 1160 if (file_check_mem(ms, cont_level) == -1) 1161 return -1; 1162 last_cont_level = cont_level; 1163#endif 1164 1165#define ALLOC_CHUNK (size_t)10 1166#define ALLOC_INCR (size_t)200 1167 1168 if (cont_level != 0) { 1169 if (*nmentryp == 0) { 1170 file_error(ms, 0, "No current entry for continuation"); 1171 return -1; 1172 } 1173 me = &(*mentryp)[*nmentryp - 1]; 1174 if (me->mp == NULL) { 1175 return -1; 1176 } 1177 if (me->cont_count == me->max_count) { 1178 struct magic *nm; 1179 size_t cnt = me->max_count + ALLOC_CHUNK; 1180 nm = erealloc(me->mp, sizeof(*nm) * cnt); 1181 me->mp = m = nm; 1182 me->max_count = CAST(uint32_t, cnt); 1183 } 1184 m = &me->mp[me->cont_count++]; 1185 (void)memset(m, 0, sizeof(*m)); 1186 m->cont_level = cont_level; 1187 } else { 1188 if (*nmentryp == maxmagic) { 1189 struct magic_entry *mp; 1190 1191 maxmagic += ALLOC_INCR; 1192 mp = erealloc(*mentryp, sizeof(*mp) * maxmagic); 1193 (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * ALLOC_INCR); 1194 *mentryp = mp; 1195 } 1196 me = &(*mentryp)[*nmentryp]; 1197 if (me->mp == NULL) { 1198 m = safe_emalloc(sizeof(*m), ALLOC_CHUNK, 0); 1199 me->mp = m; 1200 me->max_count = ALLOC_CHUNK; 1201 } else 1202 m = me->mp; 1203 (void)memset(m, 0, sizeof(*m)); 1204 m->factor_op = FILE_FACTOR_OP_NONE; 1205 m->cont_level = 0; 1206 me->cont_count = 1; 1207 } 1208 m->lineno = CAST(uint32_t, lineno); 1209 1210 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1211 ++l; /* step over */ 1212 m->flag |= OFFADD; 1213 } 1214 if (*l == '(') { 1215 ++l; /* step over */ 1216 m->flag |= INDIR; 1217 if (m->flag & OFFADD) 1218 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1219 1220 if (*l == '&') { /* m->cont_level == 0 checked below */ 1221 ++l; /* step over */ 1222 m->flag |= OFFADD; 1223 } 1224 } 1225 /* Indirect offsets are not valid at level 0. */ 1226 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1227 if (ms->flags & MAGIC_CHECK) 1228 file_magwarn(ms, "relative offset at level 0"); 1229 1230 /* get offset, then skip over it */ 1231 m->offset = (uint32_t)strtoul(l, &t, 0); 1232 if (l == t) 1233 if (ms->flags & MAGIC_CHECK) 1234 file_magwarn(ms, "offset `%s' invalid", l); 1235 l = t; 1236 1237 if (m->flag & INDIR) { 1238 m->in_type = FILE_LONG; 1239 m->in_offset = 0; 1240 /* 1241 * read [.lbs][+-]nnnnn) 1242 */ 1243 if (*l == '.') { 1244 l++; 1245 switch (*l) { 1246 case 'l': 1247 m->in_type = FILE_LELONG; 1248 break; 1249 case 'L': 1250 m->in_type = FILE_BELONG; 1251 break; 1252 case 'm': 1253 m->in_type = FILE_MELONG; 1254 break; 1255 case 'h': 1256 case 's': 1257 m->in_type = FILE_LESHORT; 1258 break; 1259 case 'H': 1260 case 'S': 1261 m->in_type = FILE_BESHORT; 1262 break; 1263 case 'c': 1264 case 'b': 1265 case 'C': 1266 case 'B': 1267 m->in_type = FILE_BYTE; 1268 break; 1269 case 'e': 1270 case 'f': 1271 case 'g': 1272 m->in_type = FILE_LEDOUBLE; 1273 break; 1274 case 'E': 1275 case 'F': 1276 case 'G': 1277 m->in_type = FILE_BEDOUBLE; 1278 break; 1279 case 'i': 1280 m->in_type = FILE_LEID3; 1281 break; 1282 case 'I': 1283 m->in_type = FILE_BEID3; 1284 break; 1285 default: 1286 if (ms->flags & MAGIC_CHECK) 1287 file_magwarn(ms, 1288 "indirect offset type `%c' invalid", 1289 *l); 1290 break; 1291 } 1292 l++; 1293 } 1294 1295 m->in_op = 0; 1296 if (*l == '~') { 1297 m->in_op |= FILE_OPINVERSE; 1298 l++; 1299 } 1300 if ((op = get_op(*l)) != -1) { 1301 m->in_op |= op; 1302 l++; 1303 } 1304 if (*l == '(') { 1305 m->in_op |= FILE_OPINDIRECT; 1306 l++; 1307 } 1308 if (isdigit((unsigned char)*l) || *l == '-') { 1309 m->in_offset = (int32_t)strtol(l, &t, 0); 1310 if (l == t) 1311 if (ms->flags & MAGIC_CHECK) 1312 file_magwarn(ms, 1313 "in_offset `%s' invalid", l); 1314 l = t; 1315 } 1316 if (*l++ != ')' || 1317 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1318 if (ms->flags & MAGIC_CHECK) 1319 file_magwarn(ms, 1320 "missing ')' in indirect offset"); 1321 } 1322 EATAB; 1323 1324#ifdef ENABLE_CONDITIONALS 1325 m->cond = get_cond(l, &l); 1326 if (check_cond(ms, m->cond, cont_level) == -1) 1327 return -1; 1328 1329 EATAB; 1330#endif 1331 1332 if (*l == 'u') { 1333 ++l; 1334 m->flag |= UNSIGNED; 1335 } 1336 1337 m->type = get_type(l, &l); 1338 if (m->type == FILE_INVALID) { 1339 if (ms->flags & MAGIC_CHECK) 1340 file_magwarn(ms, "type `%s' invalid", l); 1341 if (me->mp) { 1342 efree(me->mp); 1343 me->mp = NULL; 1344 } 1345 return -1; 1346 } 1347 1348 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1349 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1350 1351 m->mask_op = 0; 1352 if (*l == '~') { 1353 if (!IS_LIBMAGIC_STRING(m->type)) 1354 m->mask_op |= FILE_OPINVERSE; 1355 else if (ms->flags & MAGIC_CHECK) 1356 file_magwarn(ms, "'~' invalid for string types"); 1357 ++l; 1358 } 1359 m->str_range = 0; 1360 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1361 if ((op = get_op(*l)) != -1) { 1362 if (!IS_LIBMAGIC_STRING(m->type)) { 1363 uint64_t val; 1364 ++l; 1365 m->mask_op |= op; 1366 val = (uint64_t)strtoull(l, &t, 0); 1367 l = t; 1368 m->num_mask = file_signextend(ms, m, val); 1369 eatsize(&l); 1370 } 1371 else if (op == FILE_OPDIVIDE) { 1372 int have_range = 0; 1373 while (!isspace((unsigned char)*++l)) { 1374 switch (*l) { 1375 case '0': case '1': case '2': 1376 case '3': case '4': case '5': 1377 case '6': case '7': case '8': 1378 case '9': 1379 if (have_range && 1380 (ms->flags & MAGIC_CHECK)) 1381 file_magwarn(ms, 1382 "multiple ranges"); 1383 have_range = 1; 1384 m->str_range = CAST(uint32_t, 1385 strtoul(l, &t, 0)); 1386 if (m->str_range == 0) 1387 file_magwarn(ms, 1388 "zero range"); 1389 l = t - 1; 1390 break; 1391 case CHAR_COMPACT_WHITESPACE: 1392 m->str_flags |= 1393 STRING_COMPACT_WHITESPACE; 1394 break; 1395 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1396 m->str_flags |= 1397 STRING_COMPACT_OPTIONAL_WHITESPACE; 1398 break; 1399 case CHAR_IGNORE_LOWERCASE: 1400 m->str_flags |= STRING_IGNORE_LOWERCASE; 1401 break; 1402 case CHAR_IGNORE_UPPERCASE: 1403 m->str_flags |= STRING_IGNORE_UPPERCASE; 1404 break; 1405 case CHAR_REGEX_OFFSET_START: 1406 m->str_flags |= REGEX_OFFSET_START; 1407 break; 1408 case CHAR_BINTEST: 1409 m->str_flags |= STRING_BINTEST; 1410 break; 1411 case CHAR_TEXTTEST: 1412 m->str_flags |= STRING_TEXTTEST; 1413 break; 1414 case CHAR_PSTRING_1_LE: 1415 if (m->type != FILE_PSTRING) 1416 goto bad; 1417 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1418 break; 1419 case CHAR_PSTRING_2_BE: 1420 if (m->type != FILE_PSTRING) 1421 goto bad; 1422 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1423 break; 1424 case CHAR_PSTRING_2_LE: 1425 if (m->type != FILE_PSTRING) 1426 goto bad; 1427 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1428 break; 1429 case CHAR_PSTRING_4_BE: 1430 if (m->type != FILE_PSTRING) 1431 goto bad; 1432 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1433 break; 1434 case CHAR_PSTRING_4_LE: 1435 if (m->type != FILE_PSTRING) 1436 goto bad; 1437 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1438 break; 1439 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1440 if (m->type != FILE_PSTRING) 1441 goto bad; 1442 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1443 break; 1444 default: 1445 bad: 1446 if (ms->flags & MAGIC_CHECK) 1447 file_magwarn(ms, 1448 "string extension `%c' " 1449 "invalid", *l); 1450 return -1; 1451 } 1452 /* allow multiple '/' for readability */ 1453 if (l[1] == '/' && 1454 !isspace((unsigned char)l[2])) 1455 l++; 1456 } 1457 if (string_modifier_check(ms, m) == -1) 1458 return -1; 1459 } 1460 else { 1461 if (ms->flags & MAGIC_CHECK) 1462 file_magwarn(ms, "invalid string op: %c", *t); 1463 return -1; 1464 } 1465 } 1466 /* 1467 * We used to set mask to all 1's here, instead let's just not do 1468 * anything if mask = 0 (unless you have a better idea) 1469 */ 1470 EATAB; 1471 1472 switch (*l) { 1473 case '>': 1474 case '<': 1475 m->reln = *l; 1476 ++l; 1477 if (*l == '=') { 1478 if (ms->flags & MAGIC_CHECK) { 1479 file_magwarn(ms, "%c= not supported", 1480 m->reln); 1481 return -1; 1482 } 1483 ++l; 1484 } 1485 break; 1486 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1487 case '&': 1488 case '^': 1489 case '=': 1490 m->reln = *l; 1491 ++l; 1492 if (*l == '=') { 1493 /* HP compat: ignore &= etc. */ 1494 ++l; 1495 } 1496 break; 1497 case '!': 1498 m->reln = *l; 1499 ++l; 1500 break; 1501 default: 1502 m->reln = '='; /* the default relation */ 1503 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1504 isspace((unsigned char)l[1])) || !l[1])) { 1505 m->reln = *l; 1506 ++l; 1507 } 1508 break; 1509 } 1510 /* 1511 * Grab the value part, except for an 'x' reln. 1512 */ 1513 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1514 return -1; 1515 1516 /* 1517 * TODO finish this macro and start using it! 1518 * #define offsetcheck {if (offset > HOWMANY-1) 1519 * magwarn("offset too big"); } 1520 */ 1521 1522 /* 1523 * Now get last part - the description 1524 */ 1525 EATAB; 1526 if (l[0] == '\b') { 1527 ++l; 1528 m->flag |= NOSPACE; 1529 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1530 ++l; 1531 ++l; 1532 m->flag |= NOSPACE; 1533 } 1534 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1535 continue; 1536 if (i == sizeof(m->desc)) { 1537 m->desc[sizeof(m->desc) - 1] = '\0'; 1538 if (ms->flags & MAGIC_CHECK) 1539 file_magwarn(ms, "description `%s' truncated", m->desc); 1540 } 1541 1542 /* 1543 * We only do this check while compiling, or if any of the magic 1544 * files were not compiled. 1545 */ 1546 if (ms->flags & MAGIC_CHECK) { 1547 if (check_format(ms, m) == -1) 1548 return -1; 1549 } 1550 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1551 if (m->cont_level == 0) 1552 ++(*nmentryp); /* make room for next */ 1553 return 0; 1554} 1555 1556/* 1557 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1558 * if valid 1559 */ 1560private int 1561parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1562{ 1563 const char *l = line; 1564 char *el; 1565 unsigned long factor; 1566 struct magic *m = &me->mp[0]; 1567 1568 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1569 file_magwarn(ms, 1570 "Current entry already has a strength type: %c %d", 1571 m->factor_op, m->factor); 1572 return -1; 1573 } 1574 EATAB; 1575 switch (*l) { 1576 case FILE_FACTOR_OP_NONE: 1577 case FILE_FACTOR_OP_PLUS: 1578 case FILE_FACTOR_OP_MINUS: 1579 case FILE_FACTOR_OP_TIMES: 1580 case FILE_FACTOR_OP_DIV: 1581 m->factor_op = *l++; 1582 break; 1583 default: 1584 file_magwarn(ms, "Unknown factor op `%c'", *l); 1585 return -1; 1586 } 1587 EATAB; 1588 factor = strtoul(l, &el, 0); 1589 if (factor > 255) { 1590 file_magwarn(ms, "Too large factor `%lu'", factor); 1591 goto out; 1592 } 1593 if (*el && !isspace((unsigned char)*el)) { 1594 file_magwarn(ms, "Bad factor `%s'", l); 1595 goto out; 1596 } 1597 m->factor = (uint8_t)factor; 1598 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1599 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1600 m->factor_op, m->factor); 1601 goto out; 1602 } 1603 return 0; 1604out: 1605 m->factor_op = FILE_FACTOR_OP_NONE; 1606 m->factor = 0; 1607 return -1; 1608} 1609 1610/* 1611 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 1612 * magic[index - 1] 1613 */ 1614private int 1615parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 1616{ 1617 size_t i; 1618 const char *l = line; 1619 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1620 1621 if (m->apple[0] != '\0') { 1622 file_magwarn(ms, "Current entry already has a APPLE type " 1623 "`%.8s', new type `%s'", m->mimetype, l); 1624 return -1; 1625 } 1626 1627 EATAB; 1628 for (i = 0; *l && ((isascii((unsigned char)*l) && 1629 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1630 i < sizeof(m->apple); m->apple[i++] = *l++) 1631 continue; 1632 if (i == sizeof(m->apple) && *l) { 1633 /* We don't need to NUL terminate here, printing handles it */ 1634 if (ms->flags & MAGIC_CHECK) 1635 file_magwarn(ms, "APPLE type `%s' truncated %" 1636 SIZE_T_FORMAT "u", line, i); 1637 } 1638 1639 if (i > 0) 1640 return 0; 1641 else 1642 return -1; 1643} 1644 1645/* 1646 * parse a MIME annotation line from magic file, put into magic[index - 1] 1647 * if valid 1648 */ 1649private int 1650parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 1651{ 1652 size_t i; 1653 const char *l = line; 1654 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1655 1656 if (m->mimetype[0] != '\0') { 1657 file_magwarn(ms, "Current entry already has a MIME type `%s'," 1658 " new type `%s'", m->mimetype, l); 1659 return -1; 1660 } 1661 1662 EATAB; 1663 for (i = 0; *l && ((isascii((unsigned char)*l) && 1664 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1665 i < sizeof(m->mimetype); m->mimetype[i++] = *l++) 1666 continue; 1667 if (i == sizeof(m->mimetype)) { 1668 m->mimetype[sizeof(m->mimetype) - 1] = '\0'; 1669 if (ms->flags & MAGIC_CHECK) 1670 file_magwarn(ms, "MIME type `%s' truncated %" 1671 SIZE_T_FORMAT "u", m->mimetype, i); 1672 } else 1673 m->mimetype[i] = '\0'; 1674 1675 if (i > 0) 1676 return 0; 1677 else 1678 return -1; 1679} 1680 1681private int 1682check_format_type(const char *ptr, int type) 1683{ 1684 int quad = 0; 1685 if (*ptr == '\0') { 1686 /* Missing format string; bad */ 1687 return -1; 1688 } 1689 1690 switch (type) { 1691 case FILE_FMT_QUAD: 1692 quad = 1; 1693 /*FALLTHROUGH*/ 1694 case FILE_FMT_NUM: 1695 if (*ptr == '-') 1696 ptr++; 1697 if (*ptr == '.') 1698 ptr++; 1699 while (isdigit((unsigned char)*ptr)) ptr++; 1700 if (*ptr == '.') 1701 ptr++; 1702 while (isdigit((unsigned char)*ptr)) ptr++; 1703 if (quad) { 1704 if (*ptr++ != 'l') 1705 return -1; 1706 if (*ptr++ != 'l') 1707 return -1; 1708 } 1709 1710 switch (*ptr++) { 1711 case 'l': 1712 switch (*ptr++) { 1713 case 'i': 1714 case 'd': 1715 case 'u': 1716 case 'x': 1717 case 'X': 1718 return 0; 1719 default: 1720 return -1; 1721 } 1722 1723 case 'h': 1724 switch (*ptr++) { 1725 case 'h': 1726 switch (*ptr++) { 1727 case 'i': 1728 case 'd': 1729 case 'u': 1730 case 'x': 1731 case 'X': 1732 return 0; 1733 default: 1734 return -1; 1735 } 1736 case 'd': 1737 return 0; 1738 default: 1739 return -1; 1740 } 1741 1742 case 'i': 1743 case 'c': 1744 case 'd': 1745 case 'u': 1746 case 'x': 1747 case 'X': 1748 return 0; 1749 1750 default: 1751 return -1; 1752 } 1753 1754 case FILE_FMT_FLOAT: 1755 case FILE_FMT_DOUBLE: 1756 if (*ptr == '-') 1757 ptr++; 1758 if (*ptr == '.') 1759 ptr++; 1760 while (isdigit((unsigned char)*ptr)) ptr++; 1761 if (*ptr == '.') 1762 ptr++; 1763 while (isdigit((unsigned char)*ptr)) ptr++; 1764 1765 switch (*ptr++) { 1766 case 'e': 1767 case 'E': 1768 case 'f': 1769 case 'F': 1770 case 'g': 1771 case 'G': 1772 return 0; 1773 1774 default: 1775 return -1; 1776 } 1777 1778 1779 case FILE_FMT_STR: 1780 if (*ptr == '-') 1781 ptr++; 1782 while (isdigit((unsigned char )*ptr)) 1783 ptr++; 1784 if (*ptr == '.') { 1785 ptr++; 1786 while (isdigit((unsigned char )*ptr)) 1787 ptr++; 1788 } 1789 1790 switch (*ptr++) { 1791 case 's': 1792 return 0; 1793 default: 1794 return -1; 1795 } 1796 1797 default: 1798 /* internal error */ 1799 abort(); 1800 } 1801 /*NOTREACHED*/ 1802 return -1; 1803} 1804 1805/* 1806 * Check that the optional printf format in description matches 1807 * the type of the magic. 1808 */ 1809private int 1810check_format(struct magic_set *ms, struct magic *m) 1811{ 1812 char *ptr; 1813 1814 for (ptr = m->desc; *ptr; ptr++) 1815 if (*ptr == '%') 1816 break; 1817 if (*ptr == '\0') { 1818 /* No format string; ok */ 1819 return 1; 1820 } 1821 1822 assert(file_nformats == file_nnames); 1823 1824 if (m->type >= file_nformats) { 1825 file_magwarn(ms, "Internal error inconsistency between " 1826 "m->type and format strings"); 1827 return -1; 1828 } 1829 if (file_formats[m->type] == FILE_FMT_NONE) { 1830 file_magwarn(ms, "No format string for `%s' with description " 1831 "`%s'", m->desc, file_names[m->type]); 1832 return -1; 1833 } 1834 1835 ptr++; 1836 if (check_format_type(ptr, file_formats[m->type]) == -1) { 1837 /* 1838 * TODO: this error message is unhelpful if the format 1839 * string is not one character long 1840 */ 1841 file_magwarn(ms, "Printf format `%c' is not valid for type " 1842 "`%s' in description `%s'", *ptr ? *ptr : '?', 1843 file_names[m->type], m->desc); 1844 return -1; 1845 } 1846 1847 for (; *ptr; ptr++) { 1848 if (*ptr == '%') { 1849 file_magwarn(ms, 1850 "Too many format strings (should have at most one) " 1851 "for `%s' with description `%s'", 1852 file_names[m->type], m->desc); 1853 return -1; 1854 } 1855 } 1856 return 0; 1857} 1858 1859/* 1860 * Read a numeric value from a pointer, into the value union of a magic 1861 * pointer, according to the magic type. Update the string pointer to point 1862 * just after the number read. Return 0 for success, non-zero for failure. 1863 */ 1864private int 1865getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 1866{ 1867 switch (m->type) { 1868 case FILE_BESTRING16: 1869 case FILE_LESTRING16: 1870 case FILE_STRING: 1871 case FILE_PSTRING: 1872 case FILE_REGEX: 1873 case FILE_SEARCH: 1874 *p = getstr(ms, m, *p, action == FILE_COMPILE); 1875 if (*p == NULL) { 1876 if (ms->flags & MAGIC_CHECK) 1877 file_magwarn(ms, "cannot get string from `%s'", 1878 m->value.s); 1879 return -1; 1880 } 1881 return 0; 1882 case FILE_FLOAT: 1883 case FILE_BEFLOAT: 1884 case FILE_LEFLOAT: 1885 if (m->reln != 'x') { 1886 char *ep; 1887#ifdef HAVE_STRTOF 1888 m->value.f = strtof(*p, &ep); 1889#else 1890 m->value.f = (float)strtod(*p, &ep); 1891#endif 1892 *p = ep; 1893 } 1894 return 0; 1895 case FILE_DOUBLE: 1896 case FILE_BEDOUBLE: 1897 case FILE_LEDOUBLE: 1898 if (m->reln != 'x') { 1899 char *ep; 1900 m->value.d = strtod(*p, &ep); 1901 *p = ep; 1902 } 1903 return 0; 1904 default: 1905 if (m->reln != 'x') { 1906 char *ep; 1907 m->value.q = file_signextend(ms, m, 1908 (uint64_t)strtoull(*p, &ep, 0)); 1909 *p = ep; 1910 eatsize(p); 1911 } 1912 return 0; 1913 } 1914} 1915 1916/* 1917 * Convert a string containing C character escapes. Stop at an unescaped 1918 * space or tab. 1919 * Copy the converted version to "m->value.s", and the length in m->vallen. 1920 * Return updated scan pointer as function result. Warn if set. 1921 */ 1922private const char * 1923getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 1924{ 1925 const char *origs = s; 1926 char *p = m->value.s; 1927 size_t plen = sizeof(m->value.s); 1928 char *origp = p; 1929 char *pmax = p + plen - 1; 1930 int c; 1931 int val; 1932 1933 while ((c = *s++) != '\0') { 1934 if (isspace((unsigned char) c)) 1935 break; 1936 if (p >= pmax) { 1937 file_error(ms, 0, "string too long: `%s'", origs); 1938 return NULL; 1939 } 1940 if (c == '\\') { 1941 switch(c = *s++) { 1942 1943 case '\0': 1944 if (warn) 1945 file_magwarn(ms, "incomplete escape"); 1946 goto out; 1947 1948 case '\t': 1949 if (warn) { 1950 file_magwarn(ms, 1951 "escaped tab found, use \\t instead"); 1952 warn = 0; /* already did */ 1953 } 1954 /*FALLTHROUGH*/ 1955 default: 1956 if (warn) { 1957 if (isprint((unsigned char)c)) { 1958 /* Allow escaping of 1959 * ``relations'' */ 1960 if (strchr("<>&^=!", c) == NULL 1961 && (m->type != FILE_REGEX || 1962 strchr("[]().*?^$|{}", c) 1963 == NULL)) { 1964 file_magwarn(ms, "no " 1965 "need to escape " 1966 "`%c'", c); 1967 } 1968 } else { 1969 file_magwarn(ms, 1970 "unknown escape sequence: " 1971 "\\%03o", c); 1972 } 1973 } 1974 /*FALLTHROUGH*/ 1975 /* space, perhaps force people to use \040? */ 1976 case ' ': 1977#if 0 1978 /* 1979 * Other things people escape, but shouldn't need to, 1980 * so we disallow them 1981 */ 1982 case '\'': 1983 case '"': 1984 case '?': 1985#endif 1986 /* Relations */ 1987 case '>': 1988 case '<': 1989 case '&': 1990 case '^': 1991 case '=': 1992 case '!': 1993 /* and baskslash itself */ 1994 case '\\': 1995 *p++ = (char) c; 1996 break; 1997 1998 case 'a': 1999 *p++ = '\a'; 2000 break; 2001 2002 case 'b': 2003 *p++ = '\b'; 2004 break; 2005 2006 case 'f': 2007 *p++ = '\f'; 2008 break; 2009 2010 case 'n': 2011 *p++ = '\n'; 2012 break; 2013 2014 case 'r': 2015 *p++ = '\r'; 2016 break; 2017 2018 case 't': 2019 *p++ = '\t'; 2020 break; 2021 2022 case 'v': 2023 *p++ = '\v'; 2024 break; 2025 2026 /* \ and up to 3 octal digits */ 2027 case '0': 2028 case '1': 2029 case '2': 2030 case '3': 2031 case '4': 2032 case '5': 2033 case '6': 2034 case '7': 2035 val = c - '0'; 2036 c = *s++; /* try for 2 */ 2037 if (c >= '0' && c <= '7') { 2038 val = (val << 3) | (c - '0'); 2039 c = *s++; /* try for 3 */ 2040 if (c >= '0' && c <= '7') 2041 val = (val << 3) | (c-'0'); 2042 else 2043 --s; 2044 } 2045 else 2046 --s; 2047 *p++ = (char)val; 2048 break; 2049 2050 /* \x and up to 2 hex digits */ 2051 case 'x': 2052 val = 'x'; /* Default if no digits */ 2053 c = hextoint(*s++); /* Get next char */ 2054 if (c >= 0) { 2055 val = c; 2056 c = hextoint(*s++); 2057 if (c >= 0) 2058 val = (val << 4) + c; 2059 else 2060 --s; 2061 } else 2062 --s; 2063 *p++ = (char)val; 2064 break; 2065 } 2066 } else 2067 *p++ = (char)c; 2068 } 2069out: 2070 *p = '\0'; 2071 m->vallen = CAST(unsigned char, (p - origp)); 2072 if (m->type == FILE_PSTRING) 2073 m->vallen += (unsigned char)file_pstring_length_size(m); 2074 return s; 2075} 2076 2077 2078/* Single hex char to int; -1 if not a hex char. */ 2079private int 2080hextoint(int c) 2081{ 2082 if (!isascii((unsigned char) c)) 2083 return -1; 2084 if (isdigit((unsigned char) c)) 2085 return c - '0'; 2086 if ((c >= 'a') && (c <= 'f')) 2087 return c + 10 - 'a'; 2088 if (( c>= 'A') && (c <= 'F')) 2089 return c + 10 - 'A'; 2090 return -1; 2091} 2092 2093 2094/* 2095 * Print a string containing C character escapes. 2096 */ 2097protected void 2098file_showstr(FILE *fp, const char *s, size_t len) 2099{ 2100 char c; 2101 2102 for (;;) { 2103 if (len == ~0U) { 2104 c = *s++; 2105 if (c == '\0') 2106 break; 2107 } 2108 else { 2109 if (len-- == 0) 2110 break; 2111 c = *s++; 2112 } 2113 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2114 (void) fputc(c, fp); 2115 else { 2116 (void) fputc('\\', fp); 2117 switch (c) { 2118 case '\a': 2119 (void) fputc('a', fp); 2120 break; 2121 2122 case '\b': 2123 (void) fputc('b', fp); 2124 break; 2125 2126 case '\f': 2127 (void) fputc('f', fp); 2128 break; 2129 2130 case '\n': 2131 (void) fputc('n', fp); 2132 break; 2133 2134 case '\r': 2135 (void) fputc('r', fp); 2136 break; 2137 2138 case '\t': 2139 (void) fputc('t', fp); 2140 break; 2141 2142 case '\v': 2143 (void) fputc('v', fp); 2144 break; 2145 2146 default: 2147 (void) fprintf(fp, "%.3o", c & 0377); 2148 break; 2149 } 2150 } 2151 } 2152} 2153 2154/* 2155 * eatsize(): Eat the size spec from a number [eg. 10UL] 2156 */ 2157private void 2158eatsize(const char **p) 2159{ 2160 const char *l = *p; 2161 2162 if (LOWCASE(*l) == 'u') 2163 l++; 2164 2165 switch (LOWCASE(*l)) { 2166 case 'l': /* long */ 2167 case 's': /* short */ 2168 case 'h': /* short */ 2169 case 'b': /* char/byte */ 2170 case 'c': /* char/byte */ 2171 l++; 2172 /*FALLTHROUGH*/ 2173 default: 2174 break; 2175 } 2176 2177 *p = l; 2178} 2179 2180/* 2181 * handle a compiled file. 2182 * return -1 = error 2183 * return 1 = memory structure you can free 2184 * return 3 = bundled library from PHP 2185 */ 2186private int 2187apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 2188 const char *fn) 2189{ 2190 uint32_t *ptr; 2191 uint32_t version; 2192 int needsbyteswap; 2193 char *dbname = NULL; 2194 void *mm = NULL; 2195 int ret = 0; 2196 php_stream *stream = NULL; 2197 php_stream_statbuf st; 2198 2199 2200 TSRMLS_FETCH(); 2201 2202 if (fn == NULL) { 2203 mm = (void *)&php_magic_database; 2204 ret = 3; 2205 goto internal_loaded; 2206 } 2207 2208#ifdef PHP_WIN32 2209 /* Don't bother on windows with php_stream_open_wrapper, 2210 return to give apprentice_load() a chance. */ 2211 if (php_stream_stat_path_ex(fn, 0, &st, NULL) == SUCCESS) { 2212 if (st.sb.st_mode & S_IFDIR) { 2213 goto error2; 2214 } 2215 } 2216#endif 2217 2218 dbname = mkdbname(ms, fn, 0); 2219 if (dbname == NULL) 2220 goto error2; 2221 2222#if PHP_API_VERSION < 20100412 2223 stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL); 2224#else 2225 stream = php_stream_open_wrapper((char *)fn, "rb", REPORT_ERRORS, NULL); 2226#endif 2227 2228 if (!stream) { 2229 goto error2; 2230 } 2231 2232 if (php_stream_stat(stream, &st) < 0) { 2233 file_error(ms, errno, "cannot stat `%s'", dbname); 2234 goto error1; 2235 } 2236 2237 if (st.sb.st_size < 8) { 2238 file_error(ms, 0, "file `%s' is too small", dbname); 2239 goto error1; 2240 } 2241 2242 mm = emalloc((size_t)st.sb.st_size); 2243 if (php_stream_read(stream, mm, (size_t)st.sb.st_size) != (size_t)st.sb.st_size) { 2244 file_badread(ms); 2245 ret = 1; 2246 goto error1; 2247 } 2248 ret = 1; 2249 2250 php_stream_close(stream); 2251 stream = NULL; 2252 2253internal_loaded: 2254 *magicp = mm; 2255 ptr = (uint32_t *)(void *)*magicp; 2256 if (*ptr != MAGICNO) { 2257 if (swap4(*ptr) != MAGICNO) { 2258 file_error(ms, 0, "bad magic in `%s'", dbname); 2259 goto error1; 2260 } 2261 needsbyteswap = 1; 2262 } else 2263 needsbyteswap = 0; 2264 if (needsbyteswap) 2265 version = swap4(ptr[1]); 2266 else 2267 version = ptr[1]; 2268 if (version != VERSIONNO) { 2269 file_error(ms, 0, "File %d.%d supports only version %d magic " 2270 "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel, 2271 VERSIONNO, dbname, version); 2272 goto error1; 2273 } 2274 2275 /* php_magic_database is a const, performing writes will segfault. This is for big-endian 2276 machines only, PPC and Sparc specifically. Consider static variable or MINIT in 2277 future. */ 2278 if (needsbyteswap && fn == NULL) { 2279 mm = emalloc(sizeof(php_magic_database)); 2280 mm = memcpy(mm, php_magic_database, sizeof(php_magic_database)); 2281 *magicp = mm; 2282 ret = 1; 2283 } 2284 2285 if (fn == NULL) { 2286 *nmagicp = (sizeof(php_magic_database) / sizeof(struct magic)); 2287 } else { 2288 *nmagicp = (uint32_t)(st.sb.st_size / sizeof(struct magic)); 2289 } 2290 if (*nmagicp > 0) { 2291 (*nmagicp)--; 2292 } 2293 (*magicp)++; 2294 if (needsbyteswap) { 2295 byteswap(*magicp, *nmagicp); 2296 } 2297 2298 if (dbname) { 2299 efree(dbname); 2300 } 2301 return ret; 2302 2303error1: 2304 if (stream) { 2305 php_stream_close(stream); 2306 } 2307 2308 if (mm && ret == 1) { 2309 efree(mm); 2310 } else { 2311 *magicp = NULL; 2312 *nmagicp = 0; 2313 } 2314error2: 2315 if (dbname) { 2316 efree(dbname); 2317 } 2318 return -1; 2319} 2320 2321private const uint32_t ar[] = { 2322 MAGICNO, VERSIONNO 2323}; 2324/* 2325 * handle an mmaped file. 2326 */ 2327private int 2328apprentice_compile(struct magic_set *ms, struct magic **magicp, 2329 uint32_t *nmagicp, const char *fn) 2330{ 2331 char *dbname; 2332 int rv = -1; 2333 php_stream *stream; 2334 2335 TSRMLS_FETCH(); 2336 2337 dbname = mkdbname(ms, fn, 0); 2338 2339 if (dbname == NULL) 2340 goto out; 2341 2342/* wb+ == O_WRONLY|O_CREAT|O_TRUNC|O_BINARY */ 2343#if PHP_API_VERSION < 20100412 2344 stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS|ENFORCE_SAFE_MODE, NULL); 2345#else 2346 stream = php_stream_open_wrapper((char *)fn, "wb+", REPORT_ERRORS, NULL); 2347#endif 2348 2349 if (!stream) { 2350 file_error(ms, errno, "cannot open `%s'", dbname); 2351 goto out; 2352 } 2353 2354 if (php_stream_write(stream, (char *)ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2355 file_error(ms, errno, "error writing `%s'", dbname); 2356 goto out; 2357 } 2358 2359 if (php_stream_seek(stream,(off_t)sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) { 2360 file_error(ms, errno, "error seeking `%s'", dbname); 2361 goto out; 2362 } 2363 2364 if (php_stream_write(stream, (char *)*magicp, (sizeof(struct magic) * *nmagicp) != (ssize_t)(sizeof(struct magic) * *nmagicp))) { 2365 file_error(ms, errno, "error writing `%s'", dbname); 2366 goto out; 2367 } 2368 2369 php_stream_close(stream); 2370 2371 rv = 0; 2372out: 2373 efree(dbname); 2374 return rv; 2375} 2376 2377private const char ext[] = ".mgc"; 2378/* 2379 * make a dbname 2380 */ 2381private char * 2382mkdbname(struct magic_set *ms, const char *fn, int strip) 2383{ 2384 const char *p, *q; 2385 char *buf; 2386 TSRMLS_FETCH(); 2387 2388 if (strip) { 2389 if ((p = strrchr(fn, '/')) != NULL) 2390 fn = ++p; 2391 } 2392 2393 for (q = fn; *q; q++) 2394 continue; 2395 /* Look for .mgc */ 2396 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2397 if (*p != *q) 2398 break; 2399 2400 /* Did not find .mgc, restore q */ 2401 if (p >= ext) 2402 while (*q) 2403 q++; 2404 2405 q++; 2406 /* Compatibility with old code that looked in .mime */ 2407 if (ms->flags & MAGIC_MIME) { 2408 spprintf(&buf, MAXPATHLEN, "%.*s.mime%s", (int)(q - fn), fn, ext); 2409#ifdef PHP_WIN32 2410 if (VCWD_ACCESS(buf, R_OK) == 0) { 2411#else 2412 if (VCWD_ACCESS(buf, R_OK) != -1) { 2413#endif 2414 ms->flags &= MAGIC_MIME_TYPE; 2415 return buf; 2416 } 2417 efree(buf); 2418 } 2419 spprintf(&buf, MAXPATHLEN, "%.*s%s", (int)(q - fn), fn, ext); 2420 2421 /* Compatibility with old code that looked in .mime */ 2422 if (strstr(p, ".mime") != NULL) 2423 ms->flags &= MAGIC_MIME_TYPE; 2424 return buf; 2425} 2426 2427/* 2428 * Byteswap an mmap'ed file if needed 2429 */ 2430private void 2431byteswap(struct magic *magic, uint32_t nmagic) 2432{ 2433 uint32_t i; 2434 for (i = 0; i < nmagic; i++) 2435 bs1(&magic[i]); 2436} 2437 2438/* 2439 * swap a short 2440 */ 2441private uint16_t 2442swap2(uint16_t sv) 2443{ 2444 uint16_t rv; 2445 uint8_t *s = (uint8_t *)(void *)&sv; 2446 uint8_t *d = (uint8_t *)(void *)&rv; 2447 d[0] = s[1]; 2448 d[1] = s[0]; 2449 return rv; 2450} 2451 2452/* 2453 * swap an int 2454 */ 2455private uint32_t 2456swap4(uint32_t sv) 2457{ 2458 uint32_t rv; 2459 uint8_t *s = (uint8_t *)(void *)&sv; 2460 uint8_t *d = (uint8_t *)(void *)&rv; 2461 d[0] = s[3]; 2462 d[1] = s[2]; 2463 d[2] = s[1]; 2464 d[3] = s[0]; 2465 return rv; 2466} 2467 2468/* 2469 * swap a quad 2470 */ 2471private uint64_t 2472swap8(uint64_t sv) 2473{ 2474 uint64_t rv; 2475 uint8_t *s = (uint8_t *)(void *)&sv; 2476 uint8_t *d = (uint8_t *)(void *)&rv; 2477#if 0 2478 d[0] = s[3]; 2479 d[1] = s[2]; 2480 d[2] = s[1]; 2481 d[3] = s[0]; 2482 d[4] = s[7]; 2483 d[5] = s[6]; 2484 d[6] = s[5]; 2485 d[7] = s[4]; 2486#else 2487 d[0] = s[7]; 2488 d[1] = s[6]; 2489 d[2] = s[5]; 2490 d[3] = s[4]; 2491 d[4] = s[3]; 2492 d[5] = s[2]; 2493 d[6] = s[1]; 2494 d[7] = s[0]; 2495#endif 2496 return rv; 2497} 2498 2499/* 2500 * byteswap a single magic entry 2501 */ 2502private void 2503bs1(struct magic *m) 2504{ 2505 m->cont_level = swap2(m->cont_level); 2506 m->offset = swap4((uint32_t)m->offset); 2507 m->in_offset = swap4((uint32_t)m->in_offset); 2508 m->lineno = swap4((uint32_t)m->lineno); 2509 if (IS_LIBMAGIC_STRING(m->type)) { 2510 m->str_range = swap4(m->str_range); 2511 m->str_flags = swap4(m->str_flags); 2512 } 2513 else { 2514 m->value.q = swap8(m->value.q); 2515 m->num_mask = swap8(m->num_mask); 2516 } 2517} 2518 2519protected size_t 2520file_pstring_length_size(const struct magic *m) 2521{ 2522 switch (m->str_flags & PSTRING_LEN) { 2523 case PSTRING_1_LE: 2524 return 1; 2525 case PSTRING_2_LE: 2526 case PSTRING_2_BE: 2527 return 2; 2528 case PSTRING_4_LE: 2529 case PSTRING_4_BE: 2530 return 4; 2531 default: 2532 abort(); /* Impossible */ 2533 return 1; 2534 } 2535} 2536protected size_t 2537file_pstring_get_length(const struct magic *m, const char *s) 2538{ 2539 size_t len = 0; 2540 2541 switch (m->str_flags & PSTRING_LEN) { 2542 case PSTRING_1_LE: 2543 len = *s; 2544 break; 2545 case PSTRING_2_LE: 2546 len = (s[1] << 8) | s[0]; 2547 break; 2548 case PSTRING_2_BE: 2549 len = (s[0] << 8) | s[1]; 2550 break; 2551 case PSTRING_4_LE: 2552 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2553 break; 2554 case PSTRING_4_BE: 2555 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2556 break; 2557 default: 2558 abort(); /* Impossible */ 2559 } 2560 2561 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2562 len -= file_pstring_length_size(m); 2563 2564 return len; 2565} 2566