1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Zeev Suraski <zeev@zend.com>                                |
16   |          Jani Taskinen <jani@php.net>                                |
17   |          Marcus Boerger <helly@php.net>                              |
18   |          Nuno Lopes <nlopess@php.net>                                |
19   |          Scott MacVicar <scottmac@php.net>                           |
20   +----------------------------------------------------------------------+
21*/
22
23/* $Id$ */
24
25#include <errno.h>
26#include "zend.h"
27#include "zend_API.h"
28#include "zend_globals.h"
29#include <zend_ini_parser.h>
30#include "zend_ini_scanner.h"
31
32#if 0
33# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
34#else
35# define YYDEBUG(s, c)
36#endif
37
38#include "zend_ini_scanner_defs.h"
39
40#define YYCTYPE   unsigned char
41/* allow the scanner to read one null byte after the end of the string (from ZEND_MMAP_AHEAD)
42 * so that if will be able to terminate to match the current token (e.g. non-enclosed string) */
43#define YYFILL(n) { if (YYCURSOR > YYLIMIT) return 0; }
44#define YYCURSOR  SCNG(yy_cursor)
45#define YYLIMIT   SCNG(yy_limit)
46#define YYMARKER  SCNG(yy_marker)
47
48#define YYGETCONDITION()  SCNG(yy_state)
49#define YYSETCONDITION(s) SCNG(yy_state) = s
50
51#define STATE(name)  yyc##name
52
53/* emulate flex constructs */
54#define BEGIN(state) YYSETCONDITION(STATE(state))
55#define YYSTATE      YYGETCONDITION()
56#define yytext       ((char*)SCNG(yy_text))
57#define yyleng       SCNG(yy_leng)
58#define yyless(x)    do {   YYCURSOR = (unsigned char*)yytext + x; \
59                            yyleng   = (unsigned int)x; } while(0)
60
61/* #define yymore()     goto yymore_restart */
62
63/* perform sanity check. If this message is triggered you should
64   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
65/*!max:re2c */
66#if ZEND_MMAP_AHEAD < (YYMAXFILL + 1)
67# error ZEND_MMAP_AHEAD should be greater than YYMAXFILL
68#endif
69
70
71/* How it works (for the core ini directives):
72 * ===========================================
73 *
74 * 1. Scanner scans file for tokens and passes them to parser.
75 * 2. Parser parses the tokens and passes the name/value pairs to the callback
76 *    function which stores them in the configuration hash table.
77 * 3. Later REGISTER_INI_ENTRIES() is called which triggers the actual
78 *    registering of ini entries and uses zend_get_configuration_directive()
79 *    to fetch the previously stored name/value pair from configuration hash table
80 *    and registers the static ini entries which match the name to the value
81 *    into EG(ini_directives) hash table.
82 * 4. PATH section entries are used per-request from down to top, each overriding
83 *    previous if one exists. zend_alter_ini_entry() is called for each entry.
84 *    Settings in PATH section are ZEND_INI_SYSTEM accessible and thus mimics the
85 *    php_admin_* directives used within Apache httpd.conf when PHP is compiled as
86 *    module for Apache.
87 * 5. User defined ini files (like .htaccess for apache) are parsed for each request and
88 *    stored in separate hash defined by SAPI.
89 */
90
91/* TODO: (ordered by importance :-)
92 * ===============================================================================
93 *
94 *  - Separate constant lookup totally from plain strings (using CONSTANT pattern)
95 *  - Add #if .. #else .. #endif and ==, !=, <, > , <=, >= operators
96 *  - Add #include "some.ini"
97 *  - Allow variables to refer to options also when using parse_ini_file()
98 *
99 */
100
101/* Globals Macros */
102#define SCNG    INI_SCNG
103#ifdef ZTS
104ZEND_API ts_rsrc_id ini_scanner_globals_id;
105#else
106ZEND_API zend_ini_scanner_globals ini_scanner_globals;
107#endif
108
109/* Eat leading whitespace */
110#define EAT_LEADING_WHITESPACE()                     \
111    while (yytext[0]) {                              \
112        if (yytext[0] == ' ' || yytext[0] == '\t') { \
113            SCNG(yy_text)++;                         \
114            yyleng--;                                \
115        } else {                                     \
116            break;                                   \
117        }                                            \
118    }
119
120/* Eat trailing whitespace + extra char */
121#define EAT_TRAILING_WHITESPACE_EX(ch)              \
122    while (yyleng > 0 && (                          \
123        (ch != 'X' && yytext[yyleng - 1] ==  ch) || \
124        yytext[yyleng - 1] == '\n' ||               \
125        yytext[yyleng - 1] == '\r' ||               \
126        yytext[yyleng - 1] == '\t' ||               \
127        yytext[yyleng - 1] == ' ')                  \
128    ) {                                             \
129        yyleng--;                                   \
130    }
131
132/* Eat trailing whitespace */
133#define EAT_TRAILING_WHITESPACE()   EAT_TRAILING_WHITESPACE_EX('X')
134
135#define zend_ini_copy_value(retval, str, len)   \
136    ZVAL_NEW_STR(retval, zend_string_init(str, len, 1))
137
138
139#define RETURN_TOKEN(type, str, len) {                       \
140    if (SCNG(scanner_mode) == ZEND_INI_SCANNER_TYPED) {      \
141        zend_ini_copy_typed_value(ini_lval, type, str, len); \
142    } else {                                                 \
143        zend_ini_copy_value(ini_lval, str, len);             \
144    }                                                        \
145    return type;                                             \
146}
147
148static inline int convert_to_number(zval *retval, const char *str, const int str_len)
149{
150    zend_uchar type;
151    int overflow;
152    zend_long lval;
153    double dval;
154
155    if ((type = is_numeric_string_ex(str, str_len, &lval, &dval, 0, &overflow)) != 0) {
156        if (type == IS_LONG) {
157            ZVAL_LONG(retval, lval);
158            return SUCCESS;
159        } else if (type == IS_DOUBLE && !overflow) {
160            ZVAL_DOUBLE(retval, dval);
161            return SUCCESS;
162        }
163    }
164
165    return FAILURE;
166}
167
168static void zend_ini_copy_typed_value(zval *retval, const int type, const char *str, int len)
169{
170    switch (type) {
171        case BOOL_FALSE:
172        case BOOL_TRUE:
173            ZVAL_BOOL(retval, type == BOOL_TRUE);
174            break;
175
176        case NULL_NULL:
177            ZVAL_NULL(retval);
178            break;
179
180        case TC_NUMBER:
181            if (convert_to_number(retval, str, len) == SUCCESS) {
182                break;
183            }
184            /* intentional fall-through */
185        default:
186            zend_ini_copy_value(retval, str, len);
187    }
188}
189
190static void _yy_push_state(int new_state)
191{
192    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
193    YYSETCONDITION(new_state);
194}
195
196#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
197
198static void yy_pop_state(void)
199{
200    int *stack_state = zend_stack_top(&SCNG(state_stack));
201    YYSETCONDITION(*stack_state);
202    zend_stack_del_top(&SCNG(state_stack));
203}
204
205static void yy_scan_buffer(char *str, unsigned int len)
206{
207    YYCURSOR = (YYCTYPE*)str;
208    SCNG(yy_start) = YYCURSOR;
209    YYLIMIT  = YYCURSOR + len;
210}
211
212#define ini_filename SCNG(filename)
213
214/* {{{ init_ini_scanner()
215*/
216static int init_ini_scanner(int scanner_mode, zend_file_handle *fh)
217{
218    /* Sanity check */
219    if (scanner_mode != ZEND_INI_SCANNER_NORMAL && scanner_mode != ZEND_INI_SCANNER_RAW && scanner_mode != ZEND_INI_SCANNER_TYPED) {
220        zend_error(E_WARNING, "Invalid scanner mode");
221        return FAILURE;
222    }
223
224    SCNG(lineno) = 1;
225    SCNG(scanner_mode) = scanner_mode;
226    SCNG(yy_in) = fh;
227
228    if (fh != NULL) {
229        ini_filename = zend_strndup(fh->filename, strlen(fh->filename));
230    } else {
231        ini_filename = NULL;
232    }
233
234    zend_stack_init(&SCNG(state_stack), sizeof(int));
235    BEGIN(INITIAL);
236
237    return SUCCESS;
238}
239/* }}} */
240
241/* {{{ shutdown_ini_scanner()
242*/
243void shutdown_ini_scanner(void)
244{
245    zend_stack_destroy(&SCNG(state_stack));
246    if (ini_filename) {
247        free(ini_filename);
248    }
249}
250/* }}} */
251
252/* {{{ zend_ini_scanner_get_lineno()
253*/
254int zend_ini_scanner_get_lineno(void)
255{
256    return SCNG(lineno);
257}
258/* }}} */
259
260/* {{{ zend_ini_scanner_get_filename()
261*/
262char *zend_ini_scanner_get_filename(void)
263{
264    return ini_filename ? ini_filename : "Unknown";
265}
266/* }}} */
267
268/* {{{ zend_ini_open_file_for_scanning()
269*/
270int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode)
271{
272    char *buf;
273    size_t size;
274
275    if (zend_stream_fixup(fh, &buf, &size) == FAILURE) {
276        return FAILURE;
277    }
278
279    if (init_ini_scanner(scanner_mode, fh) == FAILURE) {
280        zend_file_handle_dtor(fh);
281        return FAILURE;
282    }
283
284    yy_scan_buffer(buf, (unsigned int)size);
285
286    return SUCCESS;
287}
288/* }}} */
289
290/* {{{ zend_ini_prepare_string_for_scanning()
291*/
292int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode)
293{
294    int len = (int)strlen(str);
295
296    if (init_ini_scanner(scanner_mode, NULL) == FAILURE) {
297        return FAILURE;
298    }
299
300    yy_scan_buffer(str, len);
301
302    return SUCCESS;
303}
304/* }}} */
305
306/* {{{ zend_ini_escape_string()
307 */
308static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_type)
309{
310    register char *s, *t;
311    char *end;
312
313    zend_ini_copy_value(lval, str, len);
314
315    /* convert escape sequences */
316    s = t = Z_STRVAL_P(lval);
317    end = s + Z_STRLEN_P(lval);
318
319    while (s < end) {
320        if (*s == '\\') {
321            s++;
322            if (s >= end) {
323                *t++ = '\\';
324                continue;
325            }
326            switch (*s) {
327                case '"':
328                    if (*s != quote_type) {
329                        *t++ = '\\';
330                        *t++ = *s;
331                        break;
332                    }
333                case '\\':
334                case '$':
335                    *t++ = *s;
336                    Z_STRLEN_P(lval)--;
337                    break;
338                default:
339                    *t++ = '\\';
340                    *t++ = *s;
341                    break;
342            }
343        } else {
344            *t++ = *s;
345        }
346        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
347            SCNG(lineno)++;
348        }
349        s++;
350    }
351    *t = 0;
352}
353/* }}} */
354
355int ini_lex(zval *ini_lval)
356{
357restart:
358    SCNG(yy_text) = YYCURSOR;
359
360/* yymore_restart: */
361    /* detect EOF */
362    if (YYCURSOR >= YYLIMIT) {
363        if (YYSTATE == STATE(ST_VALUE) || YYSTATE == STATE(ST_RAW)) {
364            BEGIN(INITIAL);
365            return END_OF_LINE;
366        }
367        return 0;
368    }
369
370    /* Eat any UTF-8 BOM we find in the first 3 bytes */
371    if (YYCURSOR == SCNG(yy_start) && YYCURSOR + 3 < YYLIMIT) {
372        if (memcmp(YYCURSOR, "\xef\xbb\xbf", 3) == 0) {
373            YYCURSOR += 3;
374            goto restart;
375        }
376    }
377/*!re2c
378re2c:yyfill:check = 0;
379LNUM [0-9]+
380DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
381NUMBER [-]?{LNUM}|{DNUM}
382ANY_CHAR (.|[\n\t])
383NEWLINE ("\r"|"\n"|"\r\n")
384TABS_AND_SPACES [ \t]
385WHITESPACE [ \t]+
386CONSTANT [a-zA-Z_][a-zA-Z0-9_]*
387LABEL [^=\n\r\t;&|^$~(){}!"\[]+
388TOKENS [:,.\[\]"'()&|^+-/*=%$!~<>?@{}]
389OPERATORS [&|^~()!]
390DOLLAR_CURLY "${"
391
392SECTION_RAW_CHARS [^\]\n\r]
393SINGLE_QUOTED_CHARS [^']
394RAW_VALUE_CHARS [^\n\r;\000]
395
396LITERAL_DOLLAR ("$"([^{\000]|("\\"{ANY_CHAR})))
397VALUE_CHARS         ([^$= \t\n\r;&|^~()!"'\000]|{LITERAL_DOLLAR})
398SECTION_VALUE_CHARS ([^$\n\r;"'\]\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR})
399
400<!*> := yyleng = YYCURSOR - SCNG(yy_text);
401
402<INITIAL>"[" { /* Section start */
403    /* Enter section data lookup state */
404    if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
405        yy_push_state(ST_SECTION_RAW);
406    } else {
407        yy_push_state(ST_SECTION_VALUE);
408    }
409    return TC_SECTION;
410}
411
412<ST_VALUE,ST_SECTION_VALUE,ST_OFFSET>"'"{SINGLE_QUOTED_CHARS}+"'" { /* Raw string */
413    /* Eat leading and trailing single quotes */
414    if (yytext[0] == '\'' && yytext[yyleng - 1] == '\'') {
415        SCNG(yy_text)++;
416        yyleng = yyleng - 2;
417    }
418    RETURN_TOKEN(TC_RAW, yytext, yyleng);
419}
420
421<ST_SECTION_RAW,ST_SECTION_VALUE>"]"{TABS_AND_SPACES}*{NEWLINE}? { /* End of section */
422    BEGIN(INITIAL);
423    SCNG(lineno)++;
424    return ']';
425}
426
427<INITIAL>{LABEL}"["{TABS_AND_SPACES}* { /* Start of option with offset */
428    /* Eat leading whitespace */
429    EAT_LEADING_WHITESPACE();
430
431    /* Eat trailing whitespace and [ */
432    EAT_TRAILING_WHITESPACE_EX('[');
433
434    /* Enter offset lookup state */
435    yy_push_state(ST_OFFSET);
436
437    RETURN_TOKEN(TC_OFFSET, yytext, yyleng);
438}
439
440<ST_OFFSET>{TABS_AND_SPACES}*"]" { /* End of section or an option offset */
441    BEGIN(INITIAL);
442    return ']';
443}
444
445<ST_DOUBLE_QUOTES,ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{DOLLAR_CURLY} { /* Variable start */
446    yy_push_state(ST_VARNAME);
447    return TC_DOLLAR_CURLY;
448}
449
450<ST_VARNAME>{LABEL} { /* Variable name */
451    /* Eat leading whitespace */
452    EAT_LEADING_WHITESPACE();
453
454    /* Eat trailing whitespace */
455    EAT_TRAILING_WHITESPACE();
456
457    RETURN_TOKEN(TC_VARNAME, yytext, yyleng);
458}
459
460<ST_VARNAME>"}" { /* Variable end */
461    yy_pop_state();
462    return '}';
463}
464
465<INITIAL,ST_VALUE>("true"|"on"|"yes"){TABS_AND_SPACES}* { /* TRUE value (when used outside option value/offset this causes parse error!) */
466    RETURN_TOKEN(BOOL_TRUE, "1", 1);
467}
468
469<INITIAL,ST_VALUE>("false"|"off"|"no"|"none"){TABS_AND_SPACES}* { /* FALSE value (when used outside option value/offset this causes parse error!)*/
470    RETURN_TOKEN(BOOL_FALSE, "", 0);
471}
472
473<INITIAL,ST_VALUE>("null"){TABS_AND_SPACES}* {
474    RETURN_TOKEN(NULL_NULL, "", 0);
475}
476
477<INITIAL>{LABEL} { /* Get option name */
478    /* Eat leading whitespace */
479    EAT_LEADING_WHITESPACE();
480
481    /* Eat trailing whitespace */
482    EAT_TRAILING_WHITESPACE();
483
484    RETURN_TOKEN(TC_LABEL, yytext, yyleng);
485}
486
487<INITIAL>{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */
488    if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
489        yy_push_state(ST_RAW);
490    } else {
491        yy_push_state(ST_VALUE);
492    }
493    return '=';
494}
495
496<ST_RAW>{RAW_VALUE_CHARS} { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
497    unsigned char *sc = NULL;
498    while (YYCURSOR < YYLIMIT) {
499        switch (*YYCURSOR) {
500            case '\n':
501            case '\r':
502                goto end_raw_value_chars;
503                break;
504            case ';':
505                if (sc == NULL) {
506                    sc = YYCURSOR;
507                }
508                /* no break */
509            default:
510                YYCURSOR++;
511                break;
512        }
513    }
514end_raw_value_chars:
515    yyleng = YYCURSOR - SCNG(yy_text);
516
517    /* Eat trailing semicolons */
518    while (yytext[yyleng - 1] == ';') {
519        yyleng--;
520    }
521
522    /* Eat leading and trailing double quotes */
523    if (yytext[0] == '"' && yytext[yyleng - 1] == '"') {
524        SCNG(yy_text)++;
525        yyleng = yyleng - 2;
526    } else if (sc) {
527        YYCURSOR = sc;
528        yyleng = YYCURSOR - SCNG(yy_text);
529    }
530    RETURN_TOKEN(TC_RAW, yytext, yyleng);
531}
532
533<ST_SECTION_RAW>{SECTION_RAW_CHARS}+ { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
534    RETURN_TOKEN(TC_RAW, yytext, yyleng);
535}
536
537<ST_VALUE,ST_RAW>{TABS_AND_SPACES}*{NEWLINE} { /* End of option value */
538    BEGIN(INITIAL);
539    SCNG(lineno)++;
540    return END_OF_LINE;
541}
542
543<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{CONSTANT} { /* Get constant option value */
544    RETURN_TOKEN(TC_CONSTANT, yytext, yyleng);
545}
546
547<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{NUMBER} { /* Get number option value as string */
548    RETURN_TOKEN(TC_NUMBER, yytext, yyleng);
549}
550
551<INITIAL>{TOKENS} { /* Disallow these chars outside option values */
552    return yytext[0];
553}
554
555<ST_VALUE>{OPERATORS}{TABS_AND_SPACES}* { /* Boolean operators */
556    return yytext[0];
557}
558
559<ST_VALUE>[=] { /* Make = used in option value to trigger error */
560    yyless(0);
561    BEGIN(INITIAL);
562    return END_OF_LINE;
563}
564
565<ST_VALUE>{VALUE_CHARS}+ { /* Get everything else as option/offset value */
566    RETURN_TOKEN(TC_STRING, yytext, yyleng);
567}
568
569<ST_SECTION_VALUE,ST_OFFSET>{SECTION_VALUE_CHARS}+ { /* Get rest as section/offset value */
570    RETURN_TOKEN(TC_STRING, yytext, yyleng);
571}
572
573<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{TABS_AND_SPACES}*["] { /* Double quoted '"' string start */
574    yy_push_state(ST_DOUBLE_QUOTES);
575    return '"';
576}
577
578<ST_DOUBLE_QUOTES>["]{TABS_AND_SPACES}* { /* Double quoted '"' string ends */
579    yy_pop_state();
580    return '"';
581}
582
583<ST_DOUBLE_QUOTES>[^] { /* Escape double quoted string contents */
584    if (YYCURSOR > YYLIMIT) {
585        return 0;
586    }
587
588    while (YYCURSOR < YYLIMIT) {
589        switch (*YYCURSOR++) {
590            case '"':
591                if (YYCURSOR < YYLIMIT && YYCURSOR[-2] == '\\' && *YYCURSOR != '\r' && *YYCURSOR != '\n') {
592                    continue;
593                }
594                break;
595            case '$':
596                if (*YYCURSOR == '{') {
597                    break;
598                }
599                continue;
600            case '\\':
601                if (YYCURSOR < YYLIMIT && *YYCURSOR != '"') {
602                    YYCURSOR++;
603                }
604                /* fall through */
605            default:
606                continue;
607        }
608
609        YYCURSOR--;
610        break;
611    }
612
613    yyleng = YYCURSOR - SCNG(yy_text);
614
615    zend_ini_escape_string(ini_lval, yytext, yyleng, '"');
616    return TC_QUOTED_STRING;
617}
618
619<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{WHITESPACE} {
620    RETURN_TOKEN(TC_WHITESPACE, yytext, yyleng);
621}
622
623<INITIAL,ST_RAW>{TABS_AND_SPACES}+ {
624    /* eat whitespace */
625    goto restart;
626}
627
628<INITIAL>{TABS_AND_SPACES}*{NEWLINE} {
629    SCNG(lineno)++;
630    return END_OF_LINE;
631}
632
633<INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;][^\r\n]*{NEWLINE} { /* Comment */
634    BEGIN(INITIAL);
635    SCNG(lineno)++;
636    return END_OF_LINE;
637}
638
639<INITIAL>{TABS_AND_SPACES}*[#][^\r\n]*{NEWLINE} { /* #Comment */
640    zend_error(E_DEPRECATED, "Comments starting with '#' are deprecated in %s on line %d", zend_ini_scanner_get_filename(), SCNG(lineno));
641    BEGIN(INITIAL);
642    SCNG(lineno)++;
643    return END_OF_LINE;
644}
645
646<ST_VALUE,ST_RAW>[^] { /* End of option value (if EOF is reached before EOL */
647    BEGIN(INITIAL);
648    return END_OF_LINE;
649}
650
651<*>[^] {
652    return 0;
653}
654
655*/
656}
657