1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Zeev Suraski <zeev@zend.com>                                |
16   |          Jani Taskinen <jani@php.net>                                |
17   |          Marcus Boerger <helly@php.net>                              |
18   |          Nuno Lopes <nlopess@php.net>                                |
19   |          Scott MacVicar <scottmac@php.net>                           |
20   +----------------------------------------------------------------------+
21*/
22
23/* $Id$ */
24
25#include <errno.h>
26#include "zend.h"
27#include "zend_globals.h"
28#include <zend_ini_parser.h>
29#include "zend_ini_scanner.h"
30
31#if 0
32# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
33#else
34# define YYDEBUG(s, c)
35#endif
36
37#include "zend_ini_scanner_defs.h"
38
39#define YYCTYPE   unsigned char
40/* allow the scanner to read one null byte after the end of the string (from ZEND_MMAP_AHEAD)
41 * so that if will be able to terminate to match the current token (e.g. non-enclosed string) */
42#define YYFILL(n) { if (YYCURSOR > YYLIMIT) return 0; }
43#define YYCURSOR  SCNG(yy_cursor)
44#define YYLIMIT   SCNG(yy_limit)
45#define YYMARKER  SCNG(yy_marker)
46
47#define YYGETCONDITION()  SCNG(yy_state)
48#define YYSETCONDITION(s) SCNG(yy_state) = s
49
50#define STATE(name)  yyc##name
51
52/* emulate flex constructs */
53#define BEGIN(state) YYSETCONDITION(STATE(state))
54#define YYSTATE      YYGETCONDITION()
55#define yytext       ((char*)SCNG(yy_text))
56#define yyleng       SCNG(yy_leng)
57#define yyless(x)    do {   YYCURSOR = (unsigned char*)yytext + x; \
58                            yyleng   = (unsigned int)x; } while(0)
59
60/* #define yymore()     goto yymore_restart */
61
62/* perform sanity check. If this message is triggered you should
63   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
64/*!max:re2c */
65#if ZEND_MMAP_AHEAD < (YYMAXFILL + 1)
66# error ZEND_MMAP_AHEAD should be greater than YYMAXFILL
67#endif
68
69
70/* How it works (for the core ini directives):
71 * ===========================================
72 *
73 * 1. Scanner scans file for tokens and passes them to parser.
74 * 2. Parser parses the tokens and passes the name/value pairs to the callback
75 *    function which stores them in the configuration hash table.
76 * 3. Later REGISTER_INI_ENTRIES() is called which triggers the actual
77 *    registering of ini entries and uses zend_get_configuration_directive()
78 *    to fetch the previously stored name/value pair from configuration hash table
79 *    and registers the static ini entries which match the name to the value
80 *    into EG(ini_directives) hash table.
81 * 4. PATH section entries are used per-request from down to top, each overriding
82 *    previous if one exists. zend_alter_ini_entry() is called for each entry.
83 *    Settings in PATH section are ZEND_INI_SYSTEM accessible and thus mimics the
84 *    php_admin_* directives used within Apache httpd.conf when PHP is compiled as
85 *    module for Apache.
86 * 5. User defined ini files (like .htaccess for apache) are parsed for each request and
87 *    stored in separate hash defined by SAPI.
88 */
89
90/* TODO: (ordered by importance :-)
91 * ===============================================================================
92 *
93 *  - Separate constant lookup totally from plain strings (using CONSTANT pattern)
94 *  - Add #if .. #else .. #endif and ==, !=, <, > , <=, >= operators
95 *  - Add #include "some.ini"
96 *  - Allow variables to refer to options also when using parse_ini_file()
97 *
98 */
99
100/* Globals Macros */
101#define SCNG    INI_SCNG
102#ifdef ZTS
103ZEND_API ts_rsrc_id ini_scanner_globals_id;
104#else
105ZEND_API zend_ini_scanner_globals ini_scanner_globals;
106#endif
107
108/* Eat leading whitespace */
109#define EAT_LEADING_WHITESPACE()                     \
110    while (yytext[0]) {                              \
111        if (yytext[0] == ' ' || yytext[0] == '\t') { \
112            SCNG(yy_text)++;                         \
113            yyleng--;                                \
114        } else {                                     \
115            break;                                   \
116        }                                            \
117    }
118
119/* Eat trailing whitespace + extra char */
120#define EAT_TRAILING_WHITESPACE_EX(ch)              \
121    while (yyleng > 0 && (                          \
122        (ch != 'X' && yytext[yyleng - 1] ==  ch) || \
123        yytext[yyleng - 1] == '\n' ||               \
124        yytext[yyleng - 1] == '\r' ||               \
125        yytext[yyleng - 1] == '\t' ||               \
126        yytext[yyleng - 1] == ' ')                  \
127    ) {                                             \
128        yyleng--;                                   \
129    }
130
131/* Eat trailing whitespace */
132#define EAT_TRAILING_WHITESPACE()   EAT_TRAILING_WHITESPACE_EX('X')
133
134#define zend_ini_copy_value(retval, str, len) {  \
135    Z_STRVAL_P(retval) = zend_strndup(str, len); \
136    Z_STRLEN_P(retval) = len;                    \
137    Z_TYPE_P(retval) = IS_STRING;                \
138}
139
140#define RETURN_TOKEN(type, str, len) {           \
141    zend_ini_copy_value(ini_lval, str, len);     \
142    return type;                                 \
143}
144
145static void _yy_push_state(int new_state TSRMLS_DC)
146{
147    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
148    YYSETCONDITION(new_state);
149}
150
151#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
152
153static void yy_pop_state(TSRMLS_D)
154{
155    int *stack_state;
156    zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
157    YYSETCONDITION(*stack_state);
158    zend_stack_del_top(&SCNG(state_stack));
159}
160
161static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
162{
163    YYCURSOR = (YYCTYPE*)str;
164    SCNG(yy_start) = YYCURSOR;
165    YYLIMIT  = YYCURSOR + len;
166}
167
168#define ini_filename SCNG(filename)
169
170/* {{{ init_ini_scanner()
171*/
172static int init_ini_scanner(int scanner_mode, zend_file_handle *fh TSRMLS_DC)
173{
174    /* Sanity check */
175    if (scanner_mode != ZEND_INI_SCANNER_NORMAL && scanner_mode != ZEND_INI_SCANNER_RAW) {
176        zend_error(E_WARNING, "Invalid scanner mode");
177        return FAILURE;
178    }
179
180    SCNG(lineno) = 1;
181    SCNG(scanner_mode) = scanner_mode;
182    SCNG(yy_in) = fh;
183
184    if (fh != NULL) {
185        ini_filename = zend_strndup(fh->filename, strlen(fh->filename));
186    } else {
187        ini_filename = NULL;
188    }
189
190    zend_stack_init(&SCNG(state_stack));
191    BEGIN(INITIAL);
192
193    return SUCCESS;
194}
195/* }}} */
196
197/* {{{ shutdown_ini_scanner()
198*/
199void shutdown_ini_scanner(TSRMLS_D)
200{
201    zend_stack_destroy(&SCNG(state_stack));
202    if (ini_filename) {
203        free(ini_filename);
204    }
205}
206/* }}} */
207
208/* {{{ zend_ini_scanner_get_lineno()
209*/
210int zend_ini_scanner_get_lineno(TSRMLS_D)
211{
212    return SCNG(lineno);
213}
214/* }}} */
215
216/* {{{ zend_ini_scanner_get_filename()
217*/
218char *zend_ini_scanner_get_filename(TSRMLS_D)
219{
220    return ini_filename ? ini_filename : "Unknown";
221}
222/* }}} */
223
224/* {{{ zend_ini_open_file_for_scanning()
225*/
226int zend_ini_open_file_for_scanning(zend_file_handle *fh, int scanner_mode TSRMLS_DC)
227{
228    char *buf;
229    size_t size;
230
231    if (zend_stream_fixup(fh, &buf, &size TSRMLS_CC) == FAILURE) {
232        return FAILURE;
233    }
234
235    if (init_ini_scanner(scanner_mode, fh TSRMLS_CC) == FAILURE) {
236        zend_file_handle_dtor(fh TSRMLS_CC);
237        return FAILURE;
238    }
239
240    yy_scan_buffer(buf, size TSRMLS_CC);
241
242    return SUCCESS;
243}
244/* }}} */
245
246/* {{{ zend_ini_prepare_string_for_scanning()
247*/
248int zend_ini_prepare_string_for_scanning(char *str, int scanner_mode TSRMLS_DC)
249{
250    int len = strlen(str);
251
252    if (init_ini_scanner(scanner_mode, NULL TSRMLS_CC) == FAILURE) {
253        return FAILURE;
254    }
255
256    yy_scan_buffer(str, len TSRMLS_CC);
257
258    return SUCCESS;
259}
260/* }}} */
261
262/* {{{ zend_ini_escape_string()
263 */
264static void zend_ini_escape_string(zval *lval, char *str, int len, char quote_type TSRMLS_DC)
265{
266    register char *s, *t;
267    char *end;
268
269    zend_ini_copy_value(lval, str, len);
270
271    /* convert escape sequences */
272    s = t = Z_STRVAL_P(lval);
273    end = s + Z_STRLEN_P(lval);
274
275    while (s < end) {
276        if (*s == '\\') {
277            s++;
278            if (s >= end) {
279                *t++ = '\\';
280                continue;
281            }
282            switch (*s) {
283                case '"':
284                    if (*s != quote_type) {
285                        *t++ = '\\';
286                        *t++ = *s;
287                        break;
288                    }
289                case '\\':
290                case '$':
291                    *t++ = *s;
292                    Z_STRLEN_P(lval)--;
293                    break;
294                default:
295                    *t++ = '\\';
296                    *t++ = *s;
297                    break;
298            }
299        } else {
300            *t++ = *s;
301        }
302        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
303            SCNG(lineno)++;
304        }
305        s++;
306    }
307    *t = 0;
308}
309/* }}} */
310
311int ini_lex(zval *ini_lval TSRMLS_DC)
312{
313restart:
314    SCNG(yy_text) = YYCURSOR;
315
316/* yymore_restart: */
317    /* detect EOF */
318    if (YYCURSOR >= YYLIMIT) {
319        if (YYSTATE == STATE(ST_VALUE) || YYSTATE == STATE(ST_RAW)) {
320            BEGIN(INITIAL);
321            return END_OF_LINE;
322        }
323        return 0;
324    }
325
326    /* Eat any UTF-8 BOM we find in the first 3 bytes */
327    if (YYCURSOR == SCNG(yy_start) && YYCURSOR + 3 < YYLIMIT) {
328        if (memcmp(YYCURSOR, "\xef\xbb\xbf", 3) == 0) {
329            YYCURSOR += 3;
330            goto restart;
331        }
332    }
333/*!re2c
334re2c:yyfill:check = 0;
335LNUM [0-9]+
336DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*)
337NUMBER [-]?{LNUM}|{DNUM}
338ANY_CHAR (.|[\n\t])
339NEWLINE ("\r"|"\n"|"\r\n")
340TABS_AND_SPACES [ \t]
341WHITESPACE [ \t]+
342CONSTANT [a-zA-Z_][a-zA-Z0-9_]*
343LABEL [^=\n\r\t;|&$~(){}!"\[]+
344TOKENS [:,.\[\]"'()|^&+-/*=%$!~<>?@{}]
345OPERATORS [&|~()!]
346DOLLAR_CURLY "${"
347
348SECTION_RAW_CHARS [^\]\n\r]
349SINGLE_QUOTED_CHARS [^']
350RAW_VALUE_CHARS [^\n\r;\000]
351
352LITERAL_DOLLAR ("$"([^{\000]|("\\"{ANY_CHAR})))
353VALUE_CHARS         ([^$= \t\n\r;&|~()!"'\000]|{LITERAL_DOLLAR})
354SECTION_VALUE_CHARS ([^$\n\r;"'\]\\]|("\\"{ANY_CHAR})|{LITERAL_DOLLAR})
355
356<!*> := yyleng = YYCURSOR - SCNG(yy_text);
357
358<INITIAL>"[" { /* Section start */
359    /* Enter section data lookup state */
360    if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
361        yy_push_state(ST_SECTION_RAW TSRMLS_CC);
362    } else {
363        yy_push_state(ST_SECTION_VALUE TSRMLS_CC);
364    }
365    return TC_SECTION;
366}
367
368<ST_VALUE,ST_SECTION_VALUE,ST_OFFSET>"'"{SINGLE_QUOTED_CHARS}+"'" { /* Raw string */
369    /* Eat leading and trailing single quotes */
370    if (yytext[0] == '\'' && yytext[yyleng - 1] == '\'') {
371        SCNG(yy_text)++;
372        yyleng = yyleng - 2;
373    }
374    RETURN_TOKEN(TC_RAW, yytext, yyleng);
375}
376
377<ST_SECTION_RAW,ST_SECTION_VALUE>"]"{TABS_AND_SPACES}*{NEWLINE}? { /* End of section */
378    BEGIN(INITIAL);
379    SCNG(lineno)++;
380    return ']';
381}
382
383<INITIAL>{LABEL}"["{TABS_AND_SPACES}* { /* Start of option with offset */
384    /* Eat leading whitespace */
385    EAT_LEADING_WHITESPACE();
386
387    /* Eat trailing whitespace and [ */
388    EAT_TRAILING_WHITESPACE_EX('[');
389
390    /* Enter offset lookup state */
391    yy_push_state(ST_OFFSET TSRMLS_CC);
392
393    RETURN_TOKEN(TC_OFFSET, yytext, yyleng);
394}
395
396<ST_OFFSET>{TABS_AND_SPACES}*"]" { /* End of section or an option offset */
397    BEGIN(INITIAL);
398    return ']';
399}
400
401<ST_DOUBLE_QUOTES,ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{DOLLAR_CURLY} { /* Variable start */
402    yy_push_state(ST_VARNAME TSRMLS_CC);
403    return TC_DOLLAR_CURLY;
404}
405
406<ST_VARNAME>{LABEL} { /* Variable name */
407    /* Eat leading whitespace */
408    EAT_LEADING_WHITESPACE();
409
410    /* Eat trailing whitespace */
411    EAT_TRAILING_WHITESPACE();
412
413    RETURN_TOKEN(TC_VARNAME, yytext, yyleng);
414}
415
416<ST_VARNAME>"}" { /* Variable end */
417    yy_pop_state(TSRMLS_C);
418    return '}';
419}
420
421<INITIAL,ST_VALUE>("true"|"on"|"yes"){TABS_AND_SPACES}* { /* TRUE value (when used outside option value/offset this causes parse error!) */
422    RETURN_TOKEN(BOOL_TRUE, "1", 1);
423}
424
425<INITIAL,ST_VALUE>("false"|"off"|"no"|"none"|"null"){TABS_AND_SPACES}* { /* FALSE value (when used outside option value/offset this causes parse error!)*/
426    RETURN_TOKEN(BOOL_FALSE, "", 0);
427}
428
429<INITIAL>{LABEL} { /* Get option name */
430    /* Eat leading whitespace */
431    EAT_LEADING_WHITESPACE();
432
433    /* Eat trailing whitespace */
434    EAT_TRAILING_WHITESPACE();
435
436    RETURN_TOKEN(TC_LABEL, yytext, yyleng);
437}
438
439<INITIAL>{TABS_AND_SPACES}*[=]{TABS_AND_SPACES}* { /* Start option value */
440    if (SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW) {
441        yy_push_state(ST_RAW TSRMLS_CC);
442    } else {
443        yy_push_state(ST_VALUE TSRMLS_CC);
444    }
445    return '=';
446}
447
448<ST_RAW>{RAW_VALUE_CHARS} { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
449    char *sc = NULL;
450    while (YYCURSOR < YYLIMIT) {
451        switch (*YYCURSOR) {
452            case '\n':
453            case '\r':
454                goto end_raw_value_chars;
455                break;
456            case ';':
457                if (sc == NULL) {
458                    sc = YYCURSOR;
459                }
460                /* no break */
461            default:
462                YYCURSOR++;
463                break;
464        }
465    }
466end_raw_value_chars:
467    yyleng = YYCURSOR - SCNG(yy_text);
468
469    /* Eat trailing semicolons */
470    while (yytext[yyleng - 1] == ';') {
471        yyleng--;
472    }
473
474    /* Eat leading and trailing double quotes */
475    if (yytext[0] == '"' && yytext[yyleng - 1] == '"') {
476        SCNG(yy_text)++;
477        yyleng = yyleng - 2;
478    } else if (sc) {
479        YYCURSOR = sc;
480        yyleng = YYCURSOR - SCNG(yy_text);
481    }
482    RETURN_TOKEN(TC_RAW, yytext, yyleng);
483}
484
485<ST_SECTION_RAW>{SECTION_RAW_CHARS}+ { /* Raw value, only used when SCNG(scanner_mode) == ZEND_INI_SCANNER_RAW. */
486    RETURN_TOKEN(TC_RAW, yytext, yyleng);
487}
488
489<ST_VALUE,ST_RAW>{TABS_AND_SPACES}*{NEWLINE} { /* End of option value */
490    BEGIN(INITIAL);
491    SCNG(lineno)++;
492    return END_OF_LINE;
493}
494
495<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{CONSTANT} { /* Get constant option value */
496    RETURN_TOKEN(TC_CONSTANT, yytext, yyleng);
497}
498
499<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{NUMBER} { /* Get number option value as string */
500    RETURN_TOKEN(TC_NUMBER, yytext, yyleng);
501}
502
503<INITIAL>{TOKENS} { /* Disallow these chars outside option values */
504    return yytext[0];
505}
506
507<ST_VALUE>{OPERATORS}{TABS_AND_SPACES}* { /* Boolean operators */
508    return yytext[0];
509}
510
511<ST_VALUE>[=] { /* Make = used in option value to trigger error */
512    yyless(0);
513    BEGIN(INITIAL);
514    return END_OF_LINE;
515}
516
517<ST_VALUE>{VALUE_CHARS}+ { /* Get everything else as option/offset value */
518    RETURN_TOKEN(TC_STRING, yytext, yyleng);
519}
520
521<ST_SECTION_VALUE,ST_OFFSET>{SECTION_VALUE_CHARS}+ { /* Get rest as section/offset value */
522    RETURN_TOKEN(TC_STRING, yytext, yyleng);
523}
524
525<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{TABS_AND_SPACES}*["] { /* Double quoted '"' string start */
526    yy_push_state(ST_DOUBLE_QUOTES TSRMLS_CC);
527    return '"';
528}
529
530<ST_DOUBLE_QUOTES>["]{TABS_AND_SPACES}* { /* Double quoted '"' string ends */
531    yy_pop_state(TSRMLS_C);
532    return '"';
533}
534
535<ST_DOUBLE_QUOTES>[^] { /* Escape double quoted string contents */
536    if (YYCURSOR > YYLIMIT) {
537        return 0;
538    }
539
540    while (YYCURSOR < YYLIMIT) {
541        switch (*YYCURSOR++) {
542            case '"':
543                if (YYCURSOR < YYLIMIT && YYCURSOR[-2] == '\\' && *YYCURSOR != '\r' && *YYCURSOR != '\n') {
544                    continue;
545                }
546                break;
547            case '$':
548                if (*YYCURSOR == '{') {
549                    break;
550                }
551                continue;
552            case '\\':
553                if (YYCURSOR < YYLIMIT && *YYCURSOR != '"') {
554                    YYCURSOR++;
555                }
556                /* fall through */
557            default:
558                continue;
559        }
560
561        YYCURSOR--;
562        break;
563    }
564
565    yyleng = YYCURSOR - SCNG(yy_text);
566
567    zend_ini_escape_string(ini_lval, yytext, yyleng, '"' TSRMLS_CC);
568    return TC_QUOTED_STRING;
569}
570
571<ST_SECTION_VALUE,ST_VALUE,ST_OFFSET>{WHITESPACE} {
572    RETURN_TOKEN(TC_WHITESPACE, yytext, yyleng);
573}
574
575<INITIAL,ST_RAW>{TABS_AND_SPACES}+ {
576    /* eat whitespace */
577    goto restart;
578}
579
580<INITIAL>{TABS_AND_SPACES}*{NEWLINE} {
581    SCNG(lineno)++;
582    return END_OF_LINE;
583}
584
585<INITIAL,ST_VALUE,ST_RAW>{TABS_AND_SPACES}*[;][^\r\n]*{NEWLINE} { /* Comment */
586    BEGIN(INITIAL);
587    SCNG(lineno)++;
588    return END_OF_LINE;
589}
590
591<INITIAL>{TABS_AND_SPACES}*[#][^\r\n]*{NEWLINE} { /* #Comment */
592    zend_error(E_DEPRECATED, "Comments starting with '#' are deprecated in %s on line %d", zend_ini_scanner_get_filename(TSRMLS_C), SCNG(lineno));
593    BEGIN(INITIAL);
594    SCNG(lineno)++;
595    return END_OF_LINE;
596}
597
598<ST_VALUE,ST_RAW>[^] { /* End of option value (if EOF is reached before EOL */
599    BEGIN(INITIAL);
600    return END_OF_LINE;
601}
602
603<*>[^] {
604    return 0;
605}
606
607*/
608}
609