1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    CG(doc_comment_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150}
151
152
153static void _yy_push_state(int new_state TSRMLS_DC)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(TSRMLS_D)
162{
163    int *stack_state;
164    zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165    YYSETCONDITION(*stack_state);
166    zend_stack_del_top(&SCNG(state_stack));
167}
168
169static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170{
171    YYCURSOR       = (YYCTYPE*)str;
172    YYLIMIT        = YYCURSOR + len;
173    if (!SCNG(yy_start)) {
174        SCNG(yy_start) = YYCURSOR;
175    }
176}
177
178void startup_scanner(TSRMLS_D)
179{
180    CG(parse_error) = 0;
181    CG(doc_comment) = NULL;
182    CG(doc_comment_len) = 0;
183    zend_stack_init(&SCNG(state_stack));
184    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185}
186
187static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188    efree(heredoc_label->label);
189}
190
191void shutdown_scanner(TSRMLS_D)
192{
193    CG(parse_error) = 0;
194    RESET_DOC_COMMENT();
195    zend_stack_destroy(&SCNG(state_stack));
196    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198}
199
200ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201{
202    lex_state->yy_leng   = SCNG(yy_leng);
203    lex_state->yy_start  = SCNG(yy_start);
204    lex_state->yy_text   = SCNG(yy_text);
205    lex_state->yy_cursor = SCNG(yy_cursor);
206    lex_state->yy_marker = SCNG(yy_marker);
207    lex_state->yy_limit  = SCNG(yy_limit);
208
209    lex_state->state_stack = SCNG(state_stack);
210    zend_stack_init(&SCNG(state_stack));
211
212    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215    lex_state->in = SCNG(yy_in);
216    lex_state->yy_state = YYSTATE;
217    lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218    lex_state->lineno = CG(zend_lineno);
219
220    lex_state->script_org = SCNG(script_org);
221    lex_state->script_org_size = SCNG(script_org_size);
222    lex_state->script_filtered = SCNG(script_filtered);
223    lex_state->script_filtered_size = SCNG(script_filtered_size);
224    lex_state->input_filter = SCNG(input_filter);
225    lex_state->output_filter = SCNG(output_filter);
226    lex_state->script_encoding = SCNG(script_encoding);
227}
228
229ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230{
231    SCNG(yy_leng)   = lex_state->yy_leng;
232    SCNG(yy_start)  = lex_state->yy_start;
233    SCNG(yy_text)   = lex_state->yy_text;
234    SCNG(yy_cursor) = lex_state->yy_cursor;
235    SCNG(yy_marker) = lex_state->yy_marker;
236    SCNG(yy_limit)  = lex_state->yy_limit;
237
238    zend_stack_destroy(&SCNG(state_stack));
239    SCNG(state_stack) = lex_state->state_stack;
240
241    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244
245    SCNG(yy_in) = lex_state->in;
246    YYSETCONDITION(lex_state->yy_state);
247    CG(zend_lineno) = lex_state->lineno;
248    zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249
250    if (SCNG(script_filtered)) {
251        efree(SCNG(script_filtered));
252        SCNG(script_filtered) = NULL;
253    }
254    SCNG(script_org) = lex_state->script_org;
255    SCNG(script_org_size) = lex_state->script_org_size;
256    SCNG(script_filtered) = lex_state->script_filtered;
257    SCNG(script_filtered_size) = lex_state->script_filtered_size;
258    SCNG(input_filter) = lex_state->input_filter;
259    SCNG(output_filter) = lex_state->output_filter;
260    SCNG(script_encoding) = lex_state->script_encoding;
261
262    RESET_DOC_COMMENT();
263}
264
265ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
266{
267    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
268    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
269    file_handle->opened_path = NULL;
270    if (file_handle->free_filename) {
271        file_handle->filename = NULL;
272    }
273}
274
275#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
276#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
277#define BOM_UTF16_BE    "\xfe\xff"
278#define BOM_UTF16_LE    "\xff\xfe"
279#define BOM_UTF8        "\xef\xbb\xbf"
280
281static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
282{
283    const unsigned char *p;
284    int wchar_size = 2;
285    int le = 0;
286
287    /* utf-16 or utf-32? */
288    p = script;
289    while ((p-script) < script_size) {
290        p = memchr(p, 0, script_size-(p-script)-2);
291        if (!p) {
292            break;
293        }
294        if (*(p+1) == '\0' && *(p+2) == '\0') {
295            wchar_size = 4;
296            break;
297        }
298
299        /* searching for UTF-32 specific byte orders, so this will do */
300        p += 4;
301    }
302
303    /* BE or LE? */
304    p = script;
305    while ((p-script) < script_size) {
306        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307            /* BE */
308            le = 0;
309            break;
310        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311            /* LE* */
312            le = 1;
313            break;
314        }
315        p += wchar_size;
316    }
317
318    if (wchar_size == 2) {
319        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320    } else {
321        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322    }
323
324    return NULL;
325}
326
327static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
328{
329    const zend_encoding *script_encoding = NULL;
330    int bom_size;
331    unsigned char *pos1, *pos2;
332
333    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334        return NULL;
335    }
336
337    /* check out BOM */
338    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339        script_encoding = zend_multibyte_encoding_utf32be;
340        bom_size = sizeof(BOM_UTF32_BE)-1;
341    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342        script_encoding = zend_multibyte_encoding_utf32le;
343        bom_size = sizeof(BOM_UTF32_LE)-1;
344    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345        script_encoding = zend_multibyte_encoding_utf16be;
346        bom_size = sizeof(BOM_UTF16_BE)-1;
347    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348        script_encoding = zend_multibyte_encoding_utf16le;
349        bom_size = sizeof(BOM_UTF16_LE)-1;
350    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351        script_encoding = zend_multibyte_encoding_utf8;
352        bom_size = sizeof(BOM_UTF8)-1;
353    }
354
355    if (script_encoding) {
356        /* remove BOM */
357        LANG_SCNG(script_org) += bom_size;
358        LANG_SCNG(script_org_size) -= bom_size;
359
360        return script_encoding;
361    }
362
363    /* script contains NULL bytes -> auto-detection */
364    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365        /* check if the NULL byte is after the __HALT_COMPILER(); */
366        pos2 = LANG_SCNG(script_org);
367
368        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369            pos2 = memchr(pos2, '_', pos1 - pos2);
370            if (!pos2) break;
371            pos2++;
372            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373                pos2 += sizeof("_HALT_COMPILER")-1;
374                while (*pos2 == ' '  ||
375                       *pos2 == '\t' ||
376                       *pos2 == '\r' ||
377                       *pos2 == '\n') {
378                    pos2++;
379                }
380                if (*pos2 == '(') {
381                    pos2++;
382                    while (*pos2 == ' '  ||
383                           *pos2 == '\t' ||
384                           *pos2 == '\r' ||
385                           *pos2 == '\n') {
386                        pos2++;
387                    }
388                    if (*pos2 == ')') {
389                        pos2++;
390                        while (*pos2 == ' '  ||
391                               *pos2 == '\t' ||
392                               *pos2 == '\r' ||
393                               *pos2 == '\n') {
394                            pos2++;
395                        }
396                        if (*pos2 == ';') {
397                            return NULL;
398                        }
399                    }
400                }
401            }
402        }
403        /* make best effort if BOM is missing */
404        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
405    }
406
407    return NULL;
408}
409
410static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
411{
412    const zend_encoding *script_encoding;
413
414    if (CG(detect_unicode)) {
415        /* check out bom(byte order mark) and see if containing wchars */
416        script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
417        if (script_encoding != NULL) {
418            /* bom or wchar detection is prior to 'script_encoding' option */
419            return script_encoding;
420        }
421    }
422
423    /* if no script_encoding specified, just leave alone */
424    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425        return NULL;
426    }
427
428    /* if multiple encodings specified, detect automagically */
429    if (CG(script_encoding_list_size) > 1) {
430        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
431    }
432
433    return CG(script_encoding_list)[0];
434}
435
436ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
437{
438    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
439    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
440
441    if (!script_encoding) {
442        return FAILURE;
443    }
444
445    /* judge input/output filter */
446    LANG_SCNG(script_encoding) = script_encoding;
447    LANG_SCNG(input_filter) = NULL;
448    LANG_SCNG(output_filter) = NULL;
449
450    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455        } else {
456            LANG_SCNG(input_filter) = NULL;
457            LANG_SCNG(output_filter) = NULL;
458        }
459        return SUCCESS;
460    }
461
462    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464        LANG_SCNG(output_filter) = NULL;
465    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466        LANG_SCNG(input_filter) = NULL;
467        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468    } else {
469        /* both script and internal encodings are incompatible w/ flex */
470        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472    }
473
474    return 0;
475}
476
477ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
478{
479    const char *file_path = NULL;
480    char *buf;
481    size_t size, offset = 0;
482
483    /* The shebang line was read, get the current position to obtain the buffer start */
484    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
485        if ((offset = ftell(file_handle->handle.fp)) == -1) {
486            offset = 0;
487        }
488    }
489
490    if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
491        return FAILURE;
492    }
493
494    zend_llist_add_element(&CG(open_files), file_handle);
495    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
496        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
497        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
498        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
499        file_handle->handle.stream.handle = fh->handle.stream.handle;
500    }
501
502    /* Reset the scanner for scanning the new file */
503    SCNG(yy_in) = file_handle;
504    SCNG(yy_start) = NULL;
505
506    if (size != -1) {
507        if (CG(multibyte)) {
508            SCNG(script_org) = (unsigned char*)buf;
509            SCNG(script_org_size) = size;
510            SCNG(script_filtered) = NULL;
511
512            zend_multibyte_set_filter(NULL TSRMLS_CC);
513
514            if (SCNG(input_filter)) {
515                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
516                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
517                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
518                }
519                buf = (char*)SCNG(script_filtered);
520                size = SCNG(script_filtered_size);
521            }
522        }
523        SCNG(yy_start) = (unsigned char *)buf - offset;
524        yy_scan_buffer(buf, size TSRMLS_CC);
525    } else {
526        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
527    }
528
529    BEGIN(INITIAL);
530
531    if (file_handle->opened_path) {
532        file_path = file_handle->opened_path;
533    } else {
534        file_path = file_handle->filename;
535    }
536
537    zend_set_compiled_filename(file_path TSRMLS_CC);
538
539    if (CG(start_lineno)) {
540        CG(zend_lineno) = CG(start_lineno);
541        CG(start_lineno) = 0;
542    } else {
543        CG(zend_lineno) = 1;
544    }
545
546    RESET_DOC_COMMENT();
547    CG(increment_lineno) = 0;
548    return SUCCESS;
549}
550END_EXTERN_C()
551
552
553ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
554{
555    zend_lex_state original_lex_state;
556    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
557    zend_op_array *original_active_op_array = CG(active_op_array);
558    zend_op_array *retval=NULL;
559    int compiler_result;
560    zend_bool compilation_successful=0;
561    znode retval_znode;
562    zend_bool original_in_compilation = CG(in_compilation);
563
564    retval_znode.op_type = IS_CONST;
565    INIT_PZVAL(&retval_znode.u.constant);
566    ZVAL_LONG(&retval_znode.u.constant, 1);
567
568    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
569
570    retval = op_array; /* success oriented */
571
572    if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
573        if (type==ZEND_REQUIRE) {
574            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
575            zend_bailout();
576        } else {
577            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
578        }
579        compilation_successful=0;
580    } else {
581        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
582        CG(in_compilation) = 1;
583        CG(active_op_array) = op_array;
584        zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
585        zend_init_compiler_context(TSRMLS_C);
586        compiler_result = zendparse(TSRMLS_C);
587        zend_do_return(&retval_znode, 0 TSRMLS_CC);
588        CG(in_compilation) = original_in_compilation;
589        if (compiler_result != 0) { /* parser error */
590            zend_bailout();
591        }
592        compilation_successful=1;
593    }
594
595    if (retval) {
596        CG(active_op_array) = original_active_op_array;
597        if (compilation_successful) {
598            pass_two(op_array TSRMLS_CC);
599            zend_release_labels(0 TSRMLS_CC);
600        } else {
601            efree(op_array);
602            retval = NULL;
603        }
604    }
605    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
606    return retval;
607}
608
609
610zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
611{
612    zend_file_handle file_handle;
613    zval tmp;
614    zend_op_array *retval;
615    char *opened_path = NULL;
616
617    if (filename->type != IS_STRING) {
618        tmp = *filename;
619        zval_copy_ctor(&tmp);
620        convert_to_string(&tmp);
621        filename = &tmp;
622    }
623    file_handle.filename = Z_STRVAL_P(filename);
624    file_handle.free_filename = 0;
625    file_handle.type = ZEND_HANDLE_FILENAME;
626    file_handle.opened_path = NULL;
627    file_handle.handle.fp = NULL;
628
629    retval = zend_compile_file(&file_handle, type TSRMLS_CC);
630    if (retval && file_handle.handle.stream.handle) {
631        int dummy = 1;
632
633        if (!file_handle.opened_path) {
634            file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
635        }
636
637        zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
638
639        if (opened_path) {
640            efree(opened_path);
641        }
642    }
643    zend_destroy_file_handle(&file_handle TSRMLS_CC);
644
645    if (filename==&tmp) {
646        zval_dtor(&tmp);
647    }
648    return retval;
649}
650
651ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
652{
653    char *buf;
654    size_t size;
655
656    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
657    Z_STRVAL_P(str) = str_erealloc(Z_STRVAL_P(str), Z_STRLEN_P(str) + ZEND_MMAP_AHEAD);
658    memset(Z_STRVAL_P(str) + Z_STRLEN_P(str), 0, ZEND_MMAP_AHEAD);
659
660    SCNG(yy_in) = NULL;
661    SCNG(yy_start) = NULL;
662
663    buf = Z_STRVAL_P(str);
664    size = Z_STRLEN_P(str);
665
666    if (CG(multibyte)) {
667        SCNG(script_org) = (unsigned char*)buf;
668        SCNG(script_org_size) = size;
669        SCNG(script_filtered) = NULL;
670
671        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
672
673        if (SCNG(input_filter)) {
674            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
675                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
676                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
677            }
678            buf = (char*)SCNG(script_filtered);
679            size = SCNG(script_filtered_size);
680        }
681    }
682
683    yy_scan_buffer(buf, size TSRMLS_CC);
684
685    zend_set_compiled_filename(filename TSRMLS_CC);
686    CG(zend_lineno) = 1;
687    CG(increment_lineno) = 0;
688    RESET_DOC_COMMENT();
689    return SUCCESS;
690}
691
692
693ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
694{
695    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
696    if (SCNG(input_filter)) {
697        size_t original_offset = offset, length = 0;
698        do {
699            unsigned char *p = NULL;
700            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
701                return (size_t)-1;
702            }
703            efree(p);
704            if (length > original_offset) {
705                offset--;
706            } else if (length < original_offset) {
707                offset++;
708            }
709        } while (original_offset != length);
710    }
711    return offset;
712}
713
714
715zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
716{
717    zend_lex_state original_lex_state;
718    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
719    zend_op_array *original_active_op_array = CG(active_op_array);
720    zend_op_array *retval;
721    zval tmp;
722    int compiler_result;
723    zend_bool original_in_compilation = CG(in_compilation);
724
725    if (Z_STRLEN_P(source_string)==0) {
726        efree(op_array);
727        return NULL;
728    }
729
730    CG(in_compilation) = 1;
731
732    tmp = *source_string;
733    zval_copy_ctor(&tmp);
734    convert_to_string(&tmp);
735    source_string = &tmp;
736
737    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
738    if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
739        efree(op_array);
740        retval = NULL;
741    } else {
742        zend_bool orig_interactive = CG(interactive);
743
744        CG(interactive) = 0;
745        init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
746        CG(interactive) = orig_interactive;
747        CG(active_op_array) = op_array;
748        zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
749        zend_init_compiler_context(TSRMLS_C);
750        BEGIN(ST_IN_SCRIPTING);
751        compiler_result = zendparse(TSRMLS_C);
752
753        if (SCNG(script_filtered)) {
754            efree(SCNG(script_filtered));
755            SCNG(script_filtered) = NULL;
756        }
757
758        if (compiler_result != 0) {
759            CG(active_op_array) = original_active_op_array;
760            CG(unclean_shutdown)=1;
761            destroy_op_array(op_array TSRMLS_CC);
762            efree(op_array);
763            retval = NULL;
764        } else {
765            zend_do_return(NULL, 0 TSRMLS_CC);
766            CG(active_op_array) = original_active_op_array;
767            pass_two(op_array TSRMLS_CC);
768            zend_release_labels(0 TSRMLS_CC);
769            retval = op_array;
770        }
771    }
772    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
773    zval_dtor(&tmp);
774    CG(in_compilation) = original_in_compilation;
775    return retval;
776}
777
778
779BEGIN_EXTERN_C()
780int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
781{
782    zend_lex_state original_lex_state;
783    zend_file_handle file_handle;
784
785    file_handle.type = ZEND_HANDLE_FILENAME;
786    file_handle.filename = filename;
787    file_handle.free_filename = 0;
788    file_handle.opened_path = NULL;
789    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
790    if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
791        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
792        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
793        return FAILURE;
794    }
795    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
796    if (SCNG(script_filtered)) {
797        efree(SCNG(script_filtered));
798        SCNG(script_filtered) = NULL;
799    }
800    zend_destroy_file_handle(&file_handle TSRMLS_CC);
801    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
802    return SUCCESS;
803}
804
805int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
806{
807    zend_lex_state original_lex_state;
808    zval tmp = *str;
809
810    str = &tmp;
811    zval_copy_ctor(str);
812    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
813    if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
814        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
815        return FAILURE;
816    }
817    BEGIN(INITIAL);
818    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
819    if (SCNG(script_filtered)) {
820        efree(SCNG(script_filtered));
821        SCNG(script_filtered) = NULL;
822    }
823    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
824    zval_dtor(str);
825    return SUCCESS;
826}
827
828ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
829{
830    size_t length;
831    unsigned char *new_yy_start;
832
833    /* convert and set */
834    if (!SCNG(input_filter)) {
835        if (SCNG(script_filtered)) {
836            efree(SCNG(script_filtered));
837            SCNG(script_filtered) = NULL;
838        }
839        SCNG(script_filtered_size) = 0;
840        length = SCNG(script_org_size);
841        new_yy_start = SCNG(script_org);
842    } else {
843        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
844            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
845                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
846        }
847        SCNG(script_filtered) = new_yy_start;
848        SCNG(script_filtered_size) = length;
849    }
850
851    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
852    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
853    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
854    SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
855
856    SCNG(yy_start) = new_yy_start;
857}
858
859
860# define zend_copy_value(zendlval, yytext, yyleng) \
861    if (SCNG(output_filter)) { \
862        size_t sz = 0; \
863        SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
864        Z_STRLEN_P(zendlval) = sz; \
865    } else { \
866        Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng); \
867        Z_STRLEN_P(zendlval) = yyleng; \
868    }
869
870static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
871{
872    register char *s, *t;
873    char *end;
874
875    ZVAL_STRINGL(zendlval, str, len, 1);
876
877    /* convert escape sequences */
878    s = t = Z_STRVAL_P(zendlval);
879    end = s+Z_STRLEN_P(zendlval);
880    while (s<end) {
881        if (*s=='\\') {
882            s++;
883            if (s >= end) {
884                *t++ = '\\';
885                break;
886            }
887
888            switch(*s) {
889                case 'n':
890                    *t++ = '\n';
891                    Z_STRLEN_P(zendlval)--;
892                    break;
893                case 'r':
894                    *t++ = '\r';
895                    Z_STRLEN_P(zendlval)--;
896                    break;
897                case 't':
898                    *t++ = '\t';
899                    Z_STRLEN_P(zendlval)--;
900                    break;
901                case 'f':
902                    *t++ = '\f';
903                    Z_STRLEN_P(zendlval)--;
904                    break;
905                case 'v':
906                    *t++ = '\v';
907                    Z_STRLEN_P(zendlval)--;
908                    break;
909                case 'e':
910#ifdef PHP_WIN32
911                    *t++ = VK_ESCAPE;
912#else
913                    *t++ = '\e';
914#endif
915                    Z_STRLEN_P(zendlval)--;
916                    break;
917                case '"':
918                case '`':
919                    if (*s != quote_type) {
920                        *t++ = '\\';
921                        *t++ = *s;
922                        break;
923                    }
924                case '\\':
925                case '$':
926                    *t++ = *s;
927                    Z_STRLEN_P(zendlval)--;
928                    break;
929                case 'x':
930                case 'X':
931                    if (ZEND_IS_HEX(*(s+1))) {
932                        char hex_buf[3] = { 0, 0, 0 };
933
934                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
935
936                        hex_buf[0] = *(++s);
937                        Z_STRLEN_P(zendlval)--;
938                        if (ZEND_IS_HEX(*(s+1))) {
939                            hex_buf[1] = *(++s);
940                            Z_STRLEN_P(zendlval)--;
941                        }
942                        *t++ = (char) strtol(hex_buf, NULL, 16);
943                    } else {
944                        *t++ = '\\';
945                        *t++ = *s;
946                    }
947                    break;
948                default:
949                    /* check for an octal */
950                    if (ZEND_IS_OCT(*s)) {
951                        char octal_buf[4] = { 0, 0, 0, 0 };
952
953                        octal_buf[0] = *s;
954                        Z_STRLEN_P(zendlval)--;
955                        if (ZEND_IS_OCT(*(s+1))) {
956                            octal_buf[1] = *(++s);
957                            Z_STRLEN_P(zendlval)--;
958                            if (ZEND_IS_OCT(*(s+1))) {
959                                octal_buf[2] = *(++s);
960                                Z_STRLEN_P(zendlval)--;
961                            }
962                        }
963                        *t++ = (char) strtol(octal_buf, NULL, 8);
964                    } else {
965                        *t++ = '\\';
966                        *t++ = *s;
967                    }
968                    break;
969            }
970        } else {
971            *t++ = *s;
972        }
973
974        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
975            CG(zend_lineno)++;
976        }
977        s++;
978    }
979    *t = 0;
980    if (SCNG(output_filter)) {
981        size_t sz = 0;
982        s = Z_STRVAL_P(zendlval);
983        SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
984        Z_STRLEN_P(zendlval) = sz;
985        efree(s);
986    }
987}
988
989
990int lex_scan(zval *zendlval TSRMLS_DC)
991{
992restart:
993    SCNG(yy_text) = YYCURSOR;
994
995yymore_restart:
996
997/*!re2c
998re2c:yyfill:check = 0;
999LNUM    [0-9]+
1000DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1001EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1002HNUM    "0x"[0-9a-fA-F]+
1003BNUM    "0b"[01]+
1004LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1005WHITESPACE [ \n\r\t]+
1006TABS_AND_SPACES [ \t]*
1007TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1008ANY_CHAR [^]
1009NEWLINE ("\r"|"\n"|"\r\n")
1010
1011/* compute yyleng before each rule */
1012<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1013
1014<ST_IN_SCRIPTING>"exit" {
1015    return T_EXIT;
1016}
1017
1018<ST_IN_SCRIPTING>"die" {
1019    return T_EXIT;
1020}
1021
1022<ST_IN_SCRIPTING>"function" {
1023    return T_FUNCTION;
1024}
1025
1026<ST_IN_SCRIPTING>"const" {
1027    return T_CONST;
1028}
1029
1030<ST_IN_SCRIPTING>"return" {
1031    return T_RETURN;
1032}
1033
1034<ST_IN_SCRIPTING>"yield" {
1035    return T_YIELD;
1036}
1037
1038<ST_IN_SCRIPTING>"try" {
1039    return T_TRY;
1040}
1041
1042<ST_IN_SCRIPTING>"catch" {
1043    return T_CATCH;
1044}
1045
1046<ST_IN_SCRIPTING>"finally" {
1047    return T_FINALLY;
1048}
1049
1050<ST_IN_SCRIPTING>"throw" {
1051    return T_THROW;
1052}
1053
1054<ST_IN_SCRIPTING>"if" {
1055    return T_IF;
1056}
1057
1058<ST_IN_SCRIPTING>"elseif" {
1059    return T_ELSEIF;
1060}
1061
1062<ST_IN_SCRIPTING>"endif" {
1063    return T_ENDIF;
1064}
1065
1066<ST_IN_SCRIPTING>"else" {
1067    return T_ELSE;
1068}
1069
1070<ST_IN_SCRIPTING>"while" {
1071    return T_WHILE;
1072}
1073
1074<ST_IN_SCRIPTING>"endwhile" {
1075    return T_ENDWHILE;
1076}
1077
1078<ST_IN_SCRIPTING>"do" {
1079    return T_DO;
1080}
1081
1082<ST_IN_SCRIPTING>"for" {
1083    return T_FOR;
1084}
1085
1086<ST_IN_SCRIPTING>"endfor" {
1087    return T_ENDFOR;
1088}
1089
1090<ST_IN_SCRIPTING>"foreach" {
1091    return T_FOREACH;
1092}
1093
1094<ST_IN_SCRIPTING>"endforeach" {
1095    return T_ENDFOREACH;
1096}
1097
1098<ST_IN_SCRIPTING>"declare" {
1099    return T_DECLARE;
1100}
1101
1102<ST_IN_SCRIPTING>"enddeclare" {
1103    return T_ENDDECLARE;
1104}
1105
1106<ST_IN_SCRIPTING>"instanceof" {
1107    return T_INSTANCEOF;
1108}
1109
1110<ST_IN_SCRIPTING>"as" {
1111    return T_AS;
1112}
1113
1114<ST_IN_SCRIPTING>"switch" {
1115    return T_SWITCH;
1116}
1117
1118<ST_IN_SCRIPTING>"endswitch" {
1119    return T_ENDSWITCH;
1120}
1121
1122<ST_IN_SCRIPTING>"case" {
1123    return T_CASE;
1124}
1125
1126<ST_IN_SCRIPTING>"default" {
1127    return T_DEFAULT;
1128}
1129
1130<ST_IN_SCRIPTING>"break" {
1131    return T_BREAK;
1132}
1133
1134<ST_IN_SCRIPTING>"continue" {
1135    return T_CONTINUE;
1136}
1137
1138<ST_IN_SCRIPTING>"goto" {
1139    return T_GOTO;
1140}
1141
1142<ST_IN_SCRIPTING>"echo" {
1143    return T_ECHO;
1144}
1145
1146<ST_IN_SCRIPTING>"print" {
1147    return T_PRINT;
1148}
1149
1150<ST_IN_SCRIPTING>"class" {
1151    return T_CLASS;
1152}
1153
1154<ST_IN_SCRIPTING>"interface" {
1155    return T_INTERFACE;
1156}
1157
1158<ST_IN_SCRIPTING>"trait" {
1159    return T_TRAIT;
1160}
1161
1162<ST_IN_SCRIPTING>"extends" {
1163    return T_EXTENDS;
1164}
1165
1166<ST_IN_SCRIPTING>"implements" {
1167    return T_IMPLEMENTS;
1168}
1169
1170<ST_IN_SCRIPTING>"->" {
1171    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1172    return T_OBJECT_OPERATOR;
1173}
1174
1175<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1176    ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1177    HANDLE_NEWLINES(yytext, yyleng);
1178    return T_WHITESPACE;
1179}
1180
1181<ST_LOOKING_FOR_PROPERTY>"->" {
1182    return T_OBJECT_OPERATOR;
1183}
1184
1185<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1186    yy_pop_state(TSRMLS_C);
1187    zend_copy_value(zendlval, yytext, yyleng);
1188    zendlval->type = IS_STRING;
1189    return T_STRING;
1190}
1191
1192<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1193    yyless(0);
1194    yy_pop_state(TSRMLS_C);
1195    goto restart;
1196}
1197
1198<ST_IN_SCRIPTING>"::" {
1199    return T_PAAMAYIM_NEKUDOTAYIM;
1200}
1201
1202<ST_IN_SCRIPTING>"\\" {
1203    return T_NS_SEPARATOR;
1204}
1205
1206<ST_IN_SCRIPTING>"..." {
1207    return T_ELLIPSIS;
1208}
1209
1210<ST_IN_SCRIPTING>"new" {
1211    return T_NEW;
1212}
1213
1214<ST_IN_SCRIPTING>"clone" {
1215    return T_CLONE;
1216}
1217
1218<ST_IN_SCRIPTING>"var" {
1219    return T_VAR;
1220}
1221
1222<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1223    return T_INT_CAST;
1224}
1225
1226<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1227    return T_DOUBLE_CAST;
1228}
1229
1230<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1231    return T_STRING_CAST;
1232}
1233
1234<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1235    return T_ARRAY_CAST;
1236}
1237
1238<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1239    return T_OBJECT_CAST;
1240}
1241
1242<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1243    return T_BOOL_CAST;
1244}
1245
1246<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1247    return T_UNSET_CAST;
1248}
1249
1250<ST_IN_SCRIPTING>"eval" {
1251    return T_EVAL;
1252}
1253
1254<ST_IN_SCRIPTING>"include" {
1255    return T_INCLUDE;
1256}
1257
1258<ST_IN_SCRIPTING>"include_once" {
1259    return T_INCLUDE_ONCE;
1260}
1261
1262<ST_IN_SCRIPTING>"require" {
1263    return T_REQUIRE;
1264}
1265
1266<ST_IN_SCRIPTING>"require_once" {
1267    return T_REQUIRE_ONCE;
1268}
1269
1270<ST_IN_SCRIPTING>"namespace" {
1271    return T_NAMESPACE;
1272}
1273
1274<ST_IN_SCRIPTING>"use" {
1275    return T_USE;
1276}
1277
1278<ST_IN_SCRIPTING>"insteadof" {
1279        return T_INSTEADOF;
1280}
1281
1282<ST_IN_SCRIPTING>"global" {
1283    return T_GLOBAL;
1284}
1285
1286<ST_IN_SCRIPTING>"isset" {
1287    return T_ISSET;
1288}
1289
1290<ST_IN_SCRIPTING>"empty" {
1291    return T_EMPTY;
1292}
1293
1294<ST_IN_SCRIPTING>"__halt_compiler" {
1295    return T_HALT_COMPILER;
1296}
1297
1298<ST_IN_SCRIPTING>"static" {
1299    return T_STATIC;
1300}
1301
1302<ST_IN_SCRIPTING>"abstract" {
1303    return T_ABSTRACT;
1304}
1305
1306<ST_IN_SCRIPTING>"final" {
1307    return T_FINAL;
1308}
1309
1310<ST_IN_SCRIPTING>"private" {
1311    return T_PRIVATE;
1312}
1313
1314<ST_IN_SCRIPTING>"protected" {
1315    return T_PROTECTED;
1316}
1317
1318<ST_IN_SCRIPTING>"public" {
1319    return T_PUBLIC;
1320}
1321
1322<ST_IN_SCRIPTING>"unset" {
1323    return T_UNSET;
1324}
1325
1326<ST_IN_SCRIPTING>"=>" {
1327    return T_DOUBLE_ARROW;
1328}
1329
1330<ST_IN_SCRIPTING>"list" {
1331    return T_LIST;
1332}
1333
1334<ST_IN_SCRIPTING>"array" {
1335    return T_ARRAY;
1336}
1337
1338<ST_IN_SCRIPTING>"callable" {
1339 return T_CALLABLE;
1340}
1341
1342<ST_IN_SCRIPTING>"++" {
1343    return T_INC;
1344}
1345
1346<ST_IN_SCRIPTING>"--" {
1347    return T_DEC;
1348}
1349
1350<ST_IN_SCRIPTING>"===" {
1351    return T_IS_IDENTICAL;
1352}
1353
1354<ST_IN_SCRIPTING>"!==" {
1355    return T_IS_NOT_IDENTICAL;
1356}
1357
1358<ST_IN_SCRIPTING>"==" {
1359    return T_IS_EQUAL;
1360}
1361
1362<ST_IN_SCRIPTING>"!="|"<>" {
1363    return T_IS_NOT_EQUAL;
1364}
1365
1366<ST_IN_SCRIPTING>"<=" {
1367    return T_IS_SMALLER_OR_EQUAL;
1368}
1369
1370<ST_IN_SCRIPTING>">=" {
1371    return T_IS_GREATER_OR_EQUAL;
1372}
1373
1374<ST_IN_SCRIPTING>"+=" {
1375    return T_PLUS_EQUAL;
1376}
1377
1378<ST_IN_SCRIPTING>"-=" {
1379    return T_MINUS_EQUAL;
1380}
1381
1382<ST_IN_SCRIPTING>"*=" {
1383    return T_MUL_EQUAL;
1384}
1385
1386<ST_IN_SCRIPTING>"*\*" {
1387    return T_POW;
1388}
1389
1390<ST_IN_SCRIPTING>"*\*=" {
1391    return T_POW_EQUAL;
1392}
1393
1394<ST_IN_SCRIPTING>"/=" {
1395    return T_DIV_EQUAL;
1396}
1397
1398<ST_IN_SCRIPTING>".=" {
1399    return T_CONCAT_EQUAL;
1400}
1401
1402<ST_IN_SCRIPTING>"%=" {
1403    return T_MOD_EQUAL;
1404}
1405
1406<ST_IN_SCRIPTING>"<<=" {
1407    return T_SL_EQUAL;
1408}
1409
1410<ST_IN_SCRIPTING>">>=" {
1411    return T_SR_EQUAL;
1412}
1413
1414<ST_IN_SCRIPTING>"&=" {
1415    return T_AND_EQUAL;
1416}
1417
1418<ST_IN_SCRIPTING>"|=" {
1419    return T_OR_EQUAL;
1420}
1421
1422<ST_IN_SCRIPTING>"^=" {
1423    return T_XOR_EQUAL;
1424}
1425
1426<ST_IN_SCRIPTING>"||" {
1427    return T_BOOLEAN_OR;
1428}
1429
1430<ST_IN_SCRIPTING>"&&" {
1431    return T_BOOLEAN_AND;
1432}
1433
1434<ST_IN_SCRIPTING>"OR" {
1435    return T_LOGICAL_OR;
1436}
1437
1438<ST_IN_SCRIPTING>"AND" {
1439    return T_LOGICAL_AND;
1440}
1441
1442<ST_IN_SCRIPTING>"XOR" {
1443    return T_LOGICAL_XOR;
1444}
1445
1446<ST_IN_SCRIPTING>"<<" {
1447    return T_SL;
1448}
1449
1450<ST_IN_SCRIPTING>">>" {
1451    return T_SR;
1452}
1453
1454<ST_IN_SCRIPTING>{TOKENS} {
1455    return yytext[0];
1456}
1457
1458
1459<ST_IN_SCRIPTING>"{" {
1460    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1461    return '{';
1462}
1463
1464
1465<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1466    yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1467    return T_DOLLAR_OPEN_CURLY_BRACES;
1468}
1469
1470
1471<ST_IN_SCRIPTING>"}" {
1472    RESET_DOC_COMMENT();
1473    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1474        yy_pop_state(TSRMLS_C);
1475    }
1476    return '}';
1477}
1478
1479
1480<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1481    yyless(yyleng - 1);
1482    zend_copy_value(zendlval, yytext, yyleng);
1483    zendlval->type = IS_STRING;
1484    yy_pop_state(TSRMLS_C);
1485    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1486    return T_STRING_VARNAME;
1487}
1488
1489
1490<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1491    yyless(0);
1492    yy_pop_state(TSRMLS_C);
1493    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1494    goto restart;
1495}
1496
1497<ST_IN_SCRIPTING>{BNUM} {
1498    char *bin = yytext + 2; /* Skip "0b" */
1499    int len = yyleng - 2;
1500
1501    /* Skip any leading 0s */
1502    while (*bin == '0') {
1503        ++bin;
1504        --len;
1505    }
1506
1507    if (len < SIZEOF_LONG * 8) {
1508        if (len == 0) {
1509            Z_LVAL_P(zendlval) = 0;
1510        } else {
1511            Z_LVAL_P(zendlval) = strtol(bin, NULL, 2);
1512        }
1513        zendlval->type = IS_LONG;
1514        return T_LNUMBER;
1515    } else {
1516        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1517        return T_DNUMBER;
1518    }
1519}
1520
1521<ST_IN_SCRIPTING>{LNUM} {
1522    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1523        Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1524    } else {
1525        errno = 0;
1526        Z_LVAL_P(zendlval) = strtol(yytext, NULL, 0);
1527        if (errno == ERANGE) { /* Overflow */
1528            if (yytext[0] == '0') { /* octal overflow */
1529                Z_DVAL_P(zendlval) = zend_oct_strtod(yytext, NULL);
1530            } else {
1531                Z_DVAL_P(zendlval) = zend_strtod(yytext, NULL);
1532            }
1533            zendlval->type = IS_DOUBLE;
1534            return T_DNUMBER;
1535        }
1536    }
1537
1538    zendlval->type = IS_LONG;
1539    return T_LNUMBER;
1540}
1541
1542<ST_IN_SCRIPTING>{HNUM} {
1543    char *hex = yytext + 2; /* Skip "0x" */
1544    int len = yyleng - 2;
1545
1546    /* Skip any leading 0s */
1547    while (*hex == '0') {
1548        hex++;
1549        len--;
1550    }
1551
1552    if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1553        if (len == 0) {
1554            Z_LVAL_P(zendlval) = 0;
1555        } else {
1556            Z_LVAL_P(zendlval) = strtol(hex, NULL, 16);
1557        }
1558        zendlval->type = IS_LONG;
1559        return T_LNUMBER;
1560    } else {
1561        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1562        return T_DNUMBER;
1563    }
1564}
1565
1566<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1567    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1568        ZVAL_LONG(zendlval, strtol(yytext, NULL, 10));
1569    } else {
1570        ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1571    }
1572    return T_NUM_STRING;
1573}
1574
1575<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1576    ZVAL_STRINGL(zendlval, yytext, yyleng, 1);
1577    return T_NUM_STRING;
1578}
1579
1580<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1581    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1582    return T_DNUMBER;
1583}
1584
1585<ST_IN_SCRIPTING>"__CLASS__" {
1586    zend_class_entry *ce = CG(active_class_entry);
1587    if (ce && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1588        /* We create a special __CLASS__ constant that is going to be resolved
1589           at run-time */
1590        Z_STRLEN_P(zendlval) = sizeof("__CLASS__")-1;
1591        Z_STRVAL_P(zendlval) = estrndup("__CLASS__", Z_STRLEN_P(zendlval));
1592        zendlval->type = IS_CONSTANT;
1593    } else {
1594        if (ce && ce->name) {
1595            ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1596        } else {
1597            ZVAL_EMPTY_STRING(zendlval);
1598        }
1599    }
1600    return T_CLASS_C;
1601}
1602
1603<ST_IN_SCRIPTING>"__TRAIT__" {
1604    zend_class_entry *ce = CG(active_class_entry);
1605    if (ce && ce->name && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1606        ZVAL_STRINGL(zendlval, ce->name, ce->name_length, 1);
1607    } else {
1608        ZVAL_EMPTY_STRING(zendlval);
1609    }
1610    return T_TRAIT_C;
1611}
1612
1613<ST_IN_SCRIPTING>"__FUNCTION__" {
1614    zend_op_array *op_array = CG(active_op_array);
1615    if (op_array && op_array->function_name) {
1616        ZVAL_STRING(zendlval, op_array->function_name, 1);
1617    } else {
1618        ZVAL_EMPTY_STRING(zendlval);
1619    }
1620    return T_FUNC_C;
1621}
1622
1623<ST_IN_SCRIPTING>"__METHOD__" {
1624    const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1625    const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1626
1627    Z_STRLEN_P(zendlval) = zend_spprintf(&Z_STRVAL_P(zendlval), 0, "%s%s%s",
1628        class_name ? class_name : "",
1629        class_name && func_name ? "::" : "",
1630        func_name ? func_name : ""
1631        );
1632    zendlval->type = IS_STRING;
1633    return T_METHOD_C;
1634}
1635
1636<ST_IN_SCRIPTING>"__LINE__" {
1637    ZVAL_LONG(zendlval, CG(zend_lineno));
1638    return T_LINE;
1639}
1640
1641<ST_IN_SCRIPTING>"__FILE__" {
1642    char *filename = zend_get_compiled_filename(TSRMLS_C);
1643
1644    if (!filename) {
1645        filename = "";
1646    }
1647    ZVAL_STRING(zendlval, filename, 1);
1648    return T_FILE;
1649}
1650
1651<ST_IN_SCRIPTING>"__DIR__" {
1652    char *filename = zend_get_compiled_filename(TSRMLS_C);
1653    const size_t filename_len = strlen(filename);
1654    char *dirname;
1655
1656    if (!filename) {
1657        filename = "";
1658    }
1659
1660    dirname = estrndup(filename, filename_len);
1661    zend_dirname(dirname, filename_len);
1662
1663    if (strcmp(dirname, ".") == 0) {
1664        dirname = erealloc(dirname, MAXPATHLEN);
1665#if HAVE_GETCWD
1666        VCWD_GETCWD(dirname, MAXPATHLEN);
1667#elif HAVE_GETWD
1668        VCWD_GETWD(dirname);
1669#endif
1670    }
1671
1672    ZVAL_STRING(zendlval, dirname, 0);
1673    return T_DIR;
1674}
1675
1676<ST_IN_SCRIPTING>"__NAMESPACE__" {
1677    if (CG(current_namespace)) {
1678        *zendlval = *CG(current_namespace);
1679        zval_copy_ctor(zendlval);
1680    } else {
1681        ZVAL_EMPTY_STRING(zendlval);
1682    }
1683    return T_NS_C;
1684}
1685
1686<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1687    YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1688
1689    if (bracket != SCNG(yy_text)) {
1690        /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1691        YYCURSOR = bracket;
1692        goto inline_html;
1693    }
1694
1695    HANDLE_NEWLINES(yytext, yyleng);
1696    ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1697    BEGIN(ST_IN_SCRIPTING);
1698    return T_OPEN_TAG;
1699}
1700
1701
1702<INITIAL>"<%=" {
1703    if (CG(asp_tags)) {
1704        ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1705        BEGIN(ST_IN_SCRIPTING);
1706        return T_OPEN_TAG_WITH_ECHO;
1707    } else {
1708        goto inline_char_handler;
1709    }
1710}
1711
1712
1713<INITIAL>"<?=" {
1714    ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1715    BEGIN(ST_IN_SCRIPTING);
1716    return T_OPEN_TAG_WITH_ECHO;
1717}
1718
1719
1720<INITIAL>"<%" {
1721    if (CG(asp_tags)) {
1722        ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1723        BEGIN(ST_IN_SCRIPTING);
1724        return T_OPEN_TAG;
1725    } else {
1726        goto inline_char_handler;
1727    }
1728}
1729
1730
1731<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1732    ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1733    HANDLE_NEWLINE(yytext[yyleng-1]);
1734    BEGIN(ST_IN_SCRIPTING);
1735    return T_OPEN_TAG;
1736}
1737
1738
1739<INITIAL>"<?" {
1740    if (CG(short_tags)) {
1741        ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1742        BEGIN(ST_IN_SCRIPTING);
1743        return T_OPEN_TAG;
1744    } else {
1745        goto inline_char_handler;
1746    }
1747}
1748
1749<INITIAL>{ANY_CHAR} {
1750    if (YYCURSOR > YYLIMIT) {
1751        return 0;
1752    }
1753
1754inline_char_handler:
1755
1756    while (1) {
1757        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1758
1759        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1760
1761        if (YYCURSOR < YYLIMIT) {
1762            switch (*YYCURSOR) {
1763                case '?':
1764                    if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1765                        break;
1766                    }
1767                    continue;
1768                case '%':
1769                    if (CG(asp_tags)) {
1770                        break;
1771                    }
1772                    continue;
1773                case 's':
1774                case 'S':
1775                    /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1776                     * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1777                    YYCURSOR--;
1778                    yymore();
1779                default:
1780                    continue;
1781            }
1782
1783            YYCURSOR--;
1784        }
1785
1786        break;
1787    }
1788
1789inline_html:
1790    yyleng = YYCURSOR - SCNG(yy_text);
1791
1792    if (SCNG(output_filter)) {
1793        int readsize;
1794        size_t sz = 0;
1795        readsize = SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1796        Z_STRLEN_P(zendlval) = sz;
1797        if (readsize < yyleng) {
1798            yyless(readsize);
1799        }
1800    } else {
1801      Z_STRVAL_P(zendlval) = (char *) estrndup(yytext, yyleng);
1802      Z_STRLEN_P(zendlval) = yyleng;
1803    }
1804    zendlval->type = IS_STRING;
1805    HANDLE_NEWLINES(yytext, yyleng);
1806    return T_INLINE_HTML;
1807}
1808
1809
1810/* Make sure a label character follows "->", otherwise there is no property
1811 * and "->" will be taken literally
1812 */
1813<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1814    yyless(yyleng - 3);
1815    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1816    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1817    zendlval->type = IS_STRING;
1818    return T_VARIABLE;
1819}
1820
1821/* A [ always designates a variable offset, regardless of what follows
1822 */
1823<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1824    yyless(yyleng - 1);
1825    yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1826    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1827    zendlval->type = IS_STRING;
1828    return T_VARIABLE;
1829}
1830
1831<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1832    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1833    zendlval->type = IS_STRING;
1834    return T_VARIABLE;
1835}
1836
1837<ST_VAR_OFFSET>"]" {
1838    yy_pop_state(TSRMLS_C);
1839    return ']';
1840}
1841
1842<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1843    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1844    return yytext[0];
1845}
1846
1847<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1848    /* Invalid rule to return a more explicit parse error with proper line number */
1849    yyless(0);
1850    yy_pop_state(TSRMLS_C);
1851    return T_ENCAPSED_AND_WHITESPACE;
1852}
1853
1854<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1855    zend_copy_value(zendlval, yytext, yyleng);
1856    zendlval->type = IS_STRING;
1857    return T_STRING;
1858}
1859
1860
1861<ST_IN_SCRIPTING>"#"|"//" {
1862    while (YYCURSOR < YYLIMIT) {
1863        switch (*YYCURSOR++) {
1864            case '\r':
1865                if (*YYCURSOR == '\n') {
1866                    YYCURSOR++;
1867                }
1868                /* fall through */
1869            case '\n':
1870                CG(zend_lineno)++;
1871                break;
1872            case '%':
1873                if (!CG(asp_tags)) {
1874                    continue;
1875                }
1876                /* fall through */
1877            case '?':
1878                if (*YYCURSOR == '>') {
1879                    YYCURSOR--;
1880                    break;
1881                }
1882                /* fall through */
1883            default:
1884                continue;
1885        }
1886
1887        break;
1888    }
1889
1890    yyleng = YYCURSOR - SCNG(yy_text);
1891
1892    return T_COMMENT;
1893}
1894
1895<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1896    int doc_com;
1897
1898    if (yyleng > 2) {
1899        doc_com = 1;
1900        RESET_DOC_COMMENT();
1901    } else {
1902        doc_com = 0;
1903    }
1904
1905    while (YYCURSOR < YYLIMIT) {
1906        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1907            break;
1908        }
1909    }
1910
1911    if (YYCURSOR < YYLIMIT) {
1912        YYCURSOR++;
1913    } else {
1914        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1915    }
1916
1917    yyleng = YYCURSOR - SCNG(yy_text);
1918    HANDLE_NEWLINES(yytext, yyleng);
1919
1920    if (doc_com) {
1921        CG(doc_comment) = estrndup(yytext, yyleng);
1922        CG(doc_comment_len) = yyleng;
1923        return T_DOC_COMMENT;
1924    }
1925
1926    return T_COMMENT;
1927}
1928
1929<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1930    ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1931    BEGIN(INITIAL);
1932    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1933}
1934
1935
1936<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1937    if (CG(asp_tags)) {
1938        BEGIN(INITIAL);
1939        ZVAL_STRINGL(zendlval, yytext, yyleng, 0); /* no copying - intentional */
1940        return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1941    } else {
1942        yyless(1);
1943        return yytext[0];
1944    }
1945}
1946
1947
1948<ST_IN_SCRIPTING>b?['] {
1949    register char *s, *t;
1950    char *end;
1951    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1952
1953    while (1) {
1954        if (YYCURSOR < YYLIMIT) {
1955            if (*YYCURSOR == '\'') {
1956                YYCURSOR++;
1957                yyleng = YYCURSOR - SCNG(yy_text);
1958
1959                break;
1960            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1961                YYCURSOR++;
1962            }
1963        } else {
1964            yyleng = YYLIMIT - SCNG(yy_text);
1965
1966            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1967             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1968             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1969            return T_ENCAPSED_AND_WHITESPACE;
1970        }
1971    }
1972
1973    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2, 1);
1974
1975    /* convert escape sequences */
1976    s = t = Z_STRVAL_P(zendlval);
1977    end = s+Z_STRLEN_P(zendlval);
1978    while (s<end) {
1979        if (*s=='\\') {
1980            s++;
1981
1982            switch(*s) {
1983                case '\\':
1984                case '\'':
1985                    *t++ = *s;
1986                    Z_STRLEN_P(zendlval)--;
1987                    break;
1988                default:
1989                    *t++ = '\\';
1990                    *t++ = *s;
1991                    break;
1992            }
1993        } else {
1994            *t++ = *s;
1995        }
1996
1997        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1998            CG(zend_lineno)++;
1999        }
2000        s++;
2001    }
2002    *t = 0;
2003
2004    if (SCNG(output_filter)) {
2005        size_t sz = 0;
2006        s = Z_STRVAL_P(zendlval);
2007        SCNG(output_filter)((unsigned char **)&Z_STRVAL_P(zendlval), &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
2008        Z_STRLEN_P(zendlval) = sz;
2009        efree(s);
2010    }
2011    return T_CONSTANT_ENCAPSED_STRING;
2012}
2013
2014
2015<ST_IN_SCRIPTING>b?["] {
2016    int bprefix = (yytext[0] != '"') ? 1 : 0;
2017
2018    while (YYCURSOR < YYLIMIT) {
2019        switch (*YYCURSOR++) {
2020            case '"':
2021                yyleng = YYCURSOR - SCNG(yy_text);
2022                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2023                return T_CONSTANT_ENCAPSED_STRING;
2024            case '$':
2025                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2026                    break;
2027                }
2028                continue;
2029            case '{':
2030                if (*YYCURSOR == '$') {
2031                    break;
2032                }
2033                continue;
2034            case '\\':
2035                if (YYCURSOR < YYLIMIT) {
2036                    YYCURSOR++;
2037                }
2038                /* fall through */
2039            default:
2040                continue;
2041        }
2042
2043        YYCURSOR--;
2044        break;
2045    }
2046
2047    /* Remember how much was scanned to save rescanning */
2048    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2049
2050    YYCURSOR = SCNG(yy_text) + yyleng;
2051
2052    BEGIN(ST_DOUBLE_QUOTES);
2053    return '"';
2054}
2055
2056
2057<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2058    char *s;
2059    int bprefix = (yytext[0] != '<') ? 1 : 0;
2060    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2061
2062    CG(zend_lineno)++;
2063    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2064    s = yytext+bprefix+3;
2065    while ((*s == ' ') || (*s == '\t')) {
2066        s++;
2067        heredoc_label->length--;
2068    }
2069
2070    if (*s == '\'') {
2071        s++;
2072        heredoc_label->length -= 2;
2073
2074        BEGIN(ST_NOWDOC);
2075    } else {
2076        if (*s == '"') {
2077            s++;
2078            heredoc_label->length -= 2;
2079        }
2080
2081        BEGIN(ST_HEREDOC);
2082    }
2083
2084    heredoc_label->label = estrndup(s, heredoc_label->length);
2085
2086    /* Check for ending label on the next line */
2087    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2088        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2089
2090        if (*end == ';') {
2091            end++;
2092        }
2093
2094        if (*end == '\n' || *end == '\r') {
2095            BEGIN(ST_END_HEREDOC);
2096        }
2097    }
2098
2099    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2100
2101    return T_START_HEREDOC;
2102}
2103
2104
2105<ST_IN_SCRIPTING>[`] {
2106    BEGIN(ST_BACKQUOTE);
2107    return '`';
2108}
2109
2110
2111<ST_END_HEREDOC>{ANY_CHAR} {
2112    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2113
2114    YYCURSOR += heredoc_label->length - 1;
2115    yyleng = heredoc_label->length;
2116
2117    heredoc_label_dtor(heredoc_label);
2118    efree(heredoc_label);
2119
2120    BEGIN(ST_IN_SCRIPTING);
2121    return T_END_HEREDOC;
2122}
2123
2124
2125<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2126    Z_LVAL_P(zendlval) = (long) '{';
2127    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2128    yyless(1);
2129    return T_CURLY_OPEN;
2130}
2131
2132
2133<ST_DOUBLE_QUOTES>["] {
2134    BEGIN(ST_IN_SCRIPTING);
2135    return '"';
2136}
2137
2138<ST_BACKQUOTE>[`] {
2139    BEGIN(ST_IN_SCRIPTING);
2140    return '`';
2141}
2142
2143
2144<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2145    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2146        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2147        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2148
2149        goto double_quotes_scan_done;
2150    }
2151
2152    if (YYCURSOR > YYLIMIT) {
2153        return 0;
2154    }
2155    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2156        YYCURSOR++;
2157    }
2158
2159    while (YYCURSOR < YYLIMIT) {
2160        switch (*YYCURSOR++) {
2161            case '"':
2162                break;
2163            case '$':
2164                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2165                    break;
2166                }
2167                continue;
2168            case '{':
2169                if (*YYCURSOR == '$') {
2170                    break;
2171                }
2172                continue;
2173            case '\\':
2174                if (YYCURSOR < YYLIMIT) {
2175                    YYCURSOR++;
2176                }
2177                /* fall through */
2178            default:
2179                continue;
2180        }
2181
2182        YYCURSOR--;
2183        break;
2184    }
2185
2186double_quotes_scan_done:
2187    yyleng = YYCURSOR - SCNG(yy_text);
2188
2189    zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2190    return T_ENCAPSED_AND_WHITESPACE;
2191}
2192
2193
2194<ST_BACKQUOTE>{ANY_CHAR} {
2195    if (YYCURSOR > YYLIMIT) {
2196        return 0;
2197    }
2198    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2199        YYCURSOR++;
2200    }
2201
2202    while (YYCURSOR < YYLIMIT) {
2203        switch (*YYCURSOR++) {
2204            case '`':
2205                break;
2206            case '$':
2207                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2208                    break;
2209                }
2210                continue;
2211            case '{':
2212                if (*YYCURSOR == '$') {
2213                    break;
2214                }
2215                continue;
2216            case '\\':
2217                if (YYCURSOR < YYLIMIT) {
2218                    YYCURSOR++;
2219                }
2220                /* fall through */
2221            default:
2222                continue;
2223        }
2224
2225        YYCURSOR--;
2226        break;
2227    }
2228
2229    yyleng = YYCURSOR - SCNG(yy_text);
2230
2231    zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2232    return T_ENCAPSED_AND_WHITESPACE;
2233}
2234
2235
2236<ST_HEREDOC>{ANY_CHAR} {
2237    int newline = 0;
2238
2239    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2240
2241    if (YYCURSOR > YYLIMIT) {
2242        return 0;
2243    }
2244
2245    YYCURSOR--;
2246
2247    while (YYCURSOR < YYLIMIT) {
2248        switch (*YYCURSOR++) {
2249            case '\r':
2250                if (*YYCURSOR == '\n') {
2251                    YYCURSOR++;
2252                }
2253                /* fall through */
2254            case '\n':
2255                /* Check for ending label on the next line */
2256                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2257                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2258
2259                    if (*end == ';') {
2260                        end++;
2261                    }
2262
2263                    if (*end == '\n' || *end == '\r') {
2264                        /* newline before label will be subtracted from returned text, but
2265                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2266                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2267                            newline = 2; /* Windows newline */
2268                        } else {
2269                            newline = 1;
2270                        }
2271
2272                        CG(increment_lineno) = 1; /* For newline before label */
2273                        BEGIN(ST_END_HEREDOC);
2274
2275                        goto heredoc_scan_done;
2276                    }
2277                }
2278                continue;
2279            case '$':
2280                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2281                    break;
2282                }
2283                continue;
2284            case '{':
2285                if (*YYCURSOR == '$') {
2286                    break;
2287                }
2288                continue;
2289            case '\\':
2290                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2291                    YYCURSOR++;
2292                }
2293                /* fall through */
2294            default:
2295                continue;
2296        }
2297
2298        YYCURSOR--;
2299        break;
2300    }
2301
2302heredoc_scan_done:
2303    yyleng = YYCURSOR - SCNG(yy_text);
2304
2305    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2306    return T_ENCAPSED_AND_WHITESPACE;
2307}
2308
2309
2310<ST_NOWDOC>{ANY_CHAR} {
2311    int newline = 0;
2312
2313    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2314
2315    if (YYCURSOR > YYLIMIT) {
2316        return 0;
2317    }
2318
2319    YYCURSOR--;
2320
2321    while (YYCURSOR < YYLIMIT) {
2322        switch (*YYCURSOR++) {
2323            case '\r':
2324                if (*YYCURSOR == '\n') {
2325                    YYCURSOR++;
2326                }
2327                /* fall through */
2328            case '\n':
2329                /* Check for ending label on the next line */
2330                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2331                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2332
2333                    if (*end == ';') {
2334                        end++;
2335                    }
2336
2337                    if (*end == '\n' || *end == '\r') {
2338                        /* newline before label will be subtracted from returned text, but
2339                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2340                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2341                            newline = 2; /* Windows newline */
2342                        } else {
2343                            newline = 1;
2344                        }
2345
2346                        CG(increment_lineno) = 1; /* For newline before label */
2347                        BEGIN(ST_END_HEREDOC);
2348
2349                        goto nowdoc_scan_done;
2350                    }
2351                }
2352                /* fall through */
2353            default:
2354                continue;
2355        }
2356    }
2357
2358nowdoc_scan_done:
2359    yyleng = YYCURSOR - SCNG(yy_text);
2360
2361    zend_copy_value(zendlval, yytext, yyleng - newline);
2362    zendlval->type = IS_STRING;
2363    HANDLE_NEWLINES(yytext, yyleng - newline);
2364    return T_ENCAPSED_AND_WHITESPACE;
2365}
2366
2367
2368<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2369    if (YYCURSOR > YYLIMIT) {
2370        return 0;
2371    }
2372
2373    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2374    goto restart;
2375}
2376
2377*/
2378}
2379