1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129    ZEND_ASSERT(internal_encoding);
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147    ZEND_ASSERT(internal_encoding);
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8);
150}
151
152
153static void _yy_push_state(int new_state)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(void)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(void)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(void)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename();
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225}
226
227ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
228{
229    SCNG(yy_leng)   = lex_state->yy_leng;
230    SCNG(yy_start)  = lex_state->yy_start;
231    SCNG(yy_text)   = lex_state->yy_text;
232    SCNG(yy_cursor) = lex_state->yy_cursor;
233    SCNG(yy_marker) = lex_state->yy_marker;
234    SCNG(yy_limit)  = lex_state->yy_limit;
235
236    zend_stack_destroy(&SCNG(state_stack));
237    SCNG(state_stack) = lex_state->state_stack;
238
239    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
240    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
241    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
242
243    SCNG(yy_in) = lex_state->in;
244    YYSETCONDITION(lex_state->yy_state);
245    CG(zend_lineno) = lex_state->lineno;
246    zend_restore_compiled_filename(lex_state->filename);
247
248    if (SCNG(script_filtered)) {
249        efree(SCNG(script_filtered));
250        SCNG(script_filtered) = NULL;
251    }
252    SCNG(script_org) = lex_state->script_org;
253    SCNG(script_org_size) = lex_state->script_org_size;
254    SCNG(script_filtered) = lex_state->script_filtered;
255    SCNG(script_filtered_size) = lex_state->script_filtered_size;
256    SCNG(input_filter) = lex_state->input_filter;
257    SCNG(output_filter) = lex_state->output_filter;
258    SCNG(script_encoding) = lex_state->script_encoding;
259
260    RESET_DOC_COMMENT();
261}
262
263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
264{
265    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
266    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
267    file_handle->opened_path = NULL;
268    if (file_handle->free_filename) {
269        file_handle->filename = NULL;
270    }
271}
272
273#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
274#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
275#define BOM_UTF16_BE    "\xfe\xff"
276#define BOM_UTF16_LE    "\xff\xfe"
277#define BOM_UTF8        "\xef\xbb\xbf"
278
279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
280{
281    const unsigned char *p;
282    int wchar_size = 2;
283    int le = 0;
284
285    /* utf-16 or utf-32? */
286    p = script;
287    assert(p >= script);
288    while ((size_t)(p-script) < script_size) {
289        p = memchr(p, 0, script_size-(p-script)-2);
290        if (!p) {
291            break;
292        }
293        if (*(p+1) == '\0' && *(p+2) == '\0') {
294            wchar_size = 4;
295            break;
296        }
297
298        /* searching for UTF-32 specific byte orders, so this will do */
299        p += 4;
300    }
301
302    /* BE or LE? */
303    p = script;
304    assert(p >= script);
305    while ((size_t)(p-script) < script_size) {
306        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
307            /* BE */
308            le = 0;
309            break;
310        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
311            /* LE* */
312            le = 1;
313            break;
314        }
315        p += wchar_size;
316    }
317
318    if (wchar_size == 2) {
319        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
320    } else {
321        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
322    }
323
324    return NULL;
325}
326
327static const zend_encoding* zend_multibyte_detect_unicode(void)
328{
329    const zend_encoding *script_encoding = NULL;
330    int bom_size;
331    unsigned char *pos1, *pos2;
332
333    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
334        return NULL;
335    }
336
337    /* check out BOM */
338    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
339        script_encoding = zend_multibyte_encoding_utf32be;
340        bom_size = sizeof(BOM_UTF32_BE)-1;
341    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
342        script_encoding = zend_multibyte_encoding_utf32le;
343        bom_size = sizeof(BOM_UTF32_LE)-1;
344    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
345        script_encoding = zend_multibyte_encoding_utf16be;
346        bom_size = sizeof(BOM_UTF16_BE)-1;
347    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
348        script_encoding = zend_multibyte_encoding_utf16le;
349        bom_size = sizeof(BOM_UTF16_LE)-1;
350    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
351        script_encoding = zend_multibyte_encoding_utf8;
352        bom_size = sizeof(BOM_UTF8)-1;
353    }
354
355    if (script_encoding) {
356        /* remove BOM */
357        LANG_SCNG(script_org) += bom_size;
358        LANG_SCNG(script_org_size) -= bom_size;
359
360        return script_encoding;
361    }
362
363    /* script contains NULL bytes -> auto-detection */
364    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
365        /* check if the NULL byte is after the __HALT_COMPILER(); */
366        pos2 = LANG_SCNG(script_org);
367
368        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
369            pos2 = memchr(pos2, '_', pos1 - pos2);
370            if (!pos2) break;
371            pos2++;
372            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
373                pos2 += sizeof("_HALT_COMPILER")-1;
374                while (*pos2 == ' '  ||
375                       *pos2 == '\t' ||
376                       *pos2 == '\r' ||
377                       *pos2 == '\n') {
378                    pos2++;
379                }
380                if (*pos2 == '(') {
381                    pos2++;
382                    while (*pos2 == ' '  ||
383                           *pos2 == '\t' ||
384                           *pos2 == '\r' ||
385                           *pos2 == '\n') {
386                        pos2++;
387                    }
388                    if (*pos2 == ')') {
389                        pos2++;
390                        while (*pos2 == ' '  ||
391                               *pos2 == '\t' ||
392                               *pos2 == '\r' ||
393                               *pos2 == '\n') {
394                            pos2++;
395                        }
396                        if (*pos2 == ';') {
397                            return NULL;
398                        }
399                    }
400                }
401            }
402        }
403        /* make best effort if BOM is missing */
404        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
405    }
406
407    return NULL;
408}
409
410static const zend_encoding* zend_multibyte_find_script_encoding(void)
411{
412    const zend_encoding *script_encoding;
413
414    if (CG(detect_unicode)) {
415        /* check out bom(byte order mark) and see if containing wchars */
416        script_encoding = zend_multibyte_detect_unicode();
417        if (script_encoding != NULL) {
418            /* bom or wchar detection is prior to 'script_encoding' option */
419            return script_encoding;
420        }
421    }
422
423    /* if no script_encoding specified, just leave alone */
424    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
425        return NULL;
426    }
427
428    /* if multiple encodings specified, detect automagically */
429    if (CG(script_encoding_list_size) > 1) {
430        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
431    }
432
433    return CG(script_encoding_list)[0];
434}
435
436ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
437{
438    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
439    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
440
441    if (!script_encoding) {
442        return FAILURE;
443    }
444
445    /* judge input/output filter */
446    LANG_SCNG(script_encoding) = script_encoding;
447    LANG_SCNG(input_filter) = NULL;
448    LANG_SCNG(output_filter) = NULL;
449
450    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
451        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
452            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
453            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
454            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
455        } else {
456            LANG_SCNG(input_filter) = NULL;
457            LANG_SCNG(output_filter) = NULL;
458        }
459        return SUCCESS;
460    }
461
462    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
463        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
464        LANG_SCNG(output_filter) = NULL;
465    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
466        LANG_SCNG(input_filter) = NULL;
467        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
468    } else {
469        /* both script and internal encodings are incompatible w/ flex */
470        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
471        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
472    }
473
474    return 0;
475}
476
477ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
478{
479    const char *file_path = NULL;
480    char *buf;
481    size_t size, offset = 0;
482    zend_string *compiled_filename;
483
484    /* The shebang line was read, get the current position to obtain the buffer start */
485    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
486        if ((offset = ftell(file_handle->handle.fp)) == -1) {
487            offset = 0;
488        }
489    }
490
491    if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
492        return FAILURE;
493    }
494
495    zend_llist_add_element(&CG(open_files), file_handle);
496    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
497        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
498        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
499        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
500        file_handle->handle.stream.handle = fh->handle.stream.handle;
501    }
502
503    /* Reset the scanner for scanning the new file */
504    SCNG(yy_in) = file_handle;
505    SCNG(yy_start) = NULL;
506
507    if (size != -1) {
508        if (CG(multibyte)) {
509            SCNG(script_org) = (unsigned char*)buf;
510            SCNG(script_org_size) = size;
511            SCNG(script_filtered) = NULL;
512
513            zend_multibyte_set_filter(NULL);
514
515            if (SCNG(input_filter)) {
516                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
517                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
518                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
519                }
520                buf = (char*)SCNG(script_filtered);
521                size = SCNG(script_filtered_size);
522            }
523        }
524        SCNG(yy_start) = (unsigned char *)buf - offset;
525        yy_scan_buffer(buf, (unsigned int)size);
526    } else {
527        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
528    }
529
530    BEGIN(INITIAL);
531
532    if (file_handle->opened_path) {
533        file_path = file_handle->opened_path;
534    } else {
535        file_path = file_handle->filename;
536    }
537
538    compiled_filename = zend_string_init(file_path, strlen(file_path), 0);
539    zend_set_compiled_filename(compiled_filename);
540    zend_string_release(compiled_filename);
541
542    if (CG(start_lineno)) {
543        CG(zend_lineno) = CG(start_lineno);
544        CG(start_lineno) = 0;
545    } else {
546        CG(zend_lineno) = 1;
547    }
548
549    RESET_DOC_COMMENT();
550    CG(increment_lineno) = 0;
551    return SUCCESS;
552}
553END_EXTERN_C()
554
555
556ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
557{
558    zend_lex_state original_lex_state;
559    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
560    zend_op_array *original_active_op_array = CG(active_op_array);
561    int compiler_result;
562    zend_bool compilation_successful=0;
563    zval retval_zv;
564    zend_bool original_in_compilation = CG(in_compilation);
565
566    ZVAL_LONG(&retval_zv, 1);
567
568    zend_save_lexical_state(&original_lex_state);
569
570    if (open_file_for_scanning(file_handle)==FAILURE) {
571        if (type==ZEND_REQUIRE) {
572            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
573            zend_bailout();
574        } else {
575            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
576        }
577        compilation_successful=0;
578    } else {
579        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
580        CG(in_compilation) = 1;
581        CG(active_op_array) = op_array;
582        zend_stack_push(&CG(context_stack), (void *) &CG(context));
583        zend_init_compiler_context();
584        CG(ast_arena) = zend_arena_create(1024 * 32);
585        compiler_result = zendparse();
586        if (compiler_result != 0) { /* parser error */
587            zend_bailout();
588        }
589        zend_compile_top_stmt(CG(ast));
590        zend_ast_destroy(CG(ast));
591        zend_arena_destroy(CG(ast_arena));
592        zend_do_end_compilation();
593        zend_emit_final_return(&retval_zv);
594        CG(in_compilation) = original_in_compilation;
595        compilation_successful=1;
596    }
597
598    CG(active_op_array) = original_active_op_array;
599    if (compilation_successful) {
600        pass_two(op_array);
601        zend_release_labels(0);
602    } else {
603        efree_size(op_array, sizeof(zend_op_array));
604        op_array = NULL;
605    }
606
607    zend_restore_lexical_state(&original_lex_state);
608    return op_array;
609}
610
611
612zend_op_array *compile_filename(int type, zval *filename)
613{
614    zend_file_handle file_handle;
615    zval tmp;
616    zend_op_array *retval;
617    char *opened_path = NULL;
618
619    if (Z_TYPE_P(filename) != IS_STRING) {
620        tmp = *filename;
621        zval_copy_ctor(&tmp);
622        convert_to_string(&tmp);
623        filename = &tmp;
624    }
625    file_handle.filename = Z_STRVAL_P(filename);
626    file_handle.free_filename = 0;
627    file_handle.type = ZEND_HANDLE_FILENAME;
628    file_handle.opened_path = NULL;
629    file_handle.handle.fp = NULL;
630
631    retval = zend_compile_file(&file_handle, type);
632    if (retval && file_handle.handle.stream.handle) {
633        if (!file_handle.opened_path) {
634            file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
635        }
636
637        zend_hash_str_add_empty_element(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path));
638
639        if (opened_path) {
640            efree(opened_path);
641        }
642    }
643    zend_destroy_file_handle(&file_handle);
644
645    if (filename==&tmp) {
646        zval_dtor(&tmp);
647    }
648    return retval;
649}
650
651ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
652{
653    char *buf;
654    size_t size, old_len;
655    zend_string *new_compiled_filename;
656
657    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
658    old_len = Z_STRLEN_P(str);
659    Z_STR_P(str) = zend_string_realloc(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
660    Z_TYPE_INFO_P(str) = IS_STRING_EX;
661    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
662
663    SCNG(yy_in) = NULL;
664    SCNG(yy_start) = NULL;
665
666    buf = Z_STRVAL_P(str);
667    size = old_len;
668
669    if (CG(multibyte)) {
670        SCNG(script_org) = (unsigned char*)buf;
671        SCNG(script_org_size) = size;
672        SCNG(script_filtered) = NULL;
673
674        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
675
676        if (SCNG(input_filter)) {
677            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
678                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
679                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
680            }
681            buf = (char*)SCNG(script_filtered);
682            size = SCNG(script_filtered_size);
683        }
684    }
685
686    yy_scan_buffer(buf, (unsigned int)size);
687
688    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
689    zend_set_compiled_filename(new_compiled_filename);
690    zend_string_release(new_compiled_filename);
691    CG(zend_lineno) = 1;
692    CG(increment_lineno) = 0;
693    RESET_DOC_COMMENT();
694    return SUCCESS;
695}
696
697
698ZEND_API size_t zend_get_scanned_file_offset(void)
699{
700    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
701    if (SCNG(input_filter)) {
702        size_t original_offset = offset, length = 0;
703        do {
704            unsigned char *p = NULL;
705            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
706                return (size_t)-1;
707            }
708            efree(p);
709            if (length > original_offset) {
710                offset--;
711            } else if (length < original_offset) {
712                offset++;
713            }
714        } while (original_offset != length);
715    }
716    return offset;
717}
718
719
720zend_op_array *compile_string(zval *source_string, char *filename)
721{
722    zend_lex_state original_lex_state;
723    zend_op_array *op_array = NULL;
724    zval tmp;
725    zend_bool original_in_compilation = CG(in_compilation);
726
727    if (Z_STRLEN_P(source_string)==0) {
728        return NULL;
729    }
730
731    ZVAL_DUP(&tmp, source_string);
732    convert_to_string(&tmp);
733    source_string = &tmp;
734
735    CG(in_compilation) = 1;
736    zend_save_lexical_state(&original_lex_state);
737    if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
738        CG(ast) = NULL;
739        CG(ast_arena) = zend_arena_create(1024 * 32);
740        BEGIN(ST_IN_SCRIPTING);
741
742        if (!zendparse()) {
743            zend_op_array *original_active_op_array = CG(active_op_array);
744            op_array = emalloc(sizeof(zend_op_array));
745            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
746            CG(active_op_array) = op_array;
747
748            zend_stack_push(&CG(context_stack), (void *) &CG(context));
749            zend_init_compiler_context();
750            zend_compile_top_stmt(CG(ast));
751            zend_do_end_compilation();
752            zend_emit_final_return(NULL);
753            pass_two(op_array);
754            zend_release_labels(0);
755
756            CG(active_op_array) = original_active_op_array;
757        }
758
759        zend_ast_destroy(CG(ast));
760        zend_arena_destroy(CG(ast_arena));
761    }
762
763    zend_restore_lexical_state(&original_lex_state);
764    zval_dtor(&tmp);
765    CG(in_compilation) = original_in_compilation;
766    return op_array;
767}
768
769
770BEGIN_EXTERN_C()
771int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
772{
773    zend_lex_state original_lex_state;
774    zend_file_handle file_handle;
775
776    file_handle.type = ZEND_HANDLE_FILENAME;
777    file_handle.filename = filename;
778    file_handle.free_filename = 0;
779    file_handle.opened_path = NULL;
780    zend_save_lexical_state(&original_lex_state);
781    if (open_file_for_scanning(&file_handle)==FAILURE) {
782        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
783        zend_restore_lexical_state(&original_lex_state);
784        return FAILURE;
785    }
786    zend_highlight(syntax_highlighter_ini);
787    if (SCNG(script_filtered)) {
788        efree(SCNG(script_filtered));
789        SCNG(script_filtered) = NULL;
790    }
791    zend_destroy_file_handle(&file_handle);
792    zend_restore_lexical_state(&original_lex_state);
793    return SUCCESS;
794}
795
796int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
797{
798    zend_lex_state original_lex_state;
799    zval tmp = *str;
800
801    str = &tmp;
802    zval_copy_ctor(str);
803    zend_save_lexical_state(&original_lex_state);
804    if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
805        zend_restore_lexical_state(&original_lex_state);
806        return FAILURE;
807    }
808    BEGIN(INITIAL);
809    zend_highlight(syntax_highlighter_ini);
810    if (SCNG(script_filtered)) {
811        efree(SCNG(script_filtered));
812        SCNG(script_filtered) = NULL;
813    }
814    zend_restore_lexical_state(&original_lex_state);
815    zval_dtor(str);
816    return SUCCESS;
817}
818
819ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
820{
821    size_t length;
822    unsigned char *new_yy_start;
823
824    /* convert and set */
825    if (!SCNG(input_filter)) {
826        if (SCNG(script_filtered)) {
827            efree(SCNG(script_filtered));
828            SCNG(script_filtered) = NULL;
829        }
830        SCNG(script_filtered_size) = 0;
831        length = SCNG(script_org_size);
832        new_yy_start = SCNG(script_org);
833    } else {
834        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
835            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
836                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
837        }
838        if (SCNG(script_filtered)) {
839            efree(SCNG(script_filtered));
840        }
841        SCNG(script_filtered) = new_yy_start;
842        SCNG(script_filtered_size) = length;
843    }
844
845    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
846    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
847    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
848    SCNG(yy_limit) = new_yy_start + length;
849
850    SCNG(yy_start) = new_yy_start;
851}
852
853
854// TODO: avoid reallocation ???
855# define zend_copy_value(zendlval, yytext, yyleng) \
856    if (SCNG(output_filter)) { \
857        size_t sz = 0; \
858        char *s = NULL; \
859        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
860        ZVAL_STRINGL(zendlval, s, sz); \
861        efree(s); \
862    } else { \
863        ZVAL_STRINGL(zendlval, yytext, yyleng); \
864    }
865
866static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
867{
868    register char *s, *t;
869    char *end;
870
871    ZVAL_STRINGL(zendlval, str, len);
872
873    /* convert escape sequences */
874    s = t = Z_STRVAL_P(zendlval);
875    end = s+Z_STRLEN_P(zendlval);
876    while (s<end) {
877        if (*s=='\\') {
878            s++;
879            if (s >= end) {
880                *t++ = '\\';
881                break;
882            }
883
884            switch(*s) {
885                case 'n':
886                    *t++ = '\n';
887                    Z_STRLEN_P(zendlval)--;
888                    break;
889                case 'r':
890                    *t++ = '\r';
891                    Z_STRLEN_P(zendlval)--;
892                    break;
893                case 't':
894                    *t++ = '\t';
895                    Z_STRLEN_P(zendlval)--;
896                    break;
897                case 'f':
898                    *t++ = '\f';
899                    Z_STRLEN_P(zendlval)--;
900                    break;
901                case 'v':
902                    *t++ = '\v';
903                    Z_STRLEN_P(zendlval)--;
904                    break;
905                case 'e':
906#ifdef PHP_WIN32
907                    *t++ = VK_ESCAPE;
908#else
909                    *t++ = '\e';
910#endif
911                    Z_STRLEN_P(zendlval)--;
912                    break;
913                case '"':
914                case '`':
915                    if (*s != quote_type) {
916                        *t++ = '\\';
917                        *t++ = *s;
918                        break;
919                    }
920                case '\\':
921                case '$':
922                    *t++ = *s;
923                    Z_STRLEN_P(zendlval)--;
924                    break;
925                case 'x':
926                case 'X':
927                    if (ZEND_IS_HEX(*(s+1))) {
928                        char hex_buf[3] = { 0, 0, 0 };
929
930                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
931
932                        hex_buf[0] = *(++s);
933                        Z_STRLEN_P(zendlval)--;
934                        if (ZEND_IS_HEX(*(s+1))) {
935                            hex_buf[1] = *(++s);
936                            Z_STRLEN_P(zendlval)--;
937                        }
938                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
939                    } else {
940                        *t++ = '\\';
941                        *t++ = *s;
942                    }
943                    break;
944                /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
945                case 'u':
946                    {
947                        /* cache where we started so we can parse after validating */
948                        char *start = s + 1;
949                        size_t len = 0;
950                        zend_bool valid = 1;
951                        unsigned long codepoint;
952                        size_t byte_len = 0;
953
954                        if (*start != '{') {
955                            /* we silently let this pass to avoid breaking code
956                             * with JSON in string literals (e.g. "\"\u202e\""
957                             */
958                            *t++ = '\\';
959                            *t++ = 'u';
960                            break;
961                        } else {
962                            /* on the other hand, invalid \u{blah} errors */
963                            s++;
964                            len++;
965                            s++;
966                            while (*s != '}') {
967                                if (!ZEND_IS_HEX(*s)) {
968                                    valid = 0;
969                                    break;
970                                } else {
971                                    len++;
972                                }
973                                s++;
974                            }
975                            if (*s == '}') {
976                                valid = 1;
977                                len++;
978                            }
979                        }
980
981                        /* \u{} is invalid */
982                        if (len <= 2) {
983                            valid = 0;
984                        }
985
986                        if (!valid) {
987                            zend_error(E_COMPILE_ERROR, "Invalid UTF-8 codepoint escape sequence");
988                        }
989
990                        errno = 0;
991                        codepoint = strtoul(start + 1, NULL, 16);
992
993                        /* per RFC 3629, UTF-8 can only represent 21 bits */
994                        if (codepoint > 0x10FFFF || errno) {
995                            zend_error_noreturn(E_COMPILE_ERROR, "Invalid UTF-8 codepoint escape sequence: Codepoint too large");
996                        }
997
998                        /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
999                        if (codepoint < 0x80) {
1000                            byte_len = 1;
1001                            *t++ = codepoint;
1002                        } else if (codepoint <= 0x7FF) {
1003                            byte_len = 2;
1004                            *t++ = (codepoint >> 6) + 0xC0;
1005                            *t++ = (codepoint & 0x3F) + 0x80;
1006                        } else if (codepoint <= 0xFFFF) {
1007                            byte_len = 3;
1008                            *t++ = (codepoint >> 12) + 0xE0;
1009                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1010                            *t++ = (codepoint & 0x3F) + 0x80;
1011                        } else if (codepoint <= 0x10FFFF) {
1012                            byte_len = 4;
1013                            *t++ = (codepoint >> 18) + 0xF0;
1014                            *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1015                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1016                            *t++ = (codepoint & 0x3F) + 0x80;
1017                        }
1018
1019                        Z_STRLEN_P(zendlval) -= 2; /* \u */
1020                        Z_STRLEN_P(zendlval) -= (len - byte_len);
1021                    }
1022                    break;
1023                default:
1024                    /* check for an octal */
1025                    if (ZEND_IS_OCT(*s)) {
1026                        char octal_buf[4] = { 0, 0, 0, 0 };
1027
1028                        octal_buf[0] = *s;
1029                        Z_STRLEN_P(zendlval)--;
1030                        if (ZEND_IS_OCT(*(s+1))) {
1031                            octal_buf[1] = *(++s);
1032                            Z_STRLEN_P(zendlval)--;
1033                            if (ZEND_IS_OCT(*(s+1))) {
1034                                octal_buf[2] = *(++s);
1035                                Z_STRLEN_P(zendlval)--;
1036                            }
1037                        }
1038                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1039                    } else {
1040                        *t++ = '\\';
1041                        *t++ = *s;
1042                    }
1043                    break;
1044            }
1045        } else {
1046            *t++ = *s;
1047        }
1048
1049        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1050            CG(zend_lineno)++;
1051        }
1052        s++;
1053    }
1054    *t = 0;
1055    if (SCNG(output_filter)) {
1056        size_t sz = 0;
1057        unsigned char *str;
1058        // TODO: avoid realocation ???
1059        s = Z_STRVAL_P(zendlval);
1060        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1061        zval_ptr_dtor(zendlval);
1062        ZVAL_STRINGL(zendlval, (char *) str, sz);
1063        efree(str);
1064    }
1065}
1066
1067
1068int lex_scan(zval *zendlval)
1069{
1070restart:
1071    SCNG(yy_text) = YYCURSOR;
1072
1073/*!re2c
1074re2c:yyfill:check = 0;
1075LNUM    [0-9]+
1076DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1077EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1078HNUM    "0x"[0-9a-fA-F]+
1079BNUM    "0b"[01]+
1080LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1081WHITESPACE [ \n\r\t]+
1082TABS_AND_SPACES [ \t]*
1083TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1084ANY_CHAR [^]
1085NEWLINE ("\r"|"\n"|"\r\n")
1086
1087/* compute yyleng before each rule */
1088<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1089
1090<ST_IN_SCRIPTING>"exit" {
1091    return T_EXIT;
1092}
1093
1094<ST_IN_SCRIPTING>"die" {
1095    return T_EXIT;
1096}
1097
1098<ST_IN_SCRIPTING>"function" {
1099    return T_FUNCTION;
1100}
1101
1102<ST_IN_SCRIPTING>"const" {
1103    return T_CONST;
1104}
1105
1106<ST_IN_SCRIPTING>"return" {
1107    return T_RETURN;
1108}
1109
1110<ST_IN_SCRIPTING>"yield" {
1111    return T_YIELD;
1112}
1113
1114<ST_IN_SCRIPTING>"try" {
1115    return T_TRY;
1116}
1117
1118<ST_IN_SCRIPTING>"catch" {
1119    return T_CATCH;
1120}
1121
1122<ST_IN_SCRIPTING>"finally" {
1123    return T_FINALLY;
1124}
1125
1126<ST_IN_SCRIPTING>"throw" {
1127    return T_THROW;
1128}
1129
1130<ST_IN_SCRIPTING>"if" {
1131    return T_IF;
1132}
1133
1134<ST_IN_SCRIPTING>"elseif" {
1135    return T_ELSEIF;
1136}
1137
1138<ST_IN_SCRIPTING>"endif" {
1139    return T_ENDIF;
1140}
1141
1142<ST_IN_SCRIPTING>"else" {
1143    return T_ELSE;
1144}
1145
1146<ST_IN_SCRIPTING>"while" {
1147    return T_WHILE;
1148}
1149
1150<ST_IN_SCRIPTING>"endwhile" {
1151    return T_ENDWHILE;
1152}
1153
1154<ST_IN_SCRIPTING>"do" {
1155    return T_DO;
1156}
1157
1158<ST_IN_SCRIPTING>"for" {
1159    return T_FOR;
1160}
1161
1162<ST_IN_SCRIPTING>"endfor" {
1163    return T_ENDFOR;
1164}
1165
1166<ST_IN_SCRIPTING>"foreach" {
1167    return T_FOREACH;
1168}
1169
1170<ST_IN_SCRIPTING>"endforeach" {
1171    return T_ENDFOREACH;
1172}
1173
1174<ST_IN_SCRIPTING>"declare" {
1175    return T_DECLARE;
1176}
1177
1178<ST_IN_SCRIPTING>"enddeclare" {
1179    return T_ENDDECLARE;
1180}
1181
1182<ST_IN_SCRIPTING>"instanceof" {
1183    return T_INSTANCEOF;
1184}
1185
1186<ST_IN_SCRIPTING>"as" {
1187    return T_AS;
1188}
1189
1190<ST_IN_SCRIPTING>"switch" {
1191    return T_SWITCH;
1192}
1193
1194<ST_IN_SCRIPTING>"endswitch" {
1195    return T_ENDSWITCH;
1196}
1197
1198<ST_IN_SCRIPTING>"case" {
1199    return T_CASE;
1200}
1201
1202<ST_IN_SCRIPTING>"default" {
1203    return T_DEFAULT;
1204}
1205
1206<ST_IN_SCRIPTING>"break" {
1207    return T_BREAK;
1208}
1209
1210<ST_IN_SCRIPTING>"continue" {
1211    return T_CONTINUE;
1212}
1213
1214<ST_IN_SCRIPTING>"goto" {
1215    return T_GOTO;
1216}
1217
1218<ST_IN_SCRIPTING>"echo" {
1219    return T_ECHO;
1220}
1221
1222<ST_IN_SCRIPTING>"print" {
1223    return T_PRINT;
1224}
1225
1226<ST_IN_SCRIPTING>"class" {
1227    return T_CLASS;
1228}
1229
1230<ST_IN_SCRIPTING>"interface" {
1231    return T_INTERFACE;
1232}
1233
1234<ST_IN_SCRIPTING>"trait" {
1235    return T_TRAIT;
1236}
1237
1238<ST_IN_SCRIPTING>"extends" {
1239    return T_EXTENDS;
1240}
1241
1242<ST_IN_SCRIPTING>"implements" {
1243    return T_IMPLEMENTS;
1244}
1245
1246<ST_IN_SCRIPTING>"->" {
1247    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1248    return T_OBJECT_OPERATOR;
1249}
1250
1251<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1252    HANDLE_NEWLINES(yytext, yyleng);
1253    return T_WHITESPACE;
1254}
1255
1256<ST_LOOKING_FOR_PROPERTY>"->" {
1257    return T_OBJECT_OPERATOR;
1258}
1259
1260<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1261    yy_pop_state();
1262    zend_copy_value(zendlval, yytext, yyleng);
1263    return T_STRING;
1264}
1265
1266<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1267    yyless(0);
1268    yy_pop_state();
1269    goto restart;
1270}
1271
1272<ST_IN_SCRIPTING>"::" {
1273    return T_PAAMAYIM_NEKUDOTAYIM;
1274}
1275
1276<ST_IN_SCRIPTING>"\\" {
1277    return T_NS_SEPARATOR;
1278}
1279
1280<ST_IN_SCRIPTING>"..." {
1281    return T_ELLIPSIS;
1282}
1283
1284<ST_IN_SCRIPTING>"??" {
1285    return T_COALESCE;
1286}
1287
1288<ST_IN_SCRIPTING>"new" {
1289    return T_NEW;
1290}
1291
1292<ST_IN_SCRIPTING>"clone" {
1293    return T_CLONE;
1294}
1295
1296<ST_IN_SCRIPTING>"var" {
1297    return T_VAR;
1298}
1299
1300<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1301    return T_INT_CAST;
1302}
1303
1304<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1305    return T_DOUBLE_CAST;
1306}
1307
1308<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1309    return T_STRING_CAST;
1310}
1311
1312<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1313    return T_ARRAY_CAST;
1314}
1315
1316<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1317    return T_OBJECT_CAST;
1318}
1319
1320<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1321    return T_BOOL_CAST;
1322}
1323
1324<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1325    return T_UNSET_CAST;
1326}
1327
1328<ST_IN_SCRIPTING>"eval" {
1329    return T_EVAL;
1330}
1331
1332<ST_IN_SCRIPTING>"include" {
1333    return T_INCLUDE;
1334}
1335
1336<ST_IN_SCRIPTING>"include_once" {
1337    return T_INCLUDE_ONCE;
1338}
1339
1340<ST_IN_SCRIPTING>"require" {
1341    return T_REQUIRE;
1342}
1343
1344<ST_IN_SCRIPTING>"require_once" {
1345    return T_REQUIRE_ONCE;
1346}
1347
1348<ST_IN_SCRIPTING>"namespace" {
1349    return T_NAMESPACE;
1350}
1351
1352<ST_IN_SCRIPTING>"use" {
1353    return T_USE;
1354}
1355
1356<ST_IN_SCRIPTING>"insteadof" {
1357        return T_INSTEADOF;
1358}
1359
1360<ST_IN_SCRIPTING>"global" {
1361    return T_GLOBAL;
1362}
1363
1364<ST_IN_SCRIPTING>"isset" {
1365    return T_ISSET;
1366}
1367
1368<ST_IN_SCRIPTING>"empty" {
1369    return T_EMPTY;
1370}
1371
1372<ST_IN_SCRIPTING>"__halt_compiler" {
1373    return T_HALT_COMPILER;
1374}
1375
1376<ST_IN_SCRIPTING>"static" {
1377    return T_STATIC;
1378}
1379
1380<ST_IN_SCRIPTING>"abstract" {
1381    return T_ABSTRACT;
1382}
1383
1384<ST_IN_SCRIPTING>"final" {
1385    return T_FINAL;
1386}
1387
1388<ST_IN_SCRIPTING>"private" {
1389    return T_PRIVATE;
1390}
1391
1392<ST_IN_SCRIPTING>"protected" {
1393    return T_PROTECTED;
1394}
1395
1396<ST_IN_SCRIPTING>"public" {
1397    return T_PUBLIC;
1398}
1399
1400<ST_IN_SCRIPTING>"unset" {
1401    return T_UNSET;
1402}
1403
1404<ST_IN_SCRIPTING>"=>" {
1405    return T_DOUBLE_ARROW;
1406}
1407
1408<ST_IN_SCRIPTING>"list" {
1409    return T_LIST;
1410}
1411
1412<ST_IN_SCRIPTING>"array" {
1413    return T_ARRAY;
1414}
1415
1416<ST_IN_SCRIPTING>"callable" {
1417 return T_CALLABLE;
1418}
1419
1420<ST_IN_SCRIPTING>"++" {
1421    return T_INC;
1422}
1423
1424<ST_IN_SCRIPTING>"--" {
1425    return T_DEC;
1426}
1427
1428<ST_IN_SCRIPTING>"===" {
1429    return T_IS_IDENTICAL;
1430}
1431
1432<ST_IN_SCRIPTING>"!==" {
1433    return T_IS_NOT_IDENTICAL;
1434}
1435
1436<ST_IN_SCRIPTING>"==" {
1437    return T_IS_EQUAL;
1438}
1439
1440<ST_IN_SCRIPTING>"!="|"<>" {
1441    return T_IS_NOT_EQUAL;
1442}
1443
1444<ST_IN_SCRIPTING>"<=" {
1445    return T_IS_SMALLER_OR_EQUAL;
1446}
1447
1448<ST_IN_SCRIPTING>">=" {
1449    return T_IS_GREATER_OR_EQUAL;
1450}
1451
1452<ST_IN_SCRIPTING>"+=" {
1453    return T_PLUS_EQUAL;
1454}
1455
1456<ST_IN_SCRIPTING>"-=" {
1457    return T_MINUS_EQUAL;
1458}
1459
1460<ST_IN_SCRIPTING>"*=" {
1461    return T_MUL_EQUAL;
1462}
1463
1464<ST_IN_SCRIPTING>"*\*" {
1465    return T_POW;
1466}
1467
1468<ST_IN_SCRIPTING>"*\*=" {
1469    return T_POW_EQUAL;
1470}
1471
1472<ST_IN_SCRIPTING>"/=" {
1473    return T_DIV_EQUAL;
1474}
1475
1476<ST_IN_SCRIPTING>".=" {
1477    return T_CONCAT_EQUAL;
1478}
1479
1480<ST_IN_SCRIPTING>"%=" {
1481    return T_MOD_EQUAL;
1482}
1483
1484<ST_IN_SCRIPTING>"<<=" {
1485    return T_SL_EQUAL;
1486}
1487
1488<ST_IN_SCRIPTING>">>=" {
1489    return T_SR_EQUAL;
1490}
1491
1492<ST_IN_SCRIPTING>"&=" {
1493    return T_AND_EQUAL;
1494}
1495
1496<ST_IN_SCRIPTING>"|=" {
1497    return T_OR_EQUAL;
1498}
1499
1500<ST_IN_SCRIPTING>"^=" {
1501    return T_XOR_EQUAL;
1502}
1503
1504<ST_IN_SCRIPTING>"||" {
1505    return T_BOOLEAN_OR;
1506}
1507
1508<ST_IN_SCRIPTING>"&&" {
1509    return T_BOOLEAN_AND;
1510}
1511
1512<ST_IN_SCRIPTING>"OR" {
1513    return T_LOGICAL_OR;
1514}
1515
1516<ST_IN_SCRIPTING>"AND" {
1517    return T_LOGICAL_AND;
1518}
1519
1520<ST_IN_SCRIPTING>"XOR" {
1521    return T_LOGICAL_XOR;
1522}
1523
1524<ST_IN_SCRIPTING>"<<" {
1525    return T_SL;
1526}
1527
1528<ST_IN_SCRIPTING>">>" {
1529    return T_SR;
1530}
1531
1532<ST_IN_SCRIPTING>{TOKENS} {
1533    return yytext[0];
1534}
1535
1536
1537<ST_IN_SCRIPTING>"{" {
1538    yy_push_state(ST_IN_SCRIPTING);
1539    return '{';
1540}
1541
1542
1543<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1544    yy_push_state(ST_LOOKING_FOR_VARNAME);
1545    return T_DOLLAR_OPEN_CURLY_BRACES;
1546}
1547
1548
1549<ST_IN_SCRIPTING>"}" {
1550    RESET_DOC_COMMENT();
1551    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1552        yy_pop_state();
1553    }
1554    return '}';
1555}
1556
1557
1558<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1559    yyless(yyleng - 1);
1560    zend_copy_value(zendlval, yytext, yyleng);
1561    yy_pop_state();
1562    yy_push_state(ST_IN_SCRIPTING);
1563    return T_STRING_VARNAME;
1564}
1565
1566
1567<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1568    yyless(0);
1569    yy_pop_state();
1570    yy_push_state(ST_IN_SCRIPTING);
1571    goto restart;
1572}
1573
1574<ST_IN_SCRIPTING>{BNUM} {
1575    char *bin = yytext + 2; /* Skip "0b" */
1576    int len = yyleng - 2;
1577
1578    /* Skip any leading 0s */
1579    while (*bin == '0') {
1580        ++bin;
1581        --len;
1582    }
1583
1584    if (len < SIZEOF_ZEND_LONG * 8) {
1585        if (len == 0) {
1586            ZVAL_LONG(zendlval, 0);
1587        } else {
1588            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, NULL, 2));
1589        }
1590        return T_LNUMBER;
1591    } else {
1592        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1593        return T_DNUMBER;
1594    }
1595}
1596
1597<ST_IN_SCRIPTING>{LNUM} {
1598    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1599        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1600    } else {
1601        errno = 0;
1602        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1603        if (errno == ERANGE) { /* Overflow */
1604            if (yytext[0] == '0') { /* octal overflow */
1605                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, NULL));
1606            } else {
1607                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1608            }
1609            return T_DNUMBER;
1610        }
1611    }
1612    return T_LNUMBER;
1613}
1614
1615<ST_IN_SCRIPTING>{HNUM} {
1616    char *hex = yytext + 2; /* Skip "0x" */
1617    int len = yyleng - 2;
1618
1619    /* Skip any leading 0s */
1620    while (*hex == '0') {
1621        hex++;
1622        len--;
1623    }
1624
1625    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1626        if (len == 0) {
1627            ZVAL_LONG(zendlval, 0);
1628        } else {
1629            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, NULL, 16));
1630        }
1631        return T_LNUMBER;
1632    } else {
1633        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1634        return T_DNUMBER;
1635    }
1636}
1637
1638<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1639    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1640        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 10));
1641    } else {
1642        ZVAL_STRINGL(zendlval, yytext, yyleng);
1643    }
1644    return T_NUM_STRING;
1645}
1646
1647<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1648    ZVAL_STRINGL(zendlval, yytext, yyleng);
1649    return T_NUM_STRING;
1650}
1651
1652<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1653    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1654    return T_DNUMBER;
1655}
1656
1657<ST_IN_SCRIPTING>"__CLASS__" {
1658    return T_CLASS_C;
1659}
1660
1661<ST_IN_SCRIPTING>"__TRAIT__" {
1662    return T_TRAIT_C;
1663}
1664
1665<ST_IN_SCRIPTING>"__FUNCTION__" {
1666    return T_FUNC_C;
1667}
1668
1669<ST_IN_SCRIPTING>"__METHOD__" {
1670    return T_METHOD_C;
1671}
1672
1673<ST_IN_SCRIPTING>"__LINE__" {
1674    return T_LINE;
1675}
1676
1677<ST_IN_SCRIPTING>"__FILE__" {
1678    return T_FILE;
1679}
1680
1681<ST_IN_SCRIPTING>"__DIR__" {
1682    return T_DIR;
1683}
1684
1685<ST_IN_SCRIPTING>"__NAMESPACE__" {
1686    return T_NS_C;
1687}
1688
1689
1690<INITIAL>"<?=" {
1691    BEGIN(ST_IN_SCRIPTING);
1692    return T_OPEN_TAG_WITH_ECHO;
1693}
1694
1695
1696<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1697    HANDLE_NEWLINE(yytext[yyleng-1]);
1698    BEGIN(ST_IN_SCRIPTING);
1699    return T_OPEN_TAG;
1700}
1701
1702
1703<INITIAL>"<?" {
1704    if (CG(short_tags)) {
1705        BEGIN(ST_IN_SCRIPTING);
1706        return T_OPEN_TAG;
1707    } else {
1708        goto inline_char_handler;
1709    }
1710}
1711
1712<INITIAL>{ANY_CHAR} {
1713    if (YYCURSOR > YYLIMIT) {
1714        return 0;
1715    }
1716
1717inline_char_handler:
1718
1719    while (1) {
1720        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1721
1722        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1723
1724        if (YYCURSOR >= YYLIMIT) {
1725            break;
1726        }
1727
1728        if (*YYCURSOR == '?') {
1729            if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1730
1731                YYCURSOR--;
1732                break;
1733            }
1734        }
1735    }
1736
1737    yyleng = YYCURSOR - SCNG(yy_text);
1738
1739    if (SCNG(output_filter)) {
1740        size_t readsize;
1741        char *s = NULL;
1742        size_t sz = 0;
1743        // TODO: avoid reallocation ???
1744        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1745        ZVAL_STRINGL(zendlval, s, sz);
1746        efree(s);
1747        if (readsize < yyleng) {
1748            yyless(readsize);
1749        }
1750    } else {
1751      ZVAL_STRINGL(zendlval, yytext, yyleng);
1752    }
1753    HANDLE_NEWLINES(yytext, yyleng);
1754    return T_INLINE_HTML;
1755}
1756
1757
1758/* Make sure a label character follows "->", otherwise there is no property
1759 * and "->" will be taken literally
1760 */
1761<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1762    yyless(yyleng - 3);
1763    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1764    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1765    return T_VARIABLE;
1766}
1767
1768/* A [ always designates a variable offset, regardless of what follows
1769 */
1770<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1771    yyless(yyleng - 1);
1772    yy_push_state(ST_VAR_OFFSET);
1773    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1774    return T_VARIABLE;
1775}
1776
1777<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1778    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1779    return T_VARIABLE;
1780}
1781
1782<ST_VAR_OFFSET>"]" {
1783    yy_pop_state();
1784    return ']';
1785}
1786
1787<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1788    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1789    return yytext[0];
1790}
1791
1792<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1793    /* Invalid rule to return a more explicit parse error with proper line number */
1794    yyless(0);
1795    yy_pop_state();
1796    ZVAL_NULL(zendlval);
1797    return T_ENCAPSED_AND_WHITESPACE;
1798}
1799
1800<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1801    zend_copy_value(zendlval, yytext, yyleng);
1802    return T_STRING;
1803}
1804
1805
1806<ST_IN_SCRIPTING>"#"|"//" {
1807    while (YYCURSOR < YYLIMIT) {
1808        switch (*YYCURSOR++) {
1809            case '\r':
1810                if (*YYCURSOR == '\n') {
1811                    YYCURSOR++;
1812                }
1813                /* fall through */
1814            case '\n':
1815                CG(zend_lineno)++;
1816                break;
1817            case '?':
1818                if (*YYCURSOR == '>') {
1819                    YYCURSOR--;
1820                    break;
1821                }
1822                /* fall through */
1823            default:
1824                continue;
1825        }
1826
1827        break;
1828    }
1829
1830    yyleng = YYCURSOR - SCNG(yy_text);
1831
1832    return T_COMMENT;
1833}
1834
1835<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1836    int doc_com;
1837
1838    if (yyleng > 2) {
1839        doc_com = 1;
1840        RESET_DOC_COMMENT();
1841    } else {
1842        doc_com = 0;
1843    }
1844
1845    while (YYCURSOR < YYLIMIT) {
1846        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1847            break;
1848        }
1849    }
1850
1851    if (YYCURSOR < YYLIMIT) {
1852        YYCURSOR++;
1853    } else {
1854        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1855    }
1856
1857    yyleng = YYCURSOR - SCNG(yy_text);
1858    HANDLE_NEWLINES(yytext, yyleng);
1859
1860    if (doc_com) {
1861        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1862        return T_DOC_COMMENT;
1863    }
1864
1865    return T_COMMENT;
1866}
1867
1868<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1869    BEGIN(INITIAL);
1870    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1871}
1872
1873
1874<ST_IN_SCRIPTING>b?['] {
1875    register char *s, *t;
1876    char *end;
1877    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1878
1879    while (1) {
1880        if (YYCURSOR < YYLIMIT) {
1881            if (*YYCURSOR == '\'') {
1882                YYCURSOR++;
1883                yyleng = YYCURSOR - SCNG(yy_text);
1884
1885                break;
1886            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1887                YYCURSOR++;
1888            }
1889        } else {
1890            yyleng = YYLIMIT - SCNG(yy_text);
1891
1892            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1893             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1894             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1895            ZVAL_NULL(zendlval);
1896            return T_ENCAPSED_AND_WHITESPACE;
1897        }
1898    }
1899
1900    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1901
1902    /* convert escape sequences */
1903    s = t = Z_STRVAL_P(zendlval);
1904    end = s+Z_STRLEN_P(zendlval);
1905    while (s<end) {
1906        if (*s=='\\') {
1907            s++;
1908
1909            switch(*s) {
1910                case '\\':
1911                case '\'':
1912                    *t++ = *s;
1913                    Z_STRLEN_P(zendlval)--;
1914                    break;
1915                default:
1916                    *t++ = '\\';
1917                    *t++ = *s;
1918                    break;
1919            }
1920        } else {
1921            *t++ = *s;
1922        }
1923
1924        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1925            CG(zend_lineno)++;
1926        }
1927        s++;
1928    }
1929    *t = 0;
1930
1931    if (SCNG(output_filter)) {
1932        size_t sz = 0;
1933        char *str = NULL;
1934        s = Z_STRVAL_P(zendlval);
1935        // TODO: avoid reallocation ???
1936        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1937        ZVAL_STRINGL(zendlval, str, sz);
1938        efree(s);
1939    }
1940    return T_CONSTANT_ENCAPSED_STRING;
1941}
1942
1943
1944<ST_IN_SCRIPTING>b?["] {
1945    int bprefix = (yytext[0] != '"') ? 1 : 0;
1946
1947    while (YYCURSOR < YYLIMIT) {
1948        switch (*YYCURSOR++) {
1949            case '"':
1950                yyleng = YYCURSOR - SCNG(yy_text);
1951                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
1952                return T_CONSTANT_ENCAPSED_STRING;
1953            case '$':
1954                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
1955                    break;
1956                }
1957                continue;
1958            case '{':
1959                if (*YYCURSOR == '$') {
1960                    break;
1961                }
1962                continue;
1963            case '\\':
1964                if (YYCURSOR < YYLIMIT) {
1965                    YYCURSOR++;
1966                }
1967                /* fall through */
1968            default:
1969                continue;
1970        }
1971
1972        YYCURSOR--;
1973        break;
1974    }
1975
1976    /* Remember how much was scanned to save rescanning */
1977    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
1978
1979    YYCURSOR = SCNG(yy_text) + yyleng;
1980
1981    BEGIN(ST_DOUBLE_QUOTES);
1982    return '"';
1983}
1984
1985
1986<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
1987    char *s;
1988    int bprefix = (yytext[0] != '<') ? 1 : 0;
1989    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
1990
1991    CG(zend_lineno)++;
1992    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
1993    s = yytext+bprefix+3;
1994    while ((*s == ' ') || (*s == '\t')) {
1995        s++;
1996        heredoc_label->length--;
1997    }
1998
1999    if (*s == '\'') {
2000        s++;
2001        heredoc_label->length -= 2;
2002
2003        BEGIN(ST_NOWDOC);
2004    } else {
2005        if (*s == '"') {
2006            s++;
2007            heredoc_label->length -= 2;
2008        }
2009
2010        BEGIN(ST_HEREDOC);
2011    }
2012
2013    heredoc_label->label = estrndup(s, heredoc_label->length);
2014
2015    /* Check for ending label on the next line */
2016    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2017        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2018
2019        if (*end == ';') {
2020            end++;
2021        }
2022
2023        if (*end == '\n' || *end == '\r') {
2024            BEGIN(ST_END_HEREDOC);
2025        }
2026    }
2027
2028    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2029
2030    return T_START_HEREDOC;
2031}
2032
2033
2034<ST_IN_SCRIPTING>[`] {
2035    BEGIN(ST_BACKQUOTE);
2036    return '`';
2037}
2038
2039
2040<ST_END_HEREDOC>{ANY_CHAR} {
2041    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2042
2043    YYCURSOR += heredoc_label->length - 1;
2044    yyleng = heredoc_label->length;
2045
2046    heredoc_label_dtor(heredoc_label);
2047    efree(heredoc_label);
2048
2049    BEGIN(ST_IN_SCRIPTING);
2050    return T_END_HEREDOC;
2051}
2052
2053
2054<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2055    Z_LVAL_P(zendlval) = (zend_long) '{';
2056    yy_push_state(ST_IN_SCRIPTING);
2057    yyless(1);
2058    return T_CURLY_OPEN;
2059}
2060
2061
2062<ST_DOUBLE_QUOTES>["] {
2063    BEGIN(ST_IN_SCRIPTING);
2064    return '"';
2065}
2066
2067<ST_BACKQUOTE>[`] {
2068    BEGIN(ST_IN_SCRIPTING);
2069    return '`';
2070}
2071
2072
2073<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2074    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2075        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2076        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2077
2078        goto double_quotes_scan_done;
2079    }
2080
2081    if (YYCURSOR > YYLIMIT) {
2082        return 0;
2083    }
2084    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2085        YYCURSOR++;
2086    }
2087
2088    while (YYCURSOR < YYLIMIT) {
2089        switch (*YYCURSOR++) {
2090            case '"':
2091                break;
2092            case '$':
2093                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2094                    break;
2095                }
2096                continue;
2097            case '{':
2098                if (*YYCURSOR == '$') {
2099                    break;
2100                }
2101                continue;
2102            case '\\':
2103                if (YYCURSOR < YYLIMIT) {
2104                    YYCURSOR++;
2105                }
2106                /* fall through */
2107            default:
2108                continue;
2109        }
2110
2111        YYCURSOR--;
2112        break;
2113    }
2114
2115double_quotes_scan_done:
2116    yyleng = YYCURSOR - SCNG(yy_text);
2117
2118    zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2119    return T_ENCAPSED_AND_WHITESPACE;
2120}
2121
2122
2123<ST_BACKQUOTE>{ANY_CHAR} {
2124    if (YYCURSOR > YYLIMIT) {
2125        return 0;
2126    }
2127    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2128        YYCURSOR++;
2129    }
2130
2131    while (YYCURSOR < YYLIMIT) {
2132        switch (*YYCURSOR++) {
2133            case '`':
2134                break;
2135            case '$':
2136                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2137                    break;
2138                }
2139                continue;
2140            case '{':
2141                if (*YYCURSOR == '$') {
2142                    break;
2143                }
2144                continue;
2145            case '\\':
2146                if (YYCURSOR < YYLIMIT) {
2147                    YYCURSOR++;
2148                }
2149                /* fall through */
2150            default:
2151                continue;
2152        }
2153
2154        YYCURSOR--;
2155        break;
2156    }
2157
2158    yyleng = YYCURSOR - SCNG(yy_text);
2159
2160    zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2161    return T_ENCAPSED_AND_WHITESPACE;
2162}
2163
2164
2165<ST_HEREDOC>{ANY_CHAR} {
2166    int newline = 0;
2167
2168    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2169
2170    if (YYCURSOR > YYLIMIT) {
2171        return 0;
2172    }
2173
2174    YYCURSOR--;
2175
2176    while (YYCURSOR < YYLIMIT) {
2177        switch (*YYCURSOR++) {
2178            case '\r':
2179                if (*YYCURSOR == '\n') {
2180                    YYCURSOR++;
2181                }
2182                /* fall through */
2183            case '\n':
2184                /* Check for ending label on the next line */
2185                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2186                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2187
2188                    if (*end == ';') {
2189                        end++;
2190                    }
2191
2192                    if (*end == '\n' || *end == '\r') {
2193                        /* newline before label will be subtracted from returned text, but
2194                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2195                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2196                            newline = 2; /* Windows newline */
2197                        } else {
2198                            newline = 1;
2199                        }
2200
2201                        CG(increment_lineno) = 1; /* For newline before label */
2202                        BEGIN(ST_END_HEREDOC);
2203
2204                        goto heredoc_scan_done;
2205                    }
2206                }
2207                continue;
2208            case '$':
2209                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2210                    break;
2211                }
2212                continue;
2213            case '{':
2214                if (*YYCURSOR == '$') {
2215                    break;
2216                }
2217                continue;
2218            case '\\':
2219                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2220                    YYCURSOR++;
2221                }
2222                /* fall through */
2223            default:
2224                continue;
2225        }
2226
2227        YYCURSOR--;
2228        break;
2229    }
2230
2231heredoc_scan_done:
2232    yyleng = YYCURSOR - SCNG(yy_text);
2233
2234    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2235    return T_ENCAPSED_AND_WHITESPACE;
2236}
2237
2238
2239<ST_NOWDOC>{ANY_CHAR} {
2240    int newline = 0;
2241
2242    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2243
2244    if (YYCURSOR > YYLIMIT) {
2245        return 0;
2246    }
2247
2248    YYCURSOR--;
2249
2250    while (YYCURSOR < YYLIMIT) {
2251        switch (*YYCURSOR++) {
2252            case '\r':
2253                if (*YYCURSOR == '\n') {
2254                    YYCURSOR++;
2255                }
2256                /* fall through */
2257            case '\n':
2258                /* Check for ending label on the next line */
2259                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2260                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2261
2262                    if (*end == ';') {
2263                        end++;
2264                    }
2265
2266                    if (*end == '\n' || *end == '\r') {
2267                        /* newline before label will be subtracted from returned text, but
2268                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2269                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2270                            newline = 2; /* Windows newline */
2271                        } else {
2272                            newline = 1;
2273                        }
2274
2275                        CG(increment_lineno) = 1; /* For newline before label */
2276                        BEGIN(ST_END_HEREDOC);
2277
2278                        goto nowdoc_scan_done;
2279                    }
2280                }
2281                /* fall through */
2282            default:
2283                continue;
2284        }
2285    }
2286
2287nowdoc_scan_done:
2288    yyleng = YYCURSOR - SCNG(yy_text);
2289
2290    zend_copy_value(zendlval, yytext, yyleng - newline);
2291    HANDLE_NEWLINES(yytext, yyleng - newline);
2292    return T_ENCAPSED_AND_WHITESPACE;
2293}
2294
2295
2296<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2297    if (YYCURSOR > YYLIMIT) {
2298        return 0;
2299    }
2300
2301    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2302    goto restart;
2303}
2304
2305*/
2306}
2307