1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2015 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129    ZEND_ASSERT(internal_encoding);
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147    ZEND_ASSERT(internal_encoding);
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8);
150}
151
152
153static void _yy_push_state(int new_state)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(void)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(void)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(void)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename();
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225
226    lex_state->ast = CG(ast);
227    lex_state->ast_arena = CG(ast_arena);
228}
229
230ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
231{
232    SCNG(yy_leng)   = lex_state->yy_leng;
233    SCNG(yy_start)  = lex_state->yy_start;
234    SCNG(yy_text)   = lex_state->yy_text;
235    SCNG(yy_cursor) = lex_state->yy_cursor;
236    SCNG(yy_marker) = lex_state->yy_marker;
237    SCNG(yy_limit)  = lex_state->yy_limit;
238
239    zend_stack_destroy(&SCNG(state_stack));
240    SCNG(state_stack) = lex_state->state_stack;
241
242    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
243    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
244    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
245
246    SCNG(yy_in) = lex_state->in;
247    YYSETCONDITION(lex_state->yy_state);
248    CG(zend_lineno) = lex_state->lineno;
249    zend_restore_compiled_filename(lex_state->filename);
250
251    if (SCNG(script_filtered)) {
252        efree(SCNG(script_filtered));
253        SCNG(script_filtered) = NULL;
254    }
255    SCNG(script_org) = lex_state->script_org;
256    SCNG(script_org_size) = lex_state->script_org_size;
257    SCNG(script_filtered) = lex_state->script_filtered;
258    SCNG(script_filtered_size) = lex_state->script_filtered_size;
259    SCNG(input_filter) = lex_state->input_filter;
260    SCNG(output_filter) = lex_state->output_filter;
261    SCNG(script_encoding) = lex_state->script_encoding;
262
263    CG(ast) = lex_state->ast;
264    CG(ast_arena) = lex_state->ast_arena;
265
266    RESET_DOC_COMMENT();
267}
268
269ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
270{
271    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
272    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
273    file_handle->opened_path = NULL;
274    if (file_handle->free_filename) {
275        file_handle->filename = NULL;
276    }
277}
278
279#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
280#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
281#define BOM_UTF16_BE    "\xfe\xff"
282#define BOM_UTF16_LE    "\xff\xfe"
283#define BOM_UTF8        "\xef\xbb\xbf"
284
285static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
286{
287    const unsigned char *p;
288    int wchar_size = 2;
289    int le = 0;
290
291    /* utf-16 or utf-32? */
292    p = script;
293    assert(p >= script);
294    while ((size_t)(p-script) < script_size) {
295        p = memchr(p, 0, script_size-(p-script)-2);
296        if (!p) {
297            break;
298        }
299        if (*(p+1) == '\0' && *(p+2) == '\0') {
300            wchar_size = 4;
301            break;
302        }
303
304        /* searching for UTF-32 specific byte orders, so this will do */
305        p += 4;
306    }
307
308    /* BE or LE? */
309    p = script;
310    assert(p >= script);
311    while ((size_t)(p-script) < script_size) {
312        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
313            /* BE */
314            le = 0;
315            break;
316        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
317            /* LE* */
318            le = 1;
319            break;
320        }
321        p += wchar_size;
322    }
323
324    if (wchar_size == 2) {
325        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
326    } else {
327        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
328    }
329
330    return NULL;
331}
332
333static const zend_encoding* zend_multibyte_detect_unicode(void)
334{
335    const zend_encoding *script_encoding = NULL;
336    int bom_size;
337    unsigned char *pos1, *pos2;
338
339    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
340        return NULL;
341    }
342
343    /* check out BOM */
344    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
345        script_encoding = zend_multibyte_encoding_utf32be;
346        bom_size = sizeof(BOM_UTF32_BE)-1;
347    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
348        script_encoding = zend_multibyte_encoding_utf32le;
349        bom_size = sizeof(BOM_UTF32_LE)-1;
350    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
351        script_encoding = zend_multibyte_encoding_utf16be;
352        bom_size = sizeof(BOM_UTF16_BE)-1;
353    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
354        script_encoding = zend_multibyte_encoding_utf16le;
355        bom_size = sizeof(BOM_UTF16_LE)-1;
356    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
357        script_encoding = zend_multibyte_encoding_utf8;
358        bom_size = sizeof(BOM_UTF8)-1;
359    }
360
361    if (script_encoding) {
362        /* remove BOM */
363        LANG_SCNG(script_org) += bom_size;
364        LANG_SCNG(script_org_size) -= bom_size;
365
366        return script_encoding;
367    }
368
369    /* script contains NULL bytes -> auto-detection */
370    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
371        /* check if the NULL byte is after the __HALT_COMPILER(); */
372        pos2 = LANG_SCNG(script_org);
373
374        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
375            pos2 = memchr(pos2, '_', pos1 - pos2);
376            if (!pos2) break;
377            pos2++;
378            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
379                pos2 += sizeof("_HALT_COMPILER")-1;
380                while (*pos2 == ' '  ||
381                       *pos2 == '\t' ||
382                       *pos2 == '\r' ||
383                       *pos2 == '\n') {
384                    pos2++;
385                }
386                if (*pos2 == '(') {
387                    pos2++;
388                    while (*pos2 == ' '  ||
389                           *pos2 == '\t' ||
390                           *pos2 == '\r' ||
391                           *pos2 == '\n') {
392                        pos2++;
393                    }
394                    if (*pos2 == ')') {
395                        pos2++;
396                        while (*pos2 == ' '  ||
397                               *pos2 == '\t' ||
398                               *pos2 == '\r' ||
399                               *pos2 == '\n') {
400                            pos2++;
401                        }
402                        if (*pos2 == ';') {
403                            return NULL;
404                        }
405                    }
406                }
407            }
408        }
409        /* make best effort if BOM is missing */
410        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
411    }
412
413    return NULL;
414}
415
416static const zend_encoding* zend_multibyte_find_script_encoding(void)
417{
418    const zend_encoding *script_encoding;
419
420    if (CG(detect_unicode)) {
421        /* check out bom(byte order mark) and see if containing wchars */
422        script_encoding = zend_multibyte_detect_unicode();
423        if (script_encoding != NULL) {
424            /* bom or wchar detection is prior to 'script_encoding' option */
425            return script_encoding;
426        }
427    }
428
429    /* if no script_encoding specified, just leave alone */
430    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
431        return NULL;
432    }
433
434    /* if multiple encodings specified, detect automagically */
435    if (CG(script_encoding_list_size) > 1) {
436        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
437    }
438
439    return CG(script_encoding_list)[0];
440}
441
442ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
443{
444    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
445    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
446
447    if (!script_encoding) {
448        return FAILURE;
449    }
450
451    /* judge input/output filter */
452    LANG_SCNG(script_encoding) = script_encoding;
453    LANG_SCNG(input_filter) = NULL;
454    LANG_SCNG(output_filter) = NULL;
455
456    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
457        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
458            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
459            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
460            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
461        } else {
462            LANG_SCNG(input_filter) = NULL;
463            LANG_SCNG(output_filter) = NULL;
464        }
465        return SUCCESS;
466    }
467
468    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
469        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
470        LANG_SCNG(output_filter) = NULL;
471    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
472        LANG_SCNG(input_filter) = NULL;
473        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
474    } else {
475        /* both script and internal encodings are incompatible w/ flex */
476        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
478    }
479
480    return 0;
481}
482
483ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
484{
485    char *buf;
486    size_t size, offset = 0;
487    zend_string *compiled_filename;
488
489    /* The shebang line was read, get the current position to obtain the buffer start */
490    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
491        if ((offset = ftell(file_handle->handle.fp)) == -1) {
492            offset = 0;
493        }
494    }
495
496    if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
497        return FAILURE;
498    }
499
500    zend_llist_add_element(&CG(open_files), file_handle);
501    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
502        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
503        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
504        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
505        file_handle->handle.stream.handle = fh->handle.stream.handle;
506    }
507
508    /* Reset the scanner for scanning the new file */
509    SCNG(yy_in) = file_handle;
510    SCNG(yy_start) = NULL;
511
512    if (size != -1) {
513        if (CG(multibyte)) {
514            SCNG(script_org) = (unsigned char*)buf;
515            SCNG(script_org_size) = size;
516            SCNG(script_filtered) = NULL;
517
518            zend_multibyte_set_filter(NULL);
519
520            if (SCNG(input_filter)) {
521                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
522                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
523                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
524                }
525                buf = (char*)SCNG(script_filtered);
526                size = SCNG(script_filtered_size);
527            }
528        }
529        SCNG(yy_start) = (unsigned char *)buf - offset;
530        yy_scan_buffer(buf, (unsigned int)size);
531    } else {
532        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
533    }
534
535    BEGIN(INITIAL);
536
537    if (file_handle->opened_path) {
538        compiled_filename = zend_string_copy(file_handle->opened_path);
539    } else {
540        compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
541    }
542
543    zend_set_compiled_filename(compiled_filename);
544    zend_string_release(compiled_filename);
545
546    if (CG(start_lineno)) {
547        CG(zend_lineno) = CG(start_lineno);
548        CG(start_lineno) = 0;
549    } else {
550        CG(zend_lineno) = 1;
551    }
552
553    RESET_DOC_COMMENT();
554    CG(increment_lineno) = 0;
555    return SUCCESS;
556}
557END_EXTERN_C()
558
559
560ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
561{
562    zend_lex_state original_lex_state;
563    zend_op_array *op_array = NULL;
564    zend_save_lexical_state(&original_lex_state);
565
566    if (open_file_for_scanning(file_handle)==FAILURE) {
567        if (type==ZEND_REQUIRE) {
568            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
569            zend_bailout();
570        } else {
571            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
572        }
573    } else {
574        zend_bool original_in_compilation = CG(in_compilation);
575        CG(in_compilation) = 1;
576
577        CG(ast) = NULL;
578        CG(ast_arena) = zend_arena_create(1024 * 32);
579        if (!zendparse()) {
580            zval retval_zv;
581            zend_op_array *original_active_op_array = CG(active_op_array);
582            op_array = emalloc(sizeof(zend_op_array));
583            init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
584            CG(active_op_array) = op_array;
585            ZVAL_LONG(&retval_zv, 1);
586
587            zend_stack_push(&CG(context_stack), (void *) &CG(context));
588            zend_init_compiler_context();
589            if (zend_ast_process) {
590                zend_ast_process(CG(ast));
591            }
592            zend_compile_top_stmt(CG(ast));
593            zend_do_end_compilation();
594            zend_emit_final_return(&retval_zv);
595            pass_two(op_array);
596            zend_release_labels(0);
597
598            CG(active_op_array) = original_active_op_array;
599        }
600
601        zend_ast_destroy(CG(ast));
602        zend_arena_destroy(CG(ast_arena));
603        CG(in_compilation) = original_in_compilation;
604    }
605
606    zend_restore_lexical_state(&original_lex_state);
607    return op_array;
608}
609
610
611zend_op_array *compile_filename(int type, zval *filename)
612{
613    zend_file_handle file_handle;
614    zval tmp;
615    zend_op_array *retval;
616    zend_string *opened_path = NULL;
617
618    if (Z_TYPE_P(filename) != IS_STRING) {
619        tmp = *filename;
620        zval_copy_ctor(&tmp);
621        convert_to_string(&tmp);
622        filename = &tmp;
623    }
624    file_handle.filename = Z_STRVAL_P(filename);
625    file_handle.free_filename = 0;
626    file_handle.type = ZEND_HANDLE_FILENAME;
627    file_handle.opened_path = NULL;
628    file_handle.handle.fp = NULL;
629
630    retval = zend_compile_file(&file_handle, type);
631    if (retval && file_handle.handle.stream.handle) {
632        if (!file_handle.opened_path) {
633            file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
634        }
635
636        zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
637
638        if (opened_path) {
639            zend_string_release(opened_path);
640        }
641    }
642    zend_destroy_file_handle(&file_handle);
643
644    if (filename==&tmp) {
645        zval_dtor(&tmp);
646    }
647    return retval;
648}
649
650ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
651{
652    char *buf;
653    size_t size, old_len;
654    zend_string *new_compiled_filename;
655
656    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
657    old_len = Z_STRLEN_P(str);
658    Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
659    Z_TYPE_INFO_P(str) = IS_STRING_EX;
660    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
661
662    SCNG(yy_in) = NULL;
663    SCNG(yy_start) = NULL;
664
665    buf = Z_STRVAL_P(str);
666    size = old_len;
667
668    if (CG(multibyte)) {
669        SCNG(script_org) = (unsigned char*)buf;
670        SCNG(script_org_size) = size;
671        SCNG(script_filtered) = NULL;
672
673        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
674
675        if (SCNG(input_filter)) {
676            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
677                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
678                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
679            }
680            buf = (char*)SCNG(script_filtered);
681            size = SCNG(script_filtered_size);
682        }
683    }
684
685    yy_scan_buffer(buf, (unsigned int)size);
686
687    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
688    zend_set_compiled_filename(new_compiled_filename);
689    zend_string_release(new_compiled_filename);
690    CG(zend_lineno) = 1;
691    CG(increment_lineno) = 0;
692    RESET_DOC_COMMENT();
693    return SUCCESS;
694}
695
696
697ZEND_API size_t zend_get_scanned_file_offset(void)
698{
699    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
700    if (SCNG(input_filter)) {
701        size_t original_offset = offset, length = 0;
702        do {
703            unsigned char *p = NULL;
704            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
705                return (size_t)-1;
706            }
707            efree(p);
708            if (length > original_offset) {
709                offset--;
710            } else if (length < original_offset) {
711                offset++;
712            }
713        } while (original_offset != length);
714    }
715    return offset;
716}
717
718
719zend_op_array *compile_string(zval *source_string, char *filename)
720{
721    zend_lex_state original_lex_state;
722    zend_op_array *op_array = NULL;
723    zval tmp;
724    zend_bool original_in_compilation = CG(in_compilation);
725
726    if (Z_STRLEN_P(source_string)==0) {
727        return NULL;
728    }
729
730    ZVAL_DUP(&tmp, source_string);
731    convert_to_string(&tmp);
732    source_string = &tmp;
733
734    CG(in_compilation) = 1;
735    zend_save_lexical_state(&original_lex_state);
736    if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
737        CG(ast) = NULL;
738        CG(ast_arena) = zend_arena_create(1024 * 32);
739        BEGIN(ST_IN_SCRIPTING);
740
741        if (!zendparse()) {
742            zend_op_array *original_active_op_array = CG(active_op_array);
743            op_array = emalloc(sizeof(zend_op_array));
744            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
745            CG(active_op_array) = op_array;
746
747            zend_stack_push(&CG(context_stack), (void *) &CG(context));
748            zend_init_compiler_context();
749            if (zend_ast_process) {
750                zend_ast_process(CG(ast));
751            }
752            zend_compile_top_stmt(CG(ast));
753            zend_do_end_compilation();
754            zend_emit_final_return(NULL);
755            pass_two(op_array);
756            zend_release_labels(0);
757
758            CG(active_op_array) = original_active_op_array;
759        }
760
761        zend_ast_destroy(CG(ast));
762        zend_arena_destroy(CG(ast_arena));
763    }
764
765    zend_restore_lexical_state(&original_lex_state);
766    zval_dtor(&tmp);
767    CG(in_compilation) = original_in_compilation;
768    return op_array;
769}
770
771
772BEGIN_EXTERN_C()
773int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
774{
775    zend_lex_state original_lex_state;
776    zend_file_handle file_handle;
777
778    file_handle.type = ZEND_HANDLE_FILENAME;
779    file_handle.filename = filename;
780    file_handle.free_filename = 0;
781    file_handle.opened_path = NULL;
782    zend_save_lexical_state(&original_lex_state);
783    if (open_file_for_scanning(&file_handle)==FAILURE) {
784        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
785        zend_restore_lexical_state(&original_lex_state);
786        return FAILURE;
787    }
788    zend_highlight(syntax_highlighter_ini);
789    if (SCNG(script_filtered)) {
790        efree(SCNG(script_filtered));
791        SCNG(script_filtered) = NULL;
792    }
793    zend_destroy_file_handle(&file_handle);
794    zend_restore_lexical_state(&original_lex_state);
795    return SUCCESS;
796}
797
798int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
799{
800    zend_lex_state original_lex_state;
801    zval tmp = *str;
802
803    str = &tmp;
804    zval_copy_ctor(str);
805    zend_save_lexical_state(&original_lex_state);
806    if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
807        zend_restore_lexical_state(&original_lex_state);
808        return FAILURE;
809    }
810    BEGIN(INITIAL);
811    zend_highlight(syntax_highlighter_ini);
812    if (SCNG(script_filtered)) {
813        efree(SCNG(script_filtered));
814        SCNG(script_filtered) = NULL;
815    }
816    zend_restore_lexical_state(&original_lex_state);
817    zval_dtor(str);
818    return SUCCESS;
819}
820
821ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
822{
823    size_t length;
824    unsigned char *new_yy_start;
825
826    /* convert and set */
827    if (!SCNG(input_filter)) {
828        if (SCNG(script_filtered)) {
829            efree(SCNG(script_filtered));
830            SCNG(script_filtered) = NULL;
831        }
832        SCNG(script_filtered_size) = 0;
833        length = SCNG(script_org_size);
834        new_yy_start = SCNG(script_org);
835    } else {
836        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
837            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
838                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
839        }
840        if (SCNG(script_filtered)) {
841            efree(SCNG(script_filtered));
842        }
843        SCNG(script_filtered) = new_yy_start;
844        SCNG(script_filtered_size) = length;
845    }
846
847    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
848    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
849    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
850    SCNG(yy_limit) = new_yy_start + length;
851
852    SCNG(yy_start) = new_yy_start;
853}
854
855
856// TODO: avoid reallocation ???
857# define zend_copy_value(zendlval, yytext, yyleng) \
858    if (SCNG(output_filter)) { \
859        size_t sz = 0; \
860        char *s = NULL; \
861        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
862        ZVAL_STRINGL(zendlval, s, sz); \
863        efree(s); \
864    } else { \
865        ZVAL_STRINGL(zendlval, yytext, yyleng); \
866    }
867
868static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
869{
870    register char *s, *t;
871    char *end;
872
873    ZVAL_STRINGL(zendlval, str, len);
874
875    /* convert escape sequences */
876    s = t = Z_STRVAL_P(zendlval);
877    end = s+Z_STRLEN_P(zendlval);
878    while (s<end) {
879        if (*s=='\\') {
880            s++;
881            if (s >= end) {
882                *t++ = '\\';
883                break;
884            }
885
886            switch(*s) {
887                case 'n':
888                    *t++ = '\n';
889                    Z_STRLEN_P(zendlval)--;
890                    break;
891                case 'r':
892                    *t++ = '\r';
893                    Z_STRLEN_P(zendlval)--;
894                    break;
895                case 't':
896                    *t++ = '\t';
897                    Z_STRLEN_P(zendlval)--;
898                    break;
899                case 'f':
900                    *t++ = '\f';
901                    Z_STRLEN_P(zendlval)--;
902                    break;
903                case 'v':
904                    *t++ = '\v';
905                    Z_STRLEN_P(zendlval)--;
906                    break;
907                case 'e':
908#ifdef PHP_WIN32
909                    *t++ = VK_ESCAPE;
910#else
911                    *t++ = '\e';
912#endif
913                    Z_STRLEN_P(zendlval)--;
914                    break;
915                case '"':
916                case '`':
917                    if (*s != quote_type) {
918                        *t++ = '\\';
919                        *t++ = *s;
920                        break;
921                    }
922                case '\\':
923                case '$':
924                    *t++ = *s;
925                    Z_STRLEN_P(zendlval)--;
926                    break;
927                case 'x':
928                case 'X':
929                    if (ZEND_IS_HEX(*(s+1))) {
930                        char hex_buf[3] = { 0, 0, 0 };
931
932                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
933
934                        hex_buf[0] = *(++s);
935                        Z_STRLEN_P(zendlval)--;
936                        if (ZEND_IS_HEX(*(s+1))) {
937                            hex_buf[1] = *(++s);
938                            Z_STRLEN_P(zendlval)--;
939                        }
940                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
941                    } else {
942                        *t++ = '\\';
943                        *t++ = *s;
944                    }
945                    break;
946                /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
947                case 'u':
948                    {
949                        /* cache where we started so we can parse after validating */
950                        char *start = s + 1;
951                        size_t len = 0;
952                        zend_bool valid = 1;
953                        unsigned long codepoint;
954                        size_t byte_len = 0;
955
956                        if (*start != '{') {
957                            /* we silently let this pass to avoid breaking code
958                             * with JSON in string literals (e.g. "\"\u202e\""
959                             */
960                            *t++ = '\\';
961                            *t++ = 'u';
962                            break;
963                        } else {
964                            /* on the other hand, invalid \u{blah} errors */
965                            s++;
966                            len++;
967                            s++;
968                            while (*s != '}') {
969                                if (!ZEND_IS_HEX(*s)) {
970                                    valid = 0;
971                                    break;
972                                } else {
973                                    len++;
974                                }
975                                s++;
976                            }
977                            if (*s == '}') {
978                                valid = 1;
979                                len++;
980                            }
981                        }
982
983                        /* \u{} is invalid */
984                        if (len <= 2) {
985                            valid = 0;
986                        }
987
988                        if (!valid) {
989                            zend_error(E_COMPILE_ERROR, "Invalid UTF-8 codepoint escape sequence");
990                        }
991
992                        errno = 0;
993                        codepoint = strtoul(start + 1, NULL, 16);
994
995                        /* per RFC 3629, UTF-8 can only represent 21 bits */
996                        if (codepoint > 0x10FFFF || errno) {
997                            zend_error_noreturn(E_COMPILE_ERROR, "Invalid UTF-8 codepoint escape sequence: Codepoint too large");
998                        }
999
1000                        /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1001                        if (codepoint < 0x80) {
1002                            byte_len = 1;
1003                            *t++ = codepoint;
1004                        } else if (codepoint <= 0x7FF) {
1005                            byte_len = 2;
1006                            *t++ = (codepoint >> 6) + 0xC0;
1007                            *t++ = (codepoint & 0x3F) + 0x80;
1008                        } else if (codepoint <= 0xFFFF) {
1009                            byte_len = 3;
1010                            *t++ = (codepoint >> 12) + 0xE0;
1011                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1012                            *t++ = (codepoint & 0x3F) + 0x80;
1013                        } else if (codepoint <= 0x10FFFF) {
1014                            byte_len = 4;
1015                            *t++ = (codepoint >> 18) + 0xF0;
1016                            *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1017                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1018                            *t++ = (codepoint & 0x3F) + 0x80;
1019                        }
1020
1021                        Z_STRLEN_P(zendlval) -= 2; /* \u */
1022                        Z_STRLEN_P(zendlval) -= (len - byte_len);
1023                    }
1024                    break;
1025                default:
1026                    /* check for an octal */
1027                    if (ZEND_IS_OCT(*s)) {
1028                        char octal_buf[4] = { 0, 0, 0, 0 };
1029
1030                        octal_buf[0] = *s;
1031                        Z_STRLEN_P(zendlval)--;
1032                        if (ZEND_IS_OCT(*(s+1))) {
1033                            octal_buf[1] = *(++s);
1034                            Z_STRLEN_P(zendlval)--;
1035                            if (ZEND_IS_OCT(*(s+1))) {
1036                                octal_buf[2] = *(++s);
1037                                Z_STRLEN_P(zendlval)--;
1038                            }
1039                        }
1040                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1041                    } else {
1042                        *t++ = '\\';
1043                        *t++ = *s;
1044                    }
1045                    break;
1046            }
1047        } else {
1048            *t++ = *s;
1049        }
1050
1051        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1052            CG(zend_lineno)++;
1053        }
1054        s++;
1055    }
1056    *t = 0;
1057    if (SCNG(output_filter)) {
1058        size_t sz = 0;
1059        unsigned char *str;
1060        // TODO: avoid realocation ???
1061        s = Z_STRVAL_P(zendlval);
1062        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1063        zval_ptr_dtor(zendlval);
1064        ZVAL_STRINGL(zendlval, (char *) str, sz);
1065        efree(str);
1066    }
1067}
1068
1069
1070int lex_scan(zval *zendlval)
1071{
1072restart:
1073    SCNG(yy_text) = YYCURSOR;
1074
1075/*!re2c
1076re2c:yyfill:check = 0;
1077LNUM    [0-9]+
1078DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1079EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1080HNUM    "0x"[0-9a-fA-F]+
1081BNUM    "0b"[01]+
1082LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1083WHITESPACE [ \n\r\t]+
1084TABS_AND_SPACES [ \t]*
1085TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1086ANY_CHAR [^]
1087NEWLINE ("\r"|"\n"|"\r\n")
1088
1089/* compute yyleng before each rule */
1090<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1091
1092<ST_IN_SCRIPTING>"exit" {
1093    return T_EXIT;
1094}
1095
1096<ST_IN_SCRIPTING>"die" {
1097    return T_EXIT;
1098}
1099
1100<ST_IN_SCRIPTING>"function" {
1101    return T_FUNCTION;
1102}
1103
1104<ST_IN_SCRIPTING>"const" {
1105    return T_CONST;
1106}
1107
1108<ST_IN_SCRIPTING>"return" {
1109    return T_RETURN;
1110}
1111
1112<ST_IN_SCRIPTING>"yield" {
1113    return T_YIELD;
1114}
1115
1116<ST_IN_SCRIPTING>"try" {
1117    return T_TRY;
1118}
1119
1120<ST_IN_SCRIPTING>"catch" {
1121    return T_CATCH;
1122}
1123
1124<ST_IN_SCRIPTING>"finally" {
1125    return T_FINALLY;
1126}
1127
1128<ST_IN_SCRIPTING>"throw" {
1129    return T_THROW;
1130}
1131
1132<ST_IN_SCRIPTING>"if" {
1133    return T_IF;
1134}
1135
1136<ST_IN_SCRIPTING>"elseif" {
1137    return T_ELSEIF;
1138}
1139
1140<ST_IN_SCRIPTING>"endif" {
1141    return T_ENDIF;
1142}
1143
1144<ST_IN_SCRIPTING>"else" {
1145    return T_ELSE;
1146}
1147
1148<ST_IN_SCRIPTING>"while" {
1149    return T_WHILE;
1150}
1151
1152<ST_IN_SCRIPTING>"endwhile" {
1153    return T_ENDWHILE;
1154}
1155
1156<ST_IN_SCRIPTING>"do" {
1157    return T_DO;
1158}
1159
1160<ST_IN_SCRIPTING>"for" {
1161    return T_FOR;
1162}
1163
1164<ST_IN_SCRIPTING>"endfor" {
1165    return T_ENDFOR;
1166}
1167
1168<ST_IN_SCRIPTING>"foreach" {
1169    return T_FOREACH;
1170}
1171
1172<ST_IN_SCRIPTING>"endforeach" {
1173    return T_ENDFOREACH;
1174}
1175
1176<ST_IN_SCRIPTING>"declare" {
1177    return T_DECLARE;
1178}
1179
1180<ST_IN_SCRIPTING>"enddeclare" {
1181    return T_ENDDECLARE;
1182}
1183
1184<ST_IN_SCRIPTING>"instanceof" {
1185    return T_INSTANCEOF;
1186}
1187
1188<ST_IN_SCRIPTING>"as" {
1189    return T_AS;
1190}
1191
1192<ST_IN_SCRIPTING>"switch" {
1193    return T_SWITCH;
1194}
1195
1196<ST_IN_SCRIPTING>"endswitch" {
1197    return T_ENDSWITCH;
1198}
1199
1200<ST_IN_SCRIPTING>"case" {
1201    return T_CASE;
1202}
1203
1204<ST_IN_SCRIPTING>"default" {
1205    return T_DEFAULT;
1206}
1207
1208<ST_IN_SCRIPTING>"break" {
1209    return T_BREAK;
1210}
1211
1212<ST_IN_SCRIPTING>"continue" {
1213    return T_CONTINUE;
1214}
1215
1216<ST_IN_SCRIPTING>"goto" {
1217    return T_GOTO;
1218}
1219
1220<ST_IN_SCRIPTING>"echo" {
1221    return T_ECHO;
1222}
1223
1224<ST_IN_SCRIPTING>"print" {
1225    return T_PRINT;
1226}
1227
1228<ST_IN_SCRIPTING>"class" {
1229    return T_CLASS;
1230}
1231
1232<ST_IN_SCRIPTING>"interface" {
1233    return T_INTERFACE;
1234}
1235
1236<ST_IN_SCRIPTING>"trait" {
1237    return T_TRAIT;
1238}
1239
1240<ST_IN_SCRIPTING>"extends" {
1241    return T_EXTENDS;
1242}
1243
1244<ST_IN_SCRIPTING>"implements" {
1245    return T_IMPLEMENTS;
1246}
1247
1248<ST_IN_SCRIPTING>"->" {
1249    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1250    return T_OBJECT_OPERATOR;
1251}
1252
1253<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1254    HANDLE_NEWLINES(yytext, yyleng);
1255    return T_WHITESPACE;
1256}
1257
1258<ST_LOOKING_FOR_PROPERTY>"->" {
1259    return T_OBJECT_OPERATOR;
1260}
1261
1262<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1263    yy_pop_state();
1264    zend_copy_value(zendlval, yytext, yyleng);
1265    return T_STRING;
1266}
1267
1268<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1269    yyless(0);
1270    yy_pop_state();
1271    goto restart;
1272}
1273
1274<ST_IN_SCRIPTING>"::" {
1275    return T_PAAMAYIM_NEKUDOTAYIM;
1276}
1277
1278<ST_IN_SCRIPTING>"\\" {
1279    return T_NS_SEPARATOR;
1280}
1281
1282<ST_IN_SCRIPTING>"..." {
1283    return T_ELLIPSIS;
1284}
1285
1286<ST_IN_SCRIPTING>"??" {
1287    return T_COALESCE;
1288}
1289
1290<ST_IN_SCRIPTING>"new" {
1291    return T_NEW;
1292}
1293
1294<ST_IN_SCRIPTING>"clone" {
1295    return T_CLONE;
1296}
1297
1298<ST_IN_SCRIPTING>"var" {
1299    return T_VAR;
1300}
1301
1302<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1303    return T_INT_CAST;
1304}
1305
1306<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1307    return T_DOUBLE_CAST;
1308}
1309
1310<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1311    return T_STRING_CAST;
1312}
1313
1314<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1315    return T_ARRAY_CAST;
1316}
1317
1318<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1319    return T_OBJECT_CAST;
1320}
1321
1322<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1323    return T_BOOL_CAST;
1324}
1325
1326<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1327    return T_UNSET_CAST;
1328}
1329
1330<ST_IN_SCRIPTING>"eval" {
1331    return T_EVAL;
1332}
1333
1334<ST_IN_SCRIPTING>"include" {
1335    return T_INCLUDE;
1336}
1337
1338<ST_IN_SCRIPTING>"include_once" {
1339    return T_INCLUDE_ONCE;
1340}
1341
1342<ST_IN_SCRIPTING>"require" {
1343    return T_REQUIRE;
1344}
1345
1346<ST_IN_SCRIPTING>"require_once" {
1347    return T_REQUIRE_ONCE;
1348}
1349
1350<ST_IN_SCRIPTING>"namespace" {
1351    return T_NAMESPACE;
1352}
1353
1354<ST_IN_SCRIPTING>"use" {
1355    return T_USE;
1356}
1357
1358<ST_IN_SCRIPTING>"insteadof" {
1359        return T_INSTEADOF;
1360}
1361
1362<ST_IN_SCRIPTING>"global" {
1363    return T_GLOBAL;
1364}
1365
1366<ST_IN_SCRIPTING>"isset" {
1367    return T_ISSET;
1368}
1369
1370<ST_IN_SCRIPTING>"empty" {
1371    return T_EMPTY;
1372}
1373
1374<ST_IN_SCRIPTING>"__halt_compiler" {
1375    return T_HALT_COMPILER;
1376}
1377
1378<ST_IN_SCRIPTING>"static" {
1379    return T_STATIC;
1380}
1381
1382<ST_IN_SCRIPTING>"abstract" {
1383    return T_ABSTRACT;
1384}
1385
1386<ST_IN_SCRIPTING>"final" {
1387    return T_FINAL;
1388}
1389
1390<ST_IN_SCRIPTING>"private" {
1391    return T_PRIVATE;
1392}
1393
1394<ST_IN_SCRIPTING>"protected" {
1395    return T_PROTECTED;
1396}
1397
1398<ST_IN_SCRIPTING>"public" {
1399    return T_PUBLIC;
1400}
1401
1402<ST_IN_SCRIPTING>"unset" {
1403    return T_UNSET;
1404}
1405
1406<ST_IN_SCRIPTING>"=>" {
1407    return T_DOUBLE_ARROW;
1408}
1409
1410<ST_IN_SCRIPTING>"list" {
1411    return T_LIST;
1412}
1413
1414<ST_IN_SCRIPTING>"array" {
1415    return T_ARRAY;
1416}
1417
1418<ST_IN_SCRIPTING>"callable" {
1419 return T_CALLABLE;
1420}
1421
1422<ST_IN_SCRIPTING>"++" {
1423    return T_INC;
1424}
1425
1426<ST_IN_SCRIPTING>"--" {
1427    return T_DEC;
1428}
1429
1430<ST_IN_SCRIPTING>"===" {
1431    return T_IS_IDENTICAL;
1432}
1433
1434<ST_IN_SCRIPTING>"!==" {
1435    return T_IS_NOT_IDENTICAL;
1436}
1437
1438<ST_IN_SCRIPTING>"==" {
1439    return T_IS_EQUAL;
1440}
1441
1442<ST_IN_SCRIPTING>"!="|"<>" {
1443    return T_IS_NOT_EQUAL;
1444}
1445
1446<ST_IN_SCRIPTING>"<=>" {
1447    return T_SPACESHIP;
1448}
1449
1450<ST_IN_SCRIPTING>"<=" {
1451    return T_IS_SMALLER_OR_EQUAL;
1452}
1453
1454<ST_IN_SCRIPTING>">=" {
1455    return T_IS_GREATER_OR_EQUAL;
1456}
1457
1458<ST_IN_SCRIPTING>"+=" {
1459    return T_PLUS_EQUAL;
1460}
1461
1462<ST_IN_SCRIPTING>"-=" {
1463    return T_MINUS_EQUAL;
1464}
1465
1466<ST_IN_SCRIPTING>"*=" {
1467    return T_MUL_EQUAL;
1468}
1469
1470<ST_IN_SCRIPTING>"*\*" {
1471    return T_POW;
1472}
1473
1474<ST_IN_SCRIPTING>"*\*=" {
1475    return T_POW_EQUAL;
1476}
1477
1478<ST_IN_SCRIPTING>"/=" {
1479    return T_DIV_EQUAL;
1480}
1481
1482<ST_IN_SCRIPTING>".=" {
1483    return T_CONCAT_EQUAL;
1484}
1485
1486<ST_IN_SCRIPTING>"%=" {
1487    return T_MOD_EQUAL;
1488}
1489
1490<ST_IN_SCRIPTING>"<<=" {
1491    return T_SL_EQUAL;
1492}
1493
1494<ST_IN_SCRIPTING>">>=" {
1495    return T_SR_EQUAL;
1496}
1497
1498<ST_IN_SCRIPTING>"&=" {
1499    return T_AND_EQUAL;
1500}
1501
1502<ST_IN_SCRIPTING>"|=" {
1503    return T_OR_EQUAL;
1504}
1505
1506<ST_IN_SCRIPTING>"^=" {
1507    return T_XOR_EQUAL;
1508}
1509
1510<ST_IN_SCRIPTING>"||" {
1511    return T_BOOLEAN_OR;
1512}
1513
1514<ST_IN_SCRIPTING>"&&" {
1515    return T_BOOLEAN_AND;
1516}
1517
1518<ST_IN_SCRIPTING>"OR" {
1519    return T_LOGICAL_OR;
1520}
1521
1522<ST_IN_SCRIPTING>"AND" {
1523    return T_LOGICAL_AND;
1524}
1525
1526<ST_IN_SCRIPTING>"XOR" {
1527    return T_LOGICAL_XOR;
1528}
1529
1530<ST_IN_SCRIPTING>"<<" {
1531    return T_SL;
1532}
1533
1534<ST_IN_SCRIPTING>">>" {
1535    return T_SR;
1536}
1537
1538<ST_IN_SCRIPTING>{TOKENS} {
1539    return yytext[0];
1540}
1541
1542
1543<ST_IN_SCRIPTING>"{" {
1544    yy_push_state(ST_IN_SCRIPTING);
1545    return '{';
1546}
1547
1548
1549<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1550    yy_push_state(ST_LOOKING_FOR_VARNAME);
1551    return T_DOLLAR_OPEN_CURLY_BRACES;
1552}
1553
1554
1555<ST_IN_SCRIPTING>"}" {
1556    RESET_DOC_COMMENT();
1557    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1558        yy_pop_state();
1559    }
1560    return '}';
1561}
1562
1563
1564<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1565    yyless(yyleng - 1);
1566    zend_copy_value(zendlval, yytext, yyleng);
1567    yy_pop_state();
1568    yy_push_state(ST_IN_SCRIPTING);
1569    return T_STRING_VARNAME;
1570}
1571
1572
1573<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1574    yyless(0);
1575    yy_pop_state();
1576    yy_push_state(ST_IN_SCRIPTING);
1577    goto restart;
1578}
1579
1580<ST_IN_SCRIPTING>{BNUM} {
1581    char *bin = yytext + 2; /* Skip "0b" */
1582    int len = yyleng - 2;
1583    char *end;
1584
1585    /* Skip any leading 0s */
1586    while (*bin == '0') {
1587        ++bin;
1588        --len;
1589    }
1590
1591    if (len < SIZEOF_ZEND_LONG * 8) {
1592        if (len == 0) {
1593            ZVAL_LONG(zendlval, 0);
1594        } else {
1595            errno = 0;
1596            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1597            ZEND_ASSERT(!errno && end == yytext + yyleng);
1598        }
1599        return T_LNUMBER;
1600    } else {
1601        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1602        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1603        ZEND_ASSERT(end == yytext + yyleng);
1604        return T_DNUMBER;
1605    }
1606}
1607
1608<ST_IN_SCRIPTING>{LNUM} {
1609    char *end;
1610    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1611        errno = 0;
1612        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1613        /* This isn't an assert, we need to ensure 019 isn't valid octal
1614         * Because the lexing itself doesn't do that for us
1615         */
1616        if (end != yytext + yyleng) {
1617            zend_error_noreturn(E_COMPILE_ERROR, "Invalid numeric literal");
1618        }
1619        ZEND_ASSERT(!errno);
1620    } else {
1621        errno = 0;
1622        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1623        if (errno == ERANGE) { /* Overflow */
1624            errno = 0;
1625            if (yytext[0] == '0') { /* octal overflow */
1626                errno = 0;
1627                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1628            } else {
1629                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1630            }
1631            /* Also not an assert for the same reason */
1632            if (end != yytext + yyleng) {
1633                zend_error_noreturn(E_COMPILE_ERROR, "Invalid numeric literal");
1634            }
1635            ZEND_ASSERT(!errno);
1636            return T_DNUMBER;
1637        }
1638        /* Also not an assert for the same reason */
1639        if (end != yytext + yyleng) {
1640            zend_error_noreturn(E_COMPILE_ERROR, "Invalid numeric literal");
1641        }
1642        ZEND_ASSERT(!errno);
1643    }
1644    return T_LNUMBER;
1645}
1646
1647<ST_IN_SCRIPTING>{HNUM} {
1648    char *hex = yytext + 2; /* Skip "0x" */
1649    int len = yyleng - 2;
1650    char *end;
1651
1652    /* Skip any leading 0s */
1653    while (*hex == '0') {
1654        hex++;
1655        len--;
1656    }
1657
1658    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1659        if (len == 0) {
1660            ZVAL_LONG(zendlval, 0);
1661        } else {
1662            errno = 0;
1663            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1664            ZEND_ASSERT(!errno && end == hex + len);
1665        }
1666        return T_LNUMBER;
1667    } else {
1668        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1669        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1670        ZEND_ASSERT(end == hex + len);
1671        return T_DNUMBER;
1672    }
1673}
1674
1675<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1676    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1677        char *end;
1678        errno = 0;
1679        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1680        if (errno == ERANGE) {
1681            goto string;
1682        }
1683        ZEND_ASSERT(end == yytext + yyleng);
1684    } else {
1685string:
1686        ZVAL_STRINGL(zendlval, yytext, yyleng);
1687    }
1688    return T_NUM_STRING;
1689}
1690
1691<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1692    ZVAL_STRINGL(zendlval, yytext, yyleng);
1693    return T_NUM_STRING;
1694}
1695
1696<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1697    const char *end;
1698
1699    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1700    /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1701    ZEND_ASSERT(end == yytext + yyleng);
1702    return T_DNUMBER;
1703}
1704
1705<ST_IN_SCRIPTING>"__CLASS__" {
1706    return T_CLASS_C;
1707}
1708
1709<ST_IN_SCRIPTING>"__TRAIT__" {
1710    return T_TRAIT_C;
1711}
1712
1713<ST_IN_SCRIPTING>"__FUNCTION__" {
1714    return T_FUNC_C;
1715}
1716
1717<ST_IN_SCRIPTING>"__METHOD__" {
1718    return T_METHOD_C;
1719}
1720
1721<ST_IN_SCRIPTING>"__LINE__" {
1722    return T_LINE;
1723}
1724
1725<ST_IN_SCRIPTING>"__FILE__" {
1726    return T_FILE;
1727}
1728
1729<ST_IN_SCRIPTING>"__DIR__" {
1730    return T_DIR;
1731}
1732
1733<ST_IN_SCRIPTING>"__NAMESPACE__" {
1734    return T_NS_C;
1735}
1736
1737
1738<INITIAL>"<?=" {
1739    BEGIN(ST_IN_SCRIPTING);
1740    return T_OPEN_TAG_WITH_ECHO;
1741}
1742
1743
1744<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1745    HANDLE_NEWLINE(yytext[yyleng-1]);
1746    BEGIN(ST_IN_SCRIPTING);
1747    return T_OPEN_TAG;
1748}
1749
1750
1751<INITIAL>"<?" {
1752    if (CG(short_tags)) {
1753        BEGIN(ST_IN_SCRIPTING);
1754        return T_OPEN_TAG;
1755    } else {
1756        goto inline_char_handler;
1757    }
1758}
1759
1760<INITIAL>{ANY_CHAR} {
1761    if (YYCURSOR > YYLIMIT) {
1762        return 0;
1763    }
1764
1765inline_char_handler:
1766
1767    while (1) {
1768        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1769
1770        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1771
1772        if (YYCURSOR >= YYLIMIT) {
1773            break;
1774        }
1775
1776        if (*YYCURSOR == '?') {
1777            if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1778
1779                YYCURSOR--;
1780                break;
1781            }
1782        }
1783    }
1784
1785    yyleng = YYCURSOR - SCNG(yy_text);
1786
1787    if (SCNG(output_filter)) {
1788        size_t readsize;
1789        char *s = NULL;
1790        size_t sz = 0;
1791        // TODO: avoid reallocation ???
1792        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1793        ZVAL_STRINGL(zendlval, s, sz);
1794        efree(s);
1795        if (readsize < yyleng) {
1796            yyless(readsize);
1797        }
1798    } else {
1799      ZVAL_STRINGL(zendlval, yytext, yyleng);
1800    }
1801    HANDLE_NEWLINES(yytext, yyleng);
1802    return T_INLINE_HTML;
1803}
1804
1805
1806/* Make sure a label character follows "->", otherwise there is no property
1807 * and "->" will be taken literally
1808 */
1809<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1810    yyless(yyleng - 3);
1811    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1812    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1813    return T_VARIABLE;
1814}
1815
1816/* A [ always designates a variable offset, regardless of what follows
1817 */
1818<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1819    yyless(yyleng - 1);
1820    yy_push_state(ST_VAR_OFFSET);
1821    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1822    return T_VARIABLE;
1823}
1824
1825<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1826    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1827    return T_VARIABLE;
1828}
1829
1830<ST_VAR_OFFSET>"]" {
1831    yy_pop_state();
1832    return ']';
1833}
1834
1835<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1836    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1837    return yytext[0];
1838}
1839
1840<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1841    /* Invalid rule to return a more explicit parse error with proper line number */
1842    yyless(0);
1843    yy_pop_state();
1844    ZVAL_NULL(zendlval);
1845    return T_ENCAPSED_AND_WHITESPACE;
1846}
1847
1848<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1849    zend_copy_value(zendlval, yytext, yyleng);
1850    return T_STRING;
1851}
1852
1853
1854<ST_IN_SCRIPTING>"#"|"//" {
1855    while (YYCURSOR < YYLIMIT) {
1856        switch (*YYCURSOR++) {
1857            case '\r':
1858                if (*YYCURSOR == '\n') {
1859                    YYCURSOR++;
1860                }
1861                /* fall through */
1862            case '\n':
1863                CG(zend_lineno)++;
1864                break;
1865            case '?':
1866                if (*YYCURSOR == '>') {
1867                    YYCURSOR--;
1868                    break;
1869                }
1870                /* fall through */
1871            default:
1872                continue;
1873        }
1874
1875        break;
1876    }
1877
1878    yyleng = YYCURSOR - SCNG(yy_text);
1879
1880    return T_COMMENT;
1881}
1882
1883<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1884    int doc_com;
1885
1886    if (yyleng > 2) {
1887        doc_com = 1;
1888        RESET_DOC_COMMENT();
1889    } else {
1890        doc_com = 0;
1891    }
1892
1893    while (YYCURSOR < YYLIMIT) {
1894        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1895            break;
1896        }
1897    }
1898
1899    if (YYCURSOR < YYLIMIT) {
1900        YYCURSOR++;
1901    } else {
1902        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1903    }
1904
1905    yyleng = YYCURSOR - SCNG(yy_text);
1906    HANDLE_NEWLINES(yytext, yyleng);
1907
1908    if (doc_com) {
1909        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1910        return T_DOC_COMMENT;
1911    }
1912
1913    return T_COMMENT;
1914}
1915
1916<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1917    BEGIN(INITIAL);
1918    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1919}
1920
1921
1922<ST_IN_SCRIPTING>b?['] {
1923    register char *s, *t;
1924    char *end;
1925    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1926
1927    while (1) {
1928        if (YYCURSOR < YYLIMIT) {
1929            if (*YYCURSOR == '\'') {
1930                YYCURSOR++;
1931                yyleng = YYCURSOR - SCNG(yy_text);
1932
1933                break;
1934            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1935                YYCURSOR++;
1936            }
1937        } else {
1938            yyleng = YYLIMIT - SCNG(yy_text);
1939
1940            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1941             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1942             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1943            ZVAL_NULL(zendlval);
1944            return T_ENCAPSED_AND_WHITESPACE;
1945        }
1946    }
1947
1948    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1949
1950    /* convert escape sequences */
1951    s = t = Z_STRVAL_P(zendlval);
1952    end = s+Z_STRLEN_P(zendlval);
1953    while (s<end) {
1954        if (*s=='\\') {
1955            s++;
1956
1957            switch(*s) {
1958                case '\\':
1959                case '\'':
1960                    *t++ = *s;
1961                    Z_STRLEN_P(zendlval)--;
1962                    break;
1963                default:
1964                    *t++ = '\\';
1965                    *t++ = *s;
1966                    break;
1967            }
1968        } else {
1969            *t++ = *s;
1970        }
1971
1972        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1973            CG(zend_lineno)++;
1974        }
1975        s++;
1976    }
1977    *t = 0;
1978
1979    if (SCNG(output_filter)) {
1980        size_t sz = 0;
1981        char *str = NULL;
1982        s = Z_STRVAL_P(zendlval);
1983        // TODO: avoid reallocation ???
1984        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1985        ZVAL_STRINGL(zendlval, str, sz);
1986    }
1987    return T_CONSTANT_ENCAPSED_STRING;
1988}
1989
1990
1991<ST_IN_SCRIPTING>b?["] {
1992    int bprefix = (yytext[0] != '"') ? 1 : 0;
1993
1994    while (YYCURSOR < YYLIMIT) {
1995        switch (*YYCURSOR++) {
1996            case '"':
1997                yyleng = YYCURSOR - SCNG(yy_text);
1998                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"');
1999                return T_CONSTANT_ENCAPSED_STRING;
2000            case '$':
2001                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2002                    break;
2003                }
2004                continue;
2005            case '{':
2006                if (*YYCURSOR == '$') {
2007                    break;
2008                }
2009                continue;
2010            case '\\':
2011                if (YYCURSOR < YYLIMIT) {
2012                    YYCURSOR++;
2013                }
2014                /* fall through */
2015            default:
2016                continue;
2017        }
2018
2019        YYCURSOR--;
2020        break;
2021    }
2022
2023    /* Remember how much was scanned to save rescanning */
2024    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2025
2026    YYCURSOR = SCNG(yy_text) + yyleng;
2027
2028    BEGIN(ST_DOUBLE_QUOTES);
2029    return '"';
2030}
2031
2032
2033<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2034    char *s;
2035    int bprefix = (yytext[0] != '<') ? 1 : 0;
2036    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2037
2038    CG(zend_lineno)++;
2039    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2040    s = yytext+bprefix+3;
2041    while ((*s == ' ') || (*s == '\t')) {
2042        s++;
2043        heredoc_label->length--;
2044    }
2045
2046    if (*s == '\'') {
2047        s++;
2048        heredoc_label->length -= 2;
2049
2050        BEGIN(ST_NOWDOC);
2051    } else {
2052        if (*s == '"') {
2053            s++;
2054            heredoc_label->length -= 2;
2055        }
2056
2057        BEGIN(ST_HEREDOC);
2058    }
2059
2060    heredoc_label->label = estrndup(s, heredoc_label->length);
2061
2062    /* Check for ending label on the next line */
2063    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2064        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2065
2066        if (*end == ';') {
2067            end++;
2068        }
2069
2070        if (*end == '\n' || *end == '\r') {
2071            BEGIN(ST_END_HEREDOC);
2072        }
2073    }
2074
2075    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2076
2077    return T_START_HEREDOC;
2078}
2079
2080
2081<ST_IN_SCRIPTING>[`] {
2082    BEGIN(ST_BACKQUOTE);
2083    return '`';
2084}
2085
2086
2087<ST_END_HEREDOC>{ANY_CHAR} {
2088    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2089
2090    YYCURSOR += heredoc_label->length - 1;
2091    yyleng = heredoc_label->length;
2092
2093    heredoc_label_dtor(heredoc_label);
2094    efree(heredoc_label);
2095
2096    BEGIN(ST_IN_SCRIPTING);
2097    return T_END_HEREDOC;
2098}
2099
2100
2101<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2102    Z_LVAL_P(zendlval) = (zend_long) '{';
2103    yy_push_state(ST_IN_SCRIPTING);
2104    yyless(1);
2105    return T_CURLY_OPEN;
2106}
2107
2108
2109<ST_DOUBLE_QUOTES>["] {
2110    BEGIN(ST_IN_SCRIPTING);
2111    return '"';
2112}
2113
2114<ST_BACKQUOTE>[`] {
2115    BEGIN(ST_IN_SCRIPTING);
2116    return '`';
2117}
2118
2119
2120<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2121    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2122        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2123        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2124
2125        goto double_quotes_scan_done;
2126    }
2127
2128    if (YYCURSOR > YYLIMIT) {
2129        return 0;
2130    }
2131    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2132        YYCURSOR++;
2133    }
2134
2135    while (YYCURSOR < YYLIMIT) {
2136        switch (*YYCURSOR++) {
2137            case '"':
2138                break;
2139            case '$':
2140                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2141                    break;
2142                }
2143                continue;
2144            case '{':
2145                if (*YYCURSOR == '$') {
2146                    break;
2147                }
2148                continue;
2149            case '\\':
2150                if (YYCURSOR < YYLIMIT) {
2151                    YYCURSOR++;
2152                }
2153                /* fall through */
2154            default:
2155                continue;
2156        }
2157
2158        YYCURSOR--;
2159        break;
2160    }
2161
2162double_quotes_scan_done:
2163    yyleng = YYCURSOR - SCNG(yy_text);
2164
2165    zend_scan_escape_string(zendlval, yytext, yyleng, '"');
2166    return T_ENCAPSED_AND_WHITESPACE;
2167}
2168
2169
2170<ST_BACKQUOTE>{ANY_CHAR} {
2171    if (YYCURSOR > YYLIMIT) {
2172        return 0;
2173    }
2174    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2175        YYCURSOR++;
2176    }
2177
2178    while (YYCURSOR < YYLIMIT) {
2179        switch (*YYCURSOR++) {
2180            case '`':
2181                break;
2182            case '$':
2183                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2184                    break;
2185                }
2186                continue;
2187            case '{':
2188                if (*YYCURSOR == '$') {
2189                    break;
2190                }
2191                continue;
2192            case '\\':
2193                if (YYCURSOR < YYLIMIT) {
2194                    YYCURSOR++;
2195                }
2196                /* fall through */
2197            default:
2198                continue;
2199        }
2200
2201        YYCURSOR--;
2202        break;
2203    }
2204
2205    yyleng = YYCURSOR - SCNG(yy_text);
2206
2207    zend_scan_escape_string(zendlval, yytext, yyleng, '`');
2208    return T_ENCAPSED_AND_WHITESPACE;
2209}
2210
2211
2212<ST_HEREDOC>{ANY_CHAR} {
2213    int newline = 0;
2214
2215    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2216
2217    if (YYCURSOR > YYLIMIT) {
2218        return 0;
2219    }
2220
2221    YYCURSOR--;
2222
2223    while (YYCURSOR < YYLIMIT) {
2224        switch (*YYCURSOR++) {
2225            case '\r':
2226                if (*YYCURSOR == '\n') {
2227                    YYCURSOR++;
2228                }
2229                /* fall through */
2230            case '\n':
2231                /* Check for ending label on the next line */
2232                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2233                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2234
2235                    if (*end == ';') {
2236                        end++;
2237                    }
2238
2239                    if (*end == '\n' || *end == '\r') {
2240                        /* newline before label will be subtracted from returned text, but
2241                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2242                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2243                            newline = 2; /* Windows newline */
2244                        } else {
2245                            newline = 1;
2246                        }
2247
2248                        CG(increment_lineno) = 1; /* For newline before label */
2249                        BEGIN(ST_END_HEREDOC);
2250
2251                        goto heredoc_scan_done;
2252                    }
2253                }
2254                continue;
2255            case '$':
2256                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2257                    break;
2258                }
2259                continue;
2260            case '{':
2261                if (*YYCURSOR == '$') {
2262                    break;
2263                }
2264                continue;
2265            case '\\':
2266                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2267                    YYCURSOR++;
2268                }
2269                /* fall through */
2270            default:
2271                continue;
2272        }
2273
2274        YYCURSOR--;
2275        break;
2276    }
2277
2278heredoc_scan_done:
2279    yyleng = YYCURSOR - SCNG(yy_text);
2280
2281    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0);
2282    return T_ENCAPSED_AND_WHITESPACE;
2283}
2284
2285
2286<ST_NOWDOC>{ANY_CHAR} {
2287    int newline = 0;
2288
2289    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2290
2291    if (YYCURSOR > YYLIMIT) {
2292        return 0;
2293    }
2294
2295    YYCURSOR--;
2296
2297    while (YYCURSOR < YYLIMIT) {
2298        switch (*YYCURSOR++) {
2299            case '\r':
2300                if (*YYCURSOR == '\n') {
2301                    YYCURSOR++;
2302                }
2303                /* fall through */
2304            case '\n':
2305                /* Check for ending label on the next line */
2306                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2307                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2308
2309                    if (*end == ';') {
2310                        end++;
2311                    }
2312
2313                    if (*end == '\n' || *end == '\r') {
2314                        /* newline before label will be subtracted from returned text, but
2315                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2316                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2317                            newline = 2; /* Windows newline */
2318                        } else {
2319                            newline = 1;
2320                        }
2321
2322                        CG(increment_lineno) = 1; /* For newline before label */
2323                        BEGIN(ST_END_HEREDOC);
2324
2325                        goto nowdoc_scan_done;
2326                    }
2327                }
2328                /* fall through */
2329            default:
2330                continue;
2331        }
2332    }
2333
2334nowdoc_scan_done:
2335    yyleng = YYCURSOR - SCNG(yy_text);
2336
2337    zend_copy_value(zendlval, yytext, yyleng - newline);
2338    HANDLE_NEWLINES(yytext, yyleng - newline);
2339    return T_ENCAPSED_AND_WHITESPACE;
2340}
2341
2342
2343<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2344    if (YYCURSOR > YYLIMIT) {
2345        return 0;
2346    }
2347
2348    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2349    goto restart;
2350}
2351
2352*/
2353}
2354