1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2015 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129    ZEND_ASSERT(internal_encoding);
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147    ZEND_ASSERT(internal_encoding);
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8);
150}
151
152
153static void _yy_push_state(int new_state)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(void)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(void)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(void)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196    SCNG(on_event) = NULL;
197}
198
199ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
200{
201    lex_state->yy_leng   = SCNG(yy_leng);
202    lex_state->yy_start  = SCNG(yy_start);
203    lex_state->yy_text   = SCNG(yy_text);
204    lex_state->yy_cursor = SCNG(yy_cursor);
205    lex_state->yy_marker = SCNG(yy_marker);
206    lex_state->yy_limit  = SCNG(yy_limit);
207
208    lex_state->state_stack = SCNG(state_stack);
209    zend_stack_init(&SCNG(state_stack), sizeof(int));
210
211    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
212    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
213
214    lex_state->in = SCNG(yy_in);
215    lex_state->yy_state = YYSTATE;
216    lex_state->filename = zend_get_compiled_filename();
217    lex_state->lineno = CG(zend_lineno);
218
219    lex_state->script_org = SCNG(script_org);
220    lex_state->script_org_size = SCNG(script_org_size);
221    lex_state->script_filtered = SCNG(script_filtered);
222    lex_state->script_filtered_size = SCNG(script_filtered_size);
223    lex_state->input_filter = SCNG(input_filter);
224    lex_state->output_filter = SCNG(output_filter);
225    lex_state->script_encoding = SCNG(script_encoding);
226
227    lex_state->on_event = SCNG(on_event);
228
229    lex_state->ast = CG(ast);
230    lex_state->ast_arena = CG(ast_arena);
231}
232
233ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
234{
235    SCNG(yy_leng)   = lex_state->yy_leng;
236    SCNG(yy_start)  = lex_state->yy_start;
237    SCNG(yy_text)   = lex_state->yy_text;
238    SCNG(yy_cursor) = lex_state->yy_cursor;
239    SCNG(yy_marker) = lex_state->yy_marker;
240    SCNG(yy_limit)  = lex_state->yy_limit;
241
242    zend_stack_destroy(&SCNG(state_stack));
243    SCNG(state_stack) = lex_state->state_stack;
244
245    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
246    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
247    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
248
249    SCNG(yy_in) = lex_state->in;
250    YYSETCONDITION(lex_state->yy_state);
251    CG(zend_lineno) = lex_state->lineno;
252    zend_restore_compiled_filename(lex_state->filename);
253
254    if (SCNG(script_filtered)) {
255        efree(SCNG(script_filtered));
256        SCNG(script_filtered) = NULL;
257    }
258    SCNG(script_org) = lex_state->script_org;
259    SCNG(script_org_size) = lex_state->script_org_size;
260    SCNG(script_filtered) = lex_state->script_filtered;
261    SCNG(script_filtered_size) = lex_state->script_filtered_size;
262    SCNG(input_filter) = lex_state->input_filter;
263    SCNG(output_filter) = lex_state->output_filter;
264    SCNG(script_encoding) = lex_state->script_encoding;
265
266    SCNG(on_event) = lex_state->on_event;
267
268    CG(ast) = lex_state->ast;
269    CG(ast_arena) = lex_state->ast_arena;
270
271    RESET_DOC_COMMENT();
272}
273
274ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
275{
276    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
277    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
278    file_handle->opened_path = NULL;
279    if (file_handle->free_filename) {
280        file_handle->filename = NULL;
281    }
282}
283
284ZEND_API void zend_lex_tstring(zval *zv)
285{
286    if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
287
288    ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
289}
290
291#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
292#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
293#define BOM_UTF16_BE    "\xfe\xff"
294#define BOM_UTF16_LE    "\xff\xfe"
295#define BOM_UTF8        "\xef\xbb\xbf"
296
297static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
298{
299    const unsigned char *p;
300    int wchar_size = 2;
301    int le = 0;
302
303    /* utf-16 or utf-32? */
304    p = script;
305    assert(p >= script);
306    while ((size_t)(p-script) < script_size) {
307        p = memchr(p, 0, script_size-(p-script)-2);
308        if (!p) {
309            break;
310        }
311        if (*(p+1) == '\0' && *(p+2) == '\0') {
312            wchar_size = 4;
313            break;
314        }
315
316        /* searching for UTF-32 specific byte orders, so this will do */
317        p += 4;
318    }
319
320    /* BE or LE? */
321    p = script;
322    assert(p >= script);
323    while ((size_t)(p-script) < script_size) {
324        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
325            /* BE */
326            le = 0;
327            break;
328        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
329            /* LE* */
330            le = 1;
331            break;
332        }
333        p += wchar_size;
334    }
335
336    if (wchar_size == 2) {
337        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
338    } else {
339        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
340    }
341
342    return NULL;
343}
344
345static const zend_encoding* zend_multibyte_detect_unicode(void)
346{
347    const zend_encoding *script_encoding = NULL;
348    int bom_size;
349    unsigned char *pos1, *pos2;
350
351    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
352        return NULL;
353    }
354
355    /* check out BOM */
356    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
357        script_encoding = zend_multibyte_encoding_utf32be;
358        bom_size = sizeof(BOM_UTF32_BE)-1;
359    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
360        script_encoding = zend_multibyte_encoding_utf32le;
361        bom_size = sizeof(BOM_UTF32_LE)-1;
362    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
363        script_encoding = zend_multibyte_encoding_utf16be;
364        bom_size = sizeof(BOM_UTF16_BE)-1;
365    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
366        script_encoding = zend_multibyte_encoding_utf16le;
367        bom_size = sizeof(BOM_UTF16_LE)-1;
368    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
369        script_encoding = zend_multibyte_encoding_utf8;
370        bom_size = sizeof(BOM_UTF8)-1;
371    }
372
373    if (script_encoding) {
374        /* remove BOM */
375        LANG_SCNG(script_org) += bom_size;
376        LANG_SCNG(script_org_size) -= bom_size;
377
378        return script_encoding;
379    }
380
381    /* script contains NULL bytes -> auto-detection */
382    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
383        /* check if the NULL byte is after the __HALT_COMPILER(); */
384        pos2 = LANG_SCNG(script_org);
385
386        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
387            pos2 = memchr(pos2, '_', pos1 - pos2);
388            if (!pos2) break;
389            pos2++;
390            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
391                pos2 += sizeof("_HALT_COMPILER")-1;
392                while (*pos2 == ' '  ||
393                       *pos2 == '\t' ||
394                       *pos2 == '\r' ||
395                       *pos2 == '\n') {
396                    pos2++;
397                }
398                if (*pos2 == '(') {
399                    pos2++;
400                    while (*pos2 == ' '  ||
401                           *pos2 == '\t' ||
402                           *pos2 == '\r' ||
403                           *pos2 == '\n') {
404                        pos2++;
405                    }
406                    if (*pos2 == ')') {
407                        pos2++;
408                        while (*pos2 == ' '  ||
409                               *pos2 == '\t' ||
410                               *pos2 == '\r' ||
411                               *pos2 == '\n') {
412                            pos2++;
413                        }
414                        if (*pos2 == ';') {
415                            return NULL;
416                        }
417                    }
418                }
419            }
420        }
421        /* make best effort if BOM is missing */
422        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
423    }
424
425    return NULL;
426}
427
428static const zend_encoding* zend_multibyte_find_script_encoding(void)
429{
430    const zend_encoding *script_encoding;
431
432    if (CG(detect_unicode)) {
433        /* check out bom(byte order mark) and see if containing wchars */
434        script_encoding = zend_multibyte_detect_unicode();
435        if (script_encoding != NULL) {
436            /* bom or wchar detection is prior to 'script_encoding' option */
437            return script_encoding;
438        }
439    }
440
441    /* if no script_encoding specified, just leave alone */
442    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
443        return NULL;
444    }
445
446    /* if multiple encodings specified, detect automagically */
447    if (CG(script_encoding_list_size) > 1) {
448        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
449    }
450
451    return CG(script_encoding_list)[0];
452}
453
454ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
455{
456    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
457    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
458
459    if (!script_encoding) {
460        return FAILURE;
461    }
462
463    /* judge input/output filter */
464    LANG_SCNG(script_encoding) = script_encoding;
465    LANG_SCNG(input_filter) = NULL;
466    LANG_SCNG(output_filter) = NULL;
467
468    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
469        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
470            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
471            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
472            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
473        } else {
474            LANG_SCNG(input_filter) = NULL;
475            LANG_SCNG(output_filter) = NULL;
476        }
477        return SUCCESS;
478    }
479
480    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
481        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
482        LANG_SCNG(output_filter) = NULL;
483    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
484        LANG_SCNG(input_filter) = NULL;
485        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
486    } else {
487        /* both script and internal encodings are incompatible w/ flex */
488        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
489        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
490    }
491
492    return 0;
493}
494
495ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
496{
497    char *buf;
498    size_t size, offset = 0;
499    zend_string *compiled_filename;
500
501    /* The shebang line was read, get the current position to obtain the buffer start */
502    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
503        if ((offset = ftell(file_handle->handle.fp)) == -1) {
504            offset = 0;
505        }
506    }
507
508    if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
509        return FAILURE;
510    }
511
512    zend_llist_add_element(&CG(open_files), file_handle);
513    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
514        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
515        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
516        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
517        file_handle->handle.stream.handle = fh->handle.stream.handle;
518    }
519
520    /* Reset the scanner for scanning the new file */
521    SCNG(yy_in) = file_handle;
522    SCNG(yy_start) = NULL;
523
524    if (size != -1) {
525        if (CG(multibyte)) {
526            SCNG(script_org) = (unsigned char*)buf;
527            SCNG(script_org_size) = size;
528            SCNG(script_filtered) = NULL;
529
530            zend_multibyte_set_filter(NULL);
531
532            if (SCNG(input_filter)) {
533                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
534                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
535                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
536                }
537                buf = (char*)SCNG(script_filtered);
538                size = SCNG(script_filtered_size);
539            }
540        }
541        SCNG(yy_start) = (unsigned char *)buf - offset;
542        yy_scan_buffer(buf, (unsigned int)size);
543    } else {
544        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
545    }
546
547    BEGIN(INITIAL);
548
549    if (file_handle->opened_path) {
550        compiled_filename = zend_string_copy(file_handle->opened_path);
551    } else {
552        compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
553    }
554
555    zend_set_compiled_filename(compiled_filename);
556    zend_string_release(compiled_filename);
557
558    if (CG(start_lineno)) {
559        CG(zend_lineno) = CG(start_lineno);
560        CG(start_lineno) = 0;
561    } else {
562        CG(zend_lineno) = 1;
563    }
564
565    RESET_DOC_COMMENT();
566    CG(increment_lineno) = 0;
567    return SUCCESS;
568}
569END_EXTERN_C()
570
571
572ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
573{
574    zend_lex_state original_lex_state;
575    zend_op_array *op_array = NULL;
576    zend_save_lexical_state(&original_lex_state);
577
578    if (open_file_for_scanning(file_handle)==FAILURE) {
579        if (type==ZEND_REQUIRE) {
580            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
581            zend_bailout();
582        } else {
583            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
584        }
585    } else {
586        zend_bool original_in_compilation = CG(in_compilation);
587        CG(in_compilation) = 1;
588
589        CG(ast) = NULL;
590        CG(ast_arena) = zend_arena_create(1024 * 32);
591        if (!zendparse()) {
592            zval retval_zv;
593            zend_file_context original_file_context;
594            zend_oparray_context original_oparray_context;
595            zend_op_array *original_active_op_array = CG(active_op_array);
596            op_array = emalloc(sizeof(zend_op_array));
597            init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
598            CG(active_op_array) = op_array;
599            ZVAL_LONG(&retval_zv, 1);
600
601            if (zend_ast_process) {
602                zend_ast_process(CG(ast));
603            }
604
605            zend_file_context_begin(&original_file_context);
606            zend_oparray_context_begin(&original_oparray_context);
607            zend_compile_top_stmt(CG(ast));
608            zend_emit_final_return(&retval_zv);
609            op_array->line_start = 1;
610            op_array->line_end = CG(zend_lineno);
611            pass_two(op_array);
612            zend_oparray_context_end(&original_oparray_context);
613            zend_file_context_end(&original_file_context);
614
615            CG(active_op_array) = original_active_op_array;
616        }
617
618        zend_ast_destroy(CG(ast));
619        zend_arena_destroy(CG(ast_arena));
620        CG(in_compilation) = original_in_compilation;
621    }
622
623    zend_restore_lexical_state(&original_lex_state);
624    return op_array;
625}
626
627
628zend_op_array *compile_filename(int type, zval *filename)
629{
630    zend_file_handle file_handle;
631    zval tmp;
632    zend_op_array *retval;
633    zend_string *opened_path = NULL;
634
635    if (Z_TYPE_P(filename) != IS_STRING) {
636        tmp = *filename;
637        zval_copy_ctor(&tmp);
638        convert_to_string(&tmp);
639        filename = &tmp;
640    }
641    file_handle.filename = Z_STRVAL_P(filename);
642    file_handle.free_filename = 0;
643    file_handle.type = ZEND_HANDLE_FILENAME;
644    file_handle.opened_path = NULL;
645    file_handle.handle.fp = NULL;
646
647    retval = zend_compile_file(&file_handle, type);
648    if (retval && file_handle.handle.stream.handle) {
649        if (!file_handle.opened_path) {
650            file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
651        }
652
653        zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
654
655        if (opened_path) {
656            zend_string_release(opened_path);
657        }
658    }
659    zend_destroy_file_handle(&file_handle);
660
661    if (filename==&tmp) {
662        zval_dtor(&tmp);
663    }
664    return retval;
665}
666
667ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
668{
669    char *buf;
670    size_t size, old_len;
671    zend_string *new_compiled_filename;
672
673    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
674    old_len = Z_STRLEN_P(str);
675    Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
676    Z_TYPE_INFO_P(str) = IS_STRING_EX;
677    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
678
679    SCNG(yy_in) = NULL;
680    SCNG(yy_start) = NULL;
681
682    buf = Z_STRVAL_P(str);
683    size = old_len;
684
685    if (CG(multibyte)) {
686        SCNG(script_org) = (unsigned char*)buf;
687        SCNG(script_org_size) = size;
688        SCNG(script_filtered) = NULL;
689
690        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
691
692        if (SCNG(input_filter)) {
693            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
694                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
695                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
696            }
697            buf = (char*)SCNG(script_filtered);
698            size = SCNG(script_filtered_size);
699        }
700    }
701
702    yy_scan_buffer(buf, (unsigned int)size);
703
704    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
705    zend_set_compiled_filename(new_compiled_filename);
706    zend_string_release(new_compiled_filename);
707    CG(zend_lineno) = 1;
708    CG(increment_lineno) = 0;
709    RESET_DOC_COMMENT();
710    return SUCCESS;
711}
712
713
714ZEND_API size_t zend_get_scanned_file_offset(void)
715{
716    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
717    if (SCNG(input_filter)) {
718        size_t original_offset = offset, length = 0;
719        do {
720            unsigned char *p = NULL;
721            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
722                return (size_t)-1;
723            }
724            efree(p);
725            if (length > original_offset) {
726                offset--;
727            } else if (length < original_offset) {
728                offset++;
729            }
730        } while (original_offset != length);
731    }
732    return offset;
733}
734
735
736zend_op_array *compile_string(zval *source_string, char *filename)
737{
738    zend_lex_state original_lex_state;
739    zend_op_array *op_array = NULL;
740    zval tmp;
741    zend_bool original_in_compilation = CG(in_compilation);
742
743    if (Z_STRLEN_P(source_string)==0) {
744        return NULL;
745    }
746
747    ZVAL_DUP(&tmp, source_string);
748    convert_to_string(&tmp);
749    source_string = &tmp;
750
751    CG(in_compilation) = 1;
752    zend_save_lexical_state(&original_lex_state);
753    if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
754        CG(ast) = NULL;
755        CG(ast_arena) = zend_arena_create(1024 * 32);
756        BEGIN(ST_IN_SCRIPTING);
757
758        if (!zendparse()) {
759            zend_file_context original_file_context;
760            zend_oparray_context original_oparray_context;
761            zend_op_array *original_active_op_array = CG(active_op_array);
762            op_array = emalloc(sizeof(zend_op_array));
763            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
764            CG(active_op_array) = op_array;
765
766            if (zend_ast_process) {
767                zend_ast_process(CG(ast));
768            }
769
770            zend_file_context_begin(&original_file_context);
771            zend_oparray_context_begin(&original_oparray_context);
772            zend_compile_top_stmt(CG(ast));
773            zend_emit_final_return(NULL);
774            op_array->line_start = 1;
775            op_array->line_end = CG(zend_lineno);
776            pass_two(op_array);
777            zend_oparray_context_end(&original_oparray_context);
778            zend_file_context_end(&original_file_context);
779
780            CG(active_op_array) = original_active_op_array;
781        }
782
783        zend_ast_destroy(CG(ast));
784        zend_arena_destroy(CG(ast_arena));
785    }
786
787    zend_restore_lexical_state(&original_lex_state);
788    zval_dtor(&tmp);
789    CG(in_compilation) = original_in_compilation;
790    return op_array;
791}
792
793
794BEGIN_EXTERN_C()
795int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
796{
797    zend_lex_state original_lex_state;
798    zend_file_handle file_handle;
799
800    file_handle.type = ZEND_HANDLE_FILENAME;
801    file_handle.filename = filename;
802    file_handle.free_filename = 0;
803    file_handle.opened_path = NULL;
804    zend_save_lexical_state(&original_lex_state);
805    if (open_file_for_scanning(&file_handle)==FAILURE) {
806        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
807        zend_restore_lexical_state(&original_lex_state);
808        return FAILURE;
809    }
810    zend_highlight(syntax_highlighter_ini);
811    if (SCNG(script_filtered)) {
812        efree(SCNG(script_filtered));
813        SCNG(script_filtered) = NULL;
814    }
815    zend_destroy_file_handle(&file_handle);
816    zend_restore_lexical_state(&original_lex_state);
817    return SUCCESS;
818}
819
820int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
821{
822    zend_lex_state original_lex_state;
823    zval tmp = *str;
824
825    str = &tmp;
826    zval_copy_ctor(str);
827    zend_save_lexical_state(&original_lex_state);
828    if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
829        zend_restore_lexical_state(&original_lex_state);
830        return FAILURE;
831    }
832    BEGIN(INITIAL);
833    zend_highlight(syntax_highlighter_ini);
834    if (SCNG(script_filtered)) {
835        efree(SCNG(script_filtered));
836        SCNG(script_filtered) = NULL;
837    }
838    zend_restore_lexical_state(&original_lex_state);
839    zval_dtor(str);
840    return SUCCESS;
841}
842
843ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
844{
845    size_t length;
846    unsigned char *new_yy_start;
847
848    /* convert and set */
849    if (!SCNG(input_filter)) {
850        if (SCNG(script_filtered)) {
851            efree(SCNG(script_filtered));
852            SCNG(script_filtered) = NULL;
853        }
854        SCNG(script_filtered_size) = 0;
855        length = SCNG(script_org_size);
856        new_yy_start = SCNG(script_org);
857    } else {
858        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
859            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
860                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
861        }
862        if (SCNG(script_filtered)) {
863            efree(SCNG(script_filtered));
864        }
865        SCNG(script_filtered) = new_yy_start;
866        SCNG(script_filtered_size) = length;
867    }
868
869    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
870    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
871    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
872    SCNG(yy_limit) = new_yy_start + length;
873
874    SCNG(yy_start) = new_yy_start;
875}
876
877
878// TODO: avoid reallocation ???
879# define zend_copy_value(zendlval, yytext, yyleng) \
880    if (SCNG(output_filter)) { \
881        size_t sz = 0; \
882        char *s = NULL; \
883        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
884        ZVAL_STRINGL(zendlval, s, sz); \
885        efree(s); \
886    } else { \
887        ZVAL_STRINGL(zendlval, yytext, yyleng); \
888    }
889
890static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
891{
892    register char *s, *t;
893    char *end;
894
895    ZVAL_STRINGL(zendlval, str, len);
896
897    /* convert escape sequences */
898    s = t = Z_STRVAL_P(zendlval);
899    end = s+Z_STRLEN_P(zendlval);
900    while (s<end) {
901        if (*s=='\\') {
902            s++;
903            if (s >= end) {
904                *t++ = '\\';
905                break;
906            }
907
908            switch(*s) {
909                case 'n':
910                    *t++ = '\n';
911                    Z_STRLEN_P(zendlval)--;
912                    break;
913                case 'r':
914                    *t++ = '\r';
915                    Z_STRLEN_P(zendlval)--;
916                    break;
917                case 't':
918                    *t++ = '\t';
919                    Z_STRLEN_P(zendlval)--;
920                    break;
921                case 'f':
922                    *t++ = '\f';
923                    Z_STRLEN_P(zendlval)--;
924                    break;
925                case 'v':
926                    *t++ = '\v';
927                    Z_STRLEN_P(zendlval)--;
928                    break;
929                case 'e':
930#ifdef PHP_WIN32
931                    *t++ = VK_ESCAPE;
932#else
933                    *t++ = '\e';
934#endif
935                    Z_STRLEN_P(zendlval)--;
936                    break;
937                case '"':
938                case '`':
939                    if (*s != quote_type) {
940                        *t++ = '\\';
941                        *t++ = *s;
942                        break;
943                    }
944                case '\\':
945                case '$':
946                    *t++ = *s;
947                    Z_STRLEN_P(zendlval)--;
948                    break;
949                case 'x':
950                case 'X':
951                    if (ZEND_IS_HEX(*(s+1))) {
952                        char hex_buf[3] = { 0, 0, 0 };
953
954                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
955
956                        hex_buf[0] = *(++s);
957                        Z_STRLEN_P(zendlval)--;
958                        if (ZEND_IS_HEX(*(s+1))) {
959                            hex_buf[1] = *(++s);
960                            Z_STRLEN_P(zendlval)--;
961                        }
962                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
963                    } else {
964                        *t++ = '\\';
965                        *t++ = *s;
966                    }
967                    break;
968                /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
969                case 'u':
970                    {
971                        /* cache where we started so we can parse after validating */
972                        char *start = s + 1;
973                        size_t len = 0;
974                        zend_bool valid = 1;
975                        unsigned long codepoint;
976                        size_t byte_len = 0;
977
978                        if (*start != '{') {
979                            /* we silently let this pass to avoid breaking code
980                             * with JSON in string literals (e.g. "\"\u202e\""
981                             */
982                            *t++ = '\\';
983                            *t++ = 'u';
984                            break;
985                        } else {
986                            /* on the other hand, invalid \u{blah} errors */
987                            s++;
988                            len++;
989                            s++;
990                            while (*s != '}') {
991                                if (!ZEND_IS_HEX(*s)) {
992                                    valid = 0;
993                                    break;
994                                } else {
995                                    len++;
996                                }
997                                s++;
998                            }
999                            if (*s == '}') {
1000                                valid = 1;
1001                                len++;
1002                            }
1003                        }
1004
1005                        /* \u{} is invalid */
1006                        if (len <= 2) {
1007                            valid = 0;
1008                        }
1009
1010                        if (!valid) {
1011                            zend_throw_exception(zend_get_parse_exception(),
1012                                "Invalid UTF-8 codepoint escape sequence", E_PARSE);
1013                            zval_ptr_dtor(zendlval);
1014                            return FAILURE;
1015                        }
1016
1017                        errno = 0;
1018                        codepoint = strtoul(start + 1, NULL, 16);
1019
1020                        /* per RFC 3629, UTF-8 can only represent 21 bits */
1021                        if (codepoint > 0x10FFFF || errno) {
1022                            zend_throw_exception(zend_get_parse_exception(),
1023                                "Invalid UTF-8 codepoint escape sequence: Codepoint too large", E_PARSE);
1024                            zval_ptr_dtor(zendlval);
1025                            return FAILURE;
1026                        }
1027
1028                        /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1029                        if (codepoint < 0x80) {
1030                            byte_len = 1;
1031                            *t++ = codepoint;
1032                        } else if (codepoint <= 0x7FF) {
1033                            byte_len = 2;
1034                            *t++ = (codepoint >> 6) + 0xC0;
1035                            *t++ = (codepoint & 0x3F) + 0x80;
1036                        } else if (codepoint <= 0xFFFF) {
1037                            byte_len = 3;
1038                            *t++ = (codepoint >> 12) + 0xE0;
1039                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1040                            *t++ = (codepoint & 0x3F) + 0x80;
1041                        } else if (codepoint <= 0x10FFFF) {
1042                            byte_len = 4;
1043                            *t++ = (codepoint >> 18) + 0xF0;
1044                            *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1045                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1046                            *t++ = (codepoint & 0x3F) + 0x80;
1047                        }
1048
1049                        Z_STRLEN_P(zendlval) -= 2; /* \u */
1050                        Z_STRLEN_P(zendlval) -= (len - byte_len);
1051                    }
1052                    break;
1053                default:
1054                    /* check for an octal */
1055                    if (ZEND_IS_OCT(*s)) {
1056                        char octal_buf[4] = { 0, 0, 0, 0 };
1057
1058                        octal_buf[0] = *s;
1059                        Z_STRLEN_P(zendlval)--;
1060                        if (ZEND_IS_OCT(*(s+1))) {
1061                            octal_buf[1] = *(++s);
1062                            Z_STRLEN_P(zendlval)--;
1063                            if (ZEND_IS_OCT(*(s+1))) {
1064                                octal_buf[2] = *(++s);
1065                                Z_STRLEN_P(zendlval)--;
1066                            }
1067                        }
1068                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1069                    } else {
1070                        *t++ = '\\';
1071                        *t++ = *s;
1072                    }
1073                    break;
1074            }
1075        } else {
1076            *t++ = *s;
1077        }
1078
1079        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1080            CG(zend_lineno)++;
1081        }
1082        s++;
1083    }
1084    *t = 0;
1085    if (SCNG(output_filter)) {
1086        size_t sz = 0;
1087        unsigned char *str;
1088        // TODO: avoid realocation ???
1089        s = Z_STRVAL_P(zendlval);
1090        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1091        zval_ptr_dtor(zendlval);
1092        ZVAL_STRINGL(zendlval, (char *) str, sz);
1093        efree(str);
1094    }
1095    return SUCCESS;
1096}
1097
1098static zend_always_inline int emit_token(int token, int token_line)
1099{
1100    if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
1101
1102    return token;
1103}
1104
1105#define RETURN_TOKEN(token) return emit_token(token, start_line);
1106
1107int lex_scan(zval *zendlval)
1108{
1109
1110int start_line = CG(zend_lineno);
1111
1112restart:
1113    SCNG(yy_text) = YYCURSOR;
1114
1115/*!re2c
1116re2c:yyfill:check = 0;
1117LNUM    [0-9]+
1118DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1119EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1120HNUM    "0x"[0-9a-fA-F]+
1121BNUM    "0b"[01]+
1122LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1123WHITESPACE [ \n\r\t]+
1124TABS_AND_SPACES [ \t]*
1125TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1126ANY_CHAR [^]
1127NEWLINE ("\r"|"\n"|"\r\n")
1128
1129/* compute yyleng before each rule */
1130<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1131
1132<ST_IN_SCRIPTING>"exit" {
1133    RETURN_TOKEN(T_EXIT);
1134}
1135
1136<ST_IN_SCRIPTING>"die" {
1137    RETURN_TOKEN(T_EXIT);
1138}
1139
1140<ST_IN_SCRIPTING>"function" {
1141    RETURN_TOKEN(T_FUNCTION);
1142}
1143
1144<ST_IN_SCRIPTING>"const" {
1145    RETURN_TOKEN(T_CONST);
1146}
1147
1148<ST_IN_SCRIPTING>"return" {
1149    RETURN_TOKEN(T_RETURN);
1150}
1151
1152<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
1153    RETURN_TOKEN(T_YIELD_FROM);
1154}
1155
1156<ST_IN_SCRIPTING>"yield" {
1157    RETURN_TOKEN(T_YIELD);
1158}
1159
1160<ST_IN_SCRIPTING>"try" {
1161    RETURN_TOKEN(T_TRY);
1162}
1163
1164<ST_IN_SCRIPTING>"catch" {
1165    RETURN_TOKEN(T_CATCH);
1166}
1167
1168<ST_IN_SCRIPTING>"finally" {
1169    RETURN_TOKEN(T_FINALLY);
1170}
1171
1172<ST_IN_SCRIPTING>"throw" {
1173    RETURN_TOKEN(T_THROW);
1174}
1175
1176<ST_IN_SCRIPTING>"if" {
1177    RETURN_TOKEN(T_IF);
1178}
1179
1180<ST_IN_SCRIPTING>"elseif" {
1181    RETURN_TOKEN(T_ELSEIF);
1182}
1183
1184<ST_IN_SCRIPTING>"endif" {
1185    RETURN_TOKEN(T_ENDIF);
1186}
1187
1188<ST_IN_SCRIPTING>"else" {
1189    RETURN_TOKEN(T_ELSE);
1190}
1191
1192<ST_IN_SCRIPTING>"while" {
1193    RETURN_TOKEN(T_WHILE);
1194}
1195
1196<ST_IN_SCRIPTING>"endwhile" {
1197    RETURN_TOKEN(T_ENDWHILE);
1198}
1199
1200<ST_IN_SCRIPTING>"do" {
1201    RETURN_TOKEN(T_DO);
1202}
1203
1204<ST_IN_SCRIPTING>"for" {
1205    RETURN_TOKEN(T_FOR);
1206}
1207
1208<ST_IN_SCRIPTING>"endfor" {
1209    RETURN_TOKEN(T_ENDFOR);
1210}
1211
1212<ST_IN_SCRIPTING>"foreach" {
1213    RETURN_TOKEN(T_FOREACH);
1214}
1215
1216<ST_IN_SCRIPTING>"endforeach" {
1217    RETURN_TOKEN(T_ENDFOREACH);
1218}
1219
1220<ST_IN_SCRIPTING>"declare" {
1221    RETURN_TOKEN(T_DECLARE);
1222}
1223
1224<ST_IN_SCRIPTING>"enddeclare" {
1225    RETURN_TOKEN(T_ENDDECLARE);
1226}
1227
1228<ST_IN_SCRIPTING>"instanceof" {
1229    RETURN_TOKEN(T_INSTANCEOF);
1230}
1231
1232<ST_IN_SCRIPTING>"as" {
1233    RETURN_TOKEN(T_AS);
1234}
1235
1236<ST_IN_SCRIPTING>"switch" {
1237    RETURN_TOKEN(T_SWITCH);
1238}
1239
1240<ST_IN_SCRIPTING>"endswitch" {
1241    RETURN_TOKEN(T_ENDSWITCH);
1242}
1243
1244<ST_IN_SCRIPTING>"case" {
1245    RETURN_TOKEN(T_CASE);
1246}
1247
1248<ST_IN_SCRIPTING>"default" {
1249    RETURN_TOKEN(T_DEFAULT);
1250}
1251
1252<ST_IN_SCRIPTING>"break" {
1253    RETURN_TOKEN(T_BREAK);
1254}
1255
1256<ST_IN_SCRIPTING>"continue" {
1257    RETURN_TOKEN(T_CONTINUE);
1258}
1259
1260<ST_IN_SCRIPTING>"goto" {
1261    RETURN_TOKEN(T_GOTO);
1262}
1263
1264<ST_IN_SCRIPTING>"echo" {
1265    RETURN_TOKEN(T_ECHO);
1266}
1267
1268<ST_IN_SCRIPTING>"print" {
1269    RETURN_TOKEN(T_PRINT);
1270}
1271
1272<ST_IN_SCRIPTING>"class" {
1273    RETURN_TOKEN(T_CLASS);
1274}
1275
1276<ST_IN_SCRIPTING>"interface" {
1277    RETURN_TOKEN(T_INTERFACE);
1278}
1279
1280<ST_IN_SCRIPTING>"trait" {
1281    RETURN_TOKEN(T_TRAIT);
1282}
1283
1284<ST_IN_SCRIPTING>"extends" {
1285    RETURN_TOKEN(T_EXTENDS);
1286}
1287
1288<ST_IN_SCRIPTING>"implements" {
1289    RETURN_TOKEN(T_IMPLEMENTS);
1290}
1291
1292<ST_IN_SCRIPTING>"->" {
1293    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1294    RETURN_TOKEN(T_OBJECT_OPERATOR);
1295}
1296
1297<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1298    HANDLE_NEWLINES(yytext, yyleng);
1299    RETURN_TOKEN(T_WHITESPACE);
1300}
1301
1302<ST_LOOKING_FOR_PROPERTY>"->" {
1303    RETURN_TOKEN(T_OBJECT_OPERATOR);
1304}
1305
1306<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1307    yy_pop_state();
1308    zend_copy_value(zendlval, yytext, yyleng);
1309    RETURN_TOKEN(T_STRING);
1310}
1311
1312<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1313    yyless(0);
1314    yy_pop_state();
1315    goto restart;
1316}
1317
1318<ST_IN_SCRIPTING>"::" {
1319    RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
1320}
1321
1322<ST_IN_SCRIPTING>"\\" {
1323    RETURN_TOKEN(T_NS_SEPARATOR);
1324}
1325
1326<ST_IN_SCRIPTING>"..." {
1327    RETURN_TOKEN(T_ELLIPSIS);
1328}
1329
1330<ST_IN_SCRIPTING>"??" {
1331    RETURN_TOKEN(T_COALESCE);
1332}
1333
1334<ST_IN_SCRIPTING>"new" {
1335    RETURN_TOKEN(T_NEW);
1336}
1337
1338<ST_IN_SCRIPTING>"clone" {
1339    RETURN_TOKEN(T_CLONE);
1340}
1341
1342<ST_IN_SCRIPTING>"var" {
1343    RETURN_TOKEN(T_VAR);
1344}
1345
1346<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1347    RETURN_TOKEN(T_INT_CAST);
1348}
1349
1350<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1351    RETURN_TOKEN(T_DOUBLE_CAST);
1352}
1353
1354<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1355    RETURN_TOKEN(T_STRING_CAST);
1356}
1357
1358<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1359    RETURN_TOKEN(T_ARRAY_CAST);
1360}
1361
1362<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1363    RETURN_TOKEN(T_OBJECT_CAST);
1364}
1365
1366<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1367    RETURN_TOKEN(T_BOOL_CAST);
1368}
1369
1370<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1371    RETURN_TOKEN(T_UNSET_CAST);
1372}
1373
1374<ST_IN_SCRIPTING>"eval" {
1375    RETURN_TOKEN(T_EVAL);
1376}
1377
1378<ST_IN_SCRIPTING>"include" {
1379    RETURN_TOKEN(T_INCLUDE);
1380}
1381
1382<ST_IN_SCRIPTING>"include_once" {
1383    RETURN_TOKEN(T_INCLUDE_ONCE);
1384}
1385
1386<ST_IN_SCRIPTING>"require" {
1387    RETURN_TOKEN(T_REQUIRE);
1388}
1389
1390<ST_IN_SCRIPTING>"require_once" {
1391    RETURN_TOKEN(T_REQUIRE_ONCE);
1392}
1393
1394<ST_IN_SCRIPTING>"namespace" {
1395    RETURN_TOKEN(T_NAMESPACE);
1396}
1397
1398<ST_IN_SCRIPTING>"use" {
1399    RETURN_TOKEN(T_USE);
1400}
1401
1402<ST_IN_SCRIPTING>"insteadof" {
1403    RETURN_TOKEN(T_INSTEADOF);
1404}
1405
1406<ST_IN_SCRIPTING>"global" {
1407    RETURN_TOKEN(T_GLOBAL);
1408}
1409
1410<ST_IN_SCRIPTING>"isset" {
1411    RETURN_TOKEN(T_ISSET);
1412}
1413
1414<ST_IN_SCRIPTING>"empty" {
1415    RETURN_TOKEN(T_EMPTY);
1416}
1417
1418<ST_IN_SCRIPTING>"__halt_compiler" {
1419    RETURN_TOKEN(T_HALT_COMPILER);
1420}
1421
1422<ST_IN_SCRIPTING>"static" {
1423    RETURN_TOKEN(T_STATIC);
1424}
1425
1426<ST_IN_SCRIPTING>"abstract" {
1427    RETURN_TOKEN(T_ABSTRACT);
1428}
1429
1430<ST_IN_SCRIPTING>"final" {
1431    RETURN_TOKEN(T_FINAL);
1432}
1433
1434<ST_IN_SCRIPTING>"private" {
1435    RETURN_TOKEN(T_PRIVATE);
1436}
1437
1438<ST_IN_SCRIPTING>"protected" {
1439    RETURN_TOKEN(T_PROTECTED);
1440}
1441
1442<ST_IN_SCRIPTING>"public" {
1443    RETURN_TOKEN(T_PUBLIC);
1444}
1445
1446<ST_IN_SCRIPTING>"unset" {
1447    RETURN_TOKEN(T_UNSET);
1448}
1449
1450<ST_IN_SCRIPTING>"=>" {
1451    RETURN_TOKEN(T_DOUBLE_ARROW);
1452}
1453
1454<ST_IN_SCRIPTING>"list" {
1455    RETURN_TOKEN(T_LIST);
1456}
1457
1458<ST_IN_SCRIPTING>"array" {
1459    RETURN_TOKEN(T_ARRAY);
1460}
1461
1462<ST_IN_SCRIPTING>"callable" {
1463    RETURN_TOKEN(T_CALLABLE);
1464}
1465
1466<ST_IN_SCRIPTING>"++" {
1467    RETURN_TOKEN(T_INC);
1468}
1469
1470<ST_IN_SCRIPTING>"--" {
1471    RETURN_TOKEN(T_DEC);
1472}
1473
1474<ST_IN_SCRIPTING>"===" {
1475    RETURN_TOKEN(T_IS_IDENTICAL);
1476}
1477
1478<ST_IN_SCRIPTING>"!==" {
1479    RETURN_TOKEN(T_IS_NOT_IDENTICAL);
1480}
1481
1482<ST_IN_SCRIPTING>"==" {
1483    RETURN_TOKEN(T_IS_EQUAL);
1484}
1485
1486<ST_IN_SCRIPTING>"!="|"<>" {
1487    RETURN_TOKEN(T_IS_NOT_EQUAL);
1488}
1489
1490<ST_IN_SCRIPTING>"<=>" {
1491    RETURN_TOKEN(T_SPACESHIP);
1492}
1493
1494<ST_IN_SCRIPTING>"<=" {
1495    RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
1496}
1497
1498<ST_IN_SCRIPTING>">=" {
1499    RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
1500}
1501
1502<ST_IN_SCRIPTING>"+=" {
1503    RETURN_TOKEN(T_PLUS_EQUAL);
1504}
1505
1506<ST_IN_SCRIPTING>"-=" {
1507    RETURN_TOKEN(T_MINUS_EQUAL);
1508}
1509
1510<ST_IN_SCRIPTING>"*=" {
1511    RETURN_TOKEN(T_MUL_EQUAL);
1512}
1513
1514<ST_IN_SCRIPTING>"*\*" {
1515    RETURN_TOKEN(T_POW);
1516}
1517
1518<ST_IN_SCRIPTING>"*\*=" {
1519    RETURN_TOKEN(T_POW_EQUAL);
1520}
1521
1522<ST_IN_SCRIPTING>"/=" {
1523    RETURN_TOKEN(T_DIV_EQUAL);
1524}
1525
1526<ST_IN_SCRIPTING>".=" {
1527    RETURN_TOKEN(T_CONCAT_EQUAL);
1528}
1529
1530<ST_IN_SCRIPTING>"%=" {
1531    RETURN_TOKEN(T_MOD_EQUAL);
1532}
1533
1534<ST_IN_SCRIPTING>"<<=" {
1535    RETURN_TOKEN(T_SL_EQUAL);
1536}
1537
1538<ST_IN_SCRIPTING>">>=" {
1539    RETURN_TOKEN(T_SR_EQUAL);
1540}
1541
1542<ST_IN_SCRIPTING>"&=" {
1543    RETURN_TOKEN(T_AND_EQUAL);
1544}
1545
1546<ST_IN_SCRIPTING>"|=" {
1547    RETURN_TOKEN(T_OR_EQUAL);
1548}
1549
1550<ST_IN_SCRIPTING>"^=" {
1551    RETURN_TOKEN(T_XOR_EQUAL);
1552}
1553
1554<ST_IN_SCRIPTING>"||" {
1555    RETURN_TOKEN(T_BOOLEAN_OR);
1556}
1557
1558<ST_IN_SCRIPTING>"&&" {
1559    RETURN_TOKEN(T_BOOLEAN_AND);
1560}
1561
1562<ST_IN_SCRIPTING>"OR" {
1563    RETURN_TOKEN(T_LOGICAL_OR);
1564}
1565
1566<ST_IN_SCRIPTING>"AND" {
1567    RETURN_TOKEN(T_LOGICAL_AND);
1568}
1569
1570<ST_IN_SCRIPTING>"XOR" {
1571    RETURN_TOKEN(T_LOGICAL_XOR);
1572}
1573
1574<ST_IN_SCRIPTING>"<<" {
1575    RETURN_TOKEN(T_SL);
1576}
1577
1578<ST_IN_SCRIPTING>">>" {
1579    RETURN_TOKEN(T_SR);
1580}
1581
1582<ST_IN_SCRIPTING>{TOKENS} {
1583    RETURN_TOKEN(yytext[0]);
1584}
1585
1586
1587<ST_IN_SCRIPTING>"{" {
1588    yy_push_state(ST_IN_SCRIPTING);
1589    RETURN_TOKEN('{');
1590}
1591
1592
1593<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1594    yy_push_state(ST_LOOKING_FOR_VARNAME);
1595    RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
1596}
1597
1598
1599<ST_IN_SCRIPTING>"}" {
1600    RESET_DOC_COMMENT();
1601    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1602        yy_pop_state();
1603    }
1604    RETURN_TOKEN('}');
1605}
1606
1607
1608<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1609    yyless(yyleng - 1);
1610    zend_copy_value(zendlval, yytext, yyleng);
1611    yy_pop_state();
1612    yy_push_state(ST_IN_SCRIPTING);
1613    RETURN_TOKEN(T_STRING_VARNAME);
1614}
1615
1616
1617<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1618    yyless(0);
1619    yy_pop_state();
1620    yy_push_state(ST_IN_SCRIPTING);
1621    goto restart;
1622}
1623
1624<ST_IN_SCRIPTING>{BNUM} {
1625    char *bin = yytext + 2; /* Skip "0b" */
1626    int len = yyleng - 2;
1627    char *end;
1628
1629    /* Skip any leading 0s */
1630    while (*bin == '0') {
1631        ++bin;
1632        --len;
1633    }
1634
1635    if (len < SIZEOF_ZEND_LONG * 8) {
1636        if (len == 0) {
1637            ZVAL_LONG(zendlval, 0);
1638        } else {
1639            errno = 0;
1640            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1641            ZEND_ASSERT(!errno && end == yytext + yyleng);
1642        }
1643        RETURN_TOKEN(T_LNUMBER);
1644    } else {
1645        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1646        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1647        ZEND_ASSERT(end == yytext + yyleng);
1648        RETURN_TOKEN(T_DNUMBER);
1649    }
1650}
1651
1652<ST_IN_SCRIPTING>{LNUM} {
1653    char *end;
1654    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1655        errno = 0;
1656        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1657        /* This isn't an assert, we need to ensure 019 isn't valid octal
1658         * Because the lexing itself doesn't do that for us
1659         */
1660        if (end != yytext + yyleng) {
1661            zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
1662            RETURN_TOKEN(T_ERROR);
1663        }
1664    } else {
1665        errno = 0;
1666        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1667        if (errno == ERANGE) { /* Overflow */
1668            errno = 0;
1669            if (yytext[0] == '0') { /* octal overflow */
1670                errno = 0;
1671                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1672            } else {
1673                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1674            }
1675            /* Also not an assert for the same reason */
1676            if (end != yytext + yyleng) {
1677                zend_throw_exception(zend_get_parse_exception(),
1678                    "Invalid numeric literal", E_PARSE);
1679                RETURN_TOKEN(T_ERROR);
1680            }
1681            ZEND_ASSERT(!errno);
1682            RETURN_TOKEN(T_DNUMBER);
1683        }
1684        /* Also not an assert for the same reason */
1685        if (end != yytext + yyleng) {
1686            zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
1687            RETURN_TOKEN(T_ERROR);
1688        }
1689    }
1690    ZEND_ASSERT(!errno);
1691    RETURN_TOKEN(T_LNUMBER);
1692}
1693
1694<ST_IN_SCRIPTING>{HNUM} {
1695    char *hex = yytext + 2; /* Skip "0x" */
1696    int len = yyleng - 2;
1697    char *end;
1698
1699    /* Skip any leading 0s */
1700    while (*hex == '0') {
1701        hex++;
1702        len--;
1703    }
1704
1705    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1706        if (len == 0) {
1707            ZVAL_LONG(zendlval, 0);
1708        } else {
1709            errno = 0;
1710            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1711            ZEND_ASSERT(!errno && end == hex + len);
1712        }
1713        RETURN_TOKEN(T_LNUMBER);
1714    } else {
1715        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1716        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1717        ZEND_ASSERT(end == hex + len);
1718        RETURN_TOKEN(T_DNUMBER);
1719    }
1720}
1721
1722<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1723    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1724        char *end;
1725        errno = 0;
1726        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1727        if (errno == ERANGE) {
1728            goto string;
1729        }
1730        ZEND_ASSERT(end == yytext + yyleng);
1731    } else {
1732string:
1733        ZVAL_STRINGL(zendlval, yytext, yyleng);
1734    }
1735    RETURN_TOKEN(T_NUM_STRING);
1736}
1737
1738<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1739    ZVAL_STRINGL(zendlval, yytext, yyleng);
1740    RETURN_TOKEN(T_NUM_STRING);
1741}
1742
1743<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1744    const char *end;
1745
1746    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1747    /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1748    ZEND_ASSERT(end == yytext + yyleng);
1749    RETURN_TOKEN(T_DNUMBER);
1750}
1751
1752<ST_IN_SCRIPTING>"__CLASS__" {
1753    RETURN_TOKEN(T_CLASS_C);
1754}
1755
1756<ST_IN_SCRIPTING>"__TRAIT__" {
1757    RETURN_TOKEN(T_TRAIT_C);
1758}
1759
1760<ST_IN_SCRIPTING>"__FUNCTION__" {
1761    RETURN_TOKEN(T_FUNC_C);
1762}
1763
1764<ST_IN_SCRIPTING>"__METHOD__" {
1765    RETURN_TOKEN(T_METHOD_C);
1766}
1767
1768<ST_IN_SCRIPTING>"__LINE__" {
1769    RETURN_TOKEN(T_LINE);
1770}
1771
1772<ST_IN_SCRIPTING>"__FILE__" {
1773    RETURN_TOKEN(T_FILE);
1774}
1775
1776<ST_IN_SCRIPTING>"__DIR__" {
1777    RETURN_TOKEN(T_DIR);
1778}
1779
1780<ST_IN_SCRIPTING>"__NAMESPACE__" {
1781    RETURN_TOKEN(T_NS_C);
1782}
1783
1784
1785<INITIAL>"<?=" {
1786    BEGIN(ST_IN_SCRIPTING);
1787    RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
1788}
1789
1790
1791<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1792    HANDLE_NEWLINE(yytext[yyleng-1]);
1793    BEGIN(ST_IN_SCRIPTING);
1794    RETURN_TOKEN(T_OPEN_TAG);
1795}
1796
1797
1798<INITIAL>"<?" {
1799    if (CG(short_tags)) {
1800        BEGIN(ST_IN_SCRIPTING);
1801        RETURN_TOKEN(T_OPEN_TAG);
1802    } else {
1803        goto inline_char_handler;
1804    }
1805}
1806
1807<INITIAL>{ANY_CHAR} {
1808    if (YYCURSOR > YYLIMIT) {
1809        RETURN_TOKEN(END);
1810    }
1811
1812inline_char_handler:
1813
1814    while (1) {
1815        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1816
1817        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1818
1819        if (YYCURSOR >= YYLIMIT) {
1820            break;
1821        }
1822
1823        if (*YYCURSOR == '?') {
1824            if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1825
1826                YYCURSOR--;
1827                break;
1828            }
1829        }
1830    }
1831
1832    yyleng = YYCURSOR - SCNG(yy_text);
1833
1834    if (SCNG(output_filter)) {
1835        size_t readsize;
1836        char *s = NULL;
1837        size_t sz = 0;
1838        // TODO: avoid reallocation ???
1839        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1840        ZVAL_STRINGL(zendlval, s, sz);
1841        efree(s);
1842        if (readsize < yyleng) {
1843            yyless(readsize);
1844        }
1845    } else {
1846      ZVAL_STRINGL(zendlval, yytext, yyleng);
1847    }
1848    HANDLE_NEWLINES(yytext, yyleng);
1849    RETURN_TOKEN(T_INLINE_HTML);
1850}
1851
1852
1853/* Make sure a label character follows "->", otherwise there is no property
1854 * and "->" will be taken literally
1855 */
1856<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1857    yyless(yyleng - 3);
1858    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1859    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1860    RETURN_TOKEN(T_VARIABLE);
1861}
1862
1863/* A [ always designates a variable offset, regardless of what follows
1864 */
1865<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1866    yyless(yyleng - 1);
1867    yy_push_state(ST_VAR_OFFSET);
1868    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1869    RETURN_TOKEN(T_VARIABLE);
1870}
1871
1872<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1873    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1874    RETURN_TOKEN(T_VARIABLE);
1875}
1876
1877<ST_VAR_OFFSET>"]" {
1878    yy_pop_state();
1879    RETURN_TOKEN(']');
1880}
1881
1882<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1883    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1884    RETURN_TOKEN(yytext[0]);
1885}
1886
1887<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1888    /* Invalid rule to return a more explicit parse error with proper line number */
1889    yyless(0);
1890    yy_pop_state();
1891    ZVAL_NULL(zendlval);
1892    RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1893}
1894
1895<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1896    zend_copy_value(zendlval, yytext, yyleng);
1897    RETURN_TOKEN(T_STRING);
1898}
1899
1900
1901<ST_IN_SCRIPTING>"#"|"//" {
1902    while (YYCURSOR < YYLIMIT) {
1903        switch (*YYCURSOR++) {
1904            case '\r':
1905                if (*YYCURSOR == '\n') {
1906                    YYCURSOR++;
1907                }
1908                /* fall through */
1909            case '\n':
1910                CG(zend_lineno)++;
1911                break;
1912            case '?':
1913                if (*YYCURSOR == '>') {
1914                    YYCURSOR--;
1915                    break;
1916                }
1917                /* fall through */
1918            default:
1919                continue;
1920        }
1921
1922        break;
1923    }
1924
1925    yyleng = YYCURSOR - SCNG(yy_text);
1926
1927    RETURN_TOKEN(T_COMMENT);
1928}
1929
1930<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1931    int doc_com;
1932
1933    if (yyleng > 2) {
1934        doc_com = 1;
1935        RESET_DOC_COMMENT();
1936    } else {
1937        doc_com = 0;
1938    }
1939
1940    while (YYCURSOR < YYLIMIT) {
1941        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1942            break;
1943        }
1944    }
1945
1946    if (YYCURSOR < YYLIMIT) {
1947        YYCURSOR++;
1948    } else {
1949        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1950    }
1951
1952    yyleng = YYCURSOR - SCNG(yy_text);
1953    HANDLE_NEWLINES(yytext, yyleng);
1954
1955    if (doc_com) {
1956        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1957        RETURN_TOKEN(T_DOC_COMMENT);
1958    }
1959
1960    RETURN_TOKEN(T_COMMENT);
1961}
1962
1963<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1964    BEGIN(INITIAL);
1965    RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
1966}
1967
1968
1969<ST_IN_SCRIPTING>b?['] {
1970    register char *s, *t;
1971    char *end;
1972    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1973
1974    while (1) {
1975        if (YYCURSOR < YYLIMIT) {
1976            if (*YYCURSOR == '\'') {
1977                YYCURSOR++;
1978                yyleng = YYCURSOR - SCNG(yy_text);
1979
1980                break;
1981            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1982                YYCURSOR++;
1983            }
1984        } else {
1985            yyleng = YYLIMIT - SCNG(yy_text);
1986
1987            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1988             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1989             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1990            ZVAL_NULL(zendlval);
1991            RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
1992        }
1993    }
1994
1995    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1996
1997    /* convert escape sequences */
1998    s = t = Z_STRVAL_P(zendlval);
1999    end = s+Z_STRLEN_P(zendlval);
2000    while (s<end) {
2001        if (*s=='\\') {
2002            s++;
2003
2004            switch(*s) {
2005                case '\\':
2006                case '\'':
2007                    *t++ = *s;
2008                    Z_STRLEN_P(zendlval)--;
2009                    break;
2010                default:
2011                    *t++ = '\\';
2012                    *t++ = *s;
2013                    break;
2014            }
2015        } else {
2016            *t++ = *s;
2017        }
2018
2019        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2020            CG(zend_lineno)++;
2021        }
2022        s++;
2023    }
2024    *t = 0;
2025
2026    if (SCNG(output_filter)) {
2027        size_t sz = 0;
2028        char *str = NULL;
2029        s = Z_STRVAL_P(zendlval);
2030        // TODO: avoid reallocation ???
2031        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2032        ZVAL_STRINGL(zendlval, str, sz);
2033    }
2034    RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2035}
2036
2037
2038<ST_IN_SCRIPTING>b?["] {
2039    int bprefix = (yytext[0] != '"') ? 1 : 0;
2040
2041    while (YYCURSOR < YYLIMIT) {
2042        switch (*YYCURSOR++) {
2043            case '"':
2044                yyleng = YYCURSOR - SCNG(yy_text);
2045                if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == FAILURE) {
2046                    RETURN_TOKEN(T_ERROR);
2047                }
2048                RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
2049            case '$':
2050                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2051                    break;
2052                }
2053                continue;
2054            case '{':
2055                if (*YYCURSOR == '$') {
2056                    break;
2057                }
2058                continue;
2059            case '\\':
2060                if (YYCURSOR < YYLIMIT) {
2061                    YYCURSOR++;
2062                }
2063                /* fall through */
2064            default:
2065                continue;
2066        }
2067
2068        YYCURSOR--;
2069        break;
2070    }
2071
2072    /* Remember how much was scanned to save rescanning */
2073    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2074
2075    YYCURSOR = SCNG(yy_text) + yyleng;
2076
2077    BEGIN(ST_DOUBLE_QUOTES);
2078    RETURN_TOKEN('"');
2079}
2080
2081
2082<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2083    char *s;
2084    int bprefix = (yytext[0] != '<') ? 1 : 0;
2085    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2086
2087    CG(zend_lineno)++;
2088    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2089    s = yytext+bprefix+3;
2090    while ((*s == ' ') || (*s == '\t')) {
2091        s++;
2092        heredoc_label->length--;
2093    }
2094
2095    if (*s == '\'') {
2096        s++;
2097        heredoc_label->length -= 2;
2098
2099        BEGIN(ST_NOWDOC);
2100    } else {
2101        if (*s == '"') {
2102            s++;
2103            heredoc_label->length -= 2;
2104        }
2105
2106        BEGIN(ST_HEREDOC);
2107    }
2108
2109    heredoc_label->label = estrndup(s, heredoc_label->length);
2110
2111    /* Check for ending label on the next line */
2112    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2113        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2114
2115        if (*end == ';') {
2116            end++;
2117        }
2118
2119        if (*end == '\n' || *end == '\r') {
2120            BEGIN(ST_END_HEREDOC);
2121        }
2122    }
2123
2124    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2125
2126    RETURN_TOKEN(T_START_HEREDOC);
2127}
2128
2129
2130<ST_IN_SCRIPTING>[`] {
2131    BEGIN(ST_BACKQUOTE);
2132    RETURN_TOKEN('`');
2133}
2134
2135
2136<ST_END_HEREDOC>{ANY_CHAR} {
2137    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2138
2139    YYCURSOR += heredoc_label->length - 1;
2140    yyleng = heredoc_label->length;
2141
2142    heredoc_label_dtor(heredoc_label);
2143    efree(heredoc_label);
2144
2145    BEGIN(ST_IN_SCRIPTING);
2146    RETURN_TOKEN(T_END_HEREDOC);
2147}
2148
2149
2150<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2151    Z_LVAL_P(zendlval) = (zend_long) '{';
2152    yy_push_state(ST_IN_SCRIPTING);
2153    yyless(1);
2154    RETURN_TOKEN(T_CURLY_OPEN);
2155}
2156
2157
2158<ST_DOUBLE_QUOTES>["] {
2159    BEGIN(ST_IN_SCRIPTING);
2160    RETURN_TOKEN('"');
2161}
2162
2163<ST_BACKQUOTE>[`] {
2164    BEGIN(ST_IN_SCRIPTING);
2165    RETURN_TOKEN('`');
2166}
2167
2168
2169<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2170    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2171        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2172        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2173
2174        goto double_quotes_scan_done;
2175    }
2176
2177    if (YYCURSOR > YYLIMIT) {
2178        RETURN_TOKEN(END);
2179    }
2180    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2181        YYCURSOR++;
2182    }
2183
2184    while (YYCURSOR < YYLIMIT) {
2185        switch (*YYCURSOR++) {
2186            case '"':
2187                break;
2188            case '$':
2189                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2190                    break;
2191                }
2192                continue;
2193            case '{':
2194                if (*YYCURSOR == '$') {
2195                    break;
2196                }
2197                continue;
2198            case '\\':
2199                if (YYCURSOR < YYLIMIT) {
2200                    YYCURSOR++;
2201                }
2202                /* fall through */
2203            default:
2204                continue;
2205        }
2206
2207        YYCURSOR--;
2208        break;
2209    }
2210
2211double_quotes_scan_done:
2212    yyleng = YYCURSOR - SCNG(yy_text);
2213
2214    if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
2215        RETURN_TOKEN(T_ERROR);
2216    }
2217    RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2218}
2219
2220
2221<ST_BACKQUOTE>{ANY_CHAR} {
2222    if (YYCURSOR > YYLIMIT) {
2223        RETURN_TOKEN(END);
2224    }
2225    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2226        YYCURSOR++;
2227    }
2228
2229    while (YYCURSOR < YYLIMIT) {
2230        switch (*YYCURSOR++) {
2231            case '`':
2232                break;
2233            case '$':
2234                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2235                    break;
2236                }
2237                continue;
2238            case '{':
2239                if (*YYCURSOR == '$') {
2240                    break;
2241                }
2242                continue;
2243            case '\\':
2244                if (YYCURSOR < YYLIMIT) {
2245                    YYCURSOR++;
2246                }
2247                /* fall through */
2248            default:
2249                continue;
2250        }
2251
2252        YYCURSOR--;
2253        break;
2254    }
2255
2256    yyleng = YYCURSOR - SCNG(yy_text);
2257
2258    if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
2259        RETURN_TOKEN(T_ERROR);
2260    }
2261    RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2262}
2263
2264
2265<ST_HEREDOC>{ANY_CHAR} {
2266    int newline = 0;
2267
2268    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2269
2270    if (YYCURSOR > YYLIMIT) {
2271        RETURN_TOKEN(END);
2272    }
2273
2274    YYCURSOR--;
2275
2276    while (YYCURSOR < YYLIMIT) {
2277        switch (*YYCURSOR++) {
2278            case '\r':
2279                if (*YYCURSOR == '\n') {
2280                    YYCURSOR++;
2281                }
2282                /* fall through */
2283            case '\n':
2284                /* Check for ending label on the next line */
2285                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2286                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2287
2288                    if (*end == ';') {
2289                        end++;
2290                    }
2291
2292                    if (*end == '\n' || *end == '\r') {
2293                        /* newline before label will be subtracted from returned text, but
2294                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2295                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2296                            newline = 2; /* Windows newline */
2297                        } else {
2298                            newline = 1;
2299                        }
2300
2301                        CG(increment_lineno) = 1; /* For newline before label */
2302                        BEGIN(ST_END_HEREDOC);
2303
2304                        goto heredoc_scan_done;
2305                    }
2306                }
2307                continue;
2308            case '$':
2309                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2310                    break;
2311                }
2312                continue;
2313            case '{':
2314                if (*YYCURSOR == '$') {
2315                    break;
2316                }
2317                continue;
2318            case '\\':
2319                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2320                    YYCURSOR++;
2321                }
2322                /* fall through */
2323            default:
2324                continue;
2325        }
2326
2327        YYCURSOR--;
2328        break;
2329    }
2330
2331heredoc_scan_done:
2332    yyleng = YYCURSOR - SCNG(yy_text);
2333
2334    if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
2335        RETURN_TOKEN(T_ERROR);
2336    }
2337    RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2338}
2339
2340
2341<ST_NOWDOC>{ANY_CHAR} {
2342    int newline = 0;
2343
2344    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2345
2346    if (YYCURSOR > YYLIMIT) {
2347        RETURN_TOKEN(END);
2348    }
2349
2350    YYCURSOR--;
2351
2352    while (YYCURSOR < YYLIMIT) {
2353        switch (*YYCURSOR++) {
2354            case '\r':
2355                if (*YYCURSOR == '\n') {
2356                    YYCURSOR++;
2357                }
2358                /* fall through */
2359            case '\n':
2360                /* Check for ending label on the next line */
2361                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2362                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2363
2364                    if (*end == ';') {
2365                        end++;
2366                    }
2367
2368                    if (*end == '\n' || *end == '\r') {
2369                        /* newline before label will be subtracted from returned text, but
2370                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2371                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2372                            newline = 2; /* Windows newline */
2373                        } else {
2374                            newline = 1;
2375                        }
2376
2377                        CG(increment_lineno) = 1; /* For newline before label */
2378                        BEGIN(ST_END_HEREDOC);
2379
2380                        goto nowdoc_scan_done;
2381                    }
2382                }
2383                /* fall through */
2384            default:
2385                continue;
2386        }
2387    }
2388
2389nowdoc_scan_done:
2390    yyleng = YYCURSOR - SCNG(yy_text);
2391
2392    zend_copy_value(zendlval, yytext, yyleng - newline);
2393    HANDLE_NEWLINES(yytext, yyleng - newline);
2394    RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
2395}
2396
2397
2398<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2399    if (YYCURSOR > YYLIMIT) {
2400        RETURN_TOKEN(END);
2401    }
2402
2403    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2404    goto restart;
2405}
2406
2407*/
2408}
2409