1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2015 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
129    ZEND_ASSERT(internal_encoding);
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding));
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding));
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
147    ZEND_ASSERT(internal_encoding);
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8);
150}
151
152
153static void _yy_push_state(int new_state)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(void)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(void)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(void)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename();
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225
226    lex_state->ast = CG(ast);
227    lex_state->ast_arena = CG(ast_arena);
228}
229
230ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
231{
232    SCNG(yy_leng)   = lex_state->yy_leng;
233    SCNG(yy_start)  = lex_state->yy_start;
234    SCNG(yy_text)   = lex_state->yy_text;
235    SCNG(yy_cursor) = lex_state->yy_cursor;
236    SCNG(yy_marker) = lex_state->yy_marker;
237    SCNG(yy_limit)  = lex_state->yy_limit;
238
239    zend_stack_destroy(&SCNG(state_stack));
240    SCNG(state_stack) = lex_state->state_stack;
241
242    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
243    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
244    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
245
246    SCNG(yy_in) = lex_state->in;
247    YYSETCONDITION(lex_state->yy_state);
248    CG(zend_lineno) = lex_state->lineno;
249    zend_restore_compiled_filename(lex_state->filename);
250
251    if (SCNG(script_filtered)) {
252        efree(SCNG(script_filtered));
253        SCNG(script_filtered) = NULL;
254    }
255    SCNG(script_org) = lex_state->script_org;
256    SCNG(script_org_size) = lex_state->script_org_size;
257    SCNG(script_filtered) = lex_state->script_filtered;
258    SCNG(script_filtered_size) = lex_state->script_filtered_size;
259    SCNG(input_filter) = lex_state->input_filter;
260    SCNG(output_filter) = lex_state->output_filter;
261    SCNG(script_encoding) = lex_state->script_encoding;
262
263    CG(ast) = lex_state->ast;
264    CG(ast_arena) = lex_state->ast_arena;
265
266    RESET_DOC_COMMENT();
267}
268
269ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
270{
271    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
272    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
273    file_handle->opened_path = NULL;
274    if (file_handle->free_filename) {
275        file_handle->filename = NULL;
276    }
277}
278
279#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
280#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
281#define BOM_UTF16_BE    "\xfe\xff"
282#define BOM_UTF16_LE    "\xff\xfe"
283#define BOM_UTF8        "\xef\xbb\xbf"
284
285static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size)
286{
287    const unsigned char *p;
288    int wchar_size = 2;
289    int le = 0;
290
291    /* utf-16 or utf-32? */
292    p = script;
293    assert(p >= script);
294    while ((size_t)(p-script) < script_size) {
295        p = memchr(p, 0, script_size-(p-script)-2);
296        if (!p) {
297            break;
298        }
299        if (*(p+1) == '\0' && *(p+2) == '\0') {
300            wchar_size = 4;
301            break;
302        }
303
304        /* searching for UTF-32 specific byte orders, so this will do */
305        p += 4;
306    }
307
308    /* BE or LE? */
309    p = script;
310    assert(p >= script);
311    while ((size_t)(p-script) < script_size) {
312        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
313            /* BE */
314            le = 0;
315            break;
316        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
317            /* LE* */
318            le = 1;
319            break;
320        }
321        p += wchar_size;
322    }
323
324    if (wchar_size == 2) {
325        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
326    } else {
327        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
328    }
329
330    return NULL;
331}
332
333static const zend_encoding* zend_multibyte_detect_unicode(void)
334{
335    const zend_encoding *script_encoding = NULL;
336    int bom_size;
337    unsigned char *pos1, *pos2;
338
339    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
340        return NULL;
341    }
342
343    /* check out BOM */
344    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
345        script_encoding = zend_multibyte_encoding_utf32be;
346        bom_size = sizeof(BOM_UTF32_BE)-1;
347    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
348        script_encoding = zend_multibyte_encoding_utf32le;
349        bom_size = sizeof(BOM_UTF32_LE)-1;
350    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
351        script_encoding = zend_multibyte_encoding_utf16be;
352        bom_size = sizeof(BOM_UTF16_BE)-1;
353    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
354        script_encoding = zend_multibyte_encoding_utf16le;
355        bom_size = sizeof(BOM_UTF16_LE)-1;
356    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
357        script_encoding = zend_multibyte_encoding_utf8;
358        bom_size = sizeof(BOM_UTF8)-1;
359    }
360
361    if (script_encoding) {
362        /* remove BOM */
363        LANG_SCNG(script_org) += bom_size;
364        LANG_SCNG(script_org_size) -= bom_size;
365
366        return script_encoding;
367    }
368
369    /* script contains NULL bytes -> auto-detection */
370    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
371        /* check if the NULL byte is after the __HALT_COMPILER(); */
372        pos2 = LANG_SCNG(script_org);
373
374        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
375            pos2 = memchr(pos2, '_', pos1 - pos2);
376            if (!pos2) break;
377            pos2++;
378            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
379                pos2 += sizeof("_HALT_COMPILER")-1;
380                while (*pos2 == ' '  ||
381                       *pos2 == '\t' ||
382                       *pos2 == '\r' ||
383                       *pos2 == '\n') {
384                    pos2++;
385                }
386                if (*pos2 == '(') {
387                    pos2++;
388                    while (*pos2 == ' '  ||
389                           *pos2 == '\t' ||
390                           *pos2 == '\r' ||
391                           *pos2 == '\n') {
392                        pos2++;
393                    }
394                    if (*pos2 == ')') {
395                        pos2++;
396                        while (*pos2 == ' '  ||
397                               *pos2 == '\t' ||
398                               *pos2 == '\r' ||
399                               *pos2 == '\n') {
400                            pos2++;
401                        }
402                        if (*pos2 == ';') {
403                            return NULL;
404                        }
405                    }
406                }
407            }
408        }
409        /* make best effort if BOM is missing */
410        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size));
411    }
412
413    return NULL;
414}
415
416static const zend_encoding* zend_multibyte_find_script_encoding(void)
417{
418    const zend_encoding *script_encoding;
419
420    if (CG(detect_unicode)) {
421        /* check out bom(byte order mark) and see if containing wchars */
422        script_encoding = zend_multibyte_detect_unicode();
423        if (script_encoding != NULL) {
424            /* bom or wchar detection is prior to 'script_encoding' option */
425            return script_encoding;
426        }
427    }
428
429    /* if no script_encoding specified, just leave alone */
430    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
431        return NULL;
432    }
433
434    /* if multiple encodings specified, detect automagically */
435    if (CG(script_encoding_list_size) > 1) {
436        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size));
437    }
438
439    return CG(script_encoding_list)[0];
440}
441
442ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding)
443{
444    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
445    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding();
446
447    if (!script_encoding) {
448        return FAILURE;
449    }
450
451    /* judge input/output filter */
452    LANG_SCNG(script_encoding) = script_encoding;
453    LANG_SCNG(input_filter) = NULL;
454    LANG_SCNG(output_filter) = NULL;
455
456    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
457        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
458            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
459            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
460            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
461        } else {
462            LANG_SCNG(input_filter) = NULL;
463            LANG_SCNG(output_filter) = NULL;
464        }
465        return SUCCESS;
466    }
467
468    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
469        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
470        LANG_SCNG(output_filter) = NULL;
471    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
472        LANG_SCNG(input_filter) = NULL;
473        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
474    } else {
475        /* both script and internal encodings are incompatible w/ flex */
476        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
477        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
478    }
479
480    return 0;
481}
482
483ZEND_API int open_file_for_scanning(zend_file_handle *file_handle)
484{
485    char *buf;
486    size_t size, offset = 0;
487    zend_string *compiled_filename;
488
489    /* The shebang line was read, get the current position to obtain the buffer start */
490    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
491        if ((offset = ftell(file_handle->handle.fp)) == -1) {
492            offset = 0;
493        }
494    }
495
496    if (zend_stream_fixup(file_handle, &buf, &size) == FAILURE) {
497        return FAILURE;
498    }
499
500    zend_llist_add_element(&CG(open_files), file_handle);
501    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
502        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
503        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
504        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
505        file_handle->handle.stream.handle = fh->handle.stream.handle;
506    }
507
508    /* Reset the scanner for scanning the new file */
509    SCNG(yy_in) = file_handle;
510    SCNG(yy_start) = NULL;
511
512    if (size != -1) {
513        if (CG(multibyte)) {
514            SCNG(script_org) = (unsigned char*)buf;
515            SCNG(script_org_size) = size;
516            SCNG(script_filtered) = NULL;
517
518            zend_multibyte_set_filter(NULL);
519
520            if (SCNG(input_filter)) {
521                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
522                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
523                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
524                }
525                buf = (char*)SCNG(script_filtered);
526                size = SCNG(script_filtered_size);
527            }
528        }
529        SCNG(yy_start) = (unsigned char *)buf - offset;
530        yy_scan_buffer(buf, (unsigned int)size);
531    } else {
532        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
533    }
534
535    BEGIN(INITIAL);
536
537    if (file_handle->opened_path) {
538        compiled_filename = zend_string_copy(file_handle->opened_path);
539    } else {
540        compiled_filename = zend_string_init(file_handle->filename, strlen(file_handle->filename), 0);
541    }
542
543    zend_set_compiled_filename(compiled_filename);
544    zend_string_release(compiled_filename);
545
546    if (CG(start_lineno)) {
547        CG(zend_lineno) = CG(start_lineno);
548        CG(start_lineno) = 0;
549    } else {
550        CG(zend_lineno) = 1;
551    }
552
553    RESET_DOC_COMMENT();
554    CG(increment_lineno) = 0;
555    return SUCCESS;
556}
557END_EXTERN_C()
558
559
560ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type)
561{
562    zend_lex_state original_lex_state;
563    zend_op_array *op_array = NULL;
564    zend_save_lexical_state(&original_lex_state);
565
566    if (open_file_for_scanning(file_handle)==FAILURE) {
567        if (type==ZEND_REQUIRE) {
568            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename);
569            zend_bailout();
570        } else {
571            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename);
572        }
573    } else {
574        zend_bool original_in_compilation = CG(in_compilation);
575        CG(in_compilation) = 1;
576
577        CG(ast) = NULL;
578        CG(ast_arena) = zend_arena_create(1024 * 32);
579        if (!zendparse()) {
580            zval retval_zv;
581            zend_file_context original_file_context;
582            zend_oparray_context original_oparray_context;
583            zend_op_array *original_active_op_array = CG(active_op_array);
584            op_array = emalloc(sizeof(zend_op_array));
585            init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE);
586            CG(active_op_array) = op_array;
587            ZVAL_LONG(&retval_zv, 1);
588
589            if (zend_ast_process) {
590                zend_ast_process(CG(ast));
591            }
592
593            zend_file_context_begin(&original_file_context);
594            zend_oparray_context_begin(&original_oparray_context);
595            zend_compile_top_stmt(CG(ast));
596            zend_emit_final_return(&retval_zv);
597            op_array->line_start = 1;
598            op_array->line_end = CG(zend_lineno);
599            pass_two(op_array);
600            zend_oparray_context_end(&original_oparray_context);
601            zend_file_context_end(&original_file_context);
602
603            CG(active_op_array) = original_active_op_array;
604        }
605
606        zend_ast_destroy(CG(ast));
607        zend_arena_destroy(CG(ast_arena));
608        CG(in_compilation) = original_in_compilation;
609    }
610
611    zend_restore_lexical_state(&original_lex_state);
612    return op_array;
613}
614
615
616zend_op_array *compile_filename(int type, zval *filename)
617{
618    zend_file_handle file_handle;
619    zval tmp;
620    zend_op_array *retval;
621    zend_string *opened_path = NULL;
622
623    if (Z_TYPE_P(filename) != IS_STRING) {
624        tmp = *filename;
625        zval_copy_ctor(&tmp);
626        convert_to_string(&tmp);
627        filename = &tmp;
628    }
629    file_handle.filename = Z_STRVAL_P(filename);
630    file_handle.free_filename = 0;
631    file_handle.type = ZEND_HANDLE_FILENAME;
632    file_handle.opened_path = NULL;
633    file_handle.handle.fp = NULL;
634
635    retval = zend_compile_file(&file_handle, type);
636    if (retval && file_handle.handle.stream.handle) {
637        if (!file_handle.opened_path) {
638            file_handle.opened_path = opened_path = zend_string_copy(Z_STR_P(filename));
639        }
640
641        zend_hash_add_empty_element(&EG(included_files), file_handle.opened_path);
642
643        if (opened_path) {
644            zend_string_release(opened_path);
645        }
646    }
647    zend_destroy_file_handle(&file_handle);
648
649    if (filename==&tmp) {
650        zval_dtor(&tmp);
651    }
652    return retval;
653}
654
655ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename)
656{
657    char *buf;
658    size_t size, old_len;
659    zend_string *new_compiled_filename;
660
661    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
662    old_len = Z_STRLEN_P(str);
663    Z_STR_P(str) = zend_string_extend(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
664    Z_TYPE_INFO_P(str) = IS_STRING_EX;
665    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
666
667    SCNG(yy_in) = NULL;
668    SCNG(yy_start) = NULL;
669
670    buf = Z_STRVAL_P(str);
671    size = old_len;
672
673    if (CG(multibyte)) {
674        SCNG(script_org) = (unsigned char*)buf;
675        SCNG(script_org_size) = size;
676        SCNG(script_filtered) = NULL;
677
678        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding());
679
680        if (SCNG(input_filter)) {
681            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size))) {
682                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
683                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
684            }
685            buf = (char*)SCNG(script_filtered);
686            size = SCNG(script_filtered_size);
687        }
688    }
689
690    yy_scan_buffer(buf, (unsigned int)size);
691
692    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
693    zend_set_compiled_filename(new_compiled_filename);
694    zend_string_release(new_compiled_filename);
695    CG(zend_lineno) = 1;
696    CG(increment_lineno) = 0;
697    RESET_DOC_COMMENT();
698    return SUCCESS;
699}
700
701
702ZEND_API size_t zend_get_scanned_file_offset(void)
703{
704    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
705    if (SCNG(input_filter)) {
706        size_t original_offset = offset, length = 0;
707        do {
708            unsigned char *p = NULL;
709            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset)) {
710                return (size_t)-1;
711            }
712            efree(p);
713            if (length > original_offset) {
714                offset--;
715            } else if (length < original_offset) {
716                offset++;
717            }
718        } while (original_offset != length);
719    }
720    return offset;
721}
722
723
724zend_op_array *compile_string(zval *source_string, char *filename)
725{
726    zend_lex_state original_lex_state;
727    zend_op_array *op_array = NULL;
728    zval tmp;
729    zend_bool original_in_compilation = CG(in_compilation);
730
731    if (Z_STRLEN_P(source_string)==0) {
732        return NULL;
733    }
734
735    ZVAL_DUP(&tmp, source_string);
736    convert_to_string(&tmp);
737    source_string = &tmp;
738
739    CG(in_compilation) = 1;
740    zend_save_lexical_state(&original_lex_state);
741    if (zend_prepare_string_for_scanning(source_string, filename) == SUCCESS) {
742        CG(ast) = NULL;
743        CG(ast_arena) = zend_arena_create(1024 * 32);
744        BEGIN(ST_IN_SCRIPTING);
745
746        if (!zendparse()) {
747            zend_file_context original_file_context;
748            zend_oparray_context original_oparray_context;
749            zend_op_array *original_active_op_array = CG(active_op_array);
750            op_array = emalloc(sizeof(zend_op_array));
751            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE);
752            CG(active_op_array) = op_array;
753
754            if (zend_ast_process) {
755                zend_ast_process(CG(ast));
756            }
757
758            zend_file_context_begin(&original_file_context);
759            zend_oparray_context_begin(&original_oparray_context);
760            zend_compile_top_stmt(CG(ast));
761            zend_emit_final_return(NULL);
762            op_array->line_start = 1;
763            op_array->line_end = CG(zend_lineno);
764            pass_two(op_array);
765            zend_oparray_context_end(&original_oparray_context);
766            zend_file_context_end(&original_file_context);
767
768            CG(active_op_array) = original_active_op_array;
769        }
770
771        zend_ast_destroy(CG(ast));
772        zend_arena_destroy(CG(ast_arena));
773    }
774
775    zend_restore_lexical_state(&original_lex_state);
776    zval_dtor(&tmp);
777    CG(in_compilation) = original_in_compilation;
778    return op_array;
779}
780
781
782BEGIN_EXTERN_C()
783int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini)
784{
785    zend_lex_state original_lex_state;
786    zend_file_handle file_handle;
787
788    file_handle.type = ZEND_HANDLE_FILENAME;
789    file_handle.filename = filename;
790    file_handle.free_filename = 0;
791    file_handle.opened_path = NULL;
792    zend_save_lexical_state(&original_lex_state);
793    if (open_file_for_scanning(&file_handle)==FAILURE) {
794        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename);
795        zend_restore_lexical_state(&original_lex_state);
796        return FAILURE;
797    }
798    zend_highlight(syntax_highlighter_ini);
799    if (SCNG(script_filtered)) {
800        efree(SCNG(script_filtered));
801        SCNG(script_filtered) = NULL;
802    }
803    zend_destroy_file_handle(&file_handle);
804    zend_restore_lexical_state(&original_lex_state);
805    return SUCCESS;
806}
807
808int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name)
809{
810    zend_lex_state original_lex_state;
811    zval tmp = *str;
812
813    str = &tmp;
814    zval_copy_ctor(str);
815    zend_save_lexical_state(&original_lex_state);
816    if (zend_prepare_string_for_scanning(str, str_name)==FAILURE) {
817        zend_restore_lexical_state(&original_lex_state);
818        return FAILURE;
819    }
820    BEGIN(INITIAL);
821    zend_highlight(syntax_highlighter_ini);
822    if (SCNG(script_filtered)) {
823        efree(SCNG(script_filtered));
824        SCNG(script_filtered) = NULL;
825    }
826    zend_restore_lexical_state(&original_lex_state);
827    zval_dtor(str);
828    return SUCCESS;
829}
830
831ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding)
832{
833    size_t length;
834    unsigned char *new_yy_start;
835
836    /* convert and set */
837    if (!SCNG(input_filter)) {
838        if (SCNG(script_filtered)) {
839            efree(SCNG(script_filtered));
840            SCNG(script_filtered) = NULL;
841        }
842        SCNG(script_filtered_size) = 0;
843        length = SCNG(script_org_size);
844        new_yy_start = SCNG(script_org);
845    } else {
846        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size))) {
847            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
848                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
849        }
850        if (SCNG(script_filtered)) {
851            efree(SCNG(script_filtered));
852        }
853        SCNG(script_filtered) = new_yy_start;
854        SCNG(script_filtered_size) = length;
855    }
856
857    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
858    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
859    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
860    SCNG(yy_limit) = new_yy_start + length;
861
862    SCNG(yy_start) = new_yy_start;
863}
864
865
866// TODO: avoid reallocation ???
867# define zend_copy_value(zendlval, yytext, yyleng) \
868    if (SCNG(output_filter)) { \
869        size_t sz = 0; \
870        char *s = NULL; \
871        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \
872        ZVAL_STRINGL(zendlval, s, sz); \
873        efree(s); \
874    } else { \
875        ZVAL_STRINGL(zendlval, yytext, yyleng); \
876    }
877
878static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type)
879{
880    register char *s, *t;
881    char *end;
882
883    ZVAL_STRINGL(zendlval, str, len);
884
885    /* convert escape sequences */
886    s = t = Z_STRVAL_P(zendlval);
887    end = s+Z_STRLEN_P(zendlval);
888    while (s<end) {
889        if (*s=='\\') {
890            s++;
891            if (s >= end) {
892                *t++ = '\\';
893                break;
894            }
895
896            switch(*s) {
897                case 'n':
898                    *t++ = '\n';
899                    Z_STRLEN_P(zendlval)--;
900                    break;
901                case 'r':
902                    *t++ = '\r';
903                    Z_STRLEN_P(zendlval)--;
904                    break;
905                case 't':
906                    *t++ = '\t';
907                    Z_STRLEN_P(zendlval)--;
908                    break;
909                case 'f':
910                    *t++ = '\f';
911                    Z_STRLEN_P(zendlval)--;
912                    break;
913                case 'v':
914                    *t++ = '\v';
915                    Z_STRLEN_P(zendlval)--;
916                    break;
917                case 'e':
918#ifdef PHP_WIN32
919                    *t++ = VK_ESCAPE;
920#else
921                    *t++ = '\e';
922#endif
923                    Z_STRLEN_P(zendlval)--;
924                    break;
925                case '"':
926                case '`':
927                    if (*s != quote_type) {
928                        *t++ = '\\';
929                        *t++ = *s;
930                        break;
931                    }
932                case '\\':
933                case '$':
934                    *t++ = *s;
935                    Z_STRLEN_P(zendlval)--;
936                    break;
937                case 'x':
938                case 'X':
939                    if (ZEND_IS_HEX(*(s+1))) {
940                        char hex_buf[3] = { 0, 0, 0 };
941
942                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
943
944                        hex_buf[0] = *(++s);
945                        Z_STRLEN_P(zendlval)--;
946                        if (ZEND_IS_HEX(*(s+1))) {
947                            hex_buf[1] = *(++s);
948                            Z_STRLEN_P(zendlval)--;
949                        }
950                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
951                    } else {
952                        *t++ = '\\';
953                        *t++ = *s;
954                    }
955                    break;
956                /* UTF-8 codepoint escape, format: /\\u\{\x+\}/ */
957                case 'u':
958                    {
959                        /* cache where we started so we can parse after validating */
960                        char *start = s + 1;
961                        size_t len = 0;
962                        zend_bool valid = 1;
963                        unsigned long codepoint;
964                        size_t byte_len = 0;
965
966                        if (*start != '{') {
967                            /* we silently let this pass to avoid breaking code
968                             * with JSON in string literals (e.g. "\"\u202e\""
969                             */
970                            *t++ = '\\';
971                            *t++ = 'u';
972                            break;
973                        } else {
974                            /* on the other hand, invalid \u{blah} errors */
975                            s++;
976                            len++;
977                            s++;
978                            while (*s != '}') {
979                                if (!ZEND_IS_HEX(*s)) {
980                                    valid = 0;
981                                    break;
982                                } else {
983                                    len++;
984                                }
985                                s++;
986                            }
987                            if (*s == '}') {
988                                valid = 1;
989                                len++;
990                            }
991                        }
992
993                        /* \u{} is invalid */
994                        if (len <= 2) {
995                            valid = 0;
996                        }
997
998                        if (!valid) {
999                            zend_throw_exception(zend_get_parse_exception(),
1000                                "Invalid UTF-8 codepoint escape sequence", E_PARSE);
1001                            zval_ptr_dtor(zendlval);
1002                            return FAILURE;
1003                        }
1004
1005                        errno = 0;
1006                        codepoint = strtoul(start + 1, NULL, 16);
1007
1008                        /* per RFC 3629, UTF-8 can only represent 21 bits */
1009                        if (codepoint > 0x10FFFF || errno) {
1010                            zend_throw_exception(zend_get_parse_exception(),
1011                                "Invalid UTF-8 codepoint escape sequence: Codepoint too large", E_PARSE);
1012                            zval_ptr_dtor(zendlval);
1013                            return FAILURE;
1014                        }
1015
1016                        /* based on https://en.wikipedia.org/wiki/UTF-8#Sample_code */
1017                        if (codepoint < 0x80) {
1018                            byte_len = 1;
1019                            *t++ = codepoint;
1020                        } else if (codepoint <= 0x7FF) {
1021                            byte_len = 2;
1022                            *t++ = (codepoint >> 6) + 0xC0;
1023                            *t++ = (codepoint & 0x3F) + 0x80;
1024                        } else if (codepoint <= 0xFFFF) {
1025                            byte_len = 3;
1026                            *t++ = (codepoint >> 12) + 0xE0;
1027                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1028                            *t++ = (codepoint & 0x3F) + 0x80;
1029                        } else if (codepoint <= 0x10FFFF) {
1030                            byte_len = 4;
1031                            *t++ = (codepoint >> 18) + 0xF0;
1032                            *t++ = ((codepoint >> 12) & 0x3F) + 0x80;
1033                            *t++ = ((codepoint >> 6) & 0x3F) + 0x80;
1034                            *t++ = (codepoint & 0x3F) + 0x80;
1035                        }
1036
1037                        Z_STRLEN_P(zendlval) -= 2; /* \u */
1038                        Z_STRLEN_P(zendlval) -= (len - byte_len);
1039                    }
1040                    break;
1041                default:
1042                    /* check for an octal */
1043                    if (ZEND_IS_OCT(*s)) {
1044                        char octal_buf[4] = { 0, 0, 0, 0 };
1045
1046                        octal_buf[0] = *s;
1047                        Z_STRLEN_P(zendlval)--;
1048                        if (ZEND_IS_OCT(*(s+1))) {
1049                            octal_buf[1] = *(++s);
1050                            Z_STRLEN_P(zendlval)--;
1051                            if (ZEND_IS_OCT(*(s+1))) {
1052                                octal_buf[2] = *(++s);
1053                                Z_STRLEN_P(zendlval)--;
1054                            }
1055                        }
1056                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
1057                    } else {
1058                        *t++ = '\\';
1059                        *t++ = *s;
1060                    }
1061                    break;
1062            }
1063        } else {
1064            *t++ = *s;
1065        }
1066
1067        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1068            CG(zend_lineno)++;
1069        }
1070        s++;
1071    }
1072    *t = 0;
1073    if (SCNG(output_filter)) {
1074        size_t sz = 0;
1075        unsigned char *str;
1076        // TODO: avoid realocation ???
1077        s = Z_STRVAL_P(zendlval);
1078        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
1079        zval_ptr_dtor(zendlval);
1080        ZVAL_STRINGL(zendlval, (char *) str, sz);
1081        efree(str);
1082    }
1083    return SUCCESS;
1084}
1085
1086
1087int lex_scan(zval *zendlval)
1088{
1089restart:
1090    SCNG(yy_text) = YYCURSOR;
1091
1092/*!re2c
1093re2c:yyfill:check = 0;
1094LNUM    [0-9]+
1095DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1096EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1097HNUM    "0x"[0-9a-fA-F]+
1098BNUM    "0b"[01]+
1099LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1100WHITESPACE [ \n\r\t]+
1101TABS_AND_SPACES [ \t]*
1102TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1103ANY_CHAR [^]
1104NEWLINE ("\r"|"\n"|"\r\n")
1105
1106/* compute yyleng before each rule */
1107<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1108
1109<ST_IN_SCRIPTING>"exit" {
1110    return T_EXIT;
1111}
1112
1113<ST_IN_SCRIPTING>"die" {
1114    return T_EXIT;
1115}
1116
1117<ST_IN_SCRIPTING>"function" {
1118    return T_FUNCTION;
1119}
1120
1121<ST_IN_SCRIPTING>"const" {
1122    return T_CONST;
1123}
1124
1125<ST_IN_SCRIPTING>"return" {
1126    return T_RETURN;
1127}
1128
1129<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
1130    return T_YIELD_FROM;
1131}
1132
1133<ST_IN_SCRIPTING>"yield" {
1134    return T_YIELD;
1135}
1136
1137<ST_IN_SCRIPTING>"try" {
1138    return T_TRY;
1139}
1140
1141<ST_IN_SCRIPTING>"catch" {
1142    return T_CATCH;
1143}
1144
1145<ST_IN_SCRIPTING>"finally" {
1146    return T_FINALLY;
1147}
1148
1149<ST_IN_SCRIPTING>"throw" {
1150    return T_THROW;
1151}
1152
1153<ST_IN_SCRIPTING>"if" {
1154    return T_IF;
1155}
1156
1157<ST_IN_SCRIPTING>"elseif" {
1158    return T_ELSEIF;
1159}
1160
1161<ST_IN_SCRIPTING>"endif" {
1162    return T_ENDIF;
1163}
1164
1165<ST_IN_SCRIPTING>"else" {
1166    return T_ELSE;
1167}
1168
1169<ST_IN_SCRIPTING>"while" {
1170    return T_WHILE;
1171}
1172
1173<ST_IN_SCRIPTING>"endwhile" {
1174    return T_ENDWHILE;
1175}
1176
1177<ST_IN_SCRIPTING>"do" {
1178    return T_DO;
1179}
1180
1181<ST_IN_SCRIPTING>"for" {
1182    return T_FOR;
1183}
1184
1185<ST_IN_SCRIPTING>"endfor" {
1186    return T_ENDFOR;
1187}
1188
1189<ST_IN_SCRIPTING>"foreach" {
1190    return T_FOREACH;
1191}
1192
1193<ST_IN_SCRIPTING>"endforeach" {
1194    return T_ENDFOREACH;
1195}
1196
1197<ST_IN_SCRIPTING>"declare" {
1198    return T_DECLARE;
1199}
1200
1201<ST_IN_SCRIPTING>"enddeclare" {
1202    return T_ENDDECLARE;
1203}
1204
1205<ST_IN_SCRIPTING>"instanceof" {
1206    return T_INSTANCEOF;
1207}
1208
1209<ST_IN_SCRIPTING>"as" {
1210    return T_AS;
1211}
1212
1213<ST_IN_SCRIPTING>"switch" {
1214    return T_SWITCH;
1215}
1216
1217<ST_IN_SCRIPTING>"endswitch" {
1218    return T_ENDSWITCH;
1219}
1220
1221<ST_IN_SCRIPTING>"case" {
1222    return T_CASE;
1223}
1224
1225<ST_IN_SCRIPTING>"default" {
1226    return T_DEFAULT;
1227}
1228
1229<ST_IN_SCRIPTING>"break" {
1230    return T_BREAK;
1231}
1232
1233<ST_IN_SCRIPTING>"continue" {
1234    return T_CONTINUE;
1235}
1236
1237<ST_IN_SCRIPTING>"goto" {
1238    return T_GOTO;
1239}
1240
1241<ST_IN_SCRIPTING>"echo" {
1242    return T_ECHO;
1243}
1244
1245<ST_IN_SCRIPTING>"print" {
1246    return T_PRINT;
1247}
1248
1249<ST_IN_SCRIPTING>"class" {
1250    return T_CLASS;
1251}
1252
1253<ST_IN_SCRIPTING>"interface" {
1254    return T_INTERFACE;
1255}
1256
1257<ST_IN_SCRIPTING>"trait" {
1258    return T_TRAIT;
1259}
1260
1261<ST_IN_SCRIPTING>"extends" {
1262    return T_EXTENDS;
1263}
1264
1265<ST_IN_SCRIPTING>"implements" {
1266    return T_IMPLEMENTS;
1267}
1268
1269<ST_IN_SCRIPTING>"->" {
1270    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1271    return T_OBJECT_OPERATOR;
1272}
1273
1274<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1275    HANDLE_NEWLINES(yytext, yyleng);
1276    return T_WHITESPACE;
1277}
1278
1279<ST_LOOKING_FOR_PROPERTY>"->" {
1280    return T_OBJECT_OPERATOR;
1281}
1282
1283<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1284    yy_pop_state();
1285    zend_copy_value(zendlval, yytext, yyleng);
1286    return T_STRING;
1287}
1288
1289<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1290    yyless(0);
1291    yy_pop_state();
1292    goto restart;
1293}
1294
1295<ST_IN_SCRIPTING>"::" {
1296    return T_PAAMAYIM_NEKUDOTAYIM;
1297}
1298
1299<ST_IN_SCRIPTING>"\\" {
1300    return T_NS_SEPARATOR;
1301}
1302
1303<ST_IN_SCRIPTING>"..." {
1304    return T_ELLIPSIS;
1305}
1306
1307<ST_IN_SCRIPTING>"??" {
1308    return T_COALESCE;
1309}
1310
1311<ST_IN_SCRIPTING>"new" {
1312    return T_NEW;
1313}
1314
1315<ST_IN_SCRIPTING>"clone" {
1316    return T_CLONE;
1317}
1318
1319<ST_IN_SCRIPTING>"var" {
1320    return T_VAR;
1321}
1322
1323<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1324    return T_INT_CAST;
1325}
1326
1327<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1328    return T_DOUBLE_CAST;
1329}
1330
1331<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1332    return T_STRING_CAST;
1333}
1334
1335<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1336    return T_ARRAY_CAST;
1337}
1338
1339<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1340    return T_OBJECT_CAST;
1341}
1342
1343<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1344    return T_BOOL_CAST;
1345}
1346
1347<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1348    return T_UNSET_CAST;
1349}
1350
1351<ST_IN_SCRIPTING>"eval" {
1352    return T_EVAL;
1353}
1354
1355<ST_IN_SCRIPTING>"include" {
1356    return T_INCLUDE;
1357}
1358
1359<ST_IN_SCRIPTING>"include_once" {
1360    return T_INCLUDE_ONCE;
1361}
1362
1363<ST_IN_SCRIPTING>"require" {
1364    return T_REQUIRE;
1365}
1366
1367<ST_IN_SCRIPTING>"require_once" {
1368    return T_REQUIRE_ONCE;
1369}
1370
1371<ST_IN_SCRIPTING>"namespace" {
1372    return T_NAMESPACE;
1373}
1374
1375<ST_IN_SCRIPTING>"use" {
1376    return T_USE;
1377}
1378
1379<ST_IN_SCRIPTING>"insteadof" {
1380        return T_INSTEADOF;
1381}
1382
1383<ST_IN_SCRIPTING>"global" {
1384    return T_GLOBAL;
1385}
1386
1387<ST_IN_SCRIPTING>"isset" {
1388    return T_ISSET;
1389}
1390
1391<ST_IN_SCRIPTING>"empty" {
1392    return T_EMPTY;
1393}
1394
1395<ST_IN_SCRIPTING>"__halt_compiler" {
1396    return T_HALT_COMPILER;
1397}
1398
1399<ST_IN_SCRIPTING>"static" {
1400    return T_STATIC;
1401}
1402
1403<ST_IN_SCRIPTING>"abstract" {
1404    return T_ABSTRACT;
1405}
1406
1407<ST_IN_SCRIPTING>"final" {
1408    return T_FINAL;
1409}
1410
1411<ST_IN_SCRIPTING>"private" {
1412    return T_PRIVATE;
1413}
1414
1415<ST_IN_SCRIPTING>"protected" {
1416    return T_PROTECTED;
1417}
1418
1419<ST_IN_SCRIPTING>"public" {
1420    return T_PUBLIC;
1421}
1422
1423<ST_IN_SCRIPTING>"unset" {
1424    return T_UNSET;
1425}
1426
1427<ST_IN_SCRIPTING>"=>" {
1428    return T_DOUBLE_ARROW;
1429}
1430
1431<ST_IN_SCRIPTING>"list" {
1432    return T_LIST;
1433}
1434
1435<ST_IN_SCRIPTING>"array" {
1436    return T_ARRAY;
1437}
1438
1439<ST_IN_SCRIPTING>"callable" {
1440 return T_CALLABLE;
1441}
1442
1443<ST_IN_SCRIPTING>"++" {
1444    return T_INC;
1445}
1446
1447<ST_IN_SCRIPTING>"--" {
1448    return T_DEC;
1449}
1450
1451<ST_IN_SCRIPTING>"===" {
1452    return T_IS_IDENTICAL;
1453}
1454
1455<ST_IN_SCRIPTING>"!==" {
1456    return T_IS_NOT_IDENTICAL;
1457}
1458
1459<ST_IN_SCRIPTING>"==" {
1460    return T_IS_EQUAL;
1461}
1462
1463<ST_IN_SCRIPTING>"!="|"<>" {
1464    return T_IS_NOT_EQUAL;
1465}
1466
1467<ST_IN_SCRIPTING>"<=>" {
1468    return T_SPACESHIP;
1469}
1470
1471<ST_IN_SCRIPTING>"<=" {
1472    return T_IS_SMALLER_OR_EQUAL;
1473}
1474
1475<ST_IN_SCRIPTING>">=" {
1476    return T_IS_GREATER_OR_EQUAL;
1477}
1478
1479<ST_IN_SCRIPTING>"+=" {
1480    return T_PLUS_EQUAL;
1481}
1482
1483<ST_IN_SCRIPTING>"-=" {
1484    return T_MINUS_EQUAL;
1485}
1486
1487<ST_IN_SCRIPTING>"*=" {
1488    return T_MUL_EQUAL;
1489}
1490
1491<ST_IN_SCRIPTING>"*\*" {
1492    return T_POW;
1493}
1494
1495<ST_IN_SCRIPTING>"*\*=" {
1496    return T_POW_EQUAL;
1497}
1498
1499<ST_IN_SCRIPTING>"/=" {
1500    return T_DIV_EQUAL;
1501}
1502
1503<ST_IN_SCRIPTING>".=" {
1504    return T_CONCAT_EQUAL;
1505}
1506
1507<ST_IN_SCRIPTING>"%=" {
1508    return T_MOD_EQUAL;
1509}
1510
1511<ST_IN_SCRIPTING>"<<=" {
1512    return T_SL_EQUAL;
1513}
1514
1515<ST_IN_SCRIPTING>">>=" {
1516    return T_SR_EQUAL;
1517}
1518
1519<ST_IN_SCRIPTING>"&=" {
1520    return T_AND_EQUAL;
1521}
1522
1523<ST_IN_SCRIPTING>"|=" {
1524    return T_OR_EQUAL;
1525}
1526
1527<ST_IN_SCRIPTING>"^=" {
1528    return T_XOR_EQUAL;
1529}
1530
1531<ST_IN_SCRIPTING>"||" {
1532    return T_BOOLEAN_OR;
1533}
1534
1535<ST_IN_SCRIPTING>"&&" {
1536    return T_BOOLEAN_AND;
1537}
1538
1539<ST_IN_SCRIPTING>"OR" {
1540    return T_LOGICAL_OR;
1541}
1542
1543<ST_IN_SCRIPTING>"AND" {
1544    return T_LOGICAL_AND;
1545}
1546
1547<ST_IN_SCRIPTING>"XOR" {
1548    return T_LOGICAL_XOR;
1549}
1550
1551<ST_IN_SCRIPTING>"<<" {
1552    return T_SL;
1553}
1554
1555<ST_IN_SCRIPTING>">>" {
1556    return T_SR;
1557}
1558
1559<ST_IN_SCRIPTING>{TOKENS} {
1560    return yytext[0];
1561}
1562
1563
1564<ST_IN_SCRIPTING>"{" {
1565    yy_push_state(ST_IN_SCRIPTING);
1566    return '{';
1567}
1568
1569
1570<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1571    yy_push_state(ST_LOOKING_FOR_VARNAME);
1572    return T_DOLLAR_OPEN_CURLY_BRACES;
1573}
1574
1575
1576<ST_IN_SCRIPTING>"}" {
1577    RESET_DOC_COMMENT();
1578    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1579        yy_pop_state();
1580    }
1581    return '}';
1582}
1583
1584
1585<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1586    yyless(yyleng - 1);
1587    zend_copy_value(zendlval, yytext, yyleng);
1588    yy_pop_state();
1589    yy_push_state(ST_IN_SCRIPTING);
1590    return T_STRING_VARNAME;
1591}
1592
1593
1594<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1595    yyless(0);
1596    yy_pop_state();
1597    yy_push_state(ST_IN_SCRIPTING);
1598    goto restart;
1599}
1600
1601<ST_IN_SCRIPTING>{BNUM} {
1602    char *bin = yytext + 2; /* Skip "0b" */
1603    int len = yyleng - 2;
1604    char *end;
1605
1606    /* Skip any leading 0s */
1607    while (*bin == '0') {
1608        ++bin;
1609        --len;
1610    }
1611
1612    if (len < SIZEOF_ZEND_LONG * 8) {
1613        if (len == 0) {
1614            ZVAL_LONG(zendlval, 0);
1615        } else {
1616            errno = 0;
1617            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1618            ZEND_ASSERT(!errno && end == yytext + yyleng);
1619        }
1620        return T_LNUMBER;
1621    } else {
1622        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
1623        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1624        ZEND_ASSERT(end == yytext + yyleng);
1625        return T_DNUMBER;
1626    }
1627}
1628
1629<ST_IN_SCRIPTING>{LNUM} {
1630    char *end;
1631    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1632        errno = 0;
1633        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1634        /* This isn't an assert, we need to ensure 019 isn't valid octal
1635         * Because the lexing itself doesn't do that for us
1636         */
1637        if (end != yytext + yyleng) {
1638            zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
1639            return T_ERROR;
1640        }
1641    } else {
1642        errno = 0;
1643        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1644        if (errno == ERANGE) { /* Overflow */
1645            errno = 0;
1646            if (yytext[0] == '0') { /* octal overflow */
1647                errno = 0;
1648                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1649            } else {
1650                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1651            }
1652            /* Also not an assert for the same reason */
1653            if (end != yytext + yyleng) {
1654                zend_throw_exception(zend_get_parse_exception(),
1655                    "Invalid numeric literal", E_PARSE);
1656                return T_ERROR;
1657            }
1658            ZEND_ASSERT(!errno);
1659            return T_DNUMBER;
1660        }
1661        /* Also not an assert for the same reason */
1662        if (end != yytext + yyleng) {
1663            zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
1664            return T_ERROR;
1665        }
1666    }
1667    ZEND_ASSERT(!errno);
1668    return T_LNUMBER;
1669}
1670
1671<ST_IN_SCRIPTING>{HNUM} {
1672    char *hex = yytext + 2; /* Skip "0x" */
1673    int len = yyleng - 2;
1674    char *end;
1675
1676    /* Skip any leading 0s */
1677    while (*hex == '0') {
1678        hex++;
1679        len--;
1680    }
1681
1682    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1683        if (len == 0) {
1684            ZVAL_LONG(zendlval, 0);
1685        } else {
1686            errno = 0;
1687            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
1688            ZEND_ASSERT(!errno && end == hex + len);
1689        }
1690        return T_LNUMBER;
1691    } else {
1692        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
1693        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1694        ZEND_ASSERT(end == hex + len);
1695        return T_DNUMBER;
1696    }
1697}
1698
1699<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1700    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1701        char *end;
1702        errno = 0;
1703        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 10));
1704        if (errno == ERANGE) {
1705            goto string;
1706        }
1707        ZEND_ASSERT(end == yytext + yyleng);
1708    } else {
1709string:
1710        ZVAL_STRINGL(zendlval, yytext, yyleng);
1711    }
1712    return T_NUM_STRING;
1713}
1714
1715<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1716    ZVAL_STRINGL(zendlval, yytext, yyleng);
1717    return T_NUM_STRING;
1718}
1719
1720<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1721    const char *end;
1722
1723    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1724    /* errno isn't checked since we allow HUGE_VAL/INF overflow */
1725    ZEND_ASSERT(end == yytext + yyleng);
1726    return T_DNUMBER;
1727}
1728
1729<ST_IN_SCRIPTING>"__CLASS__" {
1730    return T_CLASS_C;
1731}
1732
1733<ST_IN_SCRIPTING>"__TRAIT__" {
1734    return T_TRAIT_C;
1735}
1736
1737<ST_IN_SCRIPTING>"__FUNCTION__" {
1738    return T_FUNC_C;
1739}
1740
1741<ST_IN_SCRIPTING>"__METHOD__" {
1742    return T_METHOD_C;
1743}
1744
1745<ST_IN_SCRIPTING>"__LINE__" {
1746    return T_LINE;
1747}
1748
1749<ST_IN_SCRIPTING>"__FILE__" {
1750    return T_FILE;
1751}
1752
1753<ST_IN_SCRIPTING>"__DIR__" {
1754    return T_DIR;
1755}
1756
1757<ST_IN_SCRIPTING>"__NAMESPACE__" {
1758    return T_NS_C;
1759}
1760
1761
1762<INITIAL>"<?=" {
1763    BEGIN(ST_IN_SCRIPTING);
1764    return T_OPEN_TAG_WITH_ECHO;
1765}
1766
1767
1768<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1769    HANDLE_NEWLINE(yytext[yyleng-1]);
1770    BEGIN(ST_IN_SCRIPTING);
1771    return T_OPEN_TAG;
1772}
1773
1774
1775<INITIAL>"<?" {
1776    if (CG(short_tags)) {
1777        BEGIN(ST_IN_SCRIPTING);
1778        return T_OPEN_TAG;
1779    } else {
1780        goto inline_char_handler;
1781    }
1782}
1783
1784<INITIAL>{ANY_CHAR} {
1785    if (YYCURSOR > YYLIMIT) {
1786        return 0;
1787    }
1788
1789inline_char_handler:
1790
1791    while (1) {
1792        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1793
1794        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1795
1796        if (YYCURSOR >= YYLIMIT) {
1797            break;
1798        }
1799
1800        if (*YYCURSOR == '?') {
1801            if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1802
1803                YYCURSOR--;
1804                break;
1805            }
1806        }
1807    }
1808
1809    yyleng = YYCURSOR - SCNG(yy_text);
1810
1811    if (SCNG(output_filter)) {
1812        size_t readsize;
1813        char *s = NULL;
1814        size_t sz = 0;
1815        // TODO: avoid reallocation ???
1816        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng);
1817        ZVAL_STRINGL(zendlval, s, sz);
1818        efree(s);
1819        if (readsize < yyleng) {
1820            yyless(readsize);
1821        }
1822    } else {
1823      ZVAL_STRINGL(zendlval, yytext, yyleng);
1824    }
1825    HANDLE_NEWLINES(yytext, yyleng);
1826    return T_INLINE_HTML;
1827}
1828
1829
1830/* Make sure a label character follows "->", otherwise there is no property
1831 * and "->" will be taken literally
1832 */
1833<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1834    yyless(yyleng - 3);
1835    yy_push_state(ST_LOOKING_FOR_PROPERTY);
1836    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1837    return T_VARIABLE;
1838}
1839
1840/* A [ always designates a variable offset, regardless of what follows
1841 */
1842<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1843    yyless(yyleng - 1);
1844    yy_push_state(ST_VAR_OFFSET);
1845    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1846    return T_VARIABLE;
1847}
1848
1849<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1850    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1851    return T_VARIABLE;
1852}
1853
1854<ST_VAR_OFFSET>"]" {
1855    yy_pop_state();
1856    return ']';
1857}
1858
1859<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1860    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1861    return yytext[0];
1862}
1863
1864<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1865    /* Invalid rule to return a more explicit parse error with proper line number */
1866    yyless(0);
1867    yy_pop_state();
1868    ZVAL_NULL(zendlval);
1869    return T_ENCAPSED_AND_WHITESPACE;
1870}
1871
1872<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1873    zend_copy_value(zendlval, yytext, yyleng);
1874    return T_STRING;
1875}
1876
1877
1878<ST_IN_SCRIPTING>"#"|"//" {
1879    while (YYCURSOR < YYLIMIT) {
1880        switch (*YYCURSOR++) {
1881            case '\r':
1882                if (*YYCURSOR == '\n') {
1883                    YYCURSOR++;
1884                }
1885                /* fall through */
1886            case '\n':
1887                CG(zend_lineno)++;
1888                break;
1889            case '?':
1890                if (*YYCURSOR == '>') {
1891                    YYCURSOR--;
1892                    break;
1893                }
1894                /* fall through */
1895            default:
1896                continue;
1897        }
1898
1899        break;
1900    }
1901
1902    yyleng = YYCURSOR - SCNG(yy_text);
1903
1904    return T_COMMENT;
1905}
1906
1907<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1908    int doc_com;
1909
1910    if (yyleng > 2) {
1911        doc_com = 1;
1912        RESET_DOC_COMMENT();
1913    } else {
1914        doc_com = 0;
1915    }
1916
1917    while (YYCURSOR < YYLIMIT) {
1918        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1919            break;
1920        }
1921    }
1922
1923    if (YYCURSOR < YYLIMIT) {
1924        YYCURSOR++;
1925    } else {
1926        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1927    }
1928
1929    yyleng = YYCURSOR - SCNG(yy_text);
1930    HANDLE_NEWLINES(yytext, yyleng);
1931
1932    if (doc_com) {
1933        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1934        return T_DOC_COMMENT;
1935    }
1936
1937    return T_COMMENT;
1938}
1939
1940<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1941    BEGIN(INITIAL);
1942    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1943}
1944
1945
1946<ST_IN_SCRIPTING>b?['] {
1947    register char *s, *t;
1948    char *end;
1949    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1950
1951    while (1) {
1952        if (YYCURSOR < YYLIMIT) {
1953            if (*YYCURSOR == '\'') {
1954                YYCURSOR++;
1955                yyleng = YYCURSOR - SCNG(yy_text);
1956
1957                break;
1958            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1959                YYCURSOR++;
1960            }
1961        } else {
1962            yyleng = YYLIMIT - SCNG(yy_text);
1963
1964            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1965             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1966             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1967            ZVAL_NULL(zendlval);
1968            return T_ENCAPSED_AND_WHITESPACE;
1969        }
1970    }
1971
1972    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1973
1974    /* convert escape sequences */
1975    s = t = Z_STRVAL_P(zendlval);
1976    end = s+Z_STRLEN_P(zendlval);
1977    while (s<end) {
1978        if (*s=='\\') {
1979            s++;
1980
1981            switch(*s) {
1982                case '\\':
1983                case '\'':
1984                    *t++ = *s;
1985                    Z_STRLEN_P(zendlval)--;
1986                    break;
1987                default:
1988                    *t++ = '\\';
1989                    *t++ = *s;
1990                    break;
1991            }
1992        } else {
1993            *t++ = *s;
1994        }
1995
1996        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1997            CG(zend_lineno)++;
1998        }
1999        s++;
2000    }
2001    *t = 0;
2002
2003    if (SCNG(output_filter)) {
2004        size_t sz = 0;
2005        char *str = NULL;
2006        s = Z_STRVAL_P(zendlval);
2007        // TODO: avoid reallocation ???
2008        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
2009        ZVAL_STRINGL(zendlval, str, sz);
2010    }
2011    return T_CONSTANT_ENCAPSED_STRING;
2012}
2013
2014
2015<ST_IN_SCRIPTING>b?["] {
2016    int bprefix = (yytext[0] != '"') ? 1 : 0;
2017
2018    while (YYCURSOR < YYLIMIT) {
2019        switch (*YYCURSOR++) {
2020            case '"':
2021                yyleng = YYCURSOR - SCNG(yy_text);
2022                if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == FAILURE) {
2023                    return T_ERROR;
2024                }
2025                return T_CONSTANT_ENCAPSED_STRING;
2026            case '$':
2027                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2028                    break;
2029                }
2030                continue;
2031            case '{':
2032                if (*YYCURSOR == '$') {
2033                    break;
2034                }
2035                continue;
2036            case '\\':
2037                if (YYCURSOR < YYLIMIT) {
2038                    YYCURSOR++;
2039                }
2040                /* fall through */
2041            default:
2042                continue;
2043        }
2044
2045        YYCURSOR--;
2046        break;
2047    }
2048
2049    /* Remember how much was scanned to save rescanning */
2050    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2051
2052    YYCURSOR = SCNG(yy_text) + yyleng;
2053
2054    BEGIN(ST_DOUBLE_QUOTES);
2055    return '"';
2056}
2057
2058
2059<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2060    char *s;
2061    int bprefix = (yytext[0] != '<') ? 1 : 0;
2062    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2063
2064    CG(zend_lineno)++;
2065    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2066    s = yytext+bprefix+3;
2067    while ((*s == ' ') || (*s == '\t')) {
2068        s++;
2069        heredoc_label->length--;
2070    }
2071
2072    if (*s == '\'') {
2073        s++;
2074        heredoc_label->length -= 2;
2075
2076        BEGIN(ST_NOWDOC);
2077    } else {
2078        if (*s == '"') {
2079            s++;
2080            heredoc_label->length -= 2;
2081        }
2082
2083        BEGIN(ST_HEREDOC);
2084    }
2085
2086    heredoc_label->label = estrndup(s, heredoc_label->length);
2087
2088    /* Check for ending label on the next line */
2089    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2090        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2091
2092        if (*end == ';') {
2093            end++;
2094        }
2095
2096        if (*end == '\n' || *end == '\r') {
2097            BEGIN(ST_END_HEREDOC);
2098        }
2099    }
2100
2101    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2102
2103    return T_START_HEREDOC;
2104}
2105
2106
2107<ST_IN_SCRIPTING>[`] {
2108    BEGIN(ST_BACKQUOTE);
2109    return '`';
2110}
2111
2112
2113<ST_END_HEREDOC>{ANY_CHAR} {
2114    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2115
2116    YYCURSOR += heredoc_label->length - 1;
2117    yyleng = heredoc_label->length;
2118
2119    heredoc_label_dtor(heredoc_label);
2120    efree(heredoc_label);
2121
2122    BEGIN(ST_IN_SCRIPTING);
2123    return T_END_HEREDOC;
2124}
2125
2126
2127<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2128    Z_LVAL_P(zendlval) = (zend_long) '{';
2129    yy_push_state(ST_IN_SCRIPTING);
2130    yyless(1);
2131    return T_CURLY_OPEN;
2132}
2133
2134
2135<ST_DOUBLE_QUOTES>["] {
2136    BEGIN(ST_IN_SCRIPTING);
2137    return '"';
2138}
2139
2140<ST_BACKQUOTE>[`] {
2141    BEGIN(ST_IN_SCRIPTING);
2142    return '`';
2143}
2144
2145
2146<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2147    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2148        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2149        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2150
2151        goto double_quotes_scan_done;
2152    }
2153
2154    if (YYCURSOR > YYLIMIT) {
2155        return 0;
2156    }
2157    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2158        YYCURSOR++;
2159    }
2160
2161    while (YYCURSOR < YYLIMIT) {
2162        switch (*YYCURSOR++) {
2163            case '"':
2164                break;
2165            case '$':
2166                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2167                    break;
2168                }
2169                continue;
2170            case '{':
2171                if (*YYCURSOR == '$') {
2172                    break;
2173                }
2174                continue;
2175            case '\\':
2176                if (YYCURSOR < YYLIMIT) {
2177                    YYCURSOR++;
2178                }
2179                /* fall through */
2180            default:
2181                continue;
2182        }
2183
2184        YYCURSOR--;
2185        break;
2186    }
2187
2188double_quotes_scan_done:
2189    yyleng = YYCURSOR - SCNG(yy_text);
2190
2191    if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
2192        return T_ERROR;
2193    }
2194    return T_ENCAPSED_AND_WHITESPACE;
2195}
2196
2197
2198<ST_BACKQUOTE>{ANY_CHAR} {
2199    if (YYCURSOR > YYLIMIT) {
2200        return 0;
2201    }
2202    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2203        YYCURSOR++;
2204    }
2205
2206    while (YYCURSOR < YYLIMIT) {
2207        switch (*YYCURSOR++) {
2208            case '`':
2209                break;
2210            case '$':
2211                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2212                    break;
2213                }
2214                continue;
2215            case '{':
2216                if (*YYCURSOR == '$') {
2217                    break;
2218                }
2219                continue;
2220            case '\\':
2221                if (YYCURSOR < YYLIMIT) {
2222                    YYCURSOR++;
2223                }
2224                /* fall through */
2225            default:
2226                continue;
2227        }
2228
2229        YYCURSOR--;
2230        break;
2231    }
2232
2233    yyleng = YYCURSOR - SCNG(yy_text);
2234
2235    if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
2236        return T_ERROR;
2237    }
2238    return T_ENCAPSED_AND_WHITESPACE;
2239}
2240
2241
2242<ST_HEREDOC>{ANY_CHAR} {
2243    int newline = 0;
2244
2245    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2246
2247    if (YYCURSOR > YYLIMIT) {
2248        return 0;
2249    }
2250
2251    YYCURSOR--;
2252
2253    while (YYCURSOR < YYLIMIT) {
2254        switch (*YYCURSOR++) {
2255            case '\r':
2256                if (*YYCURSOR == '\n') {
2257                    YYCURSOR++;
2258                }
2259                /* fall through */
2260            case '\n':
2261                /* Check for ending label on the next line */
2262                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2263                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2264
2265                    if (*end == ';') {
2266                        end++;
2267                    }
2268
2269                    if (*end == '\n' || *end == '\r') {
2270                        /* newline before label will be subtracted from returned text, but
2271                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2272                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2273                            newline = 2; /* Windows newline */
2274                        } else {
2275                            newline = 1;
2276                        }
2277
2278                        CG(increment_lineno) = 1; /* For newline before label */
2279                        BEGIN(ST_END_HEREDOC);
2280
2281                        goto heredoc_scan_done;
2282                    }
2283                }
2284                continue;
2285            case '$':
2286                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2287                    break;
2288                }
2289                continue;
2290            case '{':
2291                if (*YYCURSOR == '$') {
2292                    break;
2293                }
2294                continue;
2295            case '\\':
2296                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2297                    YYCURSOR++;
2298                }
2299                /* fall through */
2300            default:
2301                continue;
2302        }
2303
2304        YYCURSOR--;
2305        break;
2306    }
2307
2308heredoc_scan_done:
2309    yyleng = YYCURSOR - SCNG(yy_text);
2310
2311    if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
2312        return T_ERROR;
2313    }
2314    return T_ENCAPSED_AND_WHITESPACE;
2315}
2316
2317
2318<ST_NOWDOC>{ANY_CHAR} {
2319    int newline = 0;
2320
2321    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2322
2323    if (YYCURSOR > YYLIMIT) {
2324        return 0;
2325    }
2326
2327    YYCURSOR--;
2328
2329    while (YYCURSOR < YYLIMIT) {
2330        switch (*YYCURSOR++) {
2331            case '\r':
2332                if (*YYCURSOR == '\n') {
2333                    YYCURSOR++;
2334                }
2335                /* fall through */
2336            case '\n':
2337                /* Check for ending label on the next line */
2338                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2339                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2340
2341                    if (*end == ';') {
2342                        end++;
2343                    }
2344
2345                    if (*end == '\n' || *end == '\r') {
2346                        /* newline before label will be subtracted from returned text, but
2347                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2348                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2349                            newline = 2; /* Windows newline */
2350                        } else {
2351                            newline = 1;
2352                        }
2353
2354                        CG(increment_lineno) = 1; /* For newline before label */
2355                        BEGIN(ST_END_HEREDOC);
2356
2357                        goto nowdoc_scan_done;
2358                    }
2359                }
2360                /* fall through */
2361            default:
2362                continue;
2363        }
2364    }
2365
2366nowdoc_scan_done:
2367    yyleng = YYCURSOR - SCNG(yy_text);
2368
2369    zend_copy_value(zendlval, yytext, yyleng - newline);
2370    HANDLE_NEWLINES(yytext, yyleng - newline);
2371    return T_ENCAPSED_AND_WHITESPACE;
2372}
2373
2374
2375<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2376    if (YYCURSOR > YYLIMIT) {
2377        return 0;
2378    }
2379
2380    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2381    goto restart;
2382}
2383
2384*/
2385}
2386