1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129    ZEND_ASSERT(internal_encoding);
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147    ZEND_ASSERT(internal_encoding);
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150}
151
152
153static void _yy_push_state(int new_state TSRMLS_DC)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(TSRMLS_D)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(TSRMLS_D)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(TSRMLS_D)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225}
226
227ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
228{
229    SCNG(yy_leng)   = lex_state->yy_leng;
230    SCNG(yy_start)  = lex_state->yy_start;
231    SCNG(yy_text)   = lex_state->yy_text;
232    SCNG(yy_cursor) = lex_state->yy_cursor;
233    SCNG(yy_marker) = lex_state->yy_marker;
234    SCNG(yy_limit)  = lex_state->yy_limit;
235
236    zend_stack_destroy(&SCNG(state_stack));
237    SCNG(state_stack) = lex_state->state_stack;
238
239    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
240    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
241    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
242
243    SCNG(yy_in) = lex_state->in;
244    YYSETCONDITION(lex_state->yy_state);
245    CG(zend_lineno) = lex_state->lineno;
246    zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
247
248    if (SCNG(script_filtered)) {
249        efree(SCNG(script_filtered));
250        SCNG(script_filtered) = NULL;
251    }
252    SCNG(script_org) = lex_state->script_org;
253    SCNG(script_org_size) = lex_state->script_org_size;
254    SCNG(script_filtered) = lex_state->script_filtered;
255    SCNG(script_filtered_size) = lex_state->script_filtered_size;
256    SCNG(input_filter) = lex_state->input_filter;
257    SCNG(output_filter) = lex_state->output_filter;
258    SCNG(script_encoding) = lex_state->script_encoding;
259
260    RESET_DOC_COMMENT();
261}
262
263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
264{
265    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
266    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
267    file_handle->opened_path = NULL;
268    if (file_handle->free_filename) {
269        file_handle->filename = NULL;
270    }
271}
272
273#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
274#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
275#define BOM_UTF16_BE    "\xfe\xff"
276#define BOM_UTF16_LE    "\xff\xfe"
277#define BOM_UTF8        "\xef\xbb\xbf"
278
279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
280{
281    const unsigned char *p;
282    int wchar_size = 2;
283    int le = 0;
284
285    /* utf-16 or utf-32? */
286    p = script;
287    while ((p-script) < script_size) {
288        p = memchr(p, 0, script_size-(p-script)-2);
289        if (!p) {
290            break;
291        }
292        if (*(p+1) == '\0' && *(p+2) == '\0') {
293            wchar_size = 4;
294            break;
295        }
296
297        /* searching for UTF-32 specific byte orders, so this will do */
298        p += 4;
299    }
300
301    /* BE or LE? */
302    p = script;
303    while ((p-script) < script_size) {
304        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
305            /* BE */
306            le = 0;
307            break;
308        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
309            /* LE* */
310            le = 1;
311            break;
312        }
313        p += wchar_size;
314    }
315
316    if (wchar_size == 2) {
317        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
318    } else {
319        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
320    }
321
322    return NULL;
323}
324
325static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
326{
327    const zend_encoding *script_encoding = NULL;
328    int bom_size;
329    unsigned char *pos1, *pos2;
330
331    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
332        return NULL;
333    }
334
335    /* check out BOM */
336    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
337        script_encoding = zend_multibyte_encoding_utf32be;
338        bom_size = sizeof(BOM_UTF32_BE)-1;
339    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
340        script_encoding = zend_multibyte_encoding_utf32le;
341        bom_size = sizeof(BOM_UTF32_LE)-1;
342    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
343        script_encoding = zend_multibyte_encoding_utf16be;
344        bom_size = sizeof(BOM_UTF16_BE)-1;
345    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
346        script_encoding = zend_multibyte_encoding_utf16le;
347        bom_size = sizeof(BOM_UTF16_LE)-1;
348    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
349        script_encoding = zend_multibyte_encoding_utf8;
350        bom_size = sizeof(BOM_UTF8)-1;
351    }
352
353    if (script_encoding) {
354        /* remove BOM */
355        LANG_SCNG(script_org) += bom_size;
356        LANG_SCNG(script_org_size) -= bom_size;
357
358        return script_encoding;
359    }
360
361    /* script contains NULL bytes -> auto-detection */
362    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
363        /* check if the NULL byte is after the __HALT_COMPILER(); */
364        pos2 = LANG_SCNG(script_org);
365
366        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
367            pos2 = memchr(pos2, '_', pos1 - pos2);
368            if (!pos2) break;
369            pos2++;
370            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
371                pos2 += sizeof("_HALT_COMPILER")-1;
372                while (*pos2 == ' '  ||
373                       *pos2 == '\t' ||
374                       *pos2 == '\r' ||
375                       *pos2 == '\n') {
376                    pos2++;
377                }
378                if (*pos2 == '(') {
379                    pos2++;
380                    while (*pos2 == ' '  ||
381                           *pos2 == '\t' ||
382                           *pos2 == '\r' ||
383                           *pos2 == '\n') {
384                        pos2++;
385                    }
386                    if (*pos2 == ')') {
387                        pos2++;
388                        while (*pos2 == ' '  ||
389                               *pos2 == '\t' ||
390                               *pos2 == '\r' ||
391                               *pos2 == '\n') {
392                            pos2++;
393                        }
394                        if (*pos2 == ';') {
395                            return NULL;
396                        }
397                    }
398                }
399            }
400        }
401        /* make best effort if BOM is missing */
402        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
403    }
404
405    return NULL;
406}
407
408static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
409{
410    const zend_encoding *script_encoding;
411
412    if (CG(detect_unicode)) {
413        /* check out bom(byte order mark) and see if containing wchars */
414        script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
415        if (script_encoding != NULL) {
416            /* bom or wchar detection is prior to 'script_encoding' option */
417            return script_encoding;
418        }
419    }
420
421    /* if no script_encoding specified, just leave alone */
422    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
423        return NULL;
424    }
425
426    /* if multiple encodings specified, detect automagically */
427    if (CG(script_encoding_list_size) > 1) {
428        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
429    }
430
431    return CG(script_encoding_list)[0];
432}
433
434ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
435{
436    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
437    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
438
439    if (!script_encoding) {
440        return FAILURE;
441    }
442
443    /* judge input/output filter */
444    LANG_SCNG(script_encoding) = script_encoding;
445    LANG_SCNG(input_filter) = NULL;
446    LANG_SCNG(output_filter) = NULL;
447
448    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
449        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
450            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
451            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
452            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
453        } else {
454            LANG_SCNG(input_filter) = NULL;
455            LANG_SCNG(output_filter) = NULL;
456        }
457        return SUCCESS;
458    }
459
460    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
461        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
462        LANG_SCNG(output_filter) = NULL;
463    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
464        LANG_SCNG(input_filter) = NULL;
465        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
466    } else {
467        /* both script and internal encodings are incompatible w/ flex */
468        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
469        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
470    }
471
472    return 0;
473}
474
475ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
476{
477    const char *file_path = NULL;
478    char *buf;
479    size_t size, offset = 0;
480    zend_string *compiled_filename;
481
482    /* The shebang line was read, get the current position to obtain the buffer start */
483    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
484        if ((offset = ftell(file_handle->handle.fp)) == -1) {
485            offset = 0;
486        }
487    }
488
489    if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
490        return FAILURE;
491    }
492
493    zend_llist_add_element(&CG(open_files), file_handle);
494    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
495        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
496        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
497        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
498        file_handle->handle.stream.handle = fh->handle.stream.handle;
499    }
500
501    /* Reset the scanner for scanning the new file */
502    SCNG(yy_in) = file_handle;
503    SCNG(yy_start) = NULL;
504
505    if (size != -1) {
506        if (CG(multibyte)) {
507            SCNG(script_org) = (unsigned char*)buf;
508            SCNG(script_org_size) = size;
509            SCNG(script_filtered) = NULL;
510
511            zend_multibyte_set_filter(NULL TSRMLS_CC);
512
513            if (SCNG(input_filter)) {
514                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
515                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
516                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
517                }
518                buf = (char*)SCNG(script_filtered);
519                size = SCNG(script_filtered_size);
520            }
521        }
522        SCNG(yy_start) = (unsigned char *)buf - offset;
523        yy_scan_buffer(buf, size TSRMLS_CC);
524    } else {
525        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
526    }
527
528    BEGIN(INITIAL);
529
530    if (file_handle->opened_path) {
531        file_path = file_handle->opened_path;
532    } else {
533        file_path = file_handle->filename;
534    }
535
536    compiled_filename = zend_string_init(file_path, strlen(file_path), 0);
537    zend_set_compiled_filename(compiled_filename TSRMLS_CC);
538    zend_string_release(compiled_filename);
539
540    if (CG(start_lineno)) {
541        CG(zend_lineno) = CG(start_lineno);
542        CG(start_lineno) = 0;
543    } else {
544        CG(zend_lineno) = 1;
545    }
546
547    RESET_DOC_COMMENT();
548    CG(increment_lineno) = 0;
549    return SUCCESS;
550}
551END_EXTERN_C()
552
553
554ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
555{
556    zend_lex_state original_lex_state;
557    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
558    zend_op_array *original_active_op_array = CG(active_op_array);
559    int compiler_result;
560    zend_bool compilation_successful=0;
561    zval retval_zv;
562    zend_bool original_in_compilation = CG(in_compilation);
563
564    ZVAL_LONG(&retval_zv, 1);
565
566    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
567
568    if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
569        if (type==ZEND_REQUIRE) {
570            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
571            zend_bailout();
572        } else {
573            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
574        }
575        compilation_successful=0;
576    } else {
577        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
578        CG(in_compilation) = 1;
579        CG(active_op_array) = op_array;
580        zend_stack_push(&CG(context_stack), (void *) &CG(context));
581        zend_init_compiler_context(TSRMLS_C);
582        CG(ast_arena) = zend_arena_create(1024 * 32);
583        compiler_result = zendparse(TSRMLS_C);
584        if (compiler_result != 0) { /* parser error */
585            zend_bailout();
586        }
587        zend_compile_top_stmt(CG(ast) TSRMLS_CC);
588        zend_ast_destroy(CG(ast));
589        zend_arena_destroy(CG(ast_arena));
590        zend_do_end_compilation(TSRMLS_C);
591        zend_emit_final_return(&retval_zv TSRMLS_CC);
592        CG(in_compilation) = original_in_compilation;
593        compilation_successful=1;
594    }
595
596    CG(active_op_array) = original_active_op_array;
597    if (compilation_successful) {
598        pass_two(op_array TSRMLS_CC);
599        zend_release_labels(0 TSRMLS_CC);
600    } else {
601        efree_size(op_array, sizeof(zend_op_array));
602        op_array = NULL;
603    }
604
605    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
606    return op_array;
607}
608
609
610zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
611{
612    zend_file_handle file_handle;
613    zval tmp;
614    zend_op_array *retval;
615    char *opened_path = NULL;
616
617    if (Z_TYPE_P(filename) != IS_STRING) {
618        tmp = *filename;
619        zval_copy_ctor(&tmp);
620        convert_to_string(&tmp);
621        filename = &tmp;
622    }
623    file_handle.filename = Z_STRVAL_P(filename);
624    file_handle.free_filename = 0;
625    file_handle.type = ZEND_HANDLE_FILENAME;
626    file_handle.opened_path = NULL;
627    file_handle.handle.fp = NULL;
628
629    retval = zend_compile_file(&file_handle, type TSRMLS_CC);
630    if (retval && file_handle.handle.stream.handle) {
631        if (!file_handle.opened_path) {
632            file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
633        }
634
635        zend_hash_str_add_empty_element(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path));
636
637        if (opened_path) {
638            efree(opened_path);
639        }
640    }
641    zend_destroy_file_handle(&file_handle TSRMLS_CC);
642
643    if (filename==&tmp) {
644        zval_dtor(&tmp);
645    }
646    return retval;
647}
648
649ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
650{
651    char *buf;
652    size_t size, old_len;
653    zend_string *new_compiled_filename;
654
655    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
656    old_len = Z_STRLEN_P(str);
657    Z_STR_P(str) = zend_string_realloc(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
658    Z_TYPE_INFO_P(str) = IS_STRING_EX;
659    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
660
661    SCNG(yy_in) = NULL;
662    SCNG(yy_start) = NULL;
663
664    buf = Z_STRVAL_P(str);
665    size = old_len;
666
667    if (CG(multibyte)) {
668        SCNG(script_org) = (unsigned char*)buf;
669        SCNG(script_org_size) = size;
670        SCNG(script_filtered) = NULL;
671
672        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
673
674        if (SCNG(input_filter)) {
675            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
676                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
677                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
678            }
679            buf = (char*)SCNG(script_filtered);
680            size = SCNG(script_filtered_size);
681        }
682    }
683
684    yy_scan_buffer(buf, size TSRMLS_CC);
685
686    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
687    zend_set_compiled_filename(new_compiled_filename TSRMLS_CC);
688    zend_string_release(new_compiled_filename);
689    CG(zend_lineno) = 1;
690    CG(increment_lineno) = 0;
691    RESET_DOC_COMMENT();
692    return SUCCESS;
693}
694
695
696ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
697{
698    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
699    if (SCNG(input_filter)) {
700        size_t original_offset = offset, length = 0;
701        do {
702            unsigned char *p = NULL;
703            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
704                return (size_t)-1;
705            }
706            efree(p);
707            if (length > original_offset) {
708                offset--;
709            } else if (length < original_offset) {
710                offset++;
711            }
712        } while (original_offset != length);
713    }
714    return offset;
715}
716
717
718zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
719{
720    zend_lex_state original_lex_state;
721    zend_op_array *op_array = NULL;
722    zval tmp;
723    zend_bool original_in_compilation = CG(in_compilation);
724
725    if (Z_STRLEN_P(source_string)==0) {
726        return NULL;
727    }
728
729    ZVAL_DUP(&tmp, source_string);
730    convert_to_string(&tmp);
731    source_string = &tmp;
732
733    CG(in_compilation) = 1;
734    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
735    if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC) == SUCCESS) {
736        CG(ast) = NULL;
737        CG(ast_arena) = zend_arena_create(1024 * 32);
738        BEGIN(ST_IN_SCRIPTING);
739
740        if (!zendparse(TSRMLS_C)) {
741            zend_op_array *original_active_op_array = CG(active_op_array);
742            op_array = emalloc(sizeof(zend_op_array));
743            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
744            CG(active_op_array) = op_array;
745
746            zend_stack_push(&CG(context_stack), (void *) &CG(context));
747            zend_init_compiler_context(TSRMLS_C);
748            zend_compile_top_stmt(CG(ast) TSRMLS_CC);
749            zend_do_end_compilation(TSRMLS_C);
750            zend_emit_final_return(NULL TSRMLS_CC);
751            pass_two(op_array TSRMLS_CC);
752            zend_release_labels(0 TSRMLS_CC);
753
754            CG(active_op_array) = original_active_op_array;
755        }
756
757        zend_ast_destroy(CG(ast));
758        zend_arena_destroy(CG(ast_arena));
759    }
760
761    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
762    zval_dtor(&tmp);
763    CG(in_compilation) = original_in_compilation;
764    return op_array;
765}
766
767
768BEGIN_EXTERN_C()
769int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
770{
771    zend_lex_state original_lex_state;
772    zend_file_handle file_handle;
773
774    file_handle.type = ZEND_HANDLE_FILENAME;
775    file_handle.filename = filename;
776    file_handle.free_filename = 0;
777    file_handle.opened_path = NULL;
778    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
779    if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
780        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
781        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
782        return FAILURE;
783    }
784    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
785    if (SCNG(script_filtered)) {
786        efree(SCNG(script_filtered));
787        SCNG(script_filtered) = NULL;
788    }
789    zend_destroy_file_handle(&file_handle TSRMLS_CC);
790    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
791    return SUCCESS;
792}
793
794int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
795{
796    zend_lex_state original_lex_state;
797    zval tmp = *str;
798
799    str = &tmp;
800    zval_copy_ctor(str);
801    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
802    if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
803        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
804        return FAILURE;
805    }
806    BEGIN(INITIAL);
807    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
808    if (SCNG(script_filtered)) {
809        efree(SCNG(script_filtered));
810        SCNG(script_filtered) = NULL;
811    }
812    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
813    zval_dtor(str);
814    return SUCCESS;
815}
816
817ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
818{
819    size_t length;
820    unsigned char *new_yy_start;
821
822    /* convert and set */
823    if (!SCNG(input_filter)) {
824        if (SCNG(script_filtered)) {
825            efree(SCNG(script_filtered));
826            SCNG(script_filtered) = NULL;
827        }
828        SCNG(script_filtered_size) = 0;
829        length = SCNG(script_org_size);
830        new_yy_start = SCNG(script_org);
831    } else {
832        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
833            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
834                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
835        }
836        if (SCNG(script_filtered)) {
837            efree(SCNG(script_filtered));
838        }
839        SCNG(script_filtered) = new_yy_start;
840        SCNG(script_filtered_size) = length;
841    }
842
843    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
844    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
845    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
846    SCNG(yy_limit) = new_yy_start + length;
847
848    SCNG(yy_start) = new_yy_start;
849}
850
851
852// TODO: avoid reallocation ???
853# define zend_copy_value(zendlval, yytext, yyleng) \
854    if (SCNG(output_filter)) { \
855        size_t sz = 0; \
856        char *s = NULL; \
857        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
858        ZVAL_STRINGL(zendlval, s, sz); \
859        efree(s); \
860    } else { \
861        ZVAL_STRINGL(zendlval, yytext, yyleng); \
862    }
863
864static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
865{
866    register char *s, *t;
867    char *end;
868
869    ZVAL_STRINGL(zendlval, str, len);
870
871    /* convert escape sequences */
872    s = t = Z_STRVAL_P(zendlval);
873    end = s+Z_STRLEN_P(zendlval);
874    while (s<end) {
875        if (*s=='\\') {
876            s++;
877            if (s >= end) {
878                *t++ = '\\';
879                break;
880            }
881
882            switch(*s) {
883                case 'n':
884                    *t++ = '\n';
885                    Z_STRLEN_P(zendlval)--;
886                    break;
887                case 'r':
888                    *t++ = '\r';
889                    Z_STRLEN_P(zendlval)--;
890                    break;
891                case 't':
892                    *t++ = '\t';
893                    Z_STRLEN_P(zendlval)--;
894                    break;
895                case 'f':
896                    *t++ = '\f';
897                    Z_STRLEN_P(zendlval)--;
898                    break;
899                case 'v':
900                    *t++ = '\v';
901                    Z_STRLEN_P(zendlval)--;
902                    break;
903                case 'e':
904#ifdef PHP_WIN32
905                    *t++ = VK_ESCAPE;
906#else
907                    *t++ = '\e';
908#endif
909                    Z_STRLEN_P(zendlval)--;
910                    break;
911                case '"':
912                case '`':
913                    if (*s != quote_type) {
914                        *t++ = '\\';
915                        *t++ = *s;
916                        break;
917                    }
918                case '\\':
919                case '$':
920                    *t++ = *s;
921                    Z_STRLEN_P(zendlval)--;
922                    break;
923                case 'x':
924                case 'X':
925                    if (ZEND_IS_HEX(*(s+1))) {
926                        char hex_buf[3] = { 0, 0, 0 };
927
928                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
929
930                        hex_buf[0] = *(++s);
931                        Z_STRLEN_P(zendlval)--;
932                        if (ZEND_IS_HEX(*(s+1))) {
933                            hex_buf[1] = *(++s);
934                            Z_STRLEN_P(zendlval)--;
935                        }
936                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
937                    } else {
938                        *t++ = '\\';
939                        *t++ = *s;
940                    }
941                    break;
942                default:
943                    /* check for an octal */
944                    if (ZEND_IS_OCT(*s)) {
945                        char octal_buf[4] = { 0, 0, 0, 0 };
946
947                        octal_buf[0] = *s;
948                        Z_STRLEN_P(zendlval)--;
949                        if (ZEND_IS_OCT(*(s+1))) {
950                            octal_buf[1] = *(++s);
951                            Z_STRLEN_P(zendlval)--;
952                            if (ZEND_IS_OCT(*(s+1))) {
953                                octal_buf[2] = *(++s);
954                                Z_STRLEN_P(zendlval)--;
955                            }
956                        }
957                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
958                    } else {
959                        *t++ = '\\';
960                        *t++ = *s;
961                    }
962                    break;
963            }
964        } else {
965            *t++ = *s;
966        }
967
968        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
969            CG(zend_lineno)++;
970        }
971        s++;
972    }
973    *t = 0;
974    if (SCNG(output_filter)) {
975        size_t sz = 0;
976        unsigned char *str;
977        // TODO: avoid realocation ???
978        s = Z_STRVAL_P(zendlval);
979        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
980        zval_ptr_dtor(zendlval);
981        ZVAL_STRINGL(zendlval, (char *) str, sz);
982        efree(str);
983    }
984}
985
986
987int lex_scan(zval *zendlval TSRMLS_DC)
988{
989restart:
990    SCNG(yy_text) = YYCURSOR;
991
992/*!re2c
993re2c:yyfill:check = 0;
994LNUM    [0-9]+
995DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
996EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
997HNUM    "0x"[0-9a-fA-F]+
998BNUM    "0b"[01]+
999LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1000WHITESPACE [ \n\r\t]+
1001TABS_AND_SPACES [ \t]*
1002TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1003ANY_CHAR [^]
1004NEWLINE ("\r"|"\n"|"\r\n")
1005
1006/* compute yyleng before each rule */
1007<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1008
1009<ST_IN_SCRIPTING>"exit" {
1010    return T_EXIT;
1011}
1012
1013<ST_IN_SCRIPTING>"die" {
1014    return T_EXIT;
1015}
1016
1017<ST_IN_SCRIPTING>"function" {
1018    return T_FUNCTION;
1019}
1020
1021<ST_IN_SCRIPTING>"const" {
1022    return T_CONST;
1023}
1024
1025<ST_IN_SCRIPTING>"return" {
1026    return T_RETURN;
1027}
1028
1029<ST_IN_SCRIPTING>"yield" {
1030    return T_YIELD;
1031}
1032
1033<ST_IN_SCRIPTING>"try" {
1034    return T_TRY;
1035}
1036
1037<ST_IN_SCRIPTING>"catch" {
1038    return T_CATCH;
1039}
1040
1041<ST_IN_SCRIPTING>"finally" {
1042    return T_FINALLY;
1043}
1044
1045<ST_IN_SCRIPTING>"throw" {
1046    return T_THROW;
1047}
1048
1049<ST_IN_SCRIPTING>"if" {
1050    return T_IF;
1051}
1052
1053<ST_IN_SCRIPTING>"elseif" {
1054    return T_ELSEIF;
1055}
1056
1057<ST_IN_SCRIPTING>"endif" {
1058    return T_ENDIF;
1059}
1060
1061<ST_IN_SCRIPTING>"else" {
1062    return T_ELSE;
1063}
1064
1065<ST_IN_SCRIPTING>"while" {
1066    return T_WHILE;
1067}
1068
1069<ST_IN_SCRIPTING>"endwhile" {
1070    return T_ENDWHILE;
1071}
1072
1073<ST_IN_SCRIPTING>"do" {
1074    return T_DO;
1075}
1076
1077<ST_IN_SCRIPTING>"for" {
1078    return T_FOR;
1079}
1080
1081<ST_IN_SCRIPTING>"endfor" {
1082    return T_ENDFOR;
1083}
1084
1085<ST_IN_SCRIPTING>"foreach" {
1086    return T_FOREACH;
1087}
1088
1089<ST_IN_SCRIPTING>"endforeach" {
1090    return T_ENDFOREACH;
1091}
1092
1093<ST_IN_SCRIPTING>"declare" {
1094    return T_DECLARE;
1095}
1096
1097<ST_IN_SCRIPTING>"enddeclare" {
1098    return T_ENDDECLARE;
1099}
1100
1101<ST_IN_SCRIPTING>"instanceof" {
1102    return T_INSTANCEOF;
1103}
1104
1105<ST_IN_SCRIPTING>"as" {
1106    return T_AS;
1107}
1108
1109<ST_IN_SCRIPTING>"switch" {
1110    return T_SWITCH;
1111}
1112
1113<ST_IN_SCRIPTING>"endswitch" {
1114    return T_ENDSWITCH;
1115}
1116
1117<ST_IN_SCRIPTING>"case" {
1118    return T_CASE;
1119}
1120
1121<ST_IN_SCRIPTING>"default" {
1122    return T_DEFAULT;
1123}
1124
1125<ST_IN_SCRIPTING>"break" {
1126    return T_BREAK;
1127}
1128
1129<ST_IN_SCRIPTING>"continue" {
1130    return T_CONTINUE;
1131}
1132
1133<ST_IN_SCRIPTING>"goto" {
1134    return T_GOTO;
1135}
1136
1137<ST_IN_SCRIPTING>"echo" {
1138    return T_ECHO;
1139}
1140
1141<ST_IN_SCRIPTING>"print" {
1142    return T_PRINT;
1143}
1144
1145<ST_IN_SCRIPTING>"class" {
1146    return T_CLASS;
1147}
1148
1149<ST_IN_SCRIPTING>"interface" {
1150    return T_INTERFACE;
1151}
1152
1153<ST_IN_SCRIPTING>"trait" {
1154    return T_TRAIT;
1155}
1156
1157<ST_IN_SCRIPTING>"extends" {
1158    return T_EXTENDS;
1159}
1160
1161<ST_IN_SCRIPTING>"implements" {
1162    return T_IMPLEMENTS;
1163}
1164
1165<ST_IN_SCRIPTING>"->" {
1166    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1167    return T_OBJECT_OPERATOR;
1168}
1169
1170<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1171    HANDLE_NEWLINES(yytext, yyleng);
1172    return T_WHITESPACE;
1173}
1174
1175<ST_LOOKING_FOR_PROPERTY>"->" {
1176    return T_OBJECT_OPERATOR;
1177}
1178
1179<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1180    yy_pop_state(TSRMLS_C);
1181    zend_copy_value(zendlval, yytext, yyleng);
1182    return T_STRING;
1183}
1184
1185<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1186    yyless(0);
1187    yy_pop_state(TSRMLS_C);
1188    goto restart;
1189}
1190
1191<ST_IN_SCRIPTING>"::" {
1192    return T_PAAMAYIM_NEKUDOTAYIM;
1193}
1194
1195<ST_IN_SCRIPTING>"\\" {
1196    return T_NS_SEPARATOR;
1197}
1198
1199<ST_IN_SCRIPTING>"..." {
1200    return T_ELLIPSIS;
1201}
1202
1203<ST_IN_SCRIPTING>"??" {
1204    return T_COALESCE;
1205}
1206
1207<ST_IN_SCRIPTING>"new" {
1208    return T_NEW;
1209}
1210
1211<ST_IN_SCRIPTING>"clone" {
1212    return T_CLONE;
1213}
1214
1215<ST_IN_SCRIPTING>"var" {
1216    return T_VAR;
1217}
1218
1219<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1220    return T_INT_CAST;
1221}
1222
1223<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1224    return T_DOUBLE_CAST;
1225}
1226
1227<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1228    return T_STRING_CAST;
1229}
1230
1231<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1232    return T_ARRAY_CAST;
1233}
1234
1235<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1236    return T_OBJECT_CAST;
1237}
1238
1239<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1240    return T_BOOL_CAST;
1241}
1242
1243<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1244    return T_UNSET_CAST;
1245}
1246
1247<ST_IN_SCRIPTING>"eval" {
1248    return T_EVAL;
1249}
1250
1251<ST_IN_SCRIPTING>"include" {
1252    return T_INCLUDE;
1253}
1254
1255<ST_IN_SCRIPTING>"include_once" {
1256    return T_INCLUDE_ONCE;
1257}
1258
1259<ST_IN_SCRIPTING>"require" {
1260    return T_REQUIRE;
1261}
1262
1263<ST_IN_SCRIPTING>"require_once" {
1264    return T_REQUIRE_ONCE;
1265}
1266
1267<ST_IN_SCRIPTING>"namespace" {
1268    return T_NAMESPACE;
1269}
1270
1271<ST_IN_SCRIPTING>"use" {
1272    return T_USE;
1273}
1274
1275<ST_IN_SCRIPTING>"insteadof" {
1276        return T_INSTEADOF;
1277}
1278
1279<ST_IN_SCRIPTING>"global" {
1280    return T_GLOBAL;
1281}
1282
1283<ST_IN_SCRIPTING>"isset" {
1284    return T_ISSET;
1285}
1286
1287<ST_IN_SCRIPTING>"empty" {
1288    return T_EMPTY;
1289}
1290
1291<ST_IN_SCRIPTING>"__halt_compiler" {
1292    return T_HALT_COMPILER;
1293}
1294
1295<ST_IN_SCRIPTING>"static" {
1296    return T_STATIC;
1297}
1298
1299<ST_IN_SCRIPTING>"abstract" {
1300    return T_ABSTRACT;
1301}
1302
1303<ST_IN_SCRIPTING>"final" {
1304    return T_FINAL;
1305}
1306
1307<ST_IN_SCRIPTING>"private" {
1308    return T_PRIVATE;
1309}
1310
1311<ST_IN_SCRIPTING>"protected" {
1312    return T_PROTECTED;
1313}
1314
1315<ST_IN_SCRIPTING>"public" {
1316    return T_PUBLIC;
1317}
1318
1319<ST_IN_SCRIPTING>"unset" {
1320    return T_UNSET;
1321}
1322
1323<ST_IN_SCRIPTING>"=>" {
1324    return T_DOUBLE_ARROW;
1325}
1326
1327<ST_IN_SCRIPTING>"list" {
1328    return T_LIST;
1329}
1330
1331<ST_IN_SCRIPTING>"array" {
1332    return T_ARRAY;
1333}
1334
1335<ST_IN_SCRIPTING>"callable" {
1336 return T_CALLABLE;
1337}
1338
1339<ST_IN_SCRIPTING>"++" {
1340    return T_INC;
1341}
1342
1343<ST_IN_SCRIPTING>"--" {
1344    return T_DEC;
1345}
1346
1347<ST_IN_SCRIPTING>"===" {
1348    return T_IS_IDENTICAL;
1349}
1350
1351<ST_IN_SCRIPTING>"!==" {
1352    return T_IS_NOT_IDENTICAL;
1353}
1354
1355<ST_IN_SCRIPTING>"==" {
1356    return T_IS_EQUAL;
1357}
1358
1359<ST_IN_SCRIPTING>"!="|"<>" {
1360    return T_IS_NOT_EQUAL;
1361}
1362
1363<ST_IN_SCRIPTING>"<=" {
1364    return T_IS_SMALLER_OR_EQUAL;
1365}
1366
1367<ST_IN_SCRIPTING>">=" {
1368    return T_IS_GREATER_OR_EQUAL;
1369}
1370
1371<ST_IN_SCRIPTING>"+=" {
1372    return T_PLUS_EQUAL;
1373}
1374
1375<ST_IN_SCRIPTING>"-=" {
1376    return T_MINUS_EQUAL;
1377}
1378
1379<ST_IN_SCRIPTING>"*=" {
1380    return T_MUL_EQUAL;
1381}
1382
1383<ST_IN_SCRIPTING>"*\*" {
1384    return T_POW;
1385}
1386
1387<ST_IN_SCRIPTING>"*\*=" {
1388    return T_POW_EQUAL;
1389}
1390
1391<ST_IN_SCRIPTING>"/=" {
1392    return T_DIV_EQUAL;
1393}
1394
1395<ST_IN_SCRIPTING>".=" {
1396    return T_CONCAT_EQUAL;
1397}
1398
1399<ST_IN_SCRIPTING>"%=" {
1400    return T_MOD_EQUAL;
1401}
1402
1403<ST_IN_SCRIPTING>"<<=" {
1404    return T_SL_EQUAL;
1405}
1406
1407<ST_IN_SCRIPTING>">>=" {
1408    return T_SR_EQUAL;
1409}
1410
1411<ST_IN_SCRIPTING>"&=" {
1412    return T_AND_EQUAL;
1413}
1414
1415<ST_IN_SCRIPTING>"|=" {
1416    return T_OR_EQUAL;
1417}
1418
1419<ST_IN_SCRIPTING>"^=" {
1420    return T_XOR_EQUAL;
1421}
1422
1423<ST_IN_SCRIPTING>"||" {
1424    return T_BOOLEAN_OR;
1425}
1426
1427<ST_IN_SCRIPTING>"&&" {
1428    return T_BOOLEAN_AND;
1429}
1430
1431<ST_IN_SCRIPTING>"OR" {
1432    return T_LOGICAL_OR;
1433}
1434
1435<ST_IN_SCRIPTING>"AND" {
1436    return T_LOGICAL_AND;
1437}
1438
1439<ST_IN_SCRIPTING>"XOR" {
1440    return T_LOGICAL_XOR;
1441}
1442
1443<ST_IN_SCRIPTING>"<<" {
1444    return T_SL;
1445}
1446
1447<ST_IN_SCRIPTING>">>" {
1448    return T_SR;
1449}
1450
1451<ST_IN_SCRIPTING>{TOKENS} {
1452    return yytext[0];
1453}
1454
1455
1456<ST_IN_SCRIPTING>"{" {
1457    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1458    return '{';
1459}
1460
1461
1462<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1463    yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1464    return T_DOLLAR_OPEN_CURLY_BRACES;
1465}
1466
1467
1468<ST_IN_SCRIPTING>"}" {
1469    RESET_DOC_COMMENT();
1470    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1471        yy_pop_state(TSRMLS_C);
1472    }
1473    return '}';
1474}
1475
1476
1477<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1478    yyless(yyleng - 1);
1479    zend_copy_value(zendlval, yytext, yyleng);
1480    yy_pop_state(TSRMLS_C);
1481    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1482    return T_STRING_VARNAME;
1483}
1484
1485
1486<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1487    yyless(0);
1488    yy_pop_state(TSRMLS_C);
1489    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1490    goto restart;
1491}
1492
1493<ST_IN_SCRIPTING>{BNUM} {
1494    char *bin = yytext + 2; /* Skip "0b" */
1495    int len = yyleng - 2;
1496
1497    /* Skip any leading 0s */
1498    while (*bin == '0') {
1499        ++bin;
1500        --len;
1501    }
1502
1503    if (len < SIZEOF_ZEND_LONG * 8) {
1504        if (len == 0) {
1505            ZVAL_LONG(zendlval, 0);
1506        } else {
1507            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, NULL, 2));
1508        }
1509        return T_LNUMBER;
1510    } else {
1511        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1512        return T_DNUMBER;
1513    }
1514}
1515
1516<ST_IN_SCRIPTING>{LNUM} {
1517    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1518        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1519    } else {
1520        errno = 0;
1521        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1522        if (errno == ERANGE) { /* Overflow */
1523            if (yytext[0] == '0') { /* octal overflow */
1524                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, NULL));
1525            } else {
1526                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1527            }
1528            return T_DNUMBER;
1529        }
1530    }
1531    return T_LNUMBER;
1532}
1533
1534<ST_IN_SCRIPTING>{HNUM} {
1535    char *hex = yytext + 2; /* Skip "0x" */
1536    int len = yyleng - 2;
1537
1538    /* Skip any leading 0s */
1539    while (*hex == '0') {
1540        hex++;
1541        len--;
1542    }
1543
1544    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1545        if (len == 0) {
1546            ZVAL_LONG(zendlval, 0);
1547        } else {
1548            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, NULL, 16));
1549        }
1550        return T_LNUMBER;
1551    } else {
1552        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1553        return T_DNUMBER;
1554    }
1555}
1556
1557<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1558    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1559        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 10));
1560    } else {
1561        ZVAL_STRINGL(zendlval, yytext, yyleng);
1562    }
1563    return T_NUM_STRING;
1564}
1565
1566<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1567    ZVAL_STRINGL(zendlval, yytext, yyleng);
1568    return T_NUM_STRING;
1569}
1570
1571<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1572    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1573    return T_DNUMBER;
1574}
1575
1576<ST_IN_SCRIPTING>"__CLASS__" {
1577    return T_CLASS_C;
1578}
1579
1580<ST_IN_SCRIPTING>"__TRAIT__" {
1581    return T_TRAIT_C;
1582}
1583
1584<ST_IN_SCRIPTING>"__FUNCTION__" {
1585    return T_FUNC_C;
1586}
1587
1588<ST_IN_SCRIPTING>"__METHOD__" {
1589    return T_METHOD_C;
1590}
1591
1592<ST_IN_SCRIPTING>"__LINE__" {
1593    return T_LINE;
1594}
1595
1596<ST_IN_SCRIPTING>"__FILE__" {
1597    return T_FILE;
1598}
1599
1600<ST_IN_SCRIPTING>"__DIR__" {
1601    return T_DIR;
1602}
1603
1604<ST_IN_SCRIPTING>"__NAMESPACE__" {
1605    return T_NS_C;
1606}
1607
1608
1609<INITIAL>"<?=" {
1610    BEGIN(ST_IN_SCRIPTING);
1611    return T_OPEN_TAG_WITH_ECHO;
1612}
1613
1614
1615<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1616    HANDLE_NEWLINE(yytext[yyleng-1]);
1617    BEGIN(ST_IN_SCRIPTING);
1618    return T_OPEN_TAG;
1619}
1620
1621
1622<INITIAL>"<?" {
1623    if (CG(short_tags)) {
1624        BEGIN(ST_IN_SCRIPTING);
1625        return T_OPEN_TAG;
1626    } else {
1627        goto inline_char_handler;
1628    }
1629}
1630
1631<INITIAL>{ANY_CHAR} {
1632    if (YYCURSOR > YYLIMIT) {
1633        return 0;
1634    }
1635
1636inline_char_handler:
1637
1638    while (1) {
1639        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1640
1641        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1642
1643        if (YYCURSOR >= YYLIMIT) {
1644            break;
1645        }
1646
1647        if (*YYCURSOR == '?') {
1648            if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1649
1650                YYCURSOR--;
1651                break;
1652            }
1653        }
1654    }
1655
1656    yyleng = YYCURSOR - SCNG(yy_text);
1657
1658    if (SCNG(output_filter)) {
1659        int readsize;
1660        char *s = NULL;
1661        size_t sz = 0;
1662        // TODO: avoid reallocation ???
1663        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1664        ZVAL_STRINGL(zendlval, s, sz);
1665        efree(s);
1666        if (readsize < yyleng) {
1667            yyless(readsize);
1668        }
1669    } else {
1670      ZVAL_STRINGL(zendlval, yytext, yyleng);
1671    }
1672    HANDLE_NEWLINES(yytext, yyleng);
1673    return T_INLINE_HTML;
1674}
1675
1676
1677/* Make sure a label character follows "->", otherwise there is no property
1678 * and "->" will be taken literally
1679 */
1680<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1681    yyless(yyleng - 3);
1682    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1683    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1684    return T_VARIABLE;
1685}
1686
1687/* A [ always designates a variable offset, regardless of what follows
1688 */
1689<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1690    yyless(yyleng - 1);
1691    yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1692    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1693    return T_VARIABLE;
1694}
1695
1696<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1697    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1698    return T_VARIABLE;
1699}
1700
1701<ST_VAR_OFFSET>"]" {
1702    yy_pop_state(TSRMLS_C);
1703    return ']';
1704}
1705
1706<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1707    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1708    return yytext[0];
1709}
1710
1711<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1712    /* Invalid rule to return a more explicit parse error with proper line number */
1713    yyless(0);
1714    yy_pop_state(TSRMLS_C);
1715    ZVAL_NULL(zendlval);
1716    return T_ENCAPSED_AND_WHITESPACE;
1717}
1718
1719<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1720    zend_copy_value(zendlval, yytext, yyleng);
1721    return T_STRING;
1722}
1723
1724
1725<ST_IN_SCRIPTING>"#"|"//" {
1726    while (YYCURSOR < YYLIMIT) {
1727        switch (*YYCURSOR++) {
1728            case '\r':
1729                if (*YYCURSOR == '\n') {
1730                    YYCURSOR++;
1731                }
1732                /* fall through */
1733            case '\n':
1734                CG(zend_lineno)++;
1735                break;
1736            case '?':
1737                if (*YYCURSOR == '>') {
1738                    YYCURSOR--;
1739                    break;
1740                }
1741                /* fall through */
1742            default:
1743                continue;
1744        }
1745
1746        break;
1747    }
1748
1749    yyleng = YYCURSOR - SCNG(yy_text);
1750
1751    return T_COMMENT;
1752}
1753
1754<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1755    int doc_com;
1756
1757    if (yyleng > 2) {
1758        doc_com = 1;
1759        RESET_DOC_COMMENT();
1760    } else {
1761        doc_com = 0;
1762    }
1763
1764    while (YYCURSOR < YYLIMIT) {
1765        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1766            break;
1767        }
1768    }
1769
1770    if (YYCURSOR < YYLIMIT) {
1771        YYCURSOR++;
1772    } else {
1773        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1774    }
1775
1776    yyleng = YYCURSOR - SCNG(yy_text);
1777    HANDLE_NEWLINES(yytext, yyleng);
1778
1779    if (doc_com) {
1780        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1781        return T_DOC_COMMENT;
1782    }
1783
1784    return T_COMMENT;
1785}
1786
1787<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
1788    BEGIN(INITIAL);
1789    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1790}
1791
1792
1793<ST_IN_SCRIPTING>b?['] {
1794    register char *s, *t;
1795    char *end;
1796    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1797
1798    while (1) {
1799        if (YYCURSOR < YYLIMIT) {
1800            if (*YYCURSOR == '\'') {
1801                YYCURSOR++;
1802                yyleng = YYCURSOR - SCNG(yy_text);
1803
1804                break;
1805            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1806                YYCURSOR++;
1807            }
1808        } else {
1809            yyleng = YYLIMIT - SCNG(yy_text);
1810
1811            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1812             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1813             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1814            ZVAL_NULL(zendlval);
1815            return T_ENCAPSED_AND_WHITESPACE;
1816        }
1817    }
1818
1819    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1820
1821    /* convert escape sequences */
1822    s = t = Z_STRVAL_P(zendlval);
1823    end = s+Z_STRLEN_P(zendlval);
1824    while (s<end) {
1825        if (*s=='\\') {
1826            s++;
1827
1828            switch(*s) {
1829                case '\\':
1830                case '\'':
1831                    *t++ = *s;
1832                    Z_STRLEN_P(zendlval)--;
1833                    break;
1834                default:
1835                    *t++ = '\\';
1836                    *t++ = *s;
1837                    break;
1838            }
1839        } else {
1840            *t++ = *s;
1841        }
1842
1843        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1844            CG(zend_lineno)++;
1845        }
1846        s++;
1847    }
1848    *t = 0;
1849
1850    if (SCNG(output_filter)) {
1851        size_t sz = 0;
1852        char *str = NULL;
1853        s = Z_STRVAL_P(zendlval);
1854        // TODO: avoid reallocation ???
1855        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
1856        ZVAL_STRINGL(zendlval, str, sz);
1857        efree(s);
1858    }
1859    return T_CONSTANT_ENCAPSED_STRING;
1860}
1861
1862
1863<ST_IN_SCRIPTING>b?["] {
1864    int bprefix = (yytext[0] != '"') ? 1 : 0;
1865
1866    while (YYCURSOR < YYLIMIT) {
1867        switch (*YYCURSOR++) {
1868            case '"':
1869                yyleng = YYCURSOR - SCNG(yy_text);
1870                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
1871                return T_CONSTANT_ENCAPSED_STRING;
1872            case '$':
1873                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
1874                    break;
1875                }
1876                continue;
1877            case '{':
1878                if (*YYCURSOR == '$') {
1879                    break;
1880                }
1881                continue;
1882            case '\\':
1883                if (YYCURSOR < YYLIMIT) {
1884                    YYCURSOR++;
1885                }
1886                /* fall through */
1887            default:
1888                continue;
1889        }
1890
1891        YYCURSOR--;
1892        break;
1893    }
1894
1895    /* Remember how much was scanned to save rescanning */
1896    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
1897
1898    YYCURSOR = SCNG(yy_text) + yyleng;
1899
1900    BEGIN(ST_DOUBLE_QUOTES);
1901    return '"';
1902}
1903
1904
1905<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
1906    char *s;
1907    int bprefix = (yytext[0] != '<') ? 1 : 0;
1908    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
1909
1910    CG(zend_lineno)++;
1911    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
1912    s = yytext+bprefix+3;
1913    while ((*s == ' ') || (*s == '\t')) {
1914        s++;
1915        heredoc_label->length--;
1916    }
1917
1918    if (*s == '\'') {
1919        s++;
1920        heredoc_label->length -= 2;
1921
1922        BEGIN(ST_NOWDOC);
1923    } else {
1924        if (*s == '"') {
1925            s++;
1926            heredoc_label->length -= 2;
1927        }
1928
1929        BEGIN(ST_HEREDOC);
1930    }
1931
1932    heredoc_label->label = estrndup(s, heredoc_label->length);
1933
1934    /* Check for ending label on the next line */
1935    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
1936        YYCTYPE *end = YYCURSOR + heredoc_label->length;
1937
1938        if (*end == ';') {
1939            end++;
1940        }
1941
1942        if (*end == '\n' || *end == '\r') {
1943            BEGIN(ST_END_HEREDOC);
1944        }
1945    }
1946
1947    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
1948
1949    return T_START_HEREDOC;
1950}
1951
1952
1953<ST_IN_SCRIPTING>[`] {
1954    BEGIN(ST_BACKQUOTE);
1955    return '`';
1956}
1957
1958
1959<ST_END_HEREDOC>{ANY_CHAR} {
1960    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
1961
1962    YYCURSOR += heredoc_label->length - 1;
1963    yyleng = heredoc_label->length;
1964
1965    heredoc_label_dtor(heredoc_label);
1966    efree(heredoc_label);
1967
1968    BEGIN(ST_IN_SCRIPTING);
1969    return T_END_HEREDOC;
1970}
1971
1972
1973<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
1974    Z_LVAL_P(zendlval) = (zend_long) '{';
1975    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1976    yyless(1);
1977    return T_CURLY_OPEN;
1978}
1979
1980
1981<ST_DOUBLE_QUOTES>["] {
1982    BEGIN(ST_IN_SCRIPTING);
1983    return '"';
1984}
1985
1986<ST_BACKQUOTE>[`] {
1987    BEGIN(ST_IN_SCRIPTING);
1988    return '`';
1989}
1990
1991
1992<ST_DOUBLE_QUOTES>{ANY_CHAR} {
1993    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
1994        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
1995        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
1996
1997        goto double_quotes_scan_done;
1998    }
1999
2000    if (YYCURSOR > YYLIMIT) {
2001        return 0;
2002    }
2003    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2004        YYCURSOR++;
2005    }
2006
2007    while (YYCURSOR < YYLIMIT) {
2008        switch (*YYCURSOR++) {
2009            case '"':
2010                break;
2011            case '$':
2012                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2013                    break;
2014                }
2015                continue;
2016            case '{':
2017                if (*YYCURSOR == '$') {
2018                    break;
2019                }
2020                continue;
2021            case '\\':
2022                if (YYCURSOR < YYLIMIT) {
2023                    YYCURSOR++;
2024                }
2025                /* fall through */
2026            default:
2027                continue;
2028        }
2029
2030        YYCURSOR--;
2031        break;
2032    }
2033
2034double_quotes_scan_done:
2035    yyleng = YYCURSOR - SCNG(yy_text);
2036
2037    zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2038    return T_ENCAPSED_AND_WHITESPACE;
2039}
2040
2041
2042<ST_BACKQUOTE>{ANY_CHAR} {
2043    if (YYCURSOR > YYLIMIT) {
2044        return 0;
2045    }
2046    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2047        YYCURSOR++;
2048    }
2049
2050    while (YYCURSOR < YYLIMIT) {
2051        switch (*YYCURSOR++) {
2052            case '`':
2053                break;
2054            case '$':
2055                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2056                    break;
2057                }
2058                continue;
2059            case '{':
2060                if (*YYCURSOR == '$') {
2061                    break;
2062                }
2063                continue;
2064            case '\\':
2065                if (YYCURSOR < YYLIMIT) {
2066                    YYCURSOR++;
2067                }
2068                /* fall through */
2069            default:
2070                continue;
2071        }
2072
2073        YYCURSOR--;
2074        break;
2075    }
2076
2077    yyleng = YYCURSOR - SCNG(yy_text);
2078
2079    zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2080    return T_ENCAPSED_AND_WHITESPACE;
2081}
2082
2083
2084<ST_HEREDOC>{ANY_CHAR} {
2085    int newline = 0;
2086
2087    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2088
2089    if (YYCURSOR > YYLIMIT) {
2090        return 0;
2091    }
2092
2093    YYCURSOR--;
2094
2095    while (YYCURSOR < YYLIMIT) {
2096        switch (*YYCURSOR++) {
2097            case '\r':
2098                if (*YYCURSOR == '\n') {
2099                    YYCURSOR++;
2100                }
2101                /* fall through */
2102            case '\n':
2103                /* Check for ending label on the next line */
2104                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2105                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2106
2107                    if (*end == ';') {
2108                        end++;
2109                    }
2110
2111                    if (*end == '\n' || *end == '\r') {
2112                        /* newline before label will be subtracted from returned text, but
2113                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2114                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2115                            newline = 2; /* Windows newline */
2116                        } else {
2117                            newline = 1;
2118                        }
2119
2120                        CG(increment_lineno) = 1; /* For newline before label */
2121                        BEGIN(ST_END_HEREDOC);
2122
2123                        goto heredoc_scan_done;
2124                    }
2125                }
2126                continue;
2127            case '$':
2128                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2129                    break;
2130                }
2131                continue;
2132            case '{':
2133                if (*YYCURSOR == '$') {
2134                    break;
2135                }
2136                continue;
2137            case '\\':
2138                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2139                    YYCURSOR++;
2140                }
2141                /* fall through */
2142            default:
2143                continue;
2144        }
2145
2146        YYCURSOR--;
2147        break;
2148    }
2149
2150heredoc_scan_done:
2151    yyleng = YYCURSOR - SCNG(yy_text);
2152
2153    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2154    return T_ENCAPSED_AND_WHITESPACE;
2155}
2156
2157
2158<ST_NOWDOC>{ANY_CHAR} {
2159    int newline = 0;
2160
2161    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2162
2163    if (YYCURSOR > YYLIMIT) {
2164        return 0;
2165    }
2166
2167    YYCURSOR--;
2168
2169    while (YYCURSOR < YYLIMIT) {
2170        switch (*YYCURSOR++) {
2171            case '\r':
2172                if (*YYCURSOR == '\n') {
2173                    YYCURSOR++;
2174                }
2175                /* fall through */
2176            case '\n':
2177                /* Check for ending label on the next line */
2178                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2179                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2180
2181                    if (*end == ';') {
2182                        end++;
2183                    }
2184
2185                    if (*end == '\n' || *end == '\r') {
2186                        /* newline before label will be subtracted from returned text, but
2187                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2188                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2189                            newline = 2; /* Windows newline */
2190                        } else {
2191                            newline = 1;
2192                        }
2193
2194                        CG(increment_lineno) = 1; /* For newline before label */
2195                        BEGIN(ST_END_HEREDOC);
2196
2197                        goto nowdoc_scan_done;
2198                    }
2199                }
2200                /* fall through */
2201            default:
2202                continue;
2203        }
2204    }
2205
2206nowdoc_scan_done:
2207    yyleng = YYCURSOR - SCNG(yy_text);
2208
2209    zend_copy_value(zendlval, yytext, yyleng - newline);
2210    HANDLE_NEWLINES(yytext, yyleng - newline);
2211    return T_ENCAPSED_AND_WHITESPACE;
2212}
2213
2214
2215<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2216    if (YYCURSOR > YYLIMIT) {
2217        return 0;
2218    }
2219
2220    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2221    goto restart;
2222}
2223
2224*/
2225}
2226