1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150}
151
152
153static void _yy_push_state(int new_state TSRMLS_DC)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(TSRMLS_D)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(TSRMLS_D)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(TSRMLS_D)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225}
226
227ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
228{
229    SCNG(yy_leng)   = lex_state->yy_leng;
230    SCNG(yy_start)  = lex_state->yy_start;
231    SCNG(yy_text)   = lex_state->yy_text;
232    SCNG(yy_cursor) = lex_state->yy_cursor;
233    SCNG(yy_marker) = lex_state->yy_marker;
234    SCNG(yy_limit)  = lex_state->yy_limit;
235
236    zend_stack_destroy(&SCNG(state_stack));
237    SCNG(state_stack) = lex_state->state_stack;
238
239    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
240    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
241    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
242
243    SCNG(yy_in) = lex_state->in;
244    YYSETCONDITION(lex_state->yy_state);
245    CG(zend_lineno) = lex_state->lineno;
246    zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
247
248    if (SCNG(script_filtered)) {
249        efree(SCNG(script_filtered));
250        SCNG(script_filtered) = NULL;
251    }
252    SCNG(script_org) = lex_state->script_org;
253    SCNG(script_org_size) = lex_state->script_org_size;
254    SCNG(script_filtered) = lex_state->script_filtered;
255    SCNG(script_filtered_size) = lex_state->script_filtered_size;
256    SCNG(input_filter) = lex_state->input_filter;
257    SCNG(output_filter) = lex_state->output_filter;
258    SCNG(script_encoding) = lex_state->script_encoding;
259
260    RESET_DOC_COMMENT();
261}
262
263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
264{
265    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
266    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
267    file_handle->opened_path = NULL;
268    if (file_handle->free_filename) {
269        file_handle->filename = NULL;
270    }
271}
272
273#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
274#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
275#define BOM_UTF16_BE    "\xfe\xff"
276#define BOM_UTF16_LE    "\xff\xfe"
277#define BOM_UTF8        "\xef\xbb\xbf"
278
279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
280{
281    const unsigned char *p;
282    int wchar_size = 2;
283    int le = 0;
284
285    /* utf-16 or utf-32? */
286    p = script;
287    while ((p-script) < script_size) {
288        p = memchr(p, 0, script_size-(p-script)-2);
289        if (!p) {
290            break;
291        }
292        if (*(p+1) == '\0' && *(p+2) == '\0') {
293            wchar_size = 4;
294            break;
295        }
296
297        /* searching for UTF-32 specific byte orders, so this will do */
298        p += 4;
299    }
300
301    /* BE or LE? */
302    p = script;
303    while ((p-script) < script_size) {
304        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
305            /* BE */
306            le = 0;
307            break;
308        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
309            /* LE* */
310            le = 1;
311            break;
312        }
313        p += wchar_size;
314    }
315
316    if (wchar_size == 2) {
317        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
318    } else {
319        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
320    }
321
322    return NULL;
323}
324
325static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
326{
327    const zend_encoding *script_encoding = NULL;
328    int bom_size;
329    unsigned char *pos1, *pos2;
330
331    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
332        return NULL;
333    }
334
335    /* check out BOM */
336    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
337        script_encoding = zend_multibyte_encoding_utf32be;
338        bom_size = sizeof(BOM_UTF32_BE)-1;
339    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
340        script_encoding = zend_multibyte_encoding_utf32le;
341        bom_size = sizeof(BOM_UTF32_LE)-1;
342    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
343        script_encoding = zend_multibyte_encoding_utf16be;
344        bom_size = sizeof(BOM_UTF16_BE)-1;
345    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
346        script_encoding = zend_multibyte_encoding_utf16le;
347        bom_size = sizeof(BOM_UTF16_LE)-1;
348    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
349        script_encoding = zend_multibyte_encoding_utf8;
350        bom_size = sizeof(BOM_UTF8)-1;
351    }
352
353    if (script_encoding) {
354        /* remove BOM */
355        LANG_SCNG(script_org) += bom_size;
356        LANG_SCNG(script_org_size) -= bom_size;
357
358        return script_encoding;
359    }
360
361    /* script contains NULL bytes -> auto-detection */
362    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
363        /* check if the NULL byte is after the __HALT_COMPILER(); */
364        pos2 = LANG_SCNG(script_org);
365
366        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
367            pos2 = memchr(pos2, '_', pos1 - pos2);
368            if (!pos2) break;
369            pos2++;
370            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
371                pos2 += sizeof("_HALT_COMPILER")-1;
372                while (*pos2 == ' '  ||
373                       *pos2 == '\t' ||
374                       *pos2 == '\r' ||
375                       *pos2 == '\n') {
376                    pos2++;
377                }
378                if (*pos2 == '(') {
379                    pos2++;
380                    while (*pos2 == ' '  ||
381                           *pos2 == '\t' ||
382                           *pos2 == '\r' ||
383                           *pos2 == '\n') {
384                        pos2++;
385                    }
386                    if (*pos2 == ')') {
387                        pos2++;
388                        while (*pos2 == ' '  ||
389                               *pos2 == '\t' ||
390                               *pos2 == '\r' ||
391                               *pos2 == '\n') {
392                            pos2++;
393                        }
394                        if (*pos2 == ';') {
395                            return NULL;
396                        }
397                    }
398                }
399            }
400        }
401        /* make best effort if BOM is missing */
402        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
403    }
404
405    return NULL;
406}
407
408static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
409{
410    const zend_encoding *script_encoding;
411
412    if (CG(detect_unicode)) {
413        /* check out bom(byte order mark) and see if containing wchars */
414        script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
415        if (script_encoding != NULL) {
416            /* bom or wchar detection is prior to 'script_encoding' option */
417            return script_encoding;
418        }
419    }
420
421    /* if no script_encoding specified, just leave alone */
422    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
423        return NULL;
424    }
425
426    /* if multiple encodings specified, detect automagically */
427    if (CG(script_encoding_list_size) > 1) {
428        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
429    }
430
431    return CG(script_encoding_list)[0];
432}
433
434ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
435{
436    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
437    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
438
439    if (!script_encoding) {
440        return FAILURE;
441    }
442
443    /* judge input/output filter */
444    LANG_SCNG(script_encoding) = script_encoding;
445    LANG_SCNG(input_filter) = NULL;
446    LANG_SCNG(output_filter) = NULL;
447
448    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
449        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
450            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
451            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
452            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
453        } else {
454            LANG_SCNG(input_filter) = NULL;
455            LANG_SCNG(output_filter) = NULL;
456        }
457        return SUCCESS;
458    }
459
460    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
461        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
462        LANG_SCNG(output_filter) = NULL;
463    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
464        LANG_SCNG(input_filter) = NULL;
465        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
466    } else {
467        /* both script and internal encodings are incompatible w/ flex */
468        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
469        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
470    }
471
472    return 0;
473}
474
475ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
476{
477    const char *file_path = NULL;
478    char *buf;
479    size_t size, offset = 0;
480    zend_string *compiled_filename;
481
482    /* The shebang line was read, get the current position to obtain the buffer start */
483    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
484        if ((offset = ftell(file_handle->handle.fp)) == -1) {
485            offset = 0;
486        }
487    }
488
489    if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
490        return FAILURE;
491    }
492
493    zend_llist_add_element(&CG(open_files), file_handle);
494    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
495        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
496        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
497        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
498        file_handle->handle.stream.handle = fh->handle.stream.handle;
499    }
500
501    /* Reset the scanner for scanning the new file */
502    SCNG(yy_in) = file_handle;
503    SCNG(yy_start) = NULL;
504
505    if (size != -1) {
506        if (CG(multibyte)) {
507            SCNG(script_org) = (unsigned char*)buf;
508            SCNG(script_org_size) = size;
509            SCNG(script_filtered) = NULL;
510
511            zend_multibyte_set_filter(NULL TSRMLS_CC);
512
513            if (SCNG(input_filter)) {
514                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
515                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
516                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
517                }
518                buf = (char*)SCNG(script_filtered);
519                size = SCNG(script_filtered_size);
520            }
521        }
522        SCNG(yy_start) = (unsigned char *)buf - offset;
523        yy_scan_buffer(buf, size TSRMLS_CC);
524    } else {
525        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
526    }
527
528    BEGIN(INITIAL);
529
530    if (file_handle->opened_path) {
531        file_path = file_handle->opened_path;
532    } else {
533        file_path = file_handle->filename;
534    }
535
536    compiled_filename = zend_string_init(file_path, strlen(file_path), 0);
537    zend_set_compiled_filename(compiled_filename TSRMLS_CC);
538    zend_string_release(compiled_filename);
539
540    if (CG(start_lineno)) {
541        CG(zend_lineno) = CG(start_lineno);
542        CG(start_lineno) = 0;
543    } else {
544        CG(zend_lineno) = 1;
545    }
546
547    RESET_DOC_COMMENT();
548    CG(increment_lineno) = 0;
549    return SUCCESS;
550}
551END_EXTERN_C()
552
553
554ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
555{
556    zend_lex_state original_lex_state;
557    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
558    zend_op_array *original_active_op_array = CG(active_op_array);
559    zend_op_array *retval=NULL;
560    int compiler_result;
561    zend_bool compilation_successful=0;
562    zval retval_zv;
563    zend_bool original_in_compilation = CG(in_compilation);
564
565    ZVAL_LONG(&retval_zv, 1);
566
567    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
568
569    retval = op_array; /* success oriented */
570
571    if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
572        if (type==ZEND_REQUIRE) {
573            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
574            zend_bailout();
575        } else {
576            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
577        }
578        compilation_successful=0;
579    } else {
580        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
581        CG(in_compilation) = 1;
582        CG(active_op_array) = op_array;
583        zend_stack_push(&CG(context_stack), (void *) &CG(context));
584        zend_init_compiler_context(TSRMLS_C);
585        CG(ast_arena) = zend_arena_create(1024 * 32);
586        compiler_result = zendparse(TSRMLS_C);
587        if (compiler_result != 0) { /* parser error */
588            zend_bailout();
589        }
590        zend_compile_top_stmt(CG(ast) TSRMLS_CC);
591        zend_ast_destroy(CG(ast));
592        zend_arena_destroy(CG(ast_arena));
593        zend_do_end_compilation(TSRMLS_C);
594        zend_emit_final_return(&retval_zv TSRMLS_CC);
595        CG(in_compilation) = original_in_compilation;
596        compilation_successful=1;
597    }
598
599    if (retval) {
600        CG(active_op_array) = original_active_op_array;
601        if (compilation_successful) {
602            pass_two(op_array TSRMLS_CC);
603            zend_release_labels(0 TSRMLS_CC);
604        } else {
605            efree_size(op_array, sizeof(zend_op_array));
606            retval = NULL;
607        }
608    }
609    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
610    return retval;
611}
612
613
614zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
615{
616    zend_file_handle file_handle;
617    zval tmp;
618    zend_op_array *retval;
619    char *opened_path = NULL;
620
621    if (Z_TYPE_P(filename) != IS_STRING) {
622        tmp = *filename;
623        zval_copy_ctor(&tmp);
624        convert_to_string(&tmp);
625        filename = &tmp;
626    }
627    file_handle.filename = Z_STRVAL_P(filename);
628    file_handle.free_filename = 0;
629    file_handle.type = ZEND_HANDLE_FILENAME;
630    file_handle.opened_path = NULL;
631    file_handle.handle.fp = NULL;
632
633    retval = zend_compile_file(&file_handle, type TSRMLS_CC);
634    if (retval && file_handle.handle.stream.handle) {
635        if (!file_handle.opened_path) {
636            file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
637        }
638
639        zend_hash_str_add_empty_element(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path));
640
641        if (opened_path) {
642            efree(opened_path);
643        }
644    }
645    zend_destroy_file_handle(&file_handle TSRMLS_CC);
646
647    if (filename==&tmp) {
648        zval_dtor(&tmp);
649    }
650    return retval;
651}
652
653ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
654{
655    char *buf;
656    size_t size, old_len;
657    zend_string *new_compiled_filename;
658
659    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
660    old_len = Z_STRLEN_P(str);
661    Z_STR_P(str) = zend_string_realloc(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
662    Z_TYPE_INFO_P(str) = IS_STRING_EX;
663    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
664
665    SCNG(yy_in) = NULL;
666    SCNG(yy_start) = NULL;
667
668    buf = Z_STRVAL_P(str);
669    size = old_len;
670
671    if (CG(multibyte)) {
672        SCNG(script_org) = (unsigned char*)buf;
673        SCNG(script_org_size) = size;
674        SCNG(script_filtered) = NULL;
675
676        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
677
678        if (SCNG(input_filter)) {
679            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
680                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
681                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
682            }
683            buf = (char*)SCNG(script_filtered);
684            size = SCNG(script_filtered_size);
685        }
686    }
687
688    yy_scan_buffer(buf, size TSRMLS_CC);
689
690    new_compiled_filename = zend_string_init(filename, strlen(filename), 0);
691    zend_set_compiled_filename(new_compiled_filename TSRMLS_CC);
692    zend_string_release(new_compiled_filename);
693    CG(zend_lineno) = 1;
694    CG(increment_lineno) = 0;
695    RESET_DOC_COMMENT();
696    return SUCCESS;
697}
698
699
700ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
701{
702    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
703    if (SCNG(input_filter)) {
704        size_t original_offset = offset, length = 0;
705        do {
706            unsigned char *p = NULL;
707            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
708                return (size_t)-1;
709            }
710            efree(p);
711            if (length > original_offset) {
712                offset--;
713            } else if (length < original_offset) {
714                offset++;
715            }
716        } while (original_offset != length);
717    }
718    return offset;
719}
720
721
722zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
723{
724    zend_lex_state original_lex_state;
725    zend_op_array *op_array = NULL;
726    zval tmp;
727    zend_bool original_in_compilation = CG(in_compilation);
728
729    if (Z_STRLEN_P(source_string)==0) {
730        return NULL;
731    }
732
733    ZVAL_DUP(&tmp, source_string);
734    convert_to_string(&tmp);
735    source_string = &tmp;
736
737    CG(in_compilation) = 1;
738    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
739    if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC) == SUCCESS) {
740        CG(ast) = NULL;
741        CG(ast_arena) = zend_arena_create(1024 * 32);
742        BEGIN(ST_IN_SCRIPTING);
743
744        if (!zendparse(TSRMLS_C)) {
745            zend_op_array *original_active_op_array = CG(active_op_array);
746            op_array = emalloc(sizeof(zend_op_array));
747            init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
748            CG(active_op_array) = op_array;
749
750            zend_stack_push(&CG(context_stack), (void *) &CG(context));
751            zend_init_compiler_context(TSRMLS_C);
752            zend_compile_top_stmt(CG(ast) TSRMLS_CC);
753            zend_do_end_compilation(TSRMLS_C);
754            zend_emit_final_return(NULL TSRMLS_CC);
755            pass_two(op_array TSRMLS_CC);
756            zend_release_labels(0 TSRMLS_CC);
757
758            CG(active_op_array) = original_active_op_array;
759        }
760
761        zend_ast_destroy(CG(ast));
762        zend_arena_destroy(CG(ast_arena));
763    }
764
765    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
766    zval_dtor(&tmp);
767    CG(in_compilation) = original_in_compilation;
768    return op_array;
769}
770
771
772BEGIN_EXTERN_C()
773int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
774{
775    zend_lex_state original_lex_state;
776    zend_file_handle file_handle;
777
778    file_handle.type = ZEND_HANDLE_FILENAME;
779    file_handle.filename = filename;
780    file_handle.free_filename = 0;
781    file_handle.opened_path = NULL;
782    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
783    if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
784        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
785        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
786        return FAILURE;
787    }
788    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
789    if (SCNG(script_filtered)) {
790        efree(SCNG(script_filtered));
791        SCNG(script_filtered) = NULL;
792    }
793    zend_destroy_file_handle(&file_handle TSRMLS_CC);
794    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
795    return SUCCESS;
796}
797
798int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
799{
800    zend_lex_state original_lex_state;
801    zval tmp = *str;
802
803    str = &tmp;
804    zval_copy_ctor(str);
805    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
806    if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
807        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
808        return FAILURE;
809    }
810    BEGIN(INITIAL);
811    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
812    if (SCNG(script_filtered)) {
813        efree(SCNG(script_filtered));
814        SCNG(script_filtered) = NULL;
815    }
816    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
817    zval_dtor(str);
818    return SUCCESS;
819}
820
821ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
822{
823    size_t length;
824    unsigned char *new_yy_start;
825
826    /* convert and set */
827    if (!SCNG(input_filter)) {
828        if (SCNG(script_filtered)) {
829            efree(SCNG(script_filtered));
830            SCNG(script_filtered) = NULL;
831        }
832        SCNG(script_filtered_size) = 0;
833        length = SCNG(script_org_size);
834        new_yy_start = SCNG(script_org);
835    } else {
836        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
837            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
838                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
839        }
840        if (SCNG(script_filtered)) {
841            efree(SCNG(script_filtered));
842        }
843        SCNG(script_filtered) = new_yy_start;
844        SCNG(script_filtered_size) = length;
845    }
846
847    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
848    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
849    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
850    SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
851
852    SCNG(yy_start) = new_yy_start;
853}
854
855
856// TODO: avoid reallocation ???
857# define zend_copy_value(zendlval, yytext, yyleng) \
858    if (SCNG(output_filter)) { \
859        size_t sz = 0; \
860        char *s = NULL; \
861        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
862        ZVAL_STRINGL(zendlval, s, sz); \
863        efree(s); \
864    } else { \
865        ZVAL_STRINGL(zendlval, yytext, yyleng); \
866    }
867
868static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
869{
870    register char *s, *t;
871    char *end;
872
873    ZVAL_STRINGL(zendlval, str, len);
874
875    /* convert escape sequences */
876    s = t = Z_STRVAL_P(zendlval);
877    end = s+Z_STRLEN_P(zendlval);
878    while (s<end) {
879        if (*s=='\\') {
880            s++;
881            if (s >= end) {
882                *t++ = '\\';
883                break;
884            }
885
886            switch(*s) {
887                case 'n':
888                    *t++ = '\n';
889                    Z_STRLEN_P(zendlval)--;
890                    break;
891                case 'r':
892                    *t++ = '\r';
893                    Z_STRLEN_P(zendlval)--;
894                    break;
895                case 't':
896                    *t++ = '\t';
897                    Z_STRLEN_P(zendlval)--;
898                    break;
899                case 'f':
900                    *t++ = '\f';
901                    Z_STRLEN_P(zendlval)--;
902                    break;
903                case 'v':
904                    *t++ = '\v';
905                    Z_STRLEN_P(zendlval)--;
906                    break;
907                case 'e':
908#ifdef PHP_WIN32
909                    *t++ = VK_ESCAPE;
910#else
911                    *t++ = '\e';
912#endif
913                    Z_STRLEN_P(zendlval)--;
914                    break;
915                case '"':
916                case '`':
917                    if (*s != quote_type) {
918                        *t++ = '\\';
919                        *t++ = *s;
920                        break;
921                    }
922                case '\\':
923                case '$':
924                    *t++ = *s;
925                    Z_STRLEN_P(zendlval)--;
926                    break;
927                case 'x':
928                case 'X':
929                    if (ZEND_IS_HEX(*(s+1))) {
930                        char hex_buf[3] = { 0, 0, 0 };
931
932                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
933
934                        hex_buf[0] = *(++s);
935                        Z_STRLEN_P(zendlval)--;
936                        if (ZEND_IS_HEX(*(s+1))) {
937                            hex_buf[1] = *(++s);
938                            Z_STRLEN_P(zendlval)--;
939                        }
940                        *t++ = (char) ZEND_STRTOL(hex_buf, NULL, 16);
941                    } else {
942                        *t++ = '\\';
943                        *t++ = *s;
944                    }
945                    break;
946                default:
947                    /* check for an octal */
948                    if (ZEND_IS_OCT(*s)) {
949                        char octal_buf[4] = { 0, 0, 0, 0 };
950
951                        octal_buf[0] = *s;
952                        Z_STRLEN_P(zendlval)--;
953                        if (ZEND_IS_OCT(*(s+1))) {
954                            octal_buf[1] = *(++s);
955                            Z_STRLEN_P(zendlval)--;
956                            if (ZEND_IS_OCT(*(s+1))) {
957                                octal_buf[2] = *(++s);
958                                Z_STRLEN_P(zendlval)--;
959                            }
960                        }
961                        *t++ = (char) ZEND_STRTOL(octal_buf, NULL, 8);
962                    } else {
963                        *t++ = '\\';
964                        *t++ = *s;
965                    }
966                    break;
967            }
968        } else {
969            *t++ = *s;
970        }
971
972        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
973            CG(zend_lineno)++;
974        }
975        s++;
976    }
977    *t = 0;
978    if (SCNG(output_filter)) {
979        size_t sz = 0;
980        unsigned char *str;
981        // TODO: avoid realocation ???
982        s = Z_STRVAL_P(zendlval);
983        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
984        zval_ptr_dtor(zendlval);
985        ZVAL_STRINGL(zendlval, (char *) str, sz);
986        efree(str);
987    }
988}
989
990
991int lex_scan(zval *zendlval TSRMLS_DC)
992{
993restart:
994    SCNG(yy_text) = YYCURSOR;
995
996yymore_restart:
997
998/*!re2c
999re2c:yyfill:check = 0;
1000LNUM    [0-9]+
1001DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1002EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1003HNUM    "0x"[0-9a-fA-F]+
1004BNUM    "0b"[01]+
1005LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1006WHITESPACE [ \n\r\t]+
1007TABS_AND_SPACES [ \t]*
1008TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1009ANY_CHAR [^]
1010NEWLINE ("\r"|"\n"|"\r\n")
1011
1012/* compute yyleng before each rule */
1013<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1014
1015<ST_IN_SCRIPTING>"exit" {
1016    return T_EXIT;
1017}
1018
1019<ST_IN_SCRIPTING>"die" {
1020    return T_EXIT;
1021}
1022
1023<ST_IN_SCRIPTING>"function" {
1024    return T_FUNCTION;
1025}
1026
1027<ST_IN_SCRIPTING>"const" {
1028    return T_CONST;
1029}
1030
1031<ST_IN_SCRIPTING>"return" {
1032    return T_RETURN;
1033}
1034
1035<ST_IN_SCRIPTING>"yield" {
1036    return T_YIELD;
1037}
1038
1039<ST_IN_SCRIPTING>"try" {
1040    return T_TRY;
1041}
1042
1043<ST_IN_SCRIPTING>"catch" {
1044    return T_CATCH;
1045}
1046
1047<ST_IN_SCRIPTING>"finally" {
1048    return T_FINALLY;
1049}
1050
1051<ST_IN_SCRIPTING>"throw" {
1052    return T_THROW;
1053}
1054
1055<ST_IN_SCRIPTING>"if" {
1056    return T_IF;
1057}
1058
1059<ST_IN_SCRIPTING>"elseif" {
1060    return T_ELSEIF;
1061}
1062
1063<ST_IN_SCRIPTING>"endif" {
1064    return T_ENDIF;
1065}
1066
1067<ST_IN_SCRIPTING>"else" {
1068    return T_ELSE;
1069}
1070
1071<ST_IN_SCRIPTING>"while" {
1072    return T_WHILE;
1073}
1074
1075<ST_IN_SCRIPTING>"endwhile" {
1076    return T_ENDWHILE;
1077}
1078
1079<ST_IN_SCRIPTING>"do" {
1080    return T_DO;
1081}
1082
1083<ST_IN_SCRIPTING>"for" {
1084    return T_FOR;
1085}
1086
1087<ST_IN_SCRIPTING>"endfor" {
1088    return T_ENDFOR;
1089}
1090
1091<ST_IN_SCRIPTING>"foreach" {
1092    return T_FOREACH;
1093}
1094
1095<ST_IN_SCRIPTING>"endforeach" {
1096    return T_ENDFOREACH;
1097}
1098
1099<ST_IN_SCRIPTING>"declare" {
1100    return T_DECLARE;
1101}
1102
1103<ST_IN_SCRIPTING>"enddeclare" {
1104    return T_ENDDECLARE;
1105}
1106
1107<ST_IN_SCRIPTING>"instanceof" {
1108    return T_INSTANCEOF;
1109}
1110
1111<ST_IN_SCRIPTING>"as" {
1112    return T_AS;
1113}
1114
1115<ST_IN_SCRIPTING>"switch" {
1116    return T_SWITCH;
1117}
1118
1119<ST_IN_SCRIPTING>"endswitch" {
1120    return T_ENDSWITCH;
1121}
1122
1123<ST_IN_SCRIPTING>"case" {
1124    return T_CASE;
1125}
1126
1127<ST_IN_SCRIPTING>"default" {
1128    return T_DEFAULT;
1129}
1130
1131<ST_IN_SCRIPTING>"break" {
1132    return T_BREAK;
1133}
1134
1135<ST_IN_SCRIPTING>"continue" {
1136    return T_CONTINUE;
1137}
1138
1139<ST_IN_SCRIPTING>"goto" {
1140    return T_GOTO;
1141}
1142
1143<ST_IN_SCRIPTING>"echo" {
1144    return T_ECHO;
1145}
1146
1147<ST_IN_SCRIPTING>"print" {
1148    return T_PRINT;
1149}
1150
1151<ST_IN_SCRIPTING>"class" {
1152    return T_CLASS;
1153}
1154
1155<ST_IN_SCRIPTING>"interface" {
1156    return T_INTERFACE;
1157}
1158
1159<ST_IN_SCRIPTING>"trait" {
1160    return T_TRAIT;
1161}
1162
1163<ST_IN_SCRIPTING>"extends" {
1164    return T_EXTENDS;
1165}
1166
1167<ST_IN_SCRIPTING>"implements" {
1168    return T_IMPLEMENTS;
1169}
1170
1171<ST_IN_SCRIPTING>"->" {
1172    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1173    return T_OBJECT_OPERATOR;
1174}
1175
1176<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1177    HANDLE_NEWLINES(yytext, yyleng);
1178    return T_WHITESPACE;
1179}
1180
1181<ST_LOOKING_FOR_PROPERTY>"->" {
1182    return T_OBJECT_OPERATOR;
1183}
1184
1185<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1186    yy_pop_state(TSRMLS_C);
1187    zend_copy_value(zendlval, yytext, yyleng);
1188    return T_STRING;
1189}
1190
1191<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1192    yyless(0);
1193    yy_pop_state(TSRMLS_C);
1194    goto restart;
1195}
1196
1197<ST_IN_SCRIPTING>"::" {
1198    return T_PAAMAYIM_NEKUDOTAYIM;
1199}
1200
1201<ST_IN_SCRIPTING>"\\" {
1202    return T_NS_SEPARATOR;
1203}
1204
1205<ST_IN_SCRIPTING>"..." {
1206    return T_ELLIPSIS;
1207}
1208
1209<ST_IN_SCRIPTING>"new" {
1210    return T_NEW;
1211}
1212
1213<ST_IN_SCRIPTING>"clone" {
1214    return T_CLONE;
1215}
1216
1217<ST_IN_SCRIPTING>"var" {
1218    return T_VAR;
1219}
1220
1221<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1222    return T_INT_CAST;
1223}
1224
1225<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1226    return T_DOUBLE_CAST;
1227}
1228
1229<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1230    return T_STRING_CAST;
1231}
1232
1233<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1234    return T_ARRAY_CAST;
1235}
1236
1237<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1238    return T_OBJECT_CAST;
1239}
1240
1241<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1242    return T_BOOL_CAST;
1243}
1244
1245<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1246    return T_UNSET_CAST;
1247}
1248
1249<ST_IN_SCRIPTING>"eval" {
1250    return T_EVAL;
1251}
1252
1253<ST_IN_SCRIPTING>"include" {
1254    return T_INCLUDE;
1255}
1256
1257<ST_IN_SCRIPTING>"include_once" {
1258    return T_INCLUDE_ONCE;
1259}
1260
1261<ST_IN_SCRIPTING>"require" {
1262    return T_REQUIRE;
1263}
1264
1265<ST_IN_SCRIPTING>"require_once" {
1266    return T_REQUIRE_ONCE;
1267}
1268
1269<ST_IN_SCRIPTING>"namespace" {
1270    return T_NAMESPACE;
1271}
1272
1273<ST_IN_SCRIPTING>"use" {
1274    return T_USE;
1275}
1276
1277<ST_IN_SCRIPTING>"insteadof" {
1278        return T_INSTEADOF;
1279}
1280
1281<ST_IN_SCRIPTING>"global" {
1282    return T_GLOBAL;
1283}
1284
1285<ST_IN_SCRIPTING>"isset" {
1286    return T_ISSET;
1287}
1288
1289<ST_IN_SCRIPTING>"empty" {
1290    return T_EMPTY;
1291}
1292
1293<ST_IN_SCRIPTING>"__halt_compiler" {
1294    return T_HALT_COMPILER;
1295}
1296
1297<ST_IN_SCRIPTING>"static" {
1298    return T_STATIC;
1299}
1300
1301<ST_IN_SCRIPTING>"abstract" {
1302    return T_ABSTRACT;
1303}
1304
1305<ST_IN_SCRIPTING>"final" {
1306    return T_FINAL;
1307}
1308
1309<ST_IN_SCRIPTING>"private" {
1310    return T_PRIVATE;
1311}
1312
1313<ST_IN_SCRIPTING>"protected" {
1314    return T_PROTECTED;
1315}
1316
1317<ST_IN_SCRIPTING>"public" {
1318    return T_PUBLIC;
1319}
1320
1321<ST_IN_SCRIPTING>"unset" {
1322    return T_UNSET;
1323}
1324
1325<ST_IN_SCRIPTING>"=>" {
1326    return T_DOUBLE_ARROW;
1327}
1328
1329<ST_IN_SCRIPTING>"list" {
1330    return T_LIST;
1331}
1332
1333<ST_IN_SCRIPTING>"array" {
1334    return T_ARRAY;
1335}
1336
1337<ST_IN_SCRIPTING>"callable" {
1338 return T_CALLABLE;
1339}
1340
1341<ST_IN_SCRIPTING>"++" {
1342    return T_INC;
1343}
1344
1345<ST_IN_SCRIPTING>"--" {
1346    return T_DEC;
1347}
1348
1349<ST_IN_SCRIPTING>"===" {
1350    return T_IS_IDENTICAL;
1351}
1352
1353<ST_IN_SCRIPTING>"!==" {
1354    return T_IS_NOT_IDENTICAL;
1355}
1356
1357<ST_IN_SCRIPTING>"==" {
1358    return T_IS_EQUAL;
1359}
1360
1361<ST_IN_SCRIPTING>"!="|"<>" {
1362    return T_IS_NOT_EQUAL;
1363}
1364
1365<ST_IN_SCRIPTING>"<=" {
1366    return T_IS_SMALLER_OR_EQUAL;
1367}
1368
1369<ST_IN_SCRIPTING>">=" {
1370    return T_IS_GREATER_OR_EQUAL;
1371}
1372
1373<ST_IN_SCRIPTING>"+=" {
1374    return T_PLUS_EQUAL;
1375}
1376
1377<ST_IN_SCRIPTING>"-=" {
1378    return T_MINUS_EQUAL;
1379}
1380
1381<ST_IN_SCRIPTING>"*=" {
1382    return T_MUL_EQUAL;
1383}
1384
1385<ST_IN_SCRIPTING>"*\*" {
1386    return T_POW;
1387}
1388
1389<ST_IN_SCRIPTING>"*\*=" {
1390    return T_POW_EQUAL;
1391}
1392
1393<ST_IN_SCRIPTING>"/=" {
1394    return T_DIV_EQUAL;
1395}
1396
1397<ST_IN_SCRIPTING>".=" {
1398    return T_CONCAT_EQUAL;
1399}
1400
1401<ST_IN_SCRIPTING>"%=" {
1402    return T_MOD_EQUAL;
1403}
1404
1405<ST_IN_SCRIPTING>"<<=" {
1406    return T_SL_EQUAL;
1407}
1408
1409<ST_IN_SCRIPTING>">>=" {
1410    return T_SR_EQUAL;
1411}
1412
1413<ST_IN_SCRIPTING>"&=" {
1414    return T_AND_EQUAL;
1415}
1416
1417<ST_IN_SCRIPTING>"|=" {
1418    return T_OR_EQUAL;
1419}
1420
1421<ST_IN_SCRIPTING>"^=" {
1422    return T_XOR_EQUAL;
1423}
1424
1425<ST_IN_SCRIPTING>"||" {
1426    return T_BOOLEAN_OR;
1427}
1428
1429<ST_IN_SCRIPTING>"&&" {
1430    return T_BOOLEAN_AND;
1431}
1432
1433<ST_IN_SCRIPTING>"OR" {
1434    return T_LOGICAL_OR;
1435}
1436
1437<ST_IN_SCRIPTING>"AND" {
1438    return T_LOGICAL_AND;
1439}
1440
1441<ST_IN_SCRIPTING>"XOR" {
1442    return T_LOGICAL_XOR;
1443}
1444
1445<ST_IN_SCRIPTING>"<<" {
1446    return T_SL;
1447}
1448
1449<ST_IN_SCRIPTING>">>" {
1450    return T_SR;
1451}
1452
1453<ST_IN_SCRIPTING>{TOKENS} {
1454    return yytext[0];
1455}
1456
1457
1458<ST_IN_SCRIPTING>"{" {
1459    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1460    return '{';
1461}
1462
1463
1464<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1465    yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1466    return T_DOLLAR_OPEN_CURLY_BRACES;
1467}
1468
1469
1470<ST_IN_SCRIPTING>"}" {
1471    RESET_DOC_COMMENT();
1472    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1473        yy_pop_state(TSRMLS_C);
1474    }
1475    return '}';
1476}
1477
1478
1479<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1480    yyless(yyleng - 1);
1481    zend_copy_value(zendlval, yytext, yyleng);
1482    yy_pop_state(TSRMLS_C);
1483    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1484    return T_STRING_VARNAME;
1485}
1486
1487
1488<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1489    yyless(0);
1490    yy_pop_state(TSRMLS_C);
1491    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1492    goto restart;
1493}
1494
1495<ST_IN_SCRIPTING>{BNUM} {
1496    char *bin = yytext + 2; /* Skip "0b" */
1497    int len = yyleng - 2;
1498
1499    /* Skip any leading 0s */
1500    while (*bin == '0') {
1501        ++bin;
1502        --len;
1503    }
1504
1505    if (len < SIZEOF_ZEND_LONG * 8) {
1506        if (len == 0) {
1507            ZVAL_LONG(zendlval, 0);
1508        } else {
1509            ZVAL_LONG(zendlval, ZEND_STRTOL(bin, NULL, 2));
1510        }
1511        return T_LNUMBER;
1512    } else {
1513        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1514        return T_DNUMBER;
1515    }
1516}
1517
1518<ST_IN_SCRIPTING>{LNUM} {
1519    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1520        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1521    } else {
1522        errno = 0;
1523        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 0));
1524        if (errno == ERANGE) { /* Overflow */
1525            if (yytext[0] == '0') { /* octal overflow */
1526                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, NULL));
1527            } else {
1528                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1529            }
1530            return T_DNUMBER;
1531        }
1532    }
1533    return T_LNUMBER;
1534}
1535
1536<ST_IN_SCRIPTING>{HNUM} {
1537    char *hex = yytext + 2; /* Skip "0x" */
1538    int len = yyleng - 2;
1539
1540    /* Skip any leading 0s */
1541    while (*hex == '0') {
1542        hex++;
1543        len--;
1544    }
1545
1546    if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
1547        if (len == 0) {
1548            ZVAL_LONG(zendlval, 0);
1549        } else {
1550            ZVAL_LONG(zendlval, ZEND_STRTOL(hex, NULL, 16));
1551        }
1552        return T_LNUMBER;
1553    } else {
1554        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1555        return T_DNUMBER;
1556    }
1557}
1558
1559<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1560    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1561        ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, NULL, 10));
1562    } else {
1563        ZVAL_STRINGL(zendlval, yytext, yyleng);
1564    }
1565    return T_NUM_STRING;
1566}
1567
1568<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1569    ZVAL_STRINGL(zendlval, yytext, yyleng);
1570    return T_NUM_STRING;
1571}
1572
1573<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1574    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1575    return T_DNUMBER;
1576}
1577
1578<ST_IN_SCRIPTING>"__CLASS__" {
1579    return T_CLASS_C;
1580}
1581
1582<ST_IN_SCRIPTING>"__TRAIT__" {
1583    return T_TRAIT_C;
1584}
1585
1586<ST_IN_SCRIPTING>"__FUNCTION__" {
1587    return T_FUNC_C;
1588}
1589
1590<ST_IN_SCRIPTING>"__METHOD__" {
1591    return T_METHOD_C;
1592}
1593
1594<ST_IN_SCRIPTING>"__LINE__" {
1595    return T_LINE;
1596}
1597
1598<ST_IN_SCRIPTING>"__FILE__" {
1599    return T_FILE;
1600}
1601
1602<ST_IN_SCRIPTING>"__DIR__" {
1603    return T_DIR;
1604}
1605
1606<ST_IN_SCRIPTING>"__NAMESPACE__" {
1607    return T_NS_C;
1608}
1609
1610<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1611    YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1612
1613    if (bracket != SCNG(yy_text)) {
1614        /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1615        YYCURSOR = bracket;
1616        goto inline_html;
1617    }
1618
1619    HANDLE_NEWLINES(yytext, yyleng);
1620    BEGIN(ST_IN_SCRIPTING);
1621    return T_OPEN_TAG;
1622}
1623
1624
1625<INITIAL>"<%=" {
1626    if (CG(asp_tags)) {
1627        BEGIN(ST_IN_SCRIPTING);
1628        return T_OPEN_TAG_WITH_ECHO;
1629    } else {
1630        goto inline_char_handler;
1631    }
1632}
1633
1634
1635<INITIAL>"<?=" {
1636    BEGIN(ST_IN_SCRIPTING);
1637    return T_OPEN_TAG_WITH_ECHO;
1638}
1639
1640
1641<INITIAL>"<%" {
1642    if (CG(asp_tags)) {
1643        BEGIN(ST_IN_SCRIPTING);
1644        return T_OPEN_TAG;
1645    } else {
1646        goto inline_char_handler;
1647    }
1648}
1649
1650
1651<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1652    HANDLE_NEWLINE(yytext[yyleng-1]);
1653    BEGIN(ST_IN_SCRIPTING);
1654    return T_OPEN_TAG;
1655}
1656
1657
1658<INITIAL>"<?" {
1659    if (CG(short_tags)) {
1660        BEGIN(ST_IN_SCRIPTING);
1661        return T_OPEN_TAG;
1662    } else {
1663        goto inline_char_handler;
1664    }
1665}
1666
1667<INITIAL>{ANY_CHAR} {
1668    if (YYCURSOR > YYLIMIT) {
1669        return 0;
1670    }
1671
1672inline_char_handler:
1673
1674    while (1) {
1675        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1676
1677        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1678
1679        if (YYCURSOR < YYLIMIT) {
1680            switch (*YYCURSOR) {
1681                case '?':
1682                    if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1683                        break;
1684                    }
1685                    continue;
1686                case '%':
1687                    if (CG(asp_tags)) {
1688                        break;
1689                    }
1690                    continue;
1691                case 's':
1692                case 'S':
1693                    /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1694                     * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1695                    YYCURSOR--;
1696                    yymore();
1697                default:
1698                    continue;
1699            }
1700
1701            YYCURSOR--;
1702        }
1703
1704        break;
1705    }
1706
1707inline_html:
1708    yyleng = YYCURSOR - SCNG(yy_text);
1709
1710    if (SCNG(output_filter)) {
1711        int readsize;
1712        char *s = NULL;
1713        size_t sz = 0;
1714        // TODO: avoid reallocation ???
1715        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1716        ZVAL_STRINGL(zendlval, s, sz);
1717        efree(s);
1718        if (readsize < yyleng) {
1719            yyless(readsize);
1720        }
1721    } else {
1722      ZVAL_STRINGL(zendlval, yytext, yyleng);
1723    }
1724    HANDLE_NEWLINES(yytext, yyleng);
1725    return T_INLINE_HTML;
1726}
1727
1728
1729/* Make sure a label character follows "->", otherwise there is no property
1730 * and "->" will be taken literally
1731 */
1732<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1733    yyless(yyleng - 3);
1734    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1735    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1736    return T_VARIABLE;
1737}
1738
1739/* A [ always designates a variable offset, regardless of what follows
1740 */
1741<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1742    yyless(yyleng - 1);
1743    yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1744    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1745    return T_VARIABLE;
1746}
1747
1748<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1749    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1750    return T_VARIABLE;
1751}
1752
1753<ST_VAR_OFFSET>"]" {
1754    yy_pop_state(TSRMLS_C);
1755    return ']';
1756}
1757
1758<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1759    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1760    return yytext[0];
1761}
1762
1763<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1764    /* Invalid rule to return a more explicit parse error with proper line number */
1765    yyless(0);
1766    yy_pop_state(TSRMLS_C);
1767    return T_ENCAPSED_AND_WHITESPACE;
1768}
1769
1770<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1771    zend_copy_value(zendlval, yytext, yyleng);
1772    return T_STRING;
1773}
1774
1775
1776<ST_IN_SCRIPTING>"#"|"//" {
1777    while (YYCURSOR < YYLIMIT) {
1778        switch (*YYCURSOR++) {
1779            case '\r':
1780                if (*YYCURSOR == '\n') {
1781                    YYCURSOR++;
1782                }
1783                /* fall through */
1784            case '\n':
1785                CG(zend_lineno)++;
1786                break;
1787            case '%':
1788                if (!CG(asp_tags)) {
1789                    continue;
1790                }
1791                /* fall through */
1792            case '?':
1793                if (*YYCURSOR == '>') {
1794                    YYCURSOR--;
1795                    break;
1796                }
1797                /* fall through */
1798            default:
1799                continue;
1800        }
1801
1802        break;
1803    }
1804
1805    yyleng = YYCURSOR - SCNG(yy_text);
1806
1807    return T_COMMENT;
1808}
1809
1810<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1811    int doc_com;
1812
1813    if (yyleng > 2) {
1814        doc_com = 1;
1815        RESET_DOC_COMMENT();
1816    } else {
1817        doc_com = 0;
1818    }
1819
1820    while (YYCURSOR < YYLIMIT) {
1821        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1822            break;
1823        }
1824    }
1825
1826    if (YYCURSOR < YYLIMIT) {
1827        YYCURSOR++;
1828    } else {
1829        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1830    }
1831
1832    yyleng = YYCURSOR - SCNG(yy_text);
1833    HANDLE_NEWLINES(yytext, yyleng);
1834
1835    if (doc_com) {
1836        CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
1837        return T_DOC_COMMENT;
1838    }
1839
1840    return T_COMMENT;
1841}
1842
1843<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1844    BEGIN(INITIAL);
1845    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1846}
1847
1848
1849<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1850    if (CG(asp_tags)) {
1851        BEGIN(INITIAL);
1852        return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1853    } else {
1854        yyless(1);
1855        return yytext[0];
1856    }
1857}
1858
1859
1860<ST_IN_SCRIPTING>b?['] {
1861    register char *s, *t;
1862    char *end;
1863    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1864
1865    while (1) {
1866        if (YYCURSOR < YYLIMIT) {
1867            if (*YYCURSOR == '\'') {
1868                YYCURSOR++;
1869                yyleng = YYCURSOR - SCNG(yy_text);
1870
1871                break;
1872            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1873                YYCURSOR++;
1874            }
1875        } else {
1876            yyleng = YYLIMIT - SCNG(yy_text);
1877
1878            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1879             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1880             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1881            return T_ENCAPSED_AND_WHITESPACE;
1882        }
1883    }
1884
1885    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
1886
1887    /* convert escape sequences */
1888    s = t = Z_STRVAL_P(zendlval);
1889    end = s+Z_STRLEN_P(zendlval);
1890    while (s<end) {
1891        if (*s=='\\') {
1892            s++;
1893
1894            switch(*s) {
1895                case '\\':
1896                case '\'':
1897                    *t++ = *s;
1898                    Z_STRLEN_P(zendlval)--;
1899                    break;
1900                default:
1901                    *t++ = '\\';
1902                    *t++ = *s;
1903                    break;
1904            }
1905        } else {
1906            *t++ = *s;
1907        }
1908
1909        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
1910            CG(zend_lineno)++;
1911        }
1912        s++;
1913    }
1914    *t = 0;
1915
1916    if (SCNG(output_filter)) {
1917        size_t sz = 0;
1918        char *str = NULL;
1919        s = Z_STRVAL_P(zendlval);
1920        // TODO: avoid reallocation ???
1921        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
1922        ZVAL_STRINGL(zendlval, str, sz);
1923        efree(s);
1924    }
1925    return T_CONSTANT_ENCAPSED_STRING;
1926}
1927
1928
1929<ST_IN_SCRIPTING>b?["] {
1930    int bprefix = (yytext[0] != '"') ? 1 : 0;
1931
1932    while (YYCURSOR < YYLIMIT) {
1933        switch (*YYCURSOR++) {
1934            case '"':
1935                yyleng = YYCURSOR - SCNG(yy_text);
1936                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
1937                return T_CONSTANT_ENCAPSED_STRING;
1938            case '$':
1939                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
1940                    break;
1941                }
1942                continue;
1943            case '{':
1944                if (*YYCURSOR == '$') {
1945                    break;
1946                }
1947                continue;
1948            case '\\':
1949                if (YYCURSOR < YYLIMIT) {
1950                    YYCURSOR++;
1951                }
1952                /* fall through */
1953            default:
1954                continue;
1955        }
1956
1957        YYCURSOR--;
1958        break;
1959    }
1960
1961    /* Remember how much was scanned to save rescanning */
1962    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
1963
1964    YYCURSOR = SCNG(yy_text) + yyleng;
1965
1966    BEGIN(ST_DOUBLE_QUOTES);
1967    return '"';
1968}
1969
1970
1971<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
1972    char *s;
1973    int bprefix = (yytext[0] != '<') ? 1 : 0;
1974    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
1975
1976    CG(zend_lineno)++;
1977    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
1978    s = yytext+bprefix+3;
1979    while ((*s == ' ') || (*s == '\t')) {
1980        s++;
1981        heredoc_label->length--;
1982    }
1983
1984    if (*s == '\'') {
1985        s++;
1986        heredoc_label->length -= 2;
1987
1988        BEGIN(ST_NOWDOC);
1989    } else {
1990        if (*s == '"') {
1991            s++;
1992            heredoc_label->length -= 2;
1993        }
1994
1995        BEGIN(ST_HEREDOC);
1996    }
1997
1998    heredoc_label->label = estrndup(s, heredoc_label->length);
1999
2000    /* Check for ending label on the next line */
2001    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2002        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2003
2004        if (*end == ';') {
2005            end++;
2006        }
2007
2008        if (*end == '\n' || *end == '\r') {
2009            BEGIN(ST_END_HEREDOC);
2010        }
2011    }
2012
2013    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2014
2015    return T_START_HEREDOC;
2016}
2017
2018
2019<ST_IN_SCRIPTING>[`] {
2020    BEGIN(ST_BACKQUOTE);
2021    return '`';
2022}
2023
2024
2025<ST_END_HEREDOC>{ANY_CHAR} {
2026    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2027
2028    YYCURSOR += heredoc_label->length - 1;
2029    yyleng = heredoc_label->length;
2030
2031    heredoc_label_dtor(heredoc_label);
2032    efree(heredoc_label);
2033
2034    BEGIN(ST_IN_SCRIPTING);
2035    return T_END_HEREDOC;
2036}
2037
2038
2039<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2040    Z_LVAL_P(zendlval) = (zend_long) '{';
2041    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2042    yyless(1);
2043    return T_CURLY_OPEN;
2044}
2045
2046
2047<ST_DOUBLE_QUOTES>["] {
2048    BEGIN(ST_IN_SCRIPTING);
2049    return '"';
2050}
2051
2052<ST_BACKQUOTE>[`] {
2053    BEGIN(ST_IN_SCRIPTING);
2054    return '`';
2055}
2056
2057
2058<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2059    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2060        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2061        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2062
2063        goto double_quotes_scan_done;
2064    }
2065
2066    if (YYCURSOR > YYLIMIT) {
2067        return 0;
2068    }
2069    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2070        YYCURSOR++;
2071    }
2072
2073    while (YYCURSOR < YYLIMIT) {
2074        switch (*YYCURSOR++) {
2075            case '"':
2076                break;
2077            case '$':
2078                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2079                    break;
2080                }
2081                continue;
2082            case '{':
2083                if (*YYCURSOR == '$') {
2084                    break;
2085                }
2086                continue;
2087            case '\\':
2088                if (YYCURSOR < YYLIMIT) {
2089                    YYCURSOR++;
2090                }
2091                /* fall through */
2092            default:
2093                continue;
2094        }
2095
2096        YYCURSOR--;
2097        break;
2098    }
2099
2100double_quotes_scan_done:
2101    yyleng = YYCURSOR - SCNG(yy_text);
2102
2103    zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2104    return T_ENCAPSED_AND_WHITESPACE;
2105}
2106
2107
2108<ST_BACKQUOTE>{ANY_CHAR} {
2109    if (YYCURSOR > YYLIMIT) {
2110        return 0;
2111    }
2112    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2113        YYCURSOR++;
2114    }
2115
2116    while (YYCURSOR < YYLIMIT) {
2117        switch (*YYCURSOR++) {
2118            case '`':
2119                break;
2120            case '$':
2121                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2122                    break;
2123                }
2124                continue;
2125            case '{':
2126                if (*YYCURSOR == '$') {
2127                    break;
2128                }
2129                continue;
2130            case '\\':
2131                if (YYCURSOR < YYLIMIT) {
2132                    YYCURSOR++;
2133                }
2134                /* fall through */
2135            default:
2136                continue;
2137        }
2138
2139        YYCURSOR--;
2140        break;
2141    }
2142
2143    yyleng = YYCURSOR - SCNG(yy_text);
2144
2145    zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2146    return T_ENCAPSED_AND_WHITESPACE;
2147}
2148
2149
2150<ST_HEREDOC>{ANY_CHAR} {
2151    int newline = 0;
2152
2153    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2154
2155    if (YYCURSOR > YYLIMIT) {
2156        return 0;
2157    }
2158
2159    YYCURSOR--;
2160
2161    while (YYCURSOR < YYLIMIT) {
2162        switch (*YYCURSOR++) {
2163            case '\r':
2164                if (*YYCURSOR == '\n') {
2165                    YYCURSOR++;
2166                }
2167                /* fall through */
2168            case '\n':
2169                /* Check for ending label on the next line */
2170                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2171                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2172
2173                    if (*end == ';') {
2174                        end++;
2175                    }
2176
2177                    if (*end == '\n' || *end == '\r') {
2178                        /* newline before label will be subtracted from returned text, but
2179                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2180                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2181                            newline = 2; /* Windows newline */
2182                        } else {
2183                            newline = 1;
2184                        }
2185
2186                        CG(increment_lineno) = 1; /* For newline before label */
2187                        BEGIN(ST_END_HEREDOC);
2188
2189                        goto heredoc_scan_done;
2190                    }
2191                }
2192                continue;
2193            case '$':
2194                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2195                    break;
2196                }
2197                continue;
2198            case '{':
2199                if (*YYCURSOR == '$') {
2200                    break;
2201                }
2202                continue;
2203            case '\\':
2204                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2205                    YYCURSOR++;
2206                }
2207                /* fall through */
2208            default:
2209                continue;
2210        }
2211
2212        YYCURSOR--;
2213        break;
2214    }
2215
2216heredoc_scan_done:
2217    yyleng = YYCURSOR - SCNG(yy_text);
2218
2219    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2220    return T_ENCAPSED_AND_WHITESPACE;
2221}
2222
2223
2224<ST_NOWDOC>{ANY_CHAR} {
2225    int newline = 0;
2226
2227    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2228
2229    if (YYCURSOR > YYLIMIT) {
2230        return 0;
2231    }
2232
2233    YYCURSOR--;
2234
2235    while (YYCURSOR < YYLIMIT) {
2236        switch (*YYCURSOR++) {
2237            case '\r':
2238                if (*YYCURSOR == '\n') {
2239                    YYCURSOR++;
2240                }
2241                /* fall through */
2242            case '\n':
2243                /* Check for ending label on the next line */
2244                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2245                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2246
2247                    if (*end == ';') {
2248                        end++;
2249                    }
2250
2251                    if (*end == '\n' || *end == '\r') {
2252                        /* newline before label will be subtracted from returned text, but
2253                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2254                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2255                            newline = 2; /* Windows newline */
2256                        } else {
2257                            newline = 1;
2258                        }
2259
2260                        CG(increment_lineno) = 1; /* For newline before label */
2261                        BEGIN(ST_END_HEREDOC);
2262
2263                        goto nowdoc_scan_done;
2264                    }
2265                }
2266                /* fall through */
2267            default:
2268                continue;
2269        }
2270    }
2271
2272nowdoc_scan_done:
2273    yyleng = YYCURSOR - SCNG(yy_text);
2274
2275    zend_copy_value(zendlval, yytext, yyleng - newline);
2276    HANDLE_NEWLINES(yytext, yyleng - newline);
2277    return T_ENCAPSED_AND_WHITESPACE;
2278}
2279
2280
2281<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2282    if (YYCURSOR > YYLIMIT) {
2283        return 0;
2284    }
2285
2286    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2287    goto restart;
2288}
2289
2290*/
2291}
2292