1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2014 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "zend_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    SCNG(scanned_string_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150}
151
152
153static void _yy_push_state(int new_state TSRMLS_DC)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION());
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(TSRMLS_D)
162{
163    int *stack_state = zend_stack_top(&SCNG(state_stack));
164    YYSETCONDITION(*stack_state);
165    zend_stack_del_top(&SCNG(state_stack));
166}
167
168static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
169{
170    YYCURSOR       = (YYCTYPE*)str;
171    YYLIMIT        = YYCURSOR + len;
172    if (!SCNG(yy_start)) {
173        SCNG(yy_start) = YYCURSOR;
174    }
175}
176
177void startup_scanner(TSRMLS_D)
178{
179    CG(parse_error) = 0;
180    CG(doc_comment) = NULL;
181    zend_stack_init(&SCNG(state_stack), sizeof(int));
182    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
183}
184
185static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
186    efree(heredoc_label->label);
187}
188
189void shutdown_scanner(TSRMLS_D)
190{
191    CG(parse_error) = 0;
192    RESET_DOC_COMMENT();
193    zend_stack_destroy(&SCNG(state_stack));
194    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
195    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
196}
197
198ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
199{
200    lex_state->yy_leng   = SCNG(yy_leng);
201    lex_state->yy_start  = SCNG(yy_start);
202    lex_state->yy_text   = SCNG(yy_text);
203    lex_state->yy_cursor = SCNG(yy_cursor);
204    lex_state->yy_marker = SCNG(yy_marker);
205    lex_state->yy_limit  = SCNG(yy_limit);
206
207    lex_state->state_stack = SCNG(state_stack);
208    zend_stack_init(&SCNG(state_stack), sizeof(int));
209
210    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
211    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
212
213    lex_state->in = SCNG(yy_in);
214    lex_state->yy_state = YYSTATE;
215    lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
216    lex_state->lineno = CG(zend_lineno);
217
218    lex_state->script_org = SCNG(script_org);
219    lex_state->script_org_size = SCNG(script_org_size);
220    lex_state->script_filtered = SCNG(script_filtered);
221    lex_state->script_filtered_size = SCNG(script_filtered_size);
222    lex_state->input_filter = SCNG(input_filter);
223    lex_state->output_filter = SCNG(output_filter);
224    lex_state->script_encoding = SCNG(script_encoding);
225}
226
227ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
228{
229    SCNG(yy_leng)   = lex_state->yy_leng;
230    SCNG(yy_start)  = lex_state->yy_start;
231    SCNG(yy_text)   = lex_state->yy_text;
232    SCNG(yy_cursor) = lex_state->yy_cursor;
233    SCNG(yy_marker) = lex_state->yy_marker;
234    SCNG(yy_limit)  = lex_state->yy_limit;
235
236    zend_stack_destroy(&SCNG(state_stack));
237    SCNG(state_stack) = lex_state->state_stack;
238
239    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
240    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
241    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
242
243    SCNG(yy_in) = lex_state->in;
244    YYSETCONDITION(lex_state->yy_state);
245    CG(zend_lineno) = lex_state->lineno;
246    zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
247
248    if (SCNG(script_filtered)) {
249        efree(SCNG(script_filtered));
250        SCNG(script_filtered) = NULL;
251    }
252    SCNG(script_org) = lex_state->script_org;
253    SCNG(script_org_size) = lex_state->script_org_size;
254    SCNG(script_filtered) = lex_state->script_filtered;
255    SCNG(script_filtered_size) = lex_state->script_filtered_size;
256    SCNG(input_filter) = lex_state->input_filter;
257    SCNG(output_filter) = lex_state->output_filter;
258    SCNG(script_encoding) = lex_state->script_encoding;
259
260    RESET_DOC_COMMENT();
261}
262
263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
264{
265    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
266    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
267    file_handle->opened_path = NULL;
268    if (file_handle->free_filename) {
269        file_handle->filename = NULL;
270    }
271}
272
273#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
274#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
275#define BOM_UTF16_BE    "\xfe\xff"
276#define BOM_UTF16_LE    "\xff\xfe"
277#define BOM_UTF8        "\xef\xbb\xbf"
278
279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
280{
281    const unsigned char *p;
282    int wchar_size = 2;
283    int le = 0;
284
285    /* utf-16 or utf-32? */
286    p = script;
287    while ((p-script) < script_size) {
288        p = memchr(p, 0, script_size-(p-script)-2);
289        if (!p) {
290            break;
291        }
292        if (*(p+1) == '\0' && *(p+2) == '\0') {
293            wchar_size = 4;
294            break;
295        }
296
297        /* searching for UTF-32 specific byte orders, so this will do */
298        p += 4;
299    }
300
301    /* BE or LE? */
302    p = script;
303    while ((p-script) < script_size) {
304        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
305            /* BE */
306            le = 0;
307            break;
308        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
309            /* LE* */
310            le = 1;
311            break;
312        }
313        p += wchar_size;
314    }
315
316    if (wchar_size == 2) {
317        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
318    } else {
319        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
320    }
321
322    return NULL;
323}
324
325static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
326{
327    const zend_encoding *script_encoding = NULL;
328    int bom_size;
329    unsigned char *pos1, *pos2;
330
331    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
332        return NULL;
333    }
334
335    /* check out BOM */
336    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
337        script_encoding = zend_multibyte_encoding_utf32be;
338        bom_size = sizeof(BOM_UTF32_BE)-1;
339    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
340        script_encoding = zend_multibyte_encoding_utf32le;
341        bom_size = sizeof(BOM_UTF32_LE)-1;
342    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
343        script_encoding = zend_multibyte_encoding_utf16be;
344        bom_size = sizeof(BOM_UTF16_BE)-1;
345    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
346        script_encoding = zend_multibyte_encoding_utf16le;
347        bom_size = sizeof(BOM_UTF16_LE)-1;
348    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
349        script_encoding = zend_multibyte_encoding_utf8;
350        bom_size = sizeof(BOM_UTF8)-1;
351    }
352
353    if (script_encoding) {
354        /* remove BOM */
355        LANG_SCNG(script_org) += bom_size;
356        LANG_SCNG(script_org_size) -= bom_size;
357
358        return script_encoding;
359    }
360
361    /* script contains NULL bytes -> auto-detection */
362    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
363        /* check if the NULL byte is after the __HALT_COMPILER(); */
364        pos2 = LANG_SCNG(script_org);
365
366        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
367            pos2 = memchr(pos2, '_', pos1 - pos2);
368            if (!pos2) break;
369            pos2++;
370            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
371                pos2 += sizeof("_HALT_COMPILER")-1;
372                while (*pos2 == ' '  ||
373                       *pos2 == '\t' ||
374                       *pos2 == '\r' ||
375                       *pos2 == '\n') {
376                    pos2++;
377                }
378                if (*pos2 == '(') {
379                    pos2++;
380                    while (*pos2 == ' '  ||
381                           *pos2 == '\t' ||
382                           *pos2 == '\r' ||
383                           *pos2 == '\n') {
384                        pos2++;
385                    }
386                    if (*pos2 == ')') {
387                        pos2++;
388                        while (*pos2 == ' '  ||
389                               *pos2 == '\t' ||
390                               *pos2 == '\r' ||
391                               *pos2 == '\n') {
392                            pos2++;
393                        }
394                        if (*pos2 == ';') {
395                            return NULL;
396                        }
397                    }
398                }
399            }
400        }
401        /* make best effort if BOM is missing */
402        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
403    }
404
405    return NULL;
406}
407
408static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
409{
410    const zend_encoding *script_encoding;
411
412    if (CG(detect_unicode)) {
413        /* check out bom(byte order mark) and see if containing wchars */
414        script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
415        if (script_encoding != NULL) {
416            /* bom or wchar detection is prior to 'script_encoding' option */
417            return script_encoding;
418        }
419    }
420
421    /* if no script_encoding specified, just leave alone */
422    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
423        return NULL;
424    }
425
426    /* if multiple encodings specified, detect automagically */
427    if (CG(script_encoding_list_size) > 1) {
428        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
429    }
430
431    return CG(script_encoding_list)[0];
432}
433
434ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
435{
436    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
437    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
438
439    if (!script_encoding) {
440        return FAILURE;
441    }
442
443    /* judge input/output filter */
444    LANG_SCNG(script_encoding) = script_encoding;
445    LANG_SCNG(input_filter) = NULL;
446    LANG_SCNG(output_filter) = NULL;
447
448    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
449        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
450            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
451            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
452            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
453        } else {
454            LANG_SCNG(input_filter) = NULL;
455            LANG_SCNG(output_filter) = NULL;
456        }
457        return SUCCESS;
458    }
459
460    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
461        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
462        LANG_SCNG(output_filter) = NULL;
463    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
464        LANG_SCNG(input_filter) = NULL;
465        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
466    } else {
467        /* both script and internal encodings are incompatible w/ flex */
468        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
469        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
470    }
471
472    return 0;
473}
474
475ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
476{
477    const char *file_path = NULL;
478    char *buf;
479    size_t size, offset = 0;
480    zend_string *compiled_filename;
481
482    /* The shebang line was read, get the current position to obtain the buffer start */
483    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
484        if ((offset = ftell(file_handle->handle.fp)) == -1) {
485            offset = 0;
486        }
487    }
488
489    if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
490        return FAILURE;
491    }
492
493    zend_llist_add_element(&CG(open_files), file_handle);
494    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
495        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
496        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
497        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
498        file_handle->handle.stream.handle = fh->handle.stream.handle;
499    }
500
501    /* Reset the scanner for scanning the new file */
502    SCNG(yy_in) = file_handle;
503    SCNG(yy_start) = NULL;
504
505    if (size != -1) {
506        if (CG(multibyte)) {
507            SCNG(script_org) = (unsigned char*)buf;
508            SCNG(script_org_size) = size;
509            SCNG(script_filtered) = NULL;
510
511            zend_multibyte_set_filter(NULL TSRMLS_CC);
512
513            if (SCNG(input_filter)) {
514                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
515                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
516                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
517                }
518                buf = (char*)SCNG(script_filtered);
519                size = SCNG(script_filtered_size);
520            }
521        }
522        SCNG(yy_start) = (unsigned char *)buf - offset;
523        yy_scan_buffer(buf, size TSRMLS_CC);
524    } else {
525        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
526    }
527
528    BEGIN(INITIAL);
529
530    if (file_handle->opened_path) {
531        file_path = file_handle->opened_path;
532    } else {
533        file_path = file_handle->filename;
534    }
535
536    compiled_filename = STR_INIT(file_path, strlen(file_path), 0);
537    zend_set_compiled_filename(compiled_filename TSRMLS_CC);
538    STR_RELEASE(compiled_filename);
539
540    if (CG(start_lineno)) {
541        CG(zend_lineno) = CG(start_lineno);
542        CG(start_lineno) = 0;
543    } else {
544        CG(zend_lineno) = 1;
545    }
546
547    RESET_DOC_COMMENT();
548    CG(increment_lineno) = 0;
549    return SUCCESS;
550}
551END_EXTERN_C()
552
553
554ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
555{
556    zend_lex_state original_lex_state;
557    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
558    zend_op_array *original_active_op_array = CG(active_op_array);
559    zend_op_array *retval=NULL;
560    int compiler_result;
561    zend_bool compilation_successful=0;
562    znode retval_znode;
563    zend_bool original_in_compilation = CG(in_compilation);
564
565    retval_znode.op_type = IS_CONST;
566    ZVAL_LONG(&retval_znode.u.constant, 1);
567
568    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
569
570    retval = op_array; /* success oriented */
571
572    if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
573        if (type==ZEND_REQUIRE) {
574            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
575            zend_bailout();
576        } else {
577            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
578        }
579        compilation_successful=0;
580    } else {
581        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
582        CG(in_compilation) = 1;
583        CG(active_op_array) = op_array;
584        zend_stack_push(&CG(context_stack), (void *) &CG(context));
585        zend_init_compiler_context(TSRMLS_C);
586        compiler_result = zendparse(TSRMLS_C);
587        zend_do_return(&retval_znode, 0 TSRMLS_CC);
588        CG(in_compilation) = original_in_compilation;
589        if (compiler_result != 0) { /* parser error */
590            zend_bailout();
591        }
592        compilation_successful=1;
593    }
594
595    if (retval) {
596        CG(active_op_array) = original_active_op_array;
597        if (compilation_successful) {
598            pass_two(op_array TSRMLS_CC);
599            zend_release_labels(0 TSRMLS_CC);
600        } else {
601            efree(op_array);
602            retval = NULL;
603        }
604    }
605    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
606    return retval;
607}
608
609
610zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
611{
612    zend_file_handle file_handle;
613    zval tmp;
614    zend_op_array *retval;
615    char *opened_path = NULL;
616
617    if (Z_TYPE_P(filename) != IS_STRING) {
618        tmp = *filename;
619        zval_copy_ctor(&tmp);
620        convert_to_string(&tmp);
621        filename = &tmp;
622    }
623    file_handle.filename = Z_STRVAL_P(filename);
624    file_handle.free_filename = 0;
625    file_handle.type = ZEND_HANDLE_FILENAME;
626    file_handle.opened_path = NULL;
627    file_handle.handle.fp = NULL;
628
629    retval = zend_compile_file(&file_handle, type TSRMLS_CC);
630    if (retval && file_handle.handle.stream.handle) {
631        if (!file_handle.opened_path) {
632            file_handle.opened_path = opened_path = estrndup(Z_STRVAL_P(filename), Z_STRLEN_P(filename));
633        }
634
635        zend_hash_str_add_empty_element(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path));
636
637        if (opened_path) {
638            efree(opened_path);
639        }
640    }
641    zend_destroy_file_handle(&file_handle TSRMLS_CC);
642
643    if (filename==&tmp) {
644        zval_dtor(&tmp);
645    }
646    return retval;
647}
648
649ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
650{
651    char *buf;
652    size_t size, old_len;
653    zend_string *new_compiled_filename;
654
655    /* enforce ZEND_MMAP_AHEAD trailing NULLs for flex... */
656    old_len = Z_STRLEN_P(str);
657    Z_STR_P(str) = STR_REALLOC(Z_STR_P(str), old_len + ZEND_MMAP_AHEAD, 0);
658    Z_TYPE_INFO_P(str) = IS_STRING_EX;
659    memset(Z_STRVAL_P(str) + old_len, 0, ZEND_MMAP_AHEAD + 1);
660
661    SCNG(yy_in) = NULL;
662    SCNG(yy_start) = NULL;
663
664    buf = Z_STRVAL_P(str);
665    size = old_len;
666
667    if (CG(multibyte)) {
668        SCNG(script_org) = (unsigned char*)buf;
669        SCNG(script_org_size) = size;
670        SCNG(script_filtered) = NULL;
671
672        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
673
674        if (SCNG(input_filter)) {
675            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
676                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
677                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
678            }
679            buf = (char*)SCNG(script_filtered);
680            size = SCNG(script_filtered_size);
681        }
682    }
683
684    yy_scan_buffer(buf, size TSRMLS_CC);
685
686    new_compiled_filename = STR_INIT(filename, strlen(filename), 0);
687    zend_set_compiled_filename(new_compiled_filename TSRMLS_CC);
688    STR_RELEASE(new_compiled_filename);
689    CG(zend_lineno) = 1;
690    CG(increment_lineno) = 0;
691    RESET_DOC_COMMENT();
692    return SUCCESS;
693}
694
695
696ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
697{
698    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
699    if (SCNG(input_filter)) {
700        size_t original_offset = offset, length = 0;
701        do {
702            unsigned char *p = NULL;
703            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
704                return (size_t)-1;
705            }
706            efree(p);
707            if (length > original_offset) {
708                offset--;
709            } else if (length < original_offset) {
710                offset++;
711            }
712        } while (original_offset != length);
713    }
714    return offset;
715}
716
717
718zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
719{
720    zend_lex_state original_lex_state;
721    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
722    zend_op_array *original_active_op_array = CG(active_op_array);
723    zend_op_array *retval;
724    zval tmp;
725    int compiler_result;
726    zend_bool original_in_compilation = CG(in_compilation);
727
728    if (Z_STRLEN_P(source_string)==0) {
729        efree(op_array);
730        return NULL;
731    }
732
733    CG(in_compilation) = 1;
734
735    ZVAL_DUP(&tmp, source_string);
736    convert_to_string(&tmp);
737    source_string = &tmp;
738
739    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
740    if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
741        efree(op_array);
742        retval = NULL;
743    } else {
744        zend_bool orig_interactive = CG(interactive);
745
746        CG(interactive) = 0;
747        init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
748        CG(interactive) = orig_interactive;
749        CG(active_op_array) = op_array;
750        zend_stack_push(&CG(context_stack), (void *) &CG(context));
751        zend_init_compiler_context(TSRMLS_C);
752        BEGIN(ST_IN_SCRIPTING);
753        compiler_result = zendparse(TSRMLS_C);
754
755        if (SCNG(script_filtered)) {
756            efree(SCNG(script_filtered));
757            SCNG(script_filtered) = NULL;
758        }
759
760        if (compiler_result != 0) {
761            CG(active_op_array) = original_active_op_array;
762            CG(unclean_shutdown)=1;
763            destroy_op_array(op_array TSRMLS_CC);
764            efree(op_array);
765            retval = NULL;
766        } else {
767            zend_do_return(NULL, 0 TSRMLS_CC);
768            CG(active_op_array) = original_active_op_array;
769            pass_two(op_array TSRMLS_CC);
770            zend_release_labels(0 TSRMLS_CC);
771            retval = op_array;
772        }
773    }
774    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
775    zval_dtor(&tmp);
776    CG(in_compilation) = original_in_compilation;
777    return retval;
778}
779
780
781BEGIN_EXTERN_C()
782int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
783{
784    zend_lex_state original_lex_state;
785    zend_file_handle file_handle;
786
787    file_handle.type = ZEND_HANDLE_FILENAME;
788    file_handle.filename = filename;
789    file_handle.free_filename = 0;
790    file_handle.opened_path = NULL;
791    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
792    if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
793        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
794        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
795        return FAILURE;
796    }
797    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
798    if (SCNG(script_filtered)) {
799        efree(SCNG(script_filtered));
800        SCNG(script_filtered) = NULL;
801    }
802    zend_destroy_file_handle(&file_handle TSRMLS_CC);
803    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
804    return SUCCESS;
805}
806
807int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
808{
809    zend_lex_state original_lex_state;
810    zval tmp = *str;
811
812    str = &tmp;
813    zval_copy_ctor(str);
814    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
815    if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
816        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
817        return FAILURE;
818    }
819    BEGIN(INITIAL);
820    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
821    if (SCNG(script_filtered)) {
822        efree(SCNG(script_filtered));
823        SCNG(script_filtered) = NULL;
824    }
825    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
826    zval_dtor(str);
827    return SUCCESS;
828}
829
830ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
831{
832    size_t length;
833    unsigned char *new_yy_start;
834
835    /* convert and set */
836    if (!SCNG(input_filter)) {
837        if (SCNG(script_filtered)) {
838            efree(SCNG(script_filtered));
839            SCNG(script_filtered) = NULL;
840        }
841        SCNG(script_filtered_size) = 0;
842        length = SCNG(script_org_size);
843        new_yy_start = SCNG(script_org);
844    } else {
845        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
846            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
847                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
848        }
849        if (SCNG(script_filtered)) {
850            efree(SCNG(script_filtered));
851        }
852        SCNG(script_filtered) = new_yy_start;
853        SCNG(script_filtered_size) = length;
854    }
855
856    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
857    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
858    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
859    SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
860
861    SCNG(yy_start) = new_yy_start;
862}
863
864
865// TODO: avoid reallocation ???
866# define zend_copy_value(zendlval, yytext, yyleng) \
867    if (SCNG(output_filter)) { \
868        size_t sz = 0; \
869        char *s = NULL; \
870        SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
871        ZVAL_STRINGL(zendlval, s, sz); \
872        efree(s); \
873    } else { \
874        ZVAL_STRINGL(zendlval, yytext, yyleng); \
875    }
876
877// TODO: some extensions might need content, but we don't copy it intentional ???
878#if 0
879# define DUMMY_STRINGL(zendlval, yytext, yyleng) \
880    ZVAL_STRINGL(zendlval, yytext, yyleng)
881#else
882# define DUMMY_STRINGL(zendlval, yytext, yyleng) \
883    ZVAL_EMPTY_STRING(zendlval)
884#endif
885
886static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
887{
888    register char *s, *t;
889    char *end;
890
891    ZVAL_STRINGL(zendlval, str, len);
892
893    /* convert escape sequences */
894    s = t = Z_STRVAL_P(zendlval);
895    end = s+Z_STRLEN_P(zendlval);
896    while (s<end) {
897        if (*s=='\\') {
898            s++;
899            if (s >= end) {
900                *t++ = '\\';
901                break;
902            }
903
904            switch(*s) {
905                case 'n':
906                    *t++ = '\n';
907                    Z_STRLEN_P(zendlval)--;
908                    break;
909                case 'r':
910                    *t++ = '\r';
911                    Z_STRLEN_P(zendlval)--;
912                    break;
913                case 't':
914                    *t++ = '\t';
915                    Z_STRLEN_P(zendlval)--;
916                    break;
917                case 'f':
918                    *t++ = '\f';
919                    Z_STRLEN_P(zendlval)--;
920                    break;
921                case 'v':
922                    *t++ = '\v';
923                    Z_STRLEN_P(zendlval)--;
924                    break;
925                case 'e':
926#ifdef PHP_WIN32
927                    *t++ = VK_ESCAPE;
928#else
929                    *t++ = '\e';
930#endif
931                    Z_STRLEN_P(zendlval)--;
932                    break;
933                case '"':
934                case '`':
935                    if (*s != quote_type) {
936                        *t++ = '\\';
937                        *t++ = *s;
938                        break;
939                    }
940                case '\\':
941                case '$':
942                    *t++ = *s;
943                    Z_STRLEN_P(zendlval)--;
944                    break;
945                case 'x':
946                case 'X':
947                    if (ZEND_IS_HEX(*(s+1))) {
948                        char hex_buf[3] = { 0, 0, 0 };
949
950                        Z_STRLEN_P(zendlval)--; /* for the 'x' */
951
952                        hex_buf[0] = *(++s);
953                        Z_STRLEN_P(zendlval)--;
954                        if (ZEND_IS_HEX(*(s+1))) {
955                            hex_buf[1] = *(++s);
956                            Z_STRLEN_P(zendlval)--;
957                        }
958                        *t++ = (char) strtol(hex_buf, NULL, 16);
959                    } else {
960                        *t++ = '\\';
961                        *t++ = *s;
962                    }
963                    break;
964                default:
965                    /* check for an octal */
966                    if (ZEND_IS_OCT(*s)) {
967                        char octal_buf[4] = { 0, 0, 0, 0 };
968
969                        octal_buf[0] = *s;
970                        Z_STRLEN_P(zendlval)--;
971                        if (ZEND_IS_OCT(*(s+1))) {
972                            octal_buf[1] = *(++s);
973                            Z_STRLEN_P(zendlval)--;
974                            if (ZEND_IS_OCT(*(s+1))) {
975                                octal_buf[2] = *(++s);
976                                Z_STRLEN_P(zendlval)--;
977                            }
978                        }
979                        *t++ = (char) strtol(octal_buf, NULL, 8);
980                    } else {
981                        *t++ = '\\';
982                        *t++ = *s;
983                    }
984                    break;
985            }
986        } else {
987            *t++ = *s;
988        }
989
990        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
991            CG(zend_lineno)++;
992        }
993        s++;
994    }
995    *t = 0;
996    if (SCNG(output_filter)) {
997        size_t sz = 0;
998        unsigned char *str;
999        // TODO: avoid realocation ???
1000        s = Z_STRVAL_P(zendlval);
1001        SCNG(output_filter)(&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
1002        zval_ptr_dtor(zendlval);
1003        ZVAL_STRINGL(zendlval, str, sz);
1004        efree(str);
1005    }
1006}
1007
1008
1009int lex_scan(zval *zendlval TSRMLS_DC)
1010{
1011restart:
1012    SCNG(yy_text) = YYCURSOR;
1013
1014yymore_restart:
1015
1016/*!re2c
1017re2c:yyfill:check = 0;
1018LNUM    [0-9]+
1019DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1020EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1021HNUM    "0x"[0-9a-fA-F]+
1022BNUM    "0b"[01]+
1023LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1024WHITESPACE [ \n\r\t]+
1025TABS_AND_SPACES [ \t]*
1026TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1027ANY_CHAR [^]
1028NEWLINE ("\r"|"\n"|"\r\n")
1029
1030/* compute yyleng before each rule */
1031<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1032
1033<ST_IN_SCRIPTING>"exit" {
1034    return T_EXIT;
1035}
1036
1037<ST_IN_SCRIPTING>"die" {
1038    return T_EXIT;
1039}
1040
1041<ST_IN_SCRIPTING>"function" {
1042    return T_FUNCTION;
1043}
1044
1045<ST_IN_SCRIPTING>"const" {
1046    return T_CONST;
1047}
1048
1049<ST_IN_SCRIPTING>"return" {
1050    return T_RETURN;
1051}
1052
1053<ST_IN_SCRIPTING>"yield" {
1054    return T_YIELD;
1055}
1056
1057<ST_IN_SCRIPTING>"try" {
1058    return T_TRY;
1059}
1060
1061<ST_IN_SCRIPTING>"catch" {
1062    return T_CATCH;
1063}
1064
1065<ST_IN_SCRIPTING>"finally" {
1066    return T_FINALLY;
1067}
1068
1069<ST_IN_SCRIPTING>"throw" {
1070    return T_THROW;
1071}
1072
1073<ST_IN_SCRIPTING>"if" {
1074    return T_IF;
1075}
1076
1077<ST_IN_SCRIPTING>"elseif" {
1078    return T_ELSEIF;
1079}
1080
1081<ST_IN_SCRIPTING>"endif" {
1082    return T_ENDIF;
1083}
1084
1085<ST_IN_SCRIPTING>"else" {
1086    return T_ELSE;
1087}
1088
1089<ST_IN_SCRIPTING>"while" {
1090    return T_WHILE;
1091}
1092
1093<ST_IN_SCRIPTING>"endwhile" {
1094    return T_ENDWHILE;
1095}
1096
1097<ST_IN_SCRIPTING>"do" {
1098    return T_DO;
1099}
1100
1101<ST_IN_SCRIPTING>"for" {
1102    return T_FOR;
1103}
1104
1105<ST_IN_SCRIPTING>"endfor" {
1106    return T_ENDFOR;
1107}
1108
1109<ST_IN_SCRIPTING>"foreach" {
1110    return T_FOREACH;
1111}
1112
1113<ST_IN_SCRIPTING>"endforeach" {
1114    return T_ENDFOREACH;
1115}
1116
1117<ST_IN_SCRIPTING>"declare" {
1118    return T_DECLARE;
1119}
1120
1121<ST_IN_SCRIPTING>"enddeclare" {
1122    return T_ENDDECLARE;
1123}
1124
1125<ST_IN_SCRIPTING>"instanceof" {
1126    return T_INSTANCEOF;
1127}
1128
1129<ST_IN_SCRIPTING>"as" {
1130    return T_AS;
1131}
1132
1133<ST_IN_SCRIPTING>"switch" {
1134    return T_SWITCH;
1135}
1136
1137<ST_IN_SCRIPTING>"endswitch" {
1138    return T_ENDSWITCH;
1139}
1140
1141<ST_IN_SCRIPTING>"case" {
1142    return T_CASE;
1143}
1144
1145<ST_IN_SCRIPTING>"default" {
1146    return T_DEFAULT;
1147}
1148
1149<ST_IN_SCRIPTING>"break" {
1150    return T_BREAK;
1151}
1152
1153<ST_IN_SCRIPTING>"continue" {
1154    return T_CONTINUE;
1155}
1156
1157<ST_IN_SCRIPTING>"goto" {
1158    return T_GOTO;
1159}
1160
1161<ST_IN_SCRIPTING>"echo" {
1162    return T_ECHO;
1163}
1164
1165<ST_IN_SCRIPTING>"print" {
1166    return T_PRINT;
1167}
1168
1169<ST_IN_SCRIPTING>"class" {
1170    return T_CLASS;
1171}
1172
1173<ST_IN_SCRIPTING>"interface" {
1174    return T_INTERFACE;
1175}
1176
1177<ST_IN_SCRIPTING>"trait" {
1178    return T_TRAIT;
1179}
1180
1181<ST_IN_SCRIPTING>"extends" {
1182    return T_EXTENDS;
1183}
1184
1185<ST_IN_SCRIPTING>"implements" {
1186    return T_IMPLEMENTS;
1187}
1188
1189<ST_IN_SCRIPTING>"->" {
1190    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1191    return T_OBJECT_OPERATOR;
1192}
1193
1194<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1195    DUMMY_STRINGL(zendlval, yytext, yyleng);
1196    HANDLE_NEWLINES(yytext, yyleng);
1197    return T_WHITESPACE;
1198}
1199
1200<ST_LOOKING_FOR_PROPERTY>"->" {
1201    return T_OBJECT_OPERATOR;
1202}
1203
1204<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1205    yy_pop_state(TSRMLS_C);
1206    zend_copy_value(zendlval, yytext, yyleng);
1207    return T_STRING;
1208}
1209
1210<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1211    yyless(0);
1212    yy_pop_state(TSRMLS_C);
1213    goto restart;
1214}
1215
1216<ST_IN_SCRIPTING>"::" {
1217    return T_PAAMAYIM_NEKUDOTAYIM;
1218}
1219
1220<ST_IN_SCRIPTING>"\\" {
1221    return T_NS_SEPARATOR;
1222}
1223
1224<ST_IN_SCRIPTING>"..." {
1225    return T_ELLIPSIS;
1226}
1227
1228<ST_IN_SCRIPTING>"new" {
1229    return T_NEW;
1230}
1231
1232<ST_IN_SCRIPTING>"clone" {
1233    return T_CLONE;
1234}
1235
1236<ST_IN_SCRIPTING>"var" {
1237    return T_VAR;
1238}
1239
1240<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1241    return T_INT_CAST;
1242}
1243
1244<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1245    return T_DOUBLE_CAST;
1246}
1247
1248<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1249    return T_STRING_CAST;
1250}
1251
1252<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1253    return T_ARRAY_CAST;
1254}
1255
1256<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1257    return T_OBJECT_CAST;
1258}
1259
1260<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1261    return T_BOOL_CAST;
1262}
1263
1264<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1265    return T_UNSET_CAST;
1266}
1267
1268<ST_IN_SCRIPTING>"eval" {
1269    return T_EVAL;
1270}
1271
1272<ST_IN_SCRIPTING>"include" {
1273    return T_INCLUDE;
1274}
1275
1276<ST_IN_SCRIPTING>"include_once" {
1277    return T_INCLUDE_ONCE;
1278}
1279
1280<ST_IN_SCRIPTING>"require" {
1281    return T_REQUIRE;
1282}
1283
1284<ST_IN_SCRIPTING>"require_once" {
1285    return T_REQUIRE_ONCE;
1286}
1287
1288<ST_IN_SCRIPTING>"namespace" {
1289    return T_NAMESPACE;
1290}
1291
1292<ST_IN_SCRIPTING>"use" {
1293    return T_USE;
1294}
1295
1296<ST_IN_SCRIPTING>"insteadof" {
1297        return T_INSTEADOF;
1298}
1299
1300<ST_IN_SCRIPTING>"global" {
1301    return T_GLOBAL;
1302}
1303
1304<ST_IN_SCRIPTING>"isset" {
1305    return T_ISSET;
1306}
1307
1308<ST_IN_SCRIPTING>"empty" {
1309    return T_EMPTY;
1310}
1311
1312<ST_IN_SCRIPTING>"__halt_compiler" {
1313    return T_HALT_COMPILER;
1314}
1315
1316<ST_IN_SCRIPTING>"static" {
1317    return T_STATIC;
1318}
1319
1320<ST_IN_SCRIPTING>"abstract" {
1321    return T_ABSTRACT;
1322}
1323
1324<ST_IN_SCRIPTING>"final" {
1325    return T_FINAL;
1326}
1327
1328<ST_IN_SCRIPTING>"private" {
1329    return T_PRIVATE;
1330}
1331
1332<ST_IN_SCRIPTING>"protected" {
1333    return T_PROTECTED;
1334}
1335
1336<ST_IN_SCRIPTING>"public" {
1337    return T_PUBLIC;
1338}
1339
1340<ST_IN_SCRIPTING>"unset" {
1341    return T_UNSET;
1342}
1343
1344<ST_IN_SCRIPTING>"=>" {
1345    return T_DOUBLE_ARROW;
1346}
1347
1348<ST_IN_SCRIPTING>"list" {
1349    return T_LIST;
1350}
1351
1352<ST_IN_SCRIPTING>"array" {
1353    return T_ARRAY;
1354}
1355
1356<ST_IN_SCRIPTING>"callable" {
1357 return T_CALLABLE;
1358}
1359
1360<ST_IN_SCRIPTING>"++" {
1361    return T_INC;
1362}
1363
1364<ST_IN_SCRIPTING>"--" {
1365    return T_DEC;
1366}
1367
1368<ST_IN_SCRIPTING>"===" {
1369    return T_IS_IDENTICAL;
1370}
1371
1372<ST_IN_SCRIPTING>"!==" {
1373    return T_IS_NOT_IDENTICAL;
1374}
1375
1376<ST_IN_SCRIPTING>"==" {
1377    return T_IS_EQUAL;
1378}
1379
1380<ST_IN_SCRIPTING>"!="|"<>" {
1381    return T_IS_NOT_EQUAL;
1382}
1383
1384<ST_IN_SCRIPTING>"<=" {
1385    return T_IS_SMALLER_OR_EQUAL;
1386}
1387
1388<ST_IN_SCRIPTING>">=" {
1389    return T_IS_GREATER_OR_EQUAL;
1390}
1391
1392<ST_IN_SCRIPTING>"+=" {
1393    return T_PLUS_EQUAL;
1394}
1395
1396<ST_IN_SCRIPTING>"-=" {
1397    return T_MINUS_EQUAL;
1398}
1399
1400<ST_IN_SCRIPTING>"*=" {
1401    return T_MUL_EQUAL;
1402}
1403
1404<ST_IN_SCRIPTING>"*\*" {
1405    return T_POW;
1406}
1407
1408<ST_IN_SCRIPTING>"*\*=" {
1409    return T_POW_EQUAL;
1410}
1411
1412<ST_IN_SCRIPTING>"/=" {
1413    return T_DIV_EQUAL;
1414}
1415
1416<ST_IN_SCRIPTING>".=" {
1417    return T_CONCAT_EQUAL;
1418}
1419
1420<ST_IN_SCRIPTING>"%=" {
1421    return T_MOD_EQUAL;
1422}
1423
1424<ST_IN_SCRIPTING>"<<=" {
1425    return T_SL_EQUAL;
1426}
1427
1428<ST_IN_SCRIPTING>">>=" {
1429    return T_SR_EQUAL;
1430}
1431
1432<ST_IN_SCRIPTING>"&=" {
1433    return T_AND_EQUAL;
1434}
1435
1436<ST_IN_SCRIPTING>"|=" {
1437    return T_OR_EQUAL;
1438}
1439
1440<ST_IN_SCRIPTING>"^=" {
1441    return T_XOR_EQUAL;
1442}
1443
1444<ST_IN_SCRIPTING>"||" {
1445    return T_BOOLEAN_OR;
1446}
1447
1448<ST_IN_SCRIPTING>"&&" {
1449    return T_BOOLEAN_AND;
1450}
1451
1452<ST_IN_SCRIPTING>"OR" {
1453    return T_LOGICAL_OR;
1454}
1455
1456<ST_IN_SCRIPTING>"AND" {
1457    return T_LOGICAL_AND;
1458}
1459
1460<ST_IN_SCRIPTING>"XOR" {
1461    return T_LOGICAL_XOR;
1462}
1463
1464<ST_IN_SCRIPTING>"<<" {
1465    return T_SL;
1466}
1467
1468<ST_IN_SCRIPTING>">>" {
1469    return T_SR;
1470}
1471
1472<ST_IN_SCRIPTING>{TOKENS} {
1473    return yytext[0];
1474}
1475
1476
1477<ST_IN_SCRIPTING>"{" {
1478    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1479    return '{';
1480}
1481
1482
1483<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1484    yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1485    return T_DOLLAR_OPEN_CURLY_BRACES;
1486}
1487
1488
1489<ST_IN_SCRIPTING>"}" {
1490    RESET_DOC_COMMENT();
1491    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1492        yy_pop_state(TSRMLS_C);
1493    }
1494    return '}';
1495}
1496
1497
1498<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1499    yyless(yyleng - 1);
1500    zend_copy_value(zendlval, yytext, yyleng);
1501    yy_pop_state(TSRMLS_C);
1502    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1503    return T_STRING_VARNAME;
1504}
1505
1506
1507<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1508    yyless(0);
1509    yy_pop_state(TSRMLS_C);
1510    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1511    goto restart;
1512}
1513
1514<ST_IN_SCRIPTING>{BNUM} {
1515    char *bin = yytext + 2; /* Skip "0b" */
1516    int len = yyleng - 2;
1517
1518    /* Skip any leading 0s */
1519    while (*bin == '0') {
1520        ++bin;
1521        --len;
1522    }
1523
1524    if (len < SIZEOF_LONG * 8) {
1525        if (len == 0) {
1526            ZVAL_LONG(zendlval, 0);
1527        } else {
1528            ZVAL_LONG(zendlval, strtol(bin, NULL, 2));
1529        }
1530        return T_LNUMBER;
1531    } else {
1532        ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, NULL));
1533        return T_DNUMBER;
1534    }
1535}
1536
1537<ST_IN_SCRIPTING>{LNUM} {
1538    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1539        ZVAL_LONG(zendlval, strtol(yytext, NULL, 0));
1540    } else {
1541        errno = 0;
1542        ZVAL_LONG(zendlval, strtol(yytext, NULL, 0));
1543        if (errno == ERANGE) { /* Overflow */
1544            if (yytext[0] == '0') { /* octal overflow */
1545                ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, NULL));
1546            } else {
1547                ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1548            }
1549            return T_DNUMBER;
1550        }
1551    }
1552    return T_LNUMBER;
1553}
1554
1555<ST_IN_SCRIPTING>{HNUM} {
1556    char *hex = yytext + 2; /* Skip "0x" */
1557    int len = yyleng - 2;
1558
1559    /* Skip any leading 0s */
1560    while (*hex == '0') {
1561        hex++;
1562        len--;
1563    }
1564
1565    if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1566        if (len == 0) {
1567            ZVAL_LONG(zendlval, 0);
1568        } else {
1569            ZVAL_LONG(zendlval, strtol(hex, NULL, 16));
1570        }
1571        return T_LNUMBER;
1572    } else {
1573        ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, NULL));
1574        return T_DNUMBER;
1575    }
1576}
1577
1578<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1579    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1580        ZVAL_LONG(zendlval, strtol(yytext, NULL, 10));
1581    } else {
1582        ZVAL_STRINGL(zendlval, yytext, yyleng);
1583    }
1584    return T_NUM_STRING;
1585}
1586
1587<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1588    ZVAL_STRINGL(zendlval, yytext, yyleng);
1589    return T_NUM_STRING;
1590}
1591
1592<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1593    ZVAL_DOUBLE(zendlval, zend_strtod(yytext, NULL));
1594    return T_DNUMBER;
1595}
1596
1597<ST_IN_SCRIPTING>"__CLASS__" {
1598    zend_class_entry *ce = CG(active_class_entry);
1599    if (ce && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1600        /* We create a special __CLASS__ constant that is going to be resolved
1601           at run-time */
1602        ZVAL_STRINGL(zendlval, "__CLASS__", sizeof("__CLASS__") - 1);
1603        Z_TYPE_INFO_P(zendlval) = IS_CONSTANT_EX;
1604    } else {
1605        if (ce && ce->name) {
1606            ZVAL_STR(zendlval, STR_COPY(ce->name));
1607        } else {
1608            ZVAL_EMPTY_STRING(zendlval);
1609        }
1610    }
1611    return T_CLASS_C;
1612}
1613
1614<ST_IN_SCRIPTING>"__TRAIT__" {
1615    zend_class_entry *ce = CG(active_class_entry);
1616    if (ce && ce->name && ZEND_ACC_TRAIT == (ce->ce_flags & ZEND_ACC_TRAIT)) {
1617        ZVAL_STR(zendlval, STR_COPY(ce->name));
1618    } else {
1619        ZVAL_EMPTY_STRING(zendlval);
1620    }
1621    return T_TRAIT_C;
1622}
1623
1624<ST_IN_SCRIPTING>"__FUNCTION__" {
1625    zend_op_array *op_array = CG(active_op_array);
1626    if (op_array && op_array->function_name) {
1627        ZVAL_STR(zendlval, STR_COPY(op_array->function_name));
1628    } else {
1629        ZVAL_EMPTY_STRING(zendlval);
1630    }
1631    return T_FUNC_C;
1632}
1633
1634<ST_IN_SCRIPTING>"__METHOD__" {
1635    if (CG(active_class_entry)) {
1636        int len = 0;
1637
1638        if (CG(active_class_entry)->name) {
1639            len += CG(active_class_entry)->name->len;
1640        }
1641        if (CG(active_op_array) && CG(active_op_array)->function_name) {
1642            len += sizeof("::")-1;
1643            len += CG(active_op_array)->function_name->len;
1644        }
1645        ZVAL_NEW_STR(zendlval, STR_ALLOC(len, 0));
1646        len = 0;
1647        if (CG(active_class_entry)->name) {
1648            memcpy(Z_STRVAL_P(zendlval), CG(active_class_entry)->name->val, CG(active_class_entry)->name->len);
1649            len += CG(active_class_entry)->name->len;
1650        }
1651        if (CG(active_op_array) && CG(active_op_array)->function_name) {
1652            memcpy(Z_STRVAL_P(zendlval) + len, "::", sizeof("::")-1);
1653            len += sizeof("::")-1;
1654            memcpy(Z_STRVAL_P(zendlval) + len, CG(active_op_array)->function_name->val, CG(active_op_array)->function_name->len);
1655            len += CG(active_op_array)->function_name->len;
1656        }
1657        Z_STRVAL_P(zendlval)[len] = 0;
1658    } else if (CG(active_op_array) && CG(active_op_array)->function_name) {
1659        ZVAL_STR(zendlval, STR_COPY(CG(active_op_array)->function_name));
1660    } else {
1661        ZVAL_EMPTY_STRING(zendlval);
1662    }
1663    return T_METHOD_C;
1664}
1665
1666<ST_IN_SCRIPTING>"__LINE__" {
1667    ZVAL_LONG(zendlval, CG(zend_lineno));
1668    return T_LINE;
1669}
1670
1671<ST_IN_SCRIPTING>"__FILE__" {
1672    zend_string *filename = zend_get_compiled_filename(TSRMLS_C);
1673
1674    if (!filename) {
1675        ZVAL_EMPTY_STRING(zendlval);
1676    } else {
1677        ZVAL_STR(zendlval, STR_COPY(filename));
1678    }
1679    return T_FILE;
1680}
1681
1682<ST_IN_SCRIPTING>"__DIR__" {
1683    zend_string *filename = zend_get_compiled_filename(TSRMLS_C);
1684    zend_string *dirname;
1685
1686    if (!filename) {
1687        filename = STR_EMPTY_ALLOC();
1688    }
1689
1690    dirname = STR_INIT(filename->val, filename->len, 0);
1691    zend_dirname(dirname->val, dirname->len);
1692
1693    if (strcmp(dirname->val, ".") == 0) {
1694        dirname = STR_REALLOC(dirname, MAXPATHLEN, 0);
1695#if HAVE_GETCWD
1696        VCWD_GETCWD(dirname->val, MAXPATHLEN);
1697#elif HAVE_GETWD
1698        VCWD_GETWD(dirname->val);
1699#endif
1700    }
1701
1702    dirname->len = strlen(dirname->val);
1703    ZVAL_STR(zendlval, dirname);
1704    return T_DIR;
1705}
1706
1707<ST_IN_SCRIPTING>"__NAMESPACE__" {
1708    if (Z_TYPE(CG(current_namespace)) != IS_UNDEF) {
1709        ZVAL_DUP(zendlval, &CG(current_namespace));
1710    } else {
1711        ZVAL_EMPTY_STRING(zendlval);
1712    }
1713    return T_NS_C;
1714}
1715
1716<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1717    YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1718
1719    if (bracket != SCNG(yy_text)) {
1720        /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1721        YYCURSOR = bracket;
1722        goto inline_html;
1723    }
1724
1725    HANDLE_NEWLINES(yytext, yyleng);
1726    DUMMY_STRINGL(zendlval, yytext, yyleng);
1727    BEGIN(ST_IN_SCRIPTING);
1728    return T_OPEN_TAG;
1729}
1730
1731
1732<INITIAL>"<%=" {
1733    if (CG(asp_tags)) {
1734        DUMMY_STRINGL(zendlval, yytext, yyleng);
1735        BEGIN(ST_IN_SCRIPTING);
1736        return T_OPEN_TAG_WITH_ECHO;
1737    } else {
1738        goto inline_char_handler;
1739    }
1740}
1741
1742
1743<INITIAL>"<?=" {
1744    DUMMY_STRINGL(zendlval, yytext, yyleng);
1745    BEGIN(ST_IN_SCRIPTING);
1746    return T_OPEN_TAG_WITH_ECHO;
1747}
1748
1749
1750<INITIAL>"<%" {
1751    if (CG(asp_tags)) {
1752        DUMMY_STRINGL(zendlval, yytext, yyleng);
1753        BEGIN(ST_IN_SCRIPTING);
1754        return T_OPEN_TAG;
1755    } else {
1756        goto inline_char_handler;
1757    }
1758}
1759
1760
1761<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1762    DUMMY_STRINGL(zendlval, yytext, yyleng);
1763    HANDLE_NEWLINE(yytext[yyleng-1]);
1764    BEGIN(ST_IN_SCRIPTING);
1765    return T_OPEN_TAG;
1766}
1767
1768
1769<INITIAL>"<?" {
1770    if (CG(short_tags)) {
1771        DUMMY_STRINGL(zendlval, yytext, yyleng);
1772        BEGIN(ST_IN_SCRIPTING);
1773        return T_OPEN_TAG;
1774    } else {
1775        goto inline_char_handler;
1776    }
1777}
1778
1779<INITIAL>{ANY_CHAR} {
1780    if (YYCURSOR > YYLIMIT) {
1781        return 0;
1782    }
1783
1784inline_char_handler:
1785
1786    while (1) {
1787        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1788
1789        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1790
1791        if (YYCURSOR < YYLIMIT) {
1792            switch (*YYCURSOR) {
1793                case '?':
1794                    if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1795                        break;
1796                    }
1797                    continue;
1798                case '%':
1799                    if (CG(asp_tags)) {
1800                        break;
1801                    }
1802                    continue;
1803                case 's':
1804                case 'S':
1805                    /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1806                     * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1807                    YYCURSOR--;
1808                    yymore();
1809                default:
1810                    continue;
1811            }
1812
1813            YYCURSOR--;
1814        }
1815
1816        break;
1817    }
1818
1819inline_html:
1820    yyleng = YYCURSOR - SCNG(yy_text);
1821
1822    if (SCNG(output_filter)) {
1823        int readsize;
1824        char *s = NULL;
1825        size_t sz = 0;
1826        // TODO: avoid reallocation ???
1827        readsize = SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1828        ZVAL_STRINGL(zendlval, s, sz);
1829        efree(s);
1830        if (readsize < yyleng) {
1831            yyless(readsize);
1832        }
1833    } else {
1834      ZVAL_STRINGL(zendlval, yytext, yyleng);
1835    }
1836    HANDLE_NEWLINES(yytext, yyleng);
1837    return T_INLINE_HTML;
1838}
1839
1840
1841/* Make sure a label character follows "->", otherwise there is no property
1842 * and "->" will be taken literally
1843 */
1844<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1845    yyless(yyleng - 3);
1846    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1847    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1848    return T_VARIABLE;
1849}
1850
1851/* A [ always designates a variable offset, regardless of what follows
1852 */
1853<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1854    yyless(yyleng - 1);
1855    yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1856    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1857    return T_VARIABLE;
1858}
1859
1860<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1861    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1862    return T_VARIABLE;
1863}
1864
1865<ST_VAR_OFFSET>"]" {
1866    yy_pop_state(TSRMLS_C);
1867    return ']';
1868}
1869
1870<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1871    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1872    return yytext[0];
1873}
1874
1875<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1876    /* Invalid rule to return a more explicit parse error with proper line number */
1877    yyless(0);
1878    yy_pop_state(TSRMLS_C);
1879    return T_ENCAPSED_AND_WHITESPACE;
1880}
1881
1882<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1883    zend_copy_value(zendlval, yytext, yyleng);
1884    return T_STRING;
1885}
1886
1887
1888<ST_IN_SCRIPTING>"#"|"//" {
1889    while (YYCURSOR < YYLIMIT) {
1890        switch (*YYCURSOR++) {
1891            case '\r':
1892                if (*YYCURSOR == '\n') {
1893                    YYCURSOR++;
1894                }
1895                /* fall through */
1896            case '\n':
1897                CG(zend_lineno)++;
1898                break;
1899            case '%':
1900                if (!CG(asp_tags)) {
1901                    continue;
1902                }
1903                /* fall through */
1904            case '?':
1905                if (*YYCURSOR == '>') {
1906                    YYCURSOR--;
1907                    break;
1908                }
1909                /* fall through */
1910            default:
1911                continue;
1912        }
1913
1914        break;
1915    }
1916
1917    yyleng = YYCURSOR - SCNG(yy_text);
1918
1919    return T_COMMENT;
1920}
1921
1922<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1923    int doc_com;
1924
1925    if (yyleng > 2) {
1926        doc_com = 1;
1927        RESET_DOC_COMMENT();
1928    } else {
1929        doc_com = 0;
1930    }
1931
1932    while (YYCURSOR < YYLIMIT) {
1933        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1934            break;
1935        }
1936    }
1937
1938    if (YYCURSOR < YYLIMIT) {
1939        YYCURSOR++;
1940    } else {
1941        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1942    }
1943
1944    yyleng = YYCURSOR - SCNG(yy_text);
1945    HANDLE_NEWLINES(yytext, yyleng);
1946
1947    if (doc_com) {
1948        CG(doc_comment) = STR_INIT(yytext, yyleng, 0);
1949        return T_DOC_COMMENT;
1950    }
1951
1952    return T_COMMENT;
1953}
1954
1955<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1956    DUMMY_STRINGL(zendlval, yytext, yyleng);
1957    BEGIN(INITIAL);
1958    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1959}
1960
1961
1962<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1963    if (CG(asp_tags)) {
1964        BEGIN(INITIAL);
1965        DUMMY_STRINGL(zendlval, yytext, yyleng);
1966        return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1967    } else {
1968        yyless(1);
1969        return yytext[0];
1970    }
1971}
1972
1973
1974<ST_IN_SCRIPTING>b?['] {
1975    register char *s, *t;
1976    char *end;
1977    int bprefix = (yytext[0] != '\'') ? 1 : 0;
1978
1979    while (1) {
1980        if (YYCURSOR < YYLIMIT) {
1981            if (*YYCURSOR == '\'') {
1982                YYCURSOR++;
1983                yyleng = YYCURSOR - SCNG(yy_text);
1984
1985                break;
1986            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
1987                YYCURSOR++;
1988            }
1989        } else {
1990            yyleng = YYLIMIT - SCNG(yy_text);
1991
1992            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
1993             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
1994             * rule, which continued in ST_IN_SCRIPTING state after the quote */
1995            return T_ENCAPSED_AND_WHITESPACE;
1996        }
1997    }
1998
1999    ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2);
2000
2001    /* convert escape sequences */
2002    s = t = Z_STRVAL_P(zendlval);
2003    end = s+Z_STRLEN_P(zendlval);
2004    while (s<end) {
2005        if (*s=='\\') {
2006            s++;
2007
2008            switch(*s) {
2009                case '\\':
2010                case '\'':
2011                    *t++ = *s;
2012                    Z_STRLEN_P(zendlval)--;
2013                    break;
2014                default:
2015                    *t++ = '\\';
2016                    *t++ = *s;
2017                    break;
2018            }
2019        } else {
2020            *t++ = *s;
2021        }
2022
2023        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2024            CG(zend_lineno)++;
2025        }
2026        s++;
2027    }
2028    *t = 0;
2029
2030    if (SCNG(output_filter)) {
2031        size_t sz = 0;
2032        char *str = NULL;
2033        s = Z_STRVAL_P(zendlval);
2034        // TODO: avoid reallocation ???
2035        SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval) TSRMLS_CC);
2036        ZVAL_STRINGL(zendlval, str, sz);
2037        efree(s);
2038    }
2039    return T_CONSTANT_ENCAPSED_STRING;
2040}
2041
2042
2043<ST_IN_SCRIPTING>b?["] {
2044    int bprefix = (yytext[0] != '"') ? 1 : 0;
2045
2046    while (YYCURSOR < YYLIMIT) {
2047        switch (*YYCURSOR++) {
2048            case '"':
2049                yyleng = YYCURSOR - SCNG(yy_text);
2050                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2051                return T_CONSTANT_ENCAPSED_STRING;
2052            case '$':
2053                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2054                    break;
2055                }
2056                continue;
2057            case '{':
2058                if (*YYCURSOR == '$') {
2059                    break;
2060                }
2061                continue;
2062            case '\\':
2063                if (YYCURSOR < YYLIMIT) {
2064                    YYCURSOR++;
2065                }
2066                /* fall through */
2067            default:
2068                continue;
2069        }
2070
2071        YYCURSOR--;
2072        break;
2073    }
2074
2075    /* Remember how much was scanned to save rescanning */
2076    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2077
2078    YYCURSOR = SCNG(yy_text) + yyleng;
2079
2080    BEGIN(ST_DOUBLE_QUOTES);
2081    return '"';
2082}
2083
2084
2085<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2086    char *s;
2087    int bprefix = (yytext[0] != '<') ? 1 : 0;
2088    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2089
2090    CG(zend_lineno)++;
2091    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2092    s = yytext+bprefix+3;
2093    while ((*s == ' ') || (*s == '\t')) {
2094        s++;
2095        heredoc_label->length--;
2096    }
2097
2098    if (*s == '\'') {
2099        s++;
2100        heredoc_label->length -= 2;
2101
2102        BEGIN(ST_NOWDOC);
2103    } else {
2104        if (*s == '"') {
2105            s++;
2106            heredoc_label->length -= 2;
2107        }
2108
2109        BEGIN(ST_HEREDOC);
2110    }
2111
2112    heredoc_label->label = estrndup(s, heredoc_label->length);
2113
2114    /* Check for ending label on the next line */
2115    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2116        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2117
2118        if (*end == ';') {
2119            end++;
2120        }
2121
2122        if (*end == '\n' || *end == '\r') {
2123            BEGIN(ST_END_HEREDOC);
2124        }
2125    }
2126
2127    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2128
2129    return T_START_HEREDOC;
2130}
2131
2132
2133<ST_IN_SCRIPTING>[`] {
2134    BEGIN(ST_BACKQUOTE);
2135    return '`';
2136}
2137
2138
2139<ST_END_HEREDOC>{ANY_CHAR} {
2140    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2141
2142    YYCURSOR += heredoc_label->length - 1;
2143    yyleng = heredoc_label->length;
2144
2145    heredoc_label_dtor(heredoc_label);
2146    efree(heredoc_label);
2147
2148    BEGIN(ST_IN_SCRIPTING);
2149    return T_END_HEREDOC;
2150}
2151
2152
2153<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2154    Z_LVAL_P(zendlval) = (long) '{';
2155    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2156    yyless(1);
2157    return T_CURLY_OPEN;
2158}
2159
2160
2161<ST_DOUBLE_QUOTES>["] {
2162    BEGIN(ST_IN_SCRIPTING);
2163    return '"';
2164}
2165
2166<ST_BACKQUOTE>[`] {
2167    BEGIN(ST_IN_SCRIPTING);
2168    return '`';
2169}
2170
2171
2172<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2173    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2174        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2175        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2176
2177        goto double_quotes_scan_done;
2178    }
2179
2180    if (YYCURSOR > YYLIMIT) {
2181        return 0;
2182    }
2183    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2184        YYCURSOR++;
2185    }
2186
2187    while (YYCURSOR < YYLIMIT) {
2188        switch (*YYCURSOR++) {
2189            case '"':
2190                break;
2191            case '$':
2192                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2193                    break;
2194                }
2195                continue;
2196            case '{':
2197                if (*YYCURSOR == '$') {
2198                    break;
2199                }
2200                continue;
2201            case '\\':
2202                if (YYCURSOR < YYLIMIT) {
2203                    YYCURSOR++;
2204                }
2205                /* fall through */
2206            default:
2207                continue;
2208        }
2209
2210        YYCURSOR--;
2211        break;
2212    }
2213
2214double_quotes_scan_done:
2215    yyleng = YYCURSOR - SCNG(yy_text);
2216
2217    zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2218    return T_ENCAPSED_AND_WHITESPACE;
2219}
2220
2221
2222<ST_BACKQUOTE>{ANY_CHAR} {
2223    if (YYCURSOR > YYLIMIT) {
2224        return 0;
2225    }
2226    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2227        YYCURSOR++;
2228    }
2229
2230    while (YYCURSOR < YYLIMIT) {
2231        switch (*YYCURSOR++) {
2232            case '`':
2233                break;
2234            case '$':
2235                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2236                    break;
2237                }
2238                continue;
2239            case '{':
2240                if (*YYCURSOR == '$') {
2241                    break;
2242                }
2243                continue;
2244            case '\\':
2245                if (YYCURSOR < YYLIMIT) {
2246                    YYCURSOR++;
2247                }
2248                /* fall through */
2249            default:
2250                continue;
2251        }
2252
2253        YYCURSOR--;
2254        break;
2255    }
2256
2257    yyleng = YYCURSOR - SCNG(yy_text);
2258
2259    zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2260    return T_ENCAPSED_AND_WHITESPACE;
2261}
2262
2263
2264<ST_HEREDOC>{ANY_CHAR} {
2265    int newline = 0;
2266
2267    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2268
2269    if (YYCURSOR > YYLIMIT) {
2270        return 0;
2271    }
2272
2273    YYCURSOR--;
2274
2275    while (YYCURSOR < YYLIMIT) {
2276        switch (*YYCURSOR++) {
2277            case '\r':
2278                if (*YYCURSOR == '\n') {
2279                    YYCURSOR++;
2280                }
2281                /* fall through */
2282            case '\n':
2283                /* Check for ending label on the next line */
2284                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2285                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2286
2287                    if (*end == ';') {
2288                        end++;
2289                    }
2290
2291                    if (*end == '\n' || *end == '\r') {
2292                        /* newline before label will be subtracted from returned text, but
2293                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2294                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2295                            newline = 2; /* Windows newline */
2296                        } else {
2297                            newline = 1;
2298                        }
2299
2300                        CG(increment_lineno) = 1; /* For newline before label */
2301                        BEGIN(ST_END_HEREDOC);
2302
2303                        goto heredoc_scan_done;
2304                    }
2305                }
2306                continue;
2307            case '$':
2308                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2309                    break;
2310                }
2311                continue;
2312            case '{':
2313                if (*YYCURSOR == '$') {
2314                    break;
2315                }
2316                continue;
2317            case '\\':
2318                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2319                    YYCURSOR++;
2320                }
2321                /* fall through */
2322            default:
2323                continue;
2324        }
2325
2326        YYCURSOR--;
2327        break;
2328    }
2329
2330heredoc_scan_done:
2331    yyleng = YYCURSOR - SCNG(yy_text);
2332
2333    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2334    return T_ENCAPSED_AND_WHITESPACE;
2335}
2336
2337
2338<ST_NOWDOC>{ANY_CHAR} {
2339    int newline = 0;
2340
2341    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2342
2343    if (YYCURSOR > YYLIMIT) {
2344        return 0;
2345    }
2346
2347    YYCURSOR--;
2348
2349    while (YYCURSOR < YYLIMIT) {
2350        switch (*YYCURSOR++) {
2351            case '\r':
2352                if (*YYCURSOR == '\n') {
2353                    YYCURSOR++;
2354                }
2355                /* fall through */
2356            case '\n':
2357                /* Check for ending label on the next line */
2358                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2359                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2360
2361                    if (*end == ';') {
2362                        end++;
2363                    }
2364
2365                    if (*end == '\n' || *end == '\r') {
2366                        /* newline before label will be subtracted from returned text, but
2367                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2368                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2369                            newline = 2; /* Windows newline */
2370                        } else {
2371                            newline = 1;
2372                        }
2373
2374                        CG(increment_lineno) = 1; /* For newline before label */
2375                        BEGIN(ST_END_HEREDOC);
2376
2377                        goto nowdoc_scan_done;
2378                    }
2379                }
2380                /* fall through */
2381            default:
2382                continue;
2383        }
2384    }
2385
2386nowdoc_scan_done:
2387    yyleng = YYCURSOR - SCNG(yy_text);
2388
2389    zend_copy_value(zendlval, yytext, yyleng - newline);
2390    HANDLE_NEWLINES(yytext, yyleng - newline);
2391    return T_ENCAPSED_AND_WHITESPACE;
2392}
2393
2394
2395<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2396    if (YYCURSOR > YYLIMIT) {
2397        return 0;
2398    }
2399
2400    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2401    goto restart;
2402}
2403
2404*/
2405}
2406