1/*
2   +----------------------------------------------------------------------+
3   | Zend Engine                                                          |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 2.00 of the Zend license,     |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.zend.com/license/2_00.txt.                                |
11   | If you did not receive a copy of the Zend license and are unable to  |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@zend.com so we can mail you a copy immediately.              |
14   +----------------------------------------------------------------------+
15   | Authors: Marcus Boerger <helly@php.net>                              |
16   |          Nuno Lopes <nlopess@php.net>                                |
17   |          Scott MacVicar <scottmac@php.net>                           |
18   | Flex version authors:                                                |
19   |          Andi Gutmans <andi@zend.com>                                |
20   |          Zeev Suraski <zeev@zend.com>                                |
21   +----------------------------------------------------------------------+
22*/
23
24/* $Id$ */
25
26#if 0
27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
28#else
29# define YYDEBUG(s, c)
30#endif
31
32#include "zend_language_scanner_defs.h"
33
34#include <errno.h>
35#include "zend.h"
36#ifdef PHP_WIN32
37# include <Winuser.h>
38#endif
39#include "zend_alloc.h"
40#include <zend_language_parser.h>
41#include "zend_compile.h"
42#include "zend_language_scanner.h"
43#include "zend_highlight.h"
44#include "zend_constants.h"
45#include "zend_variables.h"
46#include "zend_operators.h"
47#include "zend_API.h"
48#include "zend_strtod.h"
49#include "zend_exceptions.h"
50#include "tsrm_virtual_cwd.h"
51#include "tsrm_config_common.h"
52
53#define YYCTYPE   unsigned char
54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } }
55#define YYCURSOR  SCNG(yy_cursor)
56#define YYLIMIT   SCNG(yy_limit)
57#define YYMARKER  SCNG(yy_marker)
58
59#define YYGETCONDITION()  SCNG(yy_state)
60#define YYSETCONDITION(s) SCNG(yy_state) = s
61
62#define STATE(name)  yyc##name
63
64/* emulate flex constructs */
65#define BEGIN(state) YYSETCONDITION(STATE(state))
66#define YYSTATE      YYGETCONDITION()
67#define yytext       ((char*)SCNG(yy_text))
68#define yyleng       SCNG(yy_leng)
69#define yyless(x)    do { YYCURSOR = (unsigned char*)yytext + x; \
70                          yyleng   = (unsigned int)x; } while(0)
71#define yymore()     goto yymore_restart
72
73/* perform sanity check. If this message is triggered you should
74   increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */
75/*!max:re2c */
76#if ZEND_MMAP_AHEAD < YYMAXFILL
77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL
78#endif
79
80#ifdef HAVE_STDARG_H
81# include <stdarg.h>
82#endif
83
84#ifdef HAVE_UNISTD_H
85# include <unistd.h>
86#endif
87
88/* Globals Macros */
89#define SCNG    LANG_SCNG
90#ifdef ZTS
91ZEND_API ts_rsrc_id language_scanner_globals_id;
92#else
93ZEND_API zend_php_scanner_globals language_scanner_globals;
94#endif
95
96#define HANDLE_NEWLINES(s, l)                                                   \
97do {                                                                            \
98    char *p = (s), *boundary = p+(l);                                           \
99                                                                                \
100    while (p<boundary) {                                                        \
101        if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) {                   \
102            CG(zend_lineno)++;                                                  \
103        }                                                                       \
104        p++;                                                                    \
105    }                                                                           \
106} while (0)
107
108#define HANDLE_NEWLINE(c) \
109{ \
110    if (c == '\n' || c == '\r') { \
111        CG(zend_lineno)++; \
112    } \
113}
114
115/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */
116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len)
117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH()    CG(doc_comment_len)
118
119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F)
120
121#define ZEND_IS_OCT(c)  ((c)>='0' && (c)<='7')
122#define ZEND_IS_HEX(c)  (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F'))
123
124BEGIN_EXTERN_C()
125
126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
127{
128    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
129    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
130    return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
131}
132
133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
134{
135    return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
136}
137
138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
139{
140    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
142}
143
144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
145{
146    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
147    assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
148    return zend_multibyte_encoding_converter(to, to_length, from, from_length,
149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
150}
151
152
153static void _yy_push_state(int new_state TSRMLS_DC)
154{
155    zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
156    YYSETCONDITION(new_state);
157}
158
159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm)
160
161static void yy_pop_state(TSRMLS_D)
162{
163    int *stack_state;
164    zend_stack_top(&SCNG(state_stack), (void **) &stack_state);
165    YYSETCONDITION(*stack_state);
166    zend_stack_del_top(&SCNG(state_stack));
167}
168
169static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC)
170{
171    YYCURSOR       = (YYCTYPE*)str;
172    YYLIMIT        = YYCURSOR + len;
173    if (!SCNG(yy_start)) {
174        SCNG(yy_start) = YYCURSOR;
175    }
176}
177
178void startup_scanner(TSRMLS_D)
179{
180    CG(parse_error) = 0;
181    CG(doc_comment) = NULL;
182    CG(doc_comment_len) = 0;
183    zend_stack_init(&SCNG(state_stack));
184    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
185}
186
187static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) {
188    efree(heredoc_label->label);
189}
190
191void shutdown_scanner(TSRMLS_D)
192{
193    CG(parse_error) = 0;
194    RESET_DOC_COMMENT();
195    zend_stack_destroy(&SCNG(state_stack));
196    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
197    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
198}
199
200ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
201{
202    lex_state->yy_leng   = SCNG(yy_leng);
203    lex_state->yy_start  = SCNG(yy_start);
204    lex_state->yy_text   = SCNG(yy_text);
205    lex_state->yy_cursor = SCNG(yy_cursor);
206    lex_state->yy_marker = SCNG(yy_marker);
207    lex_state->yy_limit  = SCNG(yy_limit);
208
209    lex_state->state_stack = SCNG(state_stack);
210    zend_stack_init(&SCNG(state_stack));
211
212    lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
213    zend_ptr_stack_init(&SCNG(heredoc_label_stack));
214
215    lex_state->in = SCNG(yy_in);
216    lex_state->yy_state = YYSTATE;
217    lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
218    lex_state->lineno = CG(zend_lineno);
219
220    lex_state->script_org = SCNG(script_org);
221    lex_state->script_org_size = SCNG(script_org_size);
222    lex_state->script_filtered = SCNG(script_filtered);
223    lex_state->script_filtered_size = SCNG(script_filtered_size);
224    lex_state->input_filter = SCNG(input_filter);
225    lex_state->output_filter = SCNG(output_filter);
226    lex_state->script_encoding = SCNG(script_encoding);
227}
228
229ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
230{
231    SCNG(yy_leng)   = lex_state->yy_leng;
232    SCNG(yy_start)  = lex_state->yy_start;
233    SCNG(yy_text)   = lex_state->yy_text;
234    SCNG(yy_cursor) = lex_state->yy_cursor;
235    SCNG(yy_marker) = lex_state->yy_marker;
236    SCNG(yy_limit)  = lex_state->yy_limit;
237
238    zend_stack_destroy(&SCNG(state_stack));
239    SCNG(state_stack) = lex_state->state_stack;
240
241    zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
242    zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
243    SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
244
245    SCNG(yy_in) = lex_state->in;
246    YYSETCONDITION(lex_state->yy_state);
247    CG(zend_lineno) = lex_state->lineno;
248    zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
249
250    if (SCNG(script_filtered)) {
251        efree(SCNG(script_filtered));
252        SCNG(script_filtered) = NULL;
253    }
254    SCNG(script_org) = lex_state->script_org;
255    SCNG(script_org_size) = lex_state->script_org_size;
256    SCNG(script_filtered) = lex_state->script_filtered;
257    SCNG(script_filtered_size) = lex_state->script_filtered_size;
258    SCNG(input_filter) = lex_state->input_filter;
259    SCNG(output_filter) = lex_state->output_filter;
260    SCNG(script_encoding) = lex_state->script_encoding;
261}
262
263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
264{
265    zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
266    /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */
267    file_handle->opened_path = NULL;
268    if (file_handle->free_filename) {
269        file_handle->filename = NULL;
270    }
271}
272
273#define BOM_UTF32_BE    "\x00\x00\xfe\xff"
274#define BOM_UTF32_LE    "\xff\xfe\x00\x00"
275#define BOM_UTF16_BE    "\xfe\xff"
276#define BOM_UTF16_LE    "\xff\xfe"
277#define BOM_UTF8        "\xef\xbb\xbf"
278
279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
280{
281    const unsigned char *p;
282    int wchar_size = 2;
283    int le = 0;
284
285    /* utf-16 or utf-32? */
286    p = script;
287    while ((p-script) < script_size) {
288        p = memchr(p, 0, script_size-(p-script)-2);
289        if (!p) {
290            break;
291        }
292        if (*(p+1) == '\0' && *(p+2) == '\0') {
293            wchar_size = 4;
294            break;
295        }
296
297        /* searching for UTF-32 specific byte orders, so this will do */
298        p += 4;
299    }
300
301    /* BE or LE? */
302    p = script;
303    while ((p-script) < script_size) {
304        if (*p == '\0' && *(p+wchar_size-1) != '\0') {
305            /* BE */
306            le = 0;
307            break;
308        } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
309            /* LE* */
310            le = 1;
311            break;
312        }
313        p += wchar_size;
314    }
315
316    if (wchar_size == 2) {
317        return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
318    } else {
319        return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
320    }
321
322    return NULL;
323}
324
325static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
326{
327    const zend_encoding *script_encoding = NULL;
328    int bom_size;
329    unsigned char *pos1, *pos2;
330
331    if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
332        return NULL;
333    }
334
335    /* check out BOM */
336    if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
337        script_encoding = zend_multibyte_encoding_utf32be;
338        bom_size = sizeof(BOM_UTF32_BE)-1;
339    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
340        script_encoding = zend_multibyte_encoding_utf32le;
341        bom_size = sizeof(BOM_UTF32_LE)-1;
342    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
343        script_encoding = zend_multibyte_encoding_utf16be;
344        bom_size = sizeof(BOM_UTF16_BE)-1;
345    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
346        script_encoding = zend_multibyte_encoding_utf16le;
347        bom_size = sizeof(BOM_UTF16_LE)-1;
348    } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
349        script_encoding = zend_multibyte_encoding_utf8;
350        bom_size = sizeof(BOM_UTF8)-1;
351    }
352
353    if (script_encoding) {
354        /* remove BOM */
355        LANG_SCNG(script_org) += bom_size;
356        LANG_SCNG(script_org_size) -= bom_size;
357
358        return script_encoding;
359    }
360
361    /* script contains NULL bytes -> auto-detection */
362    if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
363        /* check if the NULL byte is after the __HALT_COMPILER(); */
364        pos2 = LANG_SCNG(script_org);
365
366        while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
367            pos2 = memchr(pos2, '_', pos1 - pos2);
368            if (!pos2) break;
369            pos2++;
370            if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
371                pos2 += sizeof("_HALT_COMPILER")-1;
372                while (*pos2 == ' '  ||
373                       *pos2 == '\t' ||
374                       *pos2 == '\r' ||
375                       *pos2 == '\n') {
376                    pos2++;
377                }
378                if (*pos2 == '(') {
379                    pos2++;
380                    while (*pos2 == ' '  ||
381                           *pos2 == '\t' ||
382                           *pos2 == '\r' ||
383                           *pos2 == '\n') {
384                        pos2++;
385                    }
386                    if (*pos2 == ')') {
387                        pos2++;
388                        while (*pos2 == ' '  ||
389                               *pos2 == '\t' ||
390                               *pos2 == '\r' ||
391                               *pos2 == '\n') {
392                            pos2++;
393                        }
394                        if (*pos2 == ';') {
395                            return NULL;
396                        }
397                    }
398                }
399            }
400        }
401        /* make best effort if BOM is missing */
402        return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
403    }
404
405    return NULL;
406}
407
408static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
409{
410    const zend_encoding *script_encoding;
411
412    if (CG(detect_unicode)) {
413        /* check out bom(byte order mark) and see if containing wchars */
414        script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
415        if (script_encoding != NULL) {
416            /* bom or wchar detection is prior to 'script_encoding' option */
417            return script_encoding;
418        }
419    }
420
421    /* if no script_encoding specified, just leave alone */
422    if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
423        return NULL;
424    }
425
426    /* if multiple encodings specified, detect automagically */
427    if (CG(script_encoding_list_size) > 1) {
428        return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
429    }
430
431    return CG(script_encoding_list)[0];
432}
433
434ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
435{
436    const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
437    const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
438
439    if (!script_encoding) {
440        return FAILURE;
441    }
442
443    /* judge input/output filter */
444    LANG_SCNG(script_encoding) = script_encoding;
445    LANG_SCNG(input_filter) = NULL;
446    LANG_SCNG(output_filter) = NULL;
447
448    if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
449        if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
450            /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
451            LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
452            LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
453        } else {
454            LANG_SCNG(input_filter) = NULL;
455            LANG_SCNG(output_filter) = NULL;
456        }
457        return SUCCESS;
458    }
459
460    if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
461        LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
462        LANG_SCNG(output_filter) = NULL;
463    } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
464        LANG_SCNG(input_filter) = NULL;
465        LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
466    } else {
467        /* both script and internal encodings are incompatible w/ flex */
468        LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
469        LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
470    }
471
472    return 0;
473}
474
475ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
476{
477    const char *file_path = NULL;
478    char *buf;
479    size_t size, offset = 0;
480
481    /* The shebang line was read, get the current position to obtain the buffer start */
482    if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) {
483        if ((offset = ftell(file_handle->handle.fp)) == -1) {
484            offset = 0;
485        }
486    }
487
488    if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) {
489        return FAILURE;
490    }
491
492    zend_llist_add_element(&CG(open_files), file_handle);
493    if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) {
494        zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files));
495        size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle;
496        fh->handle.stream.handle = (void*)(((char*)fh) + diff);
497        file_handle->handle.stream.handle = fh->handle.stream.handle;
498    }
499
500    /* Reset the scanner for scanning the new file */
501    SCNG(yy_in) = file_handle;
502    SCNG(yy_start) = NULL;
503
504    if (size != -1) {
505        if (CG(multibyte)) {
506            SCNG(script_org) = (unsigned char*)buf;
507            SCNG(script_org_size) = size;
508            SCNG(script_filtered) = NULL;
509
510            zend_multibyte_set_filter(NULL TSRMLS_CC);
511
512            if (SCNG(input_filter)) {
513                if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
514                    zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
515                            "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
516                }
517                buf = (char*)SCNG(script_filtered);
518                size = SCNG(script_filtered_size);
519            }
520        }
521        SCNG(yy_start) = (unsigned char *)buf - offset;
522        yy_scan_buffer(buf, size TSRMLS_CC);
523    } else {
524        zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
525    }
526
527    BEGIN(INITIAL);
528
529    if (file_handle->opened_path) {
530        file_path = file_handle->opened_path;
531    } else {
532        file_path = file_handle->filename;
533    }
534
535    zend_set_compiled_filename(file_path TSRMLS_CC);
536
537    if (CG(start_lineno)) {
538        CG(zend_lineno) = CG(start_lineno);
539        CG(start_lineno) = 0;
540    } else {
541        CG(zend_lineno) = 1;
542    }
543
544    CG(increment_lineno) = 0;
545    return SUCCESS;
546}
547END_EXTERN_C()
548
549
550ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC)
551{
552    zend_lex_state original_lex_state;
553    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
554    zend_op_array *original_active_op_array = CG(active_op_array);
555    zend_op_array *retval=NULL;
556    int compiler_result;
557    zend_bool compilation_successful=0;
558    znode retval_znode;
559    zend_bool original_in_compilation = CG(in_compilation);
560
561    retval_znode.op_type = IS_CONST;
562    retval_znode.u.constant.type = IS_LONG;
563    retval_znode.u.constant.value.lval = 1;
564    Z_UNSET_ISREF(retval_znode.u.constant);
565    Z_SET_REFCOUNT(retval_znode.u.constant, 1);
566
567    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
568
569    retval = op_array; /* success oriented */
570
571    if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) {
572        if (type==ZEND_REQUIRE) {
573            zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC);
574            zend_bailout();
575        } else {
576            zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC);
577        }
578        compilation_successful=0;
579    } else {
580        init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
581        CG(in_compilation) = 1;
582        CG(active_op_array) = op_array;
583        zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
584        zend_init_compiler_context(TSRMLS_C);
585        compiler_result = zendparse(TSRMLS_C);
586        zend_do_return(&retval_znode, 0 TSRMLS_CC);
587        CG(in_compilation) = original_in_compilation;
588        if (compiler_result != 0) { /* parser error */
589            zend_bailout();
590        }
591        compilation_successful=1;
592    }
593
594    if (retval) {
595        CG(active_op_array) = original_active_op_array;
596        if (compilation_successful) {
597            pass_two(op_array TSRMLS_CC);
598            zend_release_labels(0 TSRMLS_CC);
599        } else {
600            efree(op_array);
601            retval = NULL;
602        }
603    }
604    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
605    return retval;
606}
607
608
609zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
610{
611    zend_file_handle file_handle;
612    zval tmp;
613    zend_op_array *retval;
614    char *opened_path = NULL;
615
616    if (filename->type != IS_STRING) {
617        tmp = *filename;
618        zval_copy_ctor(&tmp);
619        convert_to_string(&tmp);
620        filename = &tmp;
621    }
622    file_handle.filename = filename->value.str.val;
623    file_handle.free_filename = 0;
624    file_handle.type = ZEND_HANDLE_FILENAME;
625    file_handle.opened_path = NULL;
626    file_handle.handle.fp = NULL;
627
628    retval = zend_compile_file(&file_handle, type TSRMLS_CC);
629    if (retval && file_handle.handle.stream.handle) {
630        int dummy = 1;
631
632        if (!file_handle.opened_path) {
633            file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len);
634        }
635
636        zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL);
637
638        if (opened_path) {
639            efree(opened_path);
640        }
641    }
642    zend_destroy_file_handle(&file_handle TSRMLS_CC);
643
644    if (filename==&tmp) {
645        zval_dtor(&tmp);
646    }
647    return retval;
648}
649
650ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
651{
652    char *buf;
653    size_t size;
654
655    /* enforce two trailing NULLs for flex... */
656    if (IS_INTERNED(str->value.str.val)) {
657        char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
658        memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
659        str->value.str.val = tmp;
660    } else {
661        str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
662    }
663
664    memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
665
666    SCNG(yy_in) = NULL;
667    SCNG(yy_start) = NULL;
668
669    buf = str->value.str.val;
670    size = str->value.str.len;
671
672    if (CG(multibyte)) {
673        SCNG(script_org) = (unsigned char*)buf;
674        SCNG(script_org_size) = size;
675        SCNG(script_filtered) = NULL;
676
677        zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
678
679        if (SCNG(input_filter)) {
680            if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
681                zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
682                        "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
683            }
684            buf = (char*)SCNG(script_filtered);
685            size = SCNG(script_filtered_size);
686        }
687    }
688
689    yy_scan_buffer(buf, size TSRMLS_CC);
690
691    zend_set_compiled_filename(filename TSRMLS_CC);
692    CG(zend_lineno) = 1;
693    CG(increment_lineno) = 0;
694    return SUCCESS;
695}
696
697
698ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
699{
700    size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
701    if (SCNG(input_filter)) {
702        size_t original_offset = offset, length = 0;
703        do {
704            unsigned char *p = NULL;
705            if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
706                return (size_t)-1;
707            }
708            efree(p);
709            if (length > original_offset) {
710                offset--;
711            } else if (length < original_offset) {
712                offset++;
713            }
714        } while (original_offset != length);
715    }
716    return offset;
717}
718
719
720zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC)
721{
722    zend_lex_state original_lex_state;
723    zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array));
724    zend_op_array *original_active_op_array = CG(active_op_array);
725    zend_op_array *retval;
726    zval tmp;
727    int compiler_result;
728    zend_bool original_in_compilation = CG(in_compilation);
729
730    if (source_string->value.str.len==0) {
731        efree(op_array);
732        return NULL;
733    }
734
735    CG(in_compilation) = 1;
736
737    tmp = *source_string;
738    zval_copy_ctor(&tmp);
739    convert_to_string(&tmp);
740    source_string = &tmp;
741
742    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
743    if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) {
744        efree(op_array);
745        retval = NULL;
746    } else {
747        zend_bool orig_interactive = CG(interactive);
748
749        CG(interactive) = 0;
750        init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
751        CG(interactive) = orig_interactive;
752        CG(active_op_array) = op_array;
753        zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context)));
754        zend_init_compiler_context(TSRMLS_C);
755        BEGIN(ST_IN_SCRIPTING);
756        compiler_result = zendparse(TSRMLS_C);
757
758        if (SCNG(script_filtered)) {
759            efree(SCNG(script_filtered));
760            SCNG(script_filtered) = NULL;
761        }
762
763        if (compiler_result != 0) {
764            CG(active_op_array) = original_active_op_array;
765            CG(unclean_shutdown)=1;
766            destroy_op_array(op_array TSRMLS_CC);
767            efree(op_array);
768            retval = NULL;
769        } else {
770            zend_do_return(NULL, 0 TSRMLS_CC);
771            CG(active_op_array) = original_active_op_array;
772            pass_two(op_array TSRMLS_CC);
773            zend_release_labels(0 TSRMLS_CC);
774            retval = op_array;
775        }
776    }
777    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
778    zval_dtor(&tmp);
779    CG(in_compilation) = original_in_compilation;
780    return retval;
781}
782
783
784BEGIN_EXTERN_C()
785int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC)
786{
787    zend_lex_state original_lex_state;
788    zend_file_handle file_handle;
789
790    file_handle.type = ZEND_HANDLE_FILENAME;
791    file_handle.filename = filename;
792    file_handle.free_filename = 0;
793    file_handle.opened_path = NULL;
794    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
795    if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) {
796        zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC);
797        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
798        return FAILURE;
799    }
800    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
801    if (SCNG(script_filtered)) {
802        efree(SCNG(script_filtered));
803        SCNG(script_filtered) = NULL;
804    }
805    zend_destroy_file_handle(&file_handle TSRMLS_CC);
806    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
807    return SUCCESS;
808}
809
810int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC)
811{
812    zend_lex_state original_lex_state;
813    zval tmp = *str;
814
815    str = &tmp;
816    zval_copy_ctor(str);
817    zend_save_lexical_state(&original_lex_state TSRMLS_CC);
818    if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) {
819        zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
820        return FAILURE;
821    }
822    BEGIN(INITIAL);
823    zend_highlight(syntax_highlighter_ini TSRMLS_CC);
824    if (SCNG(script_filtered)) {
825        efree(SCNG(script_filtered));
826        SCNG(script_filtered) = NULL;
827    }
828    zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
829    zval_dtor(str);
830    return SUCCESS;
831}
832
833ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
834{
835    size_t length;
836    unsigned char *new_yy_start;
837
838    /* convert and set */
839    if (!SCNG(input_filter)) {
840        if (SCNG(script_filtered)) {
841            efree(SCNG(script_filtered));
842            SCNG(script_filtered) = NULL;
843        }
844        SCNG(script_filtered_size) = 0;
845        length = SCNG(script_org_size);
846        new_yy_start = SCNG(script_org);
847    } else {
848        if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
849            zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
850                    "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
851        }
852        SCNG(script_filtered) = new_yy_start;
853        SCNG(script_filtered_size) = length;
854    }
855
856    SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
857    SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
858    SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
859    SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
860
861    SCNG(yy_start) = new_yy_start;
862}
863
864
865# define zend_copy_value(zendlval, yytext, yyleng) \
866    if (SCNG(output_filter)) { \
867        size_t sz = 0; \
868        SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \
869        zendlval->value.str.len = sz; \
870    } else { \
871        zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
872        zendlval->value.str.len = yyleng; \
873    }
874
875static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
876{
877    register char *s, *t;
878    char *end;
879
880    ZVAL_STRINGL(zendlval, str, len, 1);
881
882    /* convert escape sequences */
883    s = t = zendlval->value.str.val;
884    end = s+zendlval->value.str.len;
885    while (s<end) {
886        if (*s=='\\') {
887            s++;
888            if (s >= end) {
889                *t++ = '\\';
890                break;
891            }
892
893            switch(*s) {
894                case 'n':
895                    *t++ = '\n';
896                    zendlval->value.str.len--;
897                    break;
898                case 'r':
899                    *t++ = '\r';
900                    zendlval->value.str.len--;
901                    break;
902                case 't':
903                    *t++ = '\t';
904                    zendlval->value.str.len--;
905                    break;
906                case 'f':
907                    *t++ = '\f';
908                    zendlval->value.str.len--;
909                    break;
910                case 'v':
911                    *t++ = '\v';
912                    zendlval->value.str.len--;
913                    break;
914                case 'e':
915#ifdef PHP_WIN32
916                    *t++ = VK_ESCAPE;
917#else
918                    *t++ = '\e';
919#endif
920                    zendlval->value.str.len--;
921                    break;
922                case '"':
923                case '`':
924                    if (*s != quote_type) {
925                        *t++ = '\\';
926                        *t++ = *s;
927                        break;
928                    }
929                case '\\':
930                case '$':
931                    *t++ = *s;
932                    zendlval->value.str.len--;
933                    break;
934                case 'x':
935                case 'X':
936                    if (ZEND_IS_HEX(*(s+1))) {
937                        char hex_buf[3] = { 0, 0, 0 };
938
939                        zendlval->value.str.len--; /* for the 'x' */
940
941                        hex_buf[0] = *(++s);
942                        zendlval->value.str.len--;
943                        if (ZEND_IS_HEX(*(s+1))) {
944                            hex_buf[1] = *(++s);
945                            zendlval->value.str.len--;
946                        }
947                        *t++ = (char) strtol(hex_buf, NULL, 16);
948                    } else {
949                        *t++ = '\\';
950                        *t++ = *s;
951                    }
952                    break;
953                default:
954                    /* check for an octal */
955                    if (ZEND_IS_OCT(*s)) {
956                        char octal_buf[4] = { 0, 0, 0, 0 };
957
958                        octal_buf[0] = *s;
959                        zendlval->value.str.len--;
960                        if (ZEND_IS_OCT(*(s+1))) {
961                            octal_buf[1] = *(++s);
962                            zendlval->value.str.len--;
963                            if (ZEND_IS_OCT(*(s+1))) {
964                                octal_buf[2] = *(++s);
965                                zendlval->value.str.len--;
966                            }
967                        }
968                        *t++ = (char) strtol(octal_buf, NULL, 8);
969                    } else {
970                        *t++ = '\\';
971                        *t++ = *s;
972                    }
973                    break;
974            }
975        } else {
976            *t++ = *s;
977        }
978
979        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
980            CG(zend_lineno)++;
981        }
982        s++;
983    }
984    *t = 0;
985    if (SCNG(output_filter)) {
986        size_t sz = 0;
987        s = zendlval->value.str.val;
988        SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
989        zendlval->value.str.len = sz;
990        efree(s);
991    }
992}
993
994
995int lex_scan(zval *zendlval TSRMLS_DC)
996{
997restart:
998    SCNG(yy_text) = YYCURSOR;
999
1000yymore_restart:
1001
1002/*!re2c
1003re2c:yyfill:check = 0;
1004LNUM    [0-9]+
1005DNUM    ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1006EXPONENT_DNUM   (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1007HNUM    "0x"[0-9a-fA-F]+
1008BNUM    "0b"[01]+
1009LABEL   [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
1010WHITESPACE [ \n\r\t]+
1011TABS_AND_SPACES [ \t]*
1012TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1013ANY_CHAR [^]
1014NEWLINE ("\r"|"\n"|"\r\n")
1015
1016/* compute yyleng before each rule */
1017<!*> := yyleng = YYCURSOR - SCNG(yy_text);
1018
1019
1020<ST_IN_SCRIPTING>"exit" {
1021    return T_EXIT;
1022}
1023
1024<ST_IN_SCRIPTING>"die" {
1025    return T_EXIT;
1026}
1027
1028<ST_IN_SCRIPTING>"function" {
1029    return T_FUNCTION;
1030}
1031
1032<ST_IN_SCRIPTING>"const" {
1033    return T_CONST;
1034}
1035
1036<ST_IN_SCRIPTING>"return" {
1037    return T_RETURN;
1038}
1039
1040<ST_IN_SCRIPTING>"yield" {
1041    return T_YIELD;
1042}
1043
1044<ST_IN_SCRIPTING>"try" {
1045    return T_TRY;
1046}
1047
1048<ST_IN_SCRIPTING>"catch" {
1049    return T_CATCH;
1050}
1051
1052<ST_IN_SCRIPTING>"finally" {
1053    return T_FINALLY;
1054}
1055
1056<ST_IN_SCRIPTING>"throw" {
1057    return T_THROW;
1058}
1059
1060<ST_IN_SCRIPTING>"if" {
1061    return T_IF;
1062}
1063
1064<ST_IN_SCRIPTING>"elseif" {
1065    return T_ELSEIF;
1066}
1067
1068<ST_IN_SCRIPTING>"endif" {
1069    return T_ENDIF;
1070}
1071
1072<ST_IN_SCRIPTING>"else" {
1073    return T_ELSE;
1074}
1075
1076<ST_IN_SCRIPTING>"while" {
1077    return T_WHILE;
1078}
1079
1080<ST_IN_SCRIPTING>"endwhile" {
1081    return T_ENDWHILE;
1082}
1083
1084<ST_IN_SCRIPTING>"do" {
1085    return T_DO;
1086}
1087
1088<ST_IN_SCRIPTING>"for" {
1089    return T_FOR;
1090}
1091
1092<ST_IN_SCRIPTING>"endfor" {
1093    return T_ENDFOR;
1094}
1095
1096<ST_IN_SCRIPTING>"foreach" {
1097    return T_FOREACH;
1098}
1099
1100<ST_IN_SCRIPTING>"endforeach" {
1101    return T_ENDFOREACH;
1102}
1103
1104<ST_IN_SCRIPTING>"declare" {
1105    return T_DECLARE;
1106}
1107
1108<ST_IN_SCRIPTING>"enddeclare" {
1109    return T_ENDDECLARE;
1110}
1111
1112<ST_IN_SCRIPTING>"instanceof" {
1113    return T_INSTANCEOF;
1114}
1115
1116<ST_IN_SCRIPTING>"as" {
1117    return T_AS;
1118}
1119
1120<ST_IN_SCRIPTING>"switch" {
1121    return T_SWITCH;
1122}
1123
1124<ST_IN_SCRIPTING>"endswitch" {
1125    return T_ENDSWITCH;
1126}
1127
1128<ST_IN_SCRIPTING>"case" {
1129    return T_CASE;
1130}
1131
1132<ST_IN_SCRIPTING>"default" {
1133    return T_DEFAULT;
1134}
1135
1136<ST_IN_SCRIPTING>"break" {
1137    return T_BREAK;
1138}
1139
1140<ST_IN_SCRIPTING>"continue" {
1141    return T_CONTINUE;
1142}
1143
1144<ST_IN_SCRIPTING>"goto" {
1145    return T_GOTO;
1146}
1147
1148<ST_IN_SCRIPTING>"echo" {
1149    return T_ECHO;
1150}
1151
1152<ST_IN_SCRIPTING>"print" {
1153    return T_PRINT;
1154}
1155
1156<ST_IN_SCRIPTING>"class" {
1157    return T_CLASS;
1158}
1159
1160<ST_IN_SCRIPTING>"interface" {
1161    return T_INTERFACE;
1162}
1163
1164<ST_IN_SCRIPTING>"trait" {
1165    return T_TRAIT;
1166}
1167
1168<ST_IN_SCRIPTING>"extends" {
1169    return T_EXTENDS;
1170}
1171
1172<ST_IN_SCRIPTING>"implements" {
1173    return T_IMPLEMENTS;
1174}
1175
1176<ST_IN_SCRIPTING>"->" {
1177    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1178    return T_OBJECT_OPERATOR;
1179}
1180
1181<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
1182    zendlval->value.str.val = yytext; /* no copying - intentional */
1183    zendlval->value.str.len = yyleng;
1184    zendlval->type = IS_STRING;
1185    HANDLE_NEWLINES(yytext, yyleng);
1186    return T_WHITESPACE;
1187}
1188
1189<ST_LOOKING_FOR_PROPERTY>"->" {
1190    return T_OBJECT_OPERATOR;
1191}
1192
1193<ST_LOOKING_FOR_PROPERTY>{LABEL} {
1194    yy_pop_state(TSRMLS_C);
1195    zend_copy_value(zendlval, yytext, yyleng);
1196    zendlval->type = IS_STRING;
1197    return T_STRING;
1198}
1199
1200<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
1201    yyless(0);
1202    yy_pop_state(TSRMLS_C);
1203    goto restart;
1204}
1205
1206<ST_IN_SCRIPTING>"::" {
1207    return T_PAAMAYIM_NEKUDOTAYIM;
1208}
1209
1210<ST_IN_SCRIPTING>"\\" {
1211    return T_NS_SEPARATOR;
1212}
1213
1214<ST_IN_SCRIPTING>"new" {
1215    return T_NEW;
1216}
1217
1218<ST_IN_SCRIPTING>"clone" {
1219    return T_CLONE;
1220}
1221
1222<ST_IN_SCRIPTING>"var" {
1223    return T_VAR;
1224}
1225
1226<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
1227    return T_INT_CAST;
1228}
1229
1230<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
1231    return T_DOUBLE_CAST;
1232}
1233
1234<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
1235    return T_STRING_CAST;
1236}
1237
1238<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
1239    return T_ARRAY_CAST;
1240}
1241
1242<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
1243    return T_OBJECT_CAST;
1244}
1245
1246<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
1247    return T_BOOL_CAST;
1248}
1249
1250<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
1251    return T_UNSET_CAST;
1252}
1253
1254<ST_IN_SCRIPTING>"eval" {
1255    return T_EVAL;
1256}
1257
1258<ST_IN_SCRIPTING>"include" {
1259    return T_INCLUDE;
1260}
1261
1262<ST_IN_SCRIPTING>"include_once" {
1263    return T_INCLUDE_ONCE;
1264}
1265
1266<ST_IN_SCRIPTING>"require" {
1267    return T_REQUIRE;
1268}
1269
1270<ST_IN_SCRIPTING>"require_once" {
1271    return T_REQUIRE_ONCE;
1272}
1273
1274<ST_IN_SCRIPTING>"namespace" {
1275    return T_NAMESPACE;
1276}
1277
1278<ST_IN_SCRIPTING>"use" {
1279    return T_USE;
1280}
1281
1282<ST_IN_SCRIPTING>"insteadof" {
1283        return T_INSTEADOF;
1284}
1285
1286<ST_IN_SCRIPTING>"global" {
1287    return T_GLOBAL;
1288}
1289
1290<ST_IN_SCRIPTING>"isset" {
1291    return T_ISSET;
1292}
1293
1294<ST_IN_SCRIPTING>"empty" {
1295    return T_EMPTY;
1296}
1297
1298<ST_IN_SCRIPTING>"__halt_compiler" {
1299    return T_HALT_COMPILER;
1300}
1301
1302<ST_IN_SCRIPTING>"static" {
1303    return T_STATIC;
1304}
1305
1306<ST_IN_SCRIPTING>"abstract" {
1307    return T_ABSTRACT;
1308}
1309
1310<ST_IN_SCRIPTING>"final" {
1311    return T_FINAL;
1312}
1313
1314<ST_IN_SCRIPTING>"private" {
1315    return T_PRIVATE;
1316}
1317
1318<ST_IN_SCRIPTING>"protected" {
1319    return T_PROTECTED;
1320}
1321
1322<ST_IN_SCRIPTING>"public" {
1323    return T_PUBLIC;
1324}
1325
1326<ST_IN_SCRIPTING>"unset" {
1327    return T_UNSET;
1328}
1329
1330<ST_IN_SCRIPTING>"=>" {
1331    return T_DOUBLE_ARROW;
1332}
1333
1334<ST_IN_SCRIPTING>"list" {
1335    return T_LIST;
1336}
1337
1338<ST_IN_SCRIPTING>"array" {
1339    return T_ARRAY;
1340}
1341
1342<ST_IN_SCRIPTING>"callable" {
1343 return T_CALLABLE;
1344}
1345
1346<ST_IN_SCRIPTING>"++" {
1347    return T_INC;
1348}
1349
1350<ST_IN_SCRIPTING>"--" {
1351    return T_DEC;
1352}
1353
1354<ST_IN_SCRIPTING>"===" {
1355    return T_IS_IDENTICAL;
1356}
1357
1358<ST_IN_SCRIPTING>"!==" {
1359    return T_IS_NOT_IDENTICAL;
1360}
1361
1362<ST_IN_SCRIPTING>"==" {
1363    return T_IS_EQUAL;
1364}
1365
1366<ST_IN_SCRIPTING>"!="|"<>" {
1367    return T_IS_NOT_EQUAL;
1368}
1369
1370<ST_IN_SCRIPTING>"<=" {
1371    return T_IS_SMALLER_OR_EQUAL;
1372}
1373
1374<ST_IN_SCRIPTING>">=" {
1375    return T_IS_GREATER_OR_EQUAL;
1376}
1377
1378<ST_IN_SCRIPTING>"+=" {
1379    return T_PLUS_EQUAL;
1380}
1381
1382<ST_IN_SCRIPTING>"-=" {
1383    return T_MINUS_EQUAL;
1384}
1385
1386<ST_IN_SCRIPTING>"*=" {
1387    return T_MUL_EQUAL;
1388}
1389
1390<ST_IN_SCRIPTING>"/=" {
1391    return T_DIV_EQUAL;
1392}
1393
1394<ST_IN_SCRIPTING>".=" {
1395    return T_CONCAT_EQUAL;
1396}
1397
1398<ST_IN_SCRIPTING>"%=" {
1399    return T_MOD_EQUAL;
1400}
1401
1402<ST_IN_SCRIPTING>"<<=" {
1403    return T_SL_EQUAL;
1404}
1405
1406<ST_IN_SCRIPTING>">>=" {
1407    return T_SR_EQUAL;
1408}
1409
1410<ST_IN_SCRIPTING>"&=" {
1411    return T_AND_EQUAL;
1412}
1413
1414<ST_IN_SCRIPTING>"|=" {
1415    return T_OR_EQUAL;
1416}
1417
1418<ST_IN_SCRIPTING>"^=" {
1419    return T_XOR_EQUAL;
1420}
1421
1422<ST_IN_SCRIPTING>"||" {
1423    return T_BOOLEAN_OR;
1424}
1425
1426<ST_IN_SCRIPTING>"&&" {
1427    return T_BOOLEAN_AND;
1428}
1429
1430<ST_IN_SCRIPTING>"OR" {
1431    return T_LOGICAL_OR;
1432}
1433
1434<ST_IN_SCRIPTING>"AND" {
1435    return T_LOGICAL_AND;
1436}
1437
1438<ST_IN_SCRIPTING>"XOR" {
1439    return T_LOGICAL_XOR;
1440}
1441
1442<ST_IN_SCRIPTING>"<<" {
1443    return T_SL;
1444}
1445
1446<ST_IN_SCRIPTING>">>" {
1447    return T_SR;
1448}
1449
1450<ST_IN_SCRIPTING>{TOKENS} {
1451    return yytext[0];
1452}
1453
1454
1455<ST_IN_SCRIPTING>"{" {
1456    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1457    return '{';
1458}
1459
1460
1461<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
1462    yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC);
1463    return T_DOLLAR_OPEN_CURLY_BRACES;
1464}
1465
1466
1467<ST_IN_SCRIPTING>"}" {
1468    RESET_DOC_COMMENT();
1469    if (!zend_stack_is_empty(&SCNG(state_stack))) {
1470        yy_pop_state(TSRMLS_C);
1471    }
1472    return '}';
1473}
1474
1475
1476<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
1477    yyless(yyleng - 1);
1478    zend_copy_value(zendlval, yytext, yyleng);
1479    zendlval->type = IS_STRING;
1480    yy_pop_state(TSRMLS_C);
1481    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1482    return T_STRING_VARNAME;
1483}
1484
1485
1486<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} {
1487    yyless(0);
1488    yy_pop_state(TSRMLS_C);
1489    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
1490    goto restart;
1491}
1492
1493<ST_IN_SCRIPTING>{BNUM} {
1494    char *bin = yytext + 2; /* Skip "0b" */
1495    int len = yyleng - 2;
1496
1497    /* Skip any leading 0s */
1498    while (*bin == '0') {
1499        ++bin;
1500        --len;
1501    }
1502
1503    if (len < SIZEOF_LONG * 8) {
1504        if (len == 0) {
1505            zendlval->value.lval = 0;
1506        } else {
1507            zendlval->value.lval = strtol(bin, NULL, 2);
1508        }
1509        zendlval->type = IS_LONG;
1510        return T_LNUMBER;
1511    } else {
1512        zendlval->value.dval = zend_bin_strtod(bin, NULL);
1513        zendlval->type = IS_DOUBLE;
1514        return T_DNUMBER;
1515    }
1516}
1517
1518<ST_IN_SCRIPTING>{LNUM} {
1519    if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1520        zendlval->value.lval = strtol(yytext, NULL, 0);
1521    } else {
1522        errno = 0;
1523        zendlval->value.lval = strtol(yytext, NULL, 0);
1524        if (errno == ERANGE) { /* Overflow */
1525            if (yytext[0] == '0') { /* octal overflow */
1526                zendlval->value.dval = zend_oct_strtod(yytext, NULL);
1527            } else {
1528                zendlval->value.dval = zend_strtod(yytext, NULL);
1529            }
1530            zendlval->type = IS_DOUBLE;
1531            return T_DNUMBER;
1532        }
1533    }
1534
1535    zendlval->type = IS_LONG;
1536    return T_LNUMBER;
1537}
1538
1539<ST_IN_SCRIPTING>{HNUM} {
1540    char *hex = yytext + 2; /* Skip "0x" */
1541    int len = yyleng - 2;
1542
1543    /* Skip any leading 0s */
1544    while (*hex == '0') {
1545        hex++;
1546        len--;
1547    }
1548
1549    if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) {
1550        if (len == 0) {
1551            zendlval->value.lval = 0;
1552        } else {
1553            zendlval->value.lval = strtol(hex, NULL, 16);
1554        }
1555        zendlval->type = IS_LONG;
1556        return T_LNUMBER;
1557    } else {
1558        zendlval->value.dval = zend_hex_strtod(hex, NULL);
1559        zendlval->type = IS_DOUBLE;
1560        return T_DNUMBER;
1561    }
1562}
1563
1564<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */
1565    if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) {
1566        zendlval->value.lval = strtol(yytext, NULL, 10);
1567        zendlval->type = IS_LONG;
1568    } else {
1569        zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1570        zendlval->value.str.len = yyleng;
1571        zendlval->type = IS_STRING;
1572    }
1573    return T_NUM_STRING;
1574}
1575
1576<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
1577    zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
1578    zendlval->value.str.len = yyleng;
1579    zendlval->type = IS_STRING;
1580    return T_NUM_STRING;
1581}
1582
1583<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1584    zendlval->value.dval = zend_strtod(yytext, NULL);
1585    zendlval->type = IS_DOUBLE;
1586    return T_DNUMBER;
1587}
1588
1589<ST_IN_SCRIPTING>"__CLASS__" {
1590    const char *class_name = NULL;
1591
1592    if (CG(active_class_entry)
1593        && (ZEND_ACC_TRAIT ==
1594            (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1595        /* We create a special __CLASS__ constant that is going to be resolved
1596           at run-time */
1597        zendlval->value.str.len = sizeof("__CLASS__")-1;
1598        zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len);
1599        zendlval->type = IS_CONSTANT;
1600    } else {
1601        if (CG(active_class_entry)) {
1602            class_name = CG(active_class_entry)->name;
1603        }
1604
1605        if (!class_name) {
1606            class_name = "";
1607        }
1608
1609        zendlval->value.str.len = strlen(class_name);
1610        zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
1611        zendlval->type = IS_STRING;
1612    }
1613    return T_CLASS_C;
1614}
1615
1616<ST_IN_SCRIPTING>"__TRAIT__" {
1617    const char *trait_name = NULL;
1618
1619    if (CG(active_class_entry)
1620        && (ZEND_ACC_TRAIT ==
1621            (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
1622        trait_name = CG(active_class_entry)->name;
1623    }
1624
1625    if (!trait_name) {
1626        trait_name = "";
1627    }
1628
1629    zendlval->value.str.len = strlen(trait_name);
1630    zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
1631    zendlval->type = IS_STRING;
1632
1633    return T_TRAIT_C;
1634}
1635
1636<ST_IN_SCRIPTING>"__FUNCTION__" {
1637    const char *func_name = NULL;
1638
1639    if (CG(active_op_array)) {
1640        func_name = CG(active_op_array)->function_name;
1641    }
1642
1643    if (!func_name) {
1644        func_name = "";
1645    }
1646    zendlval->value.str.len = strlen(func_name);
1647    zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
1648    zendlval->type = IS_STRING;
1649    return T_FUNC_C;
1650}
1651
1652<ST_IN_SCRIPTING>"__METHOD__" {
1653    const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
1654    const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
1655    size_t len = 0;
1656
1657    if (class_name) {
1658        len += strlen(class_name) + 2;
1659    }
1660    if (func_name) {
1661        len += strlen(func_name);
1662    }
1663
1664    zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s",
1665        class_name ? class_name : "",
1666        class_name && func_name ? "::" : "",
1667        func_name ? func_name : ""
1668        );
1669    zendlval->type = IS_STRING;
1670    return T_METHOD_C;
1671}
1672
1673<ST_IN_SCRIPTING>"__LINE__" {
1674    zendlval->value.lval = CG(zend_lineno);
1675    zendlval->type = IS_LONG;
1676    return T_LINE;
1677}
1678
1679<ST_IN_SCRIPTING>"__FILE__" {
1680    char *filename = zend_get_compiled_filename(TSRMLS_C);
1681
1682    if (!filename) {
1683        filename = "";
1684    }
1685    zendlval->value.str.len = strlen(filename);
1686    zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
1687    zendlval->type = IS_STRING;
1688    return T_FILE;
1689}
1690
1691<ST_IN_SCRIPTING>"__DIR__" {
1692    char *filename = zend_get_compiled_filename(TSRMLS_C);
1693    const size_t filename_len = strlen(filename);
1694    char *dirname;
1695
1696    if (!filename) {
1697        filename = "";
1698    }
1699
1700    dirname = estrndup(filename, filename_len);
1701    zend_dirname(dirname, filename_len);
1702
1703    if (strcmp(dirname, ".") == 0) {
1704        dirname = erealloc(dirname, MAXPATHLEN);
1705#if HAVE_GETCWD
1706        VCWD_GETCWD(dirname, MAXPATHLEN);
1707#elif HAVE_GETWD
1708        VCWD_GETWD(dirname);
1709#endif
1710    }
1711
1712    zendlval->value.str.len = strlen(dirname);
1713    zendlval->value.str.val = dirname;
1714    zendlval->type = IS_STRING;
1715    return T_DIR;
1716}
1717
1718<ST_IN_SCRIPTING>"__NAMESPACE__" {
1719    if (CG(current_namespace)) {
1720        *zendlval = *CG(current_namespace);
1721        zval_copy_ctor(zendlval);
1722    } else {
1723        ZVAL_EMPTY_STRING(zendlval);
1724    }
1725    return T_NS_C;
1726}
1727
1728<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
1729    YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
1730
1731    if (bracket != SCNG(yy_text)) {
1732        /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
1733        YYCURSOR = bracket;
1734        goto inline_html;
1735    }
1736
1737    HANDLE_NEWLINES(yytext, yyleng);
1738    zendlval->value.str.val = yytext; /* no copying - intentional */
1739    zendlval->value.str.len = yyleng;
1740    zendlval->type = IS_STRING;
1741    BEGIN(ST_IN_SCRIPTING);
1742    return T_OPEN_TAG;
1743}
1744
1745
1746<INITIAL>"<%=" {
1747    if (CG(asp_tags)) {
1748        zendlval->value.str.val = yytext; /* no copying - intentional */
1749        zendlval->value.str.len = yyleng;
1750        zendlval->type = IS_STRING;
1751        BEGIN(ST_IN_SCRIPTING);
1752        return T_OPEN_TAG_WITH_ECHO;
1753    } else {
1754        goto inline_char_handler;
1755    }
1756}
1757
1758
1759<INITIAL>"<?=" {
1760    zendlval->value.str.val = yytext; /* no copying - intentional */
1761    zendlval->value.str.len = yyleng;
1762    zendlval->type = IS_STRING;
1763    BEGIN(ST_IN_SCRIPTING);
1764    return T_OPEN_TAG_WITH_ECHO;
1765}
1766
1767
1768<INITIAL>"<%" {
1769    if (CG(asp_tags)) {
1770        zendlval->value.str.val = yytext; /* no copying - intentional */
1771        zendlval->value.str.len = yyleng;
1772        zendlval->type = IS_STRING;
1773        BEGIN(ST_IN_SCRIPTING);
1774        return T_OPEN_TAG;
1775    } else {
1776        goto inline_char_handler;
1777    }
1778}
1779
1780
1781<INITIAL>"<?php"([ \t]|{NEWLINE}) {
1782    zendlval->value.str.val = yytext; /* no copying - intentional */
1783    zendlval->value.str.len = yyleng;
1784    zendlval->type = IS_STRING;
1785    HANDLE_NEWLINE(yytext[yyleng-1]);
1786    BEGIN(ST_IN_SCRIPTING);
1787    return T_OPEN_TAG;
1788}
1789
1790
1791<INITIAL>"<?" {
1792    if (CG(short_tags)) {
1793        zendlval->value.str.val = yytext; /* no copying - intentional */
1794        zendlval->value.str.len = yyleng;
1795        zendlval->type = IS_STRING;
1796        BEGIN(ST_IN_SCRIPTING);
1797        return T_OPEN_TAG;
1798    } else {
1799        goto inline_char_handler;
1800    }
1801}
1802
1803<INITIAL>{ANY_CHAR} {
1804    if (YYCURSOR > YYLIMIT) {
1805        return 0;
1806    }
1807
1808inline_char_handler:
1809
1810    while (1) {
1811        YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR);
1812
1813        YYCURSOR = ptr ? ptr + 1 : YYLIMIT;
1814
1815        if (YYCURSOR < YYLIMIT) {
1816            switch (*YYCURSOR) {
1817                case '?':
1818                    if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
1819                        break;
1820                    }
1821                    continue;
1822                case '%':
1823                    if (CG(asp_tags)) {
1824                        break;
1825                    }
1826                    continue;
1827                case 's':
1828                case 'S':
1829                    /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet
1830                     * If it is, the PHP <script> tag rule checks for any HTML scanned before it */
1831                    YYCURSOR--;
1832                    yymore();
1833                default:
1834                    continue;
1835            }
1836
1837            YYCURSOR--;
1838        }
1839
1840        break;
1841    }
1842
1843inline_html:
1844    yyleng = YYCURSOR - SCNG(yy_text);
1845
1846    if (SCNG(output_filter)) {
1847        int readsize;
1848        size_t sz = 0;
1849        readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC);
1850        zendlval->value.str.len = sz;
1851        if (readsize < yyleng) {
1852            yyless(readsize);
1853        }
1854    } else {
1855      zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
1856      zendlval->value.str.len = yyleng;
1857    }
1858    zendlval->type = IS_STRING;
1859    HANDLE_NEWLINES(yytext, yyleng);
1860    return T_INLINE_HTML;
1861}
1862
1863
1864/* Make sure a label character follows "->", otherwise there is no property
1865 * and "->" will be taken literally
1866 */
1867<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
1868    yyless(yyleng - 3);
1869    yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC);
1870    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1871    zendlval->type = IS_STRING;
1872    return T_VARIABLE;
1873}
1874
1875/* A [ always designates a variable offset, regardless of what follows
1876 */
1877<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
1878    yyless(yyleng - 1);
1879    yy_push_state(ST_VAR_OFFSET TSRMLS_CC);
1880    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1881    zendlval->type = IS_STRING;
1882    return T_VARIABLE;
1883}
1884
1885<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
1886    zend_copy_value(zendlval, (yytext+1), (yyleng-1));
1887    zendlval->type = IS_STRING;
1888    return T_VARIABLE;
1889}
1890
1891<ST_VAR_OFFSET>"]" {
1892    yy_pop_state(TSRMLS_C);
1893    return ']';
1894}
1895
1896<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
1897    /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
1898    return yytext[0];
1899}
1900
1901<ST_VAR_OFFSET>[ \n\r\t\\'#] {
1902    /* Invalid rule to return a more explicit parse error with proper line number */
1903    yyless(0);
1904    yy_pop_state(TSRMLS_C);
1905    return T_ENCAPSED_AND_WHITESPACE;
1906}
1907
1908<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
1909    zend_copy_value(zendlval, yytext, yyleng);
1910    zendlval->type = IS_STRING;
1911    return T_STRING;
1912}
1913
1914
1915<ST_IN_SCRIPTING>"#"|"//" {
1916    while (YYCURSOR < YYLIMIT) {
1917        switch (*YYCURSOR++) {
1918            case '\r':
1919                if (*YYCURSOR == '\n') {
1920                    YYCURSOR++;
1921                }
1922                /* fall through */
1923            case '\n':
1924                CG(zend_lineno)++;
1925                break;
1926            case '%':
1927                if (!CG(asp_tags)) {
1928                    continue;
1929                }
1930                /* fall through */
1931            case '?':
1932                if (*YYCURSOR == '>') {
1933                    YYCURSOR--;
1934                    break;
1935                }
1936                /* fall through */
1937            default:
1938                continue;
1939        }
1940
1941        break;
1942    }
1943
1944    yyleng = YYCURSOR - SCNG(yy_text);
1945
1946    return T_COMMENT;
1947}
1948
1949<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
1950    int doc_com;
1951
1952    if (yyleng > 2) {
1953        doc_com = 1;
1954        RESET_DOC_COMMENT();
1955    } else {
1956        doc_com = 0;
1957    }
1958
1959    while (YYCURSOR < YYLIMIT) {
1960        if (*YYCURSOR++ == '*' && *YYCURSOR == '/') {
1961            break;
1962        }
1963    }
1964
1965    if (YYCURSOR < YYLIMIT) {
1966        YYCURSOR++;
1967    } else {
1968        zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno));
1969    }
1970
1971    yyleng = YYCURSOR - SCNG(yy_text);
1972    HANDLE_NEWLINES(yytext, yyleng);
1973
1974    if (doc_com) {
1975        CG(doc_comment) = estrndup(yytext, yyleng);
1976        CG(doc_comment_len) = yyleng;
1977        return T_DOC_COMMENT;
1978    }
1979
1980    return T_COMMENT;
1981}
1982
1983<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? {
1984    zendlval->value.str.val = yytext; /* no copying - intentional */
1985    zendlval->value.str.len = yyleng;
1986    zendlval->type = IS_STRING;
1987    BEGIN(INITIAL);
1988    return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1989}
1990
1991
1992<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
1993    if (CG(asp_tags)) {
1994        BEGIN(INITIAL);
1995        zendlval->value.str.len = yyleng;
1996        zendlval->type = IS_STRING;
1997        zendlval->value.str.val = yytext; /* no copying - intentional */
1998        return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
1999    } else {
2000        yyless(1);
2001        return yytext[0];
2002    }
2003}
2004
2005
2006<ST_IN_SCRIPTING>b?['] {
2007    register char *s, *t;
2008    char *end;
2009    int bprefix = (yytext[0] != '\'') ? 1 : 0;
2010
2011    while (1) {
2012        if (YYCURSOR < YYLIMIT) {
2013            if (*YYCURSOR == '\'') {
2014                YYCURSOR++;
2015                yyleng = YYCURSOR - SCNG(yy_text);
2016
2017                break;
2018            } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) {
2019                YYCURSOR++;
2020            }
2021        } else {
2022            yyleng = YYLIMIT - SCNG(yy_text);
2023
2024            /* Unclosed single quotes; treat similar to double quotes, but without a separate token
2025             * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
2026             * rule, which continued in ST_IN_SCRIPTING state after the quote */
2027            return T_ENCAPSED_AND_WHITESPACE;
2028        }
2029    }
2030
2031    zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2);
2032    zendlval->value.str.len = yyleng-bprefix-2;
2033    zendlval->type = IS_STRING;
2034
2035    /* convert escape sequences */
2036    s = t = zendlval->value.str.val;
2037    end = s+zendlval->value.str.len;
2038    while (s<end) {
2039        if (*s=='\\') {
2040            s++;
2041
2042            switch(*s) {
2043                case '\\':
2044                case '\'':
2045                    *t++ = *s;
2046                    zendlval->value.str.len--;
2047                    break;
2048                default:
2049                    *t++ = '\\';
2050                    *t++ = *s;
2051                    break;
2052            }
2053        } else {
2054            *t++ = *s;
2055        }
2056
2057        if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) {
2058            CG(zend_lineno)++;
2059        }
2060        s++;
2061    }
2062    *t = 0;
2063
2064    if (SCNG(output_filter)) {
2065        size_t sz = 0;
2066        s = zendlval->value.str.val;
2067        SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC);
2068        zendlval->value.str.len = sz;
2069        efree(s);
2070    }
2071    return T_CONSTANT_ENCAPSED_STRING;
2072}
2073
2074
2075<ST_IN_SCRIPTING>b?["] {
2076    int bprefix = (yytext[0] != '"') ? 1 : 0;
2077
2078    while (YYCURSOR < YYLIMIT) {
2079        switch (*YYCURSOR++) {
2080            case '"':
2081                yyleng = YYCURSOR - SCNG(yy_text);
2082                zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC);
2083                return T_CONSTANT_ENCAPSED_STRING;
2084            case '$':
2085                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2086                    break;
2087                }
2088                continue;
2089            case '{':
2090                if (*YYCURSOR == '$') {
2091                    break;
2092                }
2093                continue;
2094            case '\\':
2095                if (YYCURSOR < YYLIMIT) {
2096                    YYCURSOR++;
2097                }
2098                /* fall through */
2099            default:
2100                continue;
2101        }
2102
2103        YYCURSOR--;
2104        break;
2105    }
2106
2107    /* Remember how much was scanned to save rescanning */
2108    SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng);
2109
2110    YYCURSOR = SCNG(yy_text) + yyleng;
2111
2112    BEGIN(ST_DOUBLE_QUOTES);
2113    return '"';
2114}
2115
2116
2117<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} {
2118    char *s;
2119    int bprefix = (yytext[0] != '<') ? 1 : 0;
2120    zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label));
2121
2122    CG(zend_lineno)++;
2123    heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0);
2124    s = yytext+bprefix+3;
2125    while ((*s == ' ') || (*s == '\t')) {
2126        s++;
2127        heredoc_label->length--;
2128    }
2129
2130    if (*s == '\'') {
2131        s++;
2132        heredoc_label->length -= 2;
2133
2134        BEGIN(ST_NOWDOC);
2135    } else {
2136        if (*s == '"') {
2137            s++;
2138            heredoc_label->length -= 2;
2139        }
2140
2141        BEGIN(ST_HEREDOC);
2142    }
2143
2144    heredoc_label->label = estrndup(s, heredoc_label->length);
2145
2146    /* Check for ending label on the next line */
2147    if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) {
2148        YYCTYPE *end = YYCURSOR + heredoc_label->length;
2149
2150        if (*end == ';') {
2151            end++;
2152        }
2153
2154        if (*end == '\n' || *end == '\r') {
2155            BEGIN(ST_END_HEREDOC);
2156        }
2157    }
2158
2159    zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
2160
2161    return T_START_HEREDOC;
2162}
2163
2164
2165<ST_IN_SCRIPTING>[`] {
2166    BEGIN(ST_BACKQUOTE);
2167    return '`';
2168}
2169
2170
2171<ST_END_HEREDOC>{ANY_CHAR} {
2172    zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack));
2173
2174    YYCURSOR += heredoc_label->length - 1;
2175    yyleng = heredoc_label->length;
2176
2177    heredoc_label_dtor(heredoc_label);
2178    efree(heredoc_label);
2179
2180    BEGIN(ST_IN_SCRIPTING);
2181    return T_END_HEREDOC;
2182}
2183
2184
2185<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
2186    zendlval->value.lval = (long) '{';
2187    yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
2188    yyless(1);
2189    return T_CURLY_OPEN;
2190}
2191
2192
2193<ST_DOUBLE_QUOTES>["] {
2194    BEGIN(ST_IN_SCRIPTING);
2195    return '"';
2196}
2197
2198<ST_BACKQUOTE>[`] {
2199    BEGIN(ST_IN_SCRIPTING);
2200    return '`';
2201}
2202
2203
2204<ST_DOUBLE_QUOTES>{ANY_CHAR} {
2205    if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) {
2206        YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1;
2207        SET_DOUBLE_QUOTES_SCANNED_LENGTH(0);
2208
2209        goto double_quotes_scan_done;
2210    }
2211
2212    if (YYCURSOR > YYLIMIT) {
2213        return 0;
2214    }
2215    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2216        YYCURSOR++;
2217    }
2218
2219    while (YYCURSOR < YYLIMIT) {
2220        switch (*YYCURSOR++) {
2221            case '"':
2222                break;
2223            case '$':
2224                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2225                    break;
2226                }
2227                continue;
2228            case '{':
2229                if (*YYCURSOR == '$') {
2230                    break;
2231                }
2232                continue;
2233            case '\\':
2234                if (YYCURSOR < YYLIMIT) {
2235                    YYCURSOR++;
2236                }
2237                /* fall through */
2238            default:
2239                continue;
2240        }
2241
2242        YYCURSOR--;
2243        break;
2244    }
2245
2246double_quotes_scan_done:
2247    yyleng = YYCURSOR - SCNG(yy_text);
2248
2249    zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC);
2250    return T_ENCAPSED_AND_WHITESPACE;
2251}
2252
2253
2254<ST_BACKQUOTE>{ANY_CHAR} {
2255    if (YYCURSOR > YYLIMIT) {
2256        return 0;
2257    }
2258    if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
2259        YYCURSOR++;
2260    }
2261
2262    while (YYCURSOR < YYLIMIT) {
2263        switch (*YYCURSOR++) {
2264            case '`':
2265                break;
2266            case '$':
2267                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2268                    break;
2269                }
2270                continue;
2271            case '{':
2272                if (*YYCURSOR == '$') {
2273                    break;
2274                }
2275                continue;
2276            case '\\':
2277                if (YYCURSOR < YYLIMIT) {
2278                    YYCURSOR++;
2279                }
2280                /* fall through */
2281            default:
2282                continue;
2283        }
2284
2285        YYCURSOR--;
2286        break;
2287    }
2288
2289    yyleng = YYCURSOR - SCNG(yy_text);
2290
2291    zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC);
2292    return T_ENCAPSED_AND_WHITESPACE;
2293}
2294
2295
2296<ST_HEREDOC>{ANY_CHAR} {
2297    int newline = 0;
2298
2299    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2300
2301    if (YYCURSOR > YYLIMIT) {
2302        return 0;
2303    }
2304
2305    YYCURSOR--;
2306
2307    while (YYCURSOR < YYLIMIT) {
2308        switch (*YYCURSOR++) {
2309            case '\r':
2310                if (*YYCURSOR == '\n') {
2311                    YYCURSOR++;
2312                }
2313                /* fall through */
2314            case '\n':
2315                /* Check for ending label on the next line */
2316                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2317                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2318
2319                    if (*end == ';') {
2320                        end++;
2321                    }
2322
2323                    if (*end == '\n' || *end == '\r') {
2324                        /* newline before label will be subtracted from returned text, but
2325                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2326                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2327                            newline = 2; /* Windows newline */
2328                        } else {
2329                            newline = 1;
2330                        }
2331
2332                        CG(increment_lineno) = 1; /* For newline before label */
2333                        BEGIN(ST_END_HEREDOC);
2334
2335                        goto heredoc_scan_done;
2336                    }
2337                }
2338                continue;
2339            case '$':
2340                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
2341                    break;
2342                }
2343                continue;
2344            case '{':
2345                if (*YYCURSOR == '$') {
2346                    break;
2347                }
2348                continue;
2349            case '\\':
2350                if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') {
2351                    YYCURSOR++;
2352                }
2353                /* fall through */
2354            default:
2355                continue;
2356        }
2357
2358        YYCURSOR--;
2359        break;
2360    }
2361
2362heredoc_scan_done:
2363    yyleng = YYCURSOR - SCNG(yy_text);
2364
2365    zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC);
2366    return T_ENCAPSED_AND_WHITESPACE;
2367}
2368
2369
2370<ST_NOWDOC>{ANY_CHAR} {
2371    int newline = 0;
2372
2373    zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
2374
2375    if (YYCURSOR > YYLIMIT) {
2376        return 0;
2377    }
2378
2379    YYCURSOR--;
2380
2381    while (YYCURSOR < YYLIMIT) {
2382        switch (*YYCURSOR++) {
2383            case '\r':
2384                if (*YYCURSOR == '\n') {
2385                    YYCURSOR++;
2386                }
2387                /* fall through */
2388            case '\n':
2389                /* Check for ending label on the next line */
2390                if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) {
2391                    YYCTYPE *end = YYCURSOR + heredoc_label->length;
2392
2393                    if (*end == ';') {
2394                        end++;
2395                    }
2396
2397                    if (*end == '\n' || *end == '\r') {
2398                        /* newline before label will be subtracted from returned text, but
2399                         * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */
2400                        if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') {
2401                            newline = 2; /* Windows newline */
2402                        } else {
2403                            newline = 1;
2404                        }
2405
2406                        CG(increment_lineno) = 1; /* For newline before label */
2407                        BEGIN(ST_END_HEREDOC);
2408
2409                        goto nowdoc_scan_done;
2410                    }
2411                }
2412                /* fall through */
2413            default:
2414                continue;
2415        }
2416    }
2417
2418nowdoc_scan_done:
2419    yyleng = YYCURSOR - SCNG(yy_text);
2420
2421    zend_copy_value(zendlval, yytext, yyleng - newline);
2422    zendlval->type = IS_STRING;
2423    HANDLE_NEWLINES(yytext, yyleng - newline);
2424    return T_ENCAPSED_AND_WHITESPACE;
2425}
2426
2427
2428<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
2429    if (YYCURSOR > YYLIMIT) {
2430        return 0;
2431    }
2432
2433    zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
2434    goto restart;
2435}
2436
2437*/
2438}
2439