1/* 2 +----------------------------------------------------------------------+ 3 | Zend Engine | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1998-2013 Zend Technologies Ltd. (http://www.zend.com) | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 2.00 of the Zend license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.zend.com/license/2_00.txt. | 11 | If you did not receive a copy of the Zend license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@zend.com so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Authors: Marcus Boerger <helly@php.net> | 16 | Nuno Lopes <nlopess@php.net> | 17 | Scott MacVicar <scottmac@php.net> | 18 | Flex version authors: | 19 | Andi Gutmans <andi@zend.com> | 20 | Zeev Suraski <zeev@zend.com> | 21 +----------------------------------------------------------------------+ 22*/ 23 24/* $Id$ */ 25 26#if 0 27# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) 28#else 29# define YYDEBUG(s, c) 30#endif 31 32#include "zend_language_scanner_defs.h" 33 34#include <errno.h> 35#include "zend.h" 36#ifdef PHP_WIN32 37# include <Winuser.h> 38#endif 39#include "zend_alloc.h" 40#include <zend_language_parser.h> 41#include "zend_compile.h" 42#include "zend_language_scanner.h" 43#include "zend_highlight.h" 44#include "zend_constants.h" 45#include "zend_variables.h" 46#include "zend_operators.h" 47#include "zend_API.h" 48#include "zend_strtod.h" 49#include "zend_exceptions.h" 50#include "tsrm_virtual_cwd.h" 51#include "tsrm_config_common.h" 52 53#define YYCTYPE unsigned char 54#define YYFILL(n) { if ((YYCURSOR + n) >= (YYLIMIT + ZEND_MMAP_AHEAD)) { return 0; } } 55#define YYCURSOR SCNG(yy_cursor) 56#define YYLIMIT SCNG(yy_limit) 57#define YYMARKER SCNG(yy_marker) 58 59#define YYGETCONDITION() SCNG(yy_state) 60#define YYSETCONDITION(s) SCNG(yy_state) = s 61 62#define STATE(name) yyc##name 63 64/* emulate flex constructs */ 65#define BEGIN(state) YYSETCONDITION(STATE(state)) 66#define YYSTATE YYGETCONDITION() 67#define yytext ((char*)SCNG(yy_text)) 68#define yyleng SCNG(yy_leng) 69#define yyless(x) do { YYCURSOR = (unsigned char*)yytext + x; \ 70 yyleng = (unsigned int)x; } while(0) 71#define yymore() goto yymore_restart 72 73/* perform sanity check. If this message is triggered you should 74 increase the ZEND_MMAP_AHEAD value in the zend_streams.h file */ 75/*!max:re2c */ 76#if ZEND_MMAP_AHEAD < YYMAXFILL 77# error ZEND_MMAP_AHEAD should be greater than or equal to YYMAXFILL 78#endif 79 80#ifdef HAVE_STDARG_H 81# include <stdarg.h> 82#endif 83 84#ifdef HAVE_UNISTD_H 85# include <unistd.h> 86#endif 87 88/* Globals Macros */ 89#define SCNG LANG_SCNG 90#ifdef ZTS 91ZEND_API ts_rsrc_id language_scanner_globals_id; 92#else 93ZEND_API zend_php_scanner_globals language_scanner_globals; 94#endif 95 96#define HANDLE_NEWLINES(s, l) \ 97do { \ 98 char *p = (s), *boundary = p+(l); \ 99 \ 100 while (p<boundary) { \ 101 if (*p == '\n' || (*p == '\r' && (*(p+1) != '\n'))) { \ 102 CG(zend_lineno)++; \ 103 } \ 104 p++; \ 105 } \ 106} while (0) 107 108#define HANDLE_NEWLINE(c) \ 109{ \ 110 if (c == '\n' || c == '\r') { \ 111 CG(zend_lineno)++; \ 112 } \ 113} 114 115/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */ 116#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len) 117#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len) 118 119#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F) 120 121#define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') 122#define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) 123 124BEGIN_EXTERN_C() 125 126static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) 127{ 128 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); 129 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); 130 return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); 131} 132 133static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) 134{ 135 return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); 136} 137 138static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) 139{ 140 return zend_multibyte_encoding_converter(to, to_length, from, from_length, 141LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); 142} 143 144static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) 145{ 146 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); 147 assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); 148 return zend_multibyte_encoding_converter(to, to_length, from, from_length, 149internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); 150} 151 152 153static void _yy_push_state(int new_state TSRMLS_DC) 154{ 155 zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); 156 YYSETCONDITION(new_state); 157} 158 159#define yy_push_state(state_and_tsrm) _yy_push_state(yyc##state_and_tsrm) 160 161static void yy_pop_state(TSRMLS_D) 162{ 163 int *stack_state; 164 zend_stack_top(&SCNG(state_stack), (void **) &stack_state); 165 YYSETCONDITION(*stack_state); 166 zend_stack_del_top(&SCNG(state_stack)); 167} 168 169static void yy_scan_buffer(char *str, unsigned int len TSRMLS_DC) 170{ 171 YYCURSOR = (YYCTYPE*)str; 172 YYLIMIT = YYCURSOR + len; 173 if (!SCNG(yy_start)) { 174 SCNG(yy_start) = YYCURSOR; 175 } 176} 177 178void startup_scanner(TSRMLS_D) 179{ 180 CG(parse_error) = 0; 181 CG(doc_comment) = NULL; 182 CG(doc_comment_len) = 0; 183 zend_stack_init(&SCNG(state_stack)); 184 zend_ptr_stack_init(&SCNG(heredoc_label_stack)); 185} 186 187static void heredoc_label_dtor(zend_heredoc_label *heredoc_label) { 188 efree(heredoc_label->label); 189} 190 191void shutdown_scanner(TSRMLS_D) 192{ 193 CG(parse_error) = 0; 194 RESET_DOC_COMMENT(); 195 zend_stack_destroy(&SCNG(state_stack)); 196 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); 197 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); 198} 199 200ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC) 201{ 202 lex_state->yy_leng = SCNG(yy_leng); 203 lex_state->yy_start = SCNG(yy_start); 204 lex_state->yy_text = SCNG(yy_text); 205 lex_state->yy_cursor = SCNG(yy_cursor); 206 lex_state->yy_marker = SCNG(yy_marker); 207 lex_state->yy_limit = SCNG(yy_limit); 208 209 lex_state->state_stack = SCNG(state_stack); 210 zend_stack_init(&SCNG(state_stack)); 211 212 lex_state->heredoc_label_stack = SCNG(heredoc_label_stack); 213 zend_ptr_stack_init(&SCNG(heredoc_label_stack)); 214 215 lex_state->in = SCNG(yy_in); 216 lex_state->yy_state = YYSTATE; 217 lex_state->filename = zend_get_compiled_filename(TSRMLS_C); 218 lex_state->lineno = CG(zend_lineno); 219 220 lex_state->script_org = SCNG(script_org); 221 lex_state->script_org_size = SCNG(script_org_size); 222 lex_state->script_filtered = SCNG(script_filtered); 223 lex_state->script_filtered_size = SCNG(script_filtered_size); 224 lex_state->input_filter = SCNG(input_filter); 225 lex_state->output_filter = SCNG(output_filter); 226 lex_state->script_encoding = SCNG(script_encoding); 227} 228 229ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) 230{ 231 SCNG(yy_leng) = lex_state->yy_leng; 232 SCNG(yy_start) = lex_state->yy_start; 233 SCNG(yy_text) = lex_state->yy_text; 234 SCNG(yy_cursor) = lex_state->yy_cursor; 235 SCNG(yy_marker) = lex_state->yy_marker; 236 SCNG(yy_limit) = lex_state->yy_limit; 237 238 zend_stack_destroy(&SCNG(state_stack)); 239 SCNG(state_stack) = lex_state->state_stack; 240 241 zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); 242 zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); 243 SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack; 244 245 SCNG(yy_in) = lex_state->in; 246 YYSETCONDITION(lex_state->yy_state); 247 CG(zend_lineno) = lex_state->lineno; 248 zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); 249 250 if (SCNG(script_filtered)) { 251 efree(SCNG(script_filtered)); 252 SCNG(script_filtered) = NULL; 253 } 254 SCNG(script_org) = lex_state->script_org; 255 SCNG(script_org_size) = lex_state->script_org_size; 256 SCNG(script_filtered) = lex_state->script_filtered; 257 SCNG(script_filtered_size) = lex_state->script_filtered_size; 258 SCNG(input_filter) = lex_state->input_filter; 259 SCNG(output_filter) = lex_state->output_filter; 260 SCNG(script_encoding) = lex_state->script_encoding; 261} 262 263ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC) 264{ 265 zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles); 266 /* zend_file_handle_dtor() operates on the copy, so we have to NULLify the original here */ 267 file_handle->opened_path = NULL; 268 if (file_handle->free_filename) { 269 file_handle->filename = NULL; 270 } 271} 272 273#define BOM_UTF32_BE "\x00\x00\xfe\xff" 274#define BOM_UTF32_LE "\xff\xfe\x00\x00" 275#define BOM_UTF16_BE "\xfe\xff" 276#define BOM_UTF16_LE "\xff\xfe" 277#define BOM_UTF8 "\xef\xbb\xbf" 278 279static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) 280{ 281 const unsigned char *p; 282 int wchar_size = 2; 283 int le = 0; 284 285 /* utf-16 or utf-32? */ 286 p = script; 287 while ((p-script) < script_size) { 288 p = memchr(p, 0, script_size-(p-script)-2); 289 if (!p) { 290 break; 291 } 292 if (*(p+1) == '\0' && *(p+2) == '\0') { 293 wchar_size = 4; 294 break; 295 } 296 297 /* searching for UTF-32 specific byte orders, so this will do */ 298 p += 4; 299 } 300 301 /* BE or LE? */ 302 p = script; 303 while ((p-script) < script_size) { 304 if (*p == '\0' && *(p+wchar_size-1) != '\0') { 305 /* BE */ 306 le = 0; 307 break; 308 } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { 309 /* LE* */ 310 le = 1; 311 break; 312 } 313 p += wchar_size; 314 } 315 316 if (wchar_size == 2) { 317 return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; 318 } else { 319 return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; 320 } 321 322 return NULL; 323} 324 325static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) 326{ 327 const zend_encoding *script_encoding = NULL; 328 int bom_size; 329 unsigned char *pos1, *pos2; 330 331 if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { 332 return NULL; 333 } 334 335 /* check out BOM */ 336 if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { 337 script_encoding = zend_multibyte_encoding_utf32be; 338 bom_size = sizeof(BOM_UTF32_BE)-1; 339 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { 340 script_encoding = zend_multibyte_encoding_utf32le; 341 bom_size = sizeof(BOM_UTF32_LE)-1; 342 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { 343 script_encoding = zend_multibyte_encoding_utf16be; 344 bom_size = sizeof(BOM_UTF16_BE)-1; 345 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { 346 script_encoding = zend_multibyte_encoding_utf16le; 347 bom_size = sizeof(BOM_UTF16_LE)-1; 348 } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { 349 script_encoding = zend_multibyte_encoding_utf8; 350 bom_size = sizeof(BOM_UTF8)-1; 351 } 352 353 if (script_encoding) { 354 /* remove BOM */ 355 LANG_SCNG(script_org) += bom_size; 356 LANG_SCNG(script_org_size) -= bom_size; 357 358 return script_encoding; 359 } 360 361 /* script contains NULL bytes -> auto-detection */ 362 if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { 363 /* check if the NULL byte is after the __HALT_COMPILER(); */ 364 pos2 = LANG_SCNG(script_org); 365 366 while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { 367 pos2 = memchr(pos2, '_', pos1 - pos2); 368 if (!pos2) break; 369 pos2++; 370 if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { 371 pos2 += sizeof("_HALT_COMPILER")-1; 372 while (*pos2 == ' ' || 373 *pos2 == '\t' || 374 *pos2 == '\r' || 375 *pos2 == '\n') { 376 pos2++; 377 } 378 if (*pos2 == '(') { 379 pos2++; 380 while (*pos2 == ' ' || 381 *pos2 == '\t' || 382 *pos2 == '\r' || 383 *pos2 == '\n') { 384 pos2++; 385 } 386 if (*pos2 == ')') { 387 pos2++; 388 while (*pos2 == ' ' || 389 *pos2 == '\t' || 390 *pos2 == '\r' || 391 *pos2 == '\n') { 392 pos2++; 393 } 394 if (*pos2 == ';') { 395 return NULL; 396 } 397 } 398 } 399 } 400 } 401 /* make best effort if BOM is missing */ 402 return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); 403 } 404 405 return NULL; 406} 407 408static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) 409{ 410 const zend_encoding *script_encoding; 411 412 if (CG(detect_unicode)) { 413 /* check out bom(byte order mark) and see if containing wchars */ 414 script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); 415 if (script_encoding != NULL) { 416 /* bom or wchar detection is prior to 'script_encoding' option */ 417 return script_encoding; 418 } 419 } 420 421 /* if no script_encoding specified, just leave alone */ 422 if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { 423 return NULL; 424 } 425 426 /* if multiple encodings specified, detect automagically */ 427 if (CG(script_encoding_list_size) > 1) { 428 return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); 429 } 430 431 return CG(script_encoding_list)[0]; 432} 433 434ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) 435{ 436 const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); 437 const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); 438 439 if (!script_encoding) { 440 return FAILURE; 441 } 442 443 /* judge input/output filter */ 444 LANG_SCNG(script_encoding) = script_encoding; 445 LANG_SCNG(input_filter) = NULL; 446 LANG_SCNG(output_filter) = NULL; 447 448 if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { 449 if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { 450 /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ 451 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; 452 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; 453 } else { 454 LANG_SCNG(input_filter) = NULL; 455 LANG_SCNG(output_filter) = NULL; 456 } 457 return SUCCESS; 458 } 459 460 if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { 461 LANG_SCNG(input_filter) = encoding_filter_script_to_internal; 462 LANG_SCNG(output_filter) = NULL; 463 } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { 464 LANG_SCNG(input_filter) = NULL; 465 LANG_SCNG(output_filter) = encoding_filter_script_to_internal; 466 } else { 467 /* both script and internal encodings are incompatible w/ flex */ 468 LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; 469 LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; 470 } 471 472 return 0; 473} 474 475ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) 476{ 477 const char *file_path = NULL; 478 char *buf; 479 size_t size, offset = 0; 480 481 /* The shebang line was read, get the current position to obtain the buffer start */ 482 if (CG(start_lineno) == 2 && file_handle->type == ZEND_HANDLE_FP && file_handle->handle.fp) { 483 if ((offset = ftell(file_handle->handle.fp)) == -1) { 484 offset = 0; 485 } 486 } 487 488 if (zend_stream_fixup(file_handle, &buf, &size TSRMLS_CC) == FAILURE) { 489 return FAILURE; 490 } 491 492 zend_llist_add_element(&CG(open_files), file_handle); 493 if (file_handle->handle.stream.handle >= (void*)file_handle && file_handle->handle.stream.handle <= (void*)(file_handle+1)) { 494 zend_file_handle *fh = (zend_file_handle*)zend_llist_get_last(&CG(open_files)); 495 size_t diff = (char*)file_handle->handle.stream.handle - (char*)file_handle; 496 fh->handle.stream.handle = (void*)(((char*)fh) + diff); 497 file_handle->handle.stream.handle = fh->handle.stream.handle; 498 } 499 500 /* Reset the scanner for scanning the new file */ 501 SCNG(yy_in) = file_handle; 502 SCNG(yy_start) = NULL; 503 504 if (size != -1) { 505 if (CG(multibyte)) { 506 SCNG(script_org) = (unsigned char*)buf; 507 SCNG(script_org_size) = size; 508 SCNG(script_filtered) = NULL; 509 510 zend_multibyte_set_filter(NULL TSRMLS_CC); 511 512 if (SCNG(input_filter)) { 513 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { 514 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " 515 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); 516 } 517 buf = (char*)SCNG(script_filtered); 518 size = SCNG(script_filtered_size); 519 } 520 } 521 SCNG(yy_start) = (unsigned char *)buf - offset; 522 yy_scan_buffer(buf, size TSRMLS_CC); 523 } else { 524 zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); 525 } 526 527 BEGIN(INITIAL); 528 529 if (file_handle->opened_path) { 530 file_path = file_handle->opened_path; 531 } else { 532 file_path = file_handle->filename; 533 } 534 535 zend_set_compiled_filename(file_path TSRMLS_CC); 536 537 if (CG(start_lineno)) { 538 CG(zend_lineno) = CG(start_lineno); 539 CG(start_lineno) = 0; 540 } else { 541 CG(zend_lineno) = 1; 542 } 543 544 CG(increment_lineno) = 0; 545 return SUCCESS; 546} 547END_EXTERN_C() 548 549 550ZEND_API zend_op_array *compile_file(zend_file_handle *file_handle, int type TSRMLS_DC) 551{ 552 zend_lex_state original_lex_state; 553 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); 554 zend_op_array *original_active_op_array = CG(active_op_array); 555 zend_op_array *retval=NULL; 556 int compiler_result; 557 zend_bool compilation_successful=0; 558 znode retval_znode; 559 zend_bool original_in_compilation = CG(in_compilation); 560 561 retval_znode.op_type = IS_CONST; 562 retval_znode.u.constant.type = IS_LONG; 563 retval_znode.u.constant.value.lval = 1; 564 Z_UNSET_ISREF(retval_znode.u.constant); 565 Z_SET_REFCOUNT(retval_znode.u.constant, 1); 566 567 zend_save_lexical_state(&original_lex_state TSRMLS_CC); 568 569 retval = op_array; /* success oriented */ 570 571 if (open_file_for_scanning(file_handle TSRMLS_CC)==FAILURE) { 572 if (type==ZEND_REQUIRE) { 573 zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename TSRMLS_CC); 574 zend_bailout(); 575 } else { 576 zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename TSRMLS_CC); 577 } 578 compilation_successful=0; 579 } else { 580 init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); 581 CG(in_compilation) = 1; 582 CG(active_op_array) = op_array; 583 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); 584 zend_init_compiler_context(TSRMLS_C); 585 compiler_result = zendparse(TSRMLS_C); 586 zend_do_return(&retval_znode, 0 TSRMLS_CC); 587 CG(in_compilation) = original_in_compilation; 588 if (compiler_result != 0) { /* parser error */ 589 zend_bailout(); 590 } 591 compilation_successful=1; 592 } 593 594 if (retval) { 595 CG(active_op_array) = original_active_op_array; 596 if (compilation_successful) { 597 pass_two(op_array TSRMLS_CC); 598 zend_release_labels(0 TSRMLS_CC); 599 } else { 600 efree(op_array); 601 retval = NULL; 602 } 603 } 604 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 605 return retval; 606} 607 608 609zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC) 610{ 611 zend_file_handle file_handle; 612 zval tmp; 613 zend_op_array *retval; 614 char *opened_path = NULL; 615 616 if (filename->type != IS_STRING) { 617 tmp = *filename; 618 zval_copy_ctor(&tmp); 619 convert_to_string(&tmp); 620 filename = &tmp; 621 } 622 file_handle.filename = filename->value.str.val; 623 file_handle.free_filename = 0; 624 file_handle.type = ZEND_HANDLE_FILENAME; 625 file_handle.opened_path = NULL; 626 file_handle.handle.fp = NULL; 627 628 retval = zend_compile_file(&file_handle, type TSRMLS_CC); 629 if (retval && file_handle.handle.stream.handle) { 630 int dummy = 1; 631 632 if (!file_handle.opened_path) { 633 file_handle.opened_path = opened_path = estrndup(filename->value.str.val, filename->value.str.len); 634 } 635 636 zend_hash_add(&EG(included_files), file_handle.opened_path, strlen(file_handle.opened_path)+1, (void *)&dummy, sizeof(int), NULL); 637 638 if (opened_path) { 639 efree(opened_path); 640 } 641 } 642 zend_destroy_file_handle(&file_handle TSRMLS_CC); 643 644 if (filename==&tmp) { 645 zval_dtor(&tmp); 646 } 647 return retval; 648} 649 650ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) 651{ 652 char *buf; 653 size_t size; 654 655 /* enforce two trailing NULLs for flex... */ 656 if (IS_INTERNED(str->value.str.val)) { 657 char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); 658 memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); 659 str->value.str.val = tmp; 660 } else { 661 str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); 662 } 663 664 memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); 665 666 SCNG(yy_in) = NULL; 667 SCNG(yy_start) = NULL; 668 669 buf = str->value.str.val; 670 size = str->value.str.len; 671 672 if (CG(multibyte)) { 673 SCNG(script_org) = (unsigned char*)buf; 674 SCNG(script_org_size) = size; 675 SCNG(script_filtered) = NULL; 676 677 zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); 678 679 if (SCNG(input_filter)) { 680 if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { 681 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " 682 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); 683 } 684 buf = (char*)SCNG(script_filtered); 685 size = SCNG(script_filtered_size); 686 } 687 } 688 689 yy_scan_buffer(buf, size TSRMLS_CC); 690 691 zend_set_compiled_filename(filename TSRMLS_CC); 692 CG(zend_lineno) = 1; 693 CG(increment_lineno) = 0; 694 return SUCCESS; 695} 696 697 698ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) 699{ 700 size_t offset = SCNG(yy_cursor) - SCNG(yy_start); 701 if (SCNG(input_filter)) { 702 size_t original_offset = offset, length = 0; 703 do { 704 unsigned char *p = NULL; 705 if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { 706 return (size_t)-1; 707 } 708 efree(p); 709 if (length > original_offset) { 710 offset--; 711 } else if (length < original_offset) { 712 offset++; 713 } 714 } while (original_offset != length); 715 } 716 return offset; 717} 718 719 720zend_op_array *compile_string(zval *source_string, char *filename TSRMLS_DC) 721{ 722 zend_lex_state original_lex_state; 723 zend_op_array *op_array = (zend_op_array *) emalloc(sizeof(zend_op_array)); 724 zend_op_array *original_active_op_array = CG(active_op_array); 725 zend_op_array *retval; 726 zval tmp; 727 int compiler_result; 728 zend_bool original_in_compilation = CG(in_compilation); 729 730 if (source_string->value.str.len==0) { 731 efree(op_array); 732 return NULL; 733 } 734 735 CG(in_compilation) = 1; 736 737 tmp = *source_string; 738 zval_copy_ctor(&tmp); 739 convert_to_string(&tmp); 740 source_string = &tmp; 741 742 zend_save_lexical_state(&original_lex_state TSRMLS_CC); 743 if (zend_prepare_string_for_scanning(source_string, filename TSRMLS_CC)==FAILURE) { 744 efree(op_array); 745 retval = NULL; 746 } else { 747 zend_bool orig_interactive = CG(interactive); 748 749 CG(interactive) = 0; 750 init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); 751 CG(interactive) = orig_interactive; 752 CG(active_op_array) = op_array; 753 zend_stack_push(&CG(context_stack), (void *) &CG(context), sizeof(CG(context))); 754 zend_init_compiler_context(TSRMLS_C); 755 BEGIN(ST_IN_SCRIPTING); 756 compiler_result = zendparse(TSRMLS_C); 757 758 if (SCNG(script_filtered)) { 759 efree(SCNG(script_filtered)); 760 SCNG(script_filtered) = NULL; 761 } 762 763 if (compiler_result != 0) { 764 CG(active_op_array) = original_active_op_array; 765 CG(unclean_shutdown)=1; 766 destroy_op_array(op_array TSRMLS_CC); 767 efree(op_array); 768 retval = NULL; 769 } else { 770 zend_do_return(NULL, 0 TSRMLS_CC); 771 CG(active_op_array) = original_active_op_array; 772 pass_two(op_array TSRMLS_CC); 773 zend_release_labels(0 TSRMLS_CC); 774 retval = op_array; 775 } 776 } 777 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 778 zval_dtor(&tmp); 779 CG(in_compilation) = original_in_compilation; 780 return retval; 781} 782 783 784BEGIN_EXTERN_C() 785int highlight_file(char *filename, zend_syntax_highlighter_ini *syntax_highlighter_ini TSRMLS_DC) 786{ 787 zend_lex_state original_lex_state; 788 zend_file_handle file_handle; 789 790 file_handle.type = ZEND_HANDLE_FILENAME; 791 file_handle.filename = filename; 792 file_handle.free_filename = 0; 793 file_handle.opened_path = NULL; 794 zend_save_lexical_state(&original_lex_state TSRMLS_CC); 795 if (open_file_for_scanning(&file_handle TSRMLS_CC)==FAILURE) { 796 zend_message_dispatcher(ZMSG_FAILED_HIGHLIGHT_FOPEN, filename TSRMLS_CC); 797 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 798 return FAILURE; 799 } 800 zend_highlight(syntax_highlighter_ini TSRMLS_CC); 801 if (SCNG(script_filtered)) { 802 efree(SCNG(script_filtered)); 803 SCNG(script_filtered) = NULL; 804 } 805 zend_destroy_file_handle(&file_handle TSRMLS_CC); 806 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 807 return SUCCESS; 808} 809 810int highlight_string(zval *str, zend_syntax_highlighter_ini *syntax_highlighter_ini, char *str_name TSRMLS_DC) 811{ 812 zend_lex_state original_lex_state; 813 zval tmp = *str; 814 815 str = &tmp; 816 zval_copy_ctor(str); 817 zend_save_lexical_state(&original_lex_state TSRMLS_CC); 818 if (zend_prepare_string_for_scanning(str, str_name TSRMLS_CC)==FAILURE) { 819 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 820 return FAILURE; 821 } 822 BEGIN(INITIAL); 823 zend_highlight(syntax_highlighter_ini TSRMLS_CC); 824 if (SCNG(script_filtered)) { 825 efree(SCNG(script_filtered)); 826 SCNG(script_filtered) = NULL; 827 } 828 zend_restore_lexical_state(&original_lex_state TSRMLS_CC); 829 zval_dtor(str); 830 return SUCCESS; 831} 832 833ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) 834{ 835 size_t length; 836 unsigned char *new_yy_start; 837 838 /* convert and set */ 839 if (!SCNG(input_filter)) { 840 if (SCNG(script_filtered)) { 841 efree(SCNG(script_filtered)); 842 SCNG(script_filtered) = NULL; 843 } 844 SCNG(script_filtered_size) = 0; 845 length = SCNG(script_org_size); 846 new_yy_start = SCNG(script_org); 847 } else { 848 if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { 849 zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " 850 "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); 851 } 852 SCNG(script_filtered) = new_yy_start; 853 SCNG(script_filtered_size) = length; 854 } 855 856 SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); 857 SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); 858 SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); 859 SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); 860 861 SCNG(yy_start) = new_yy_start; 862} 863 864 865# define zend_copy_value(zendlval, yytext, yyleng) \ 866 if (SCNG(output_filter)) { \ 867 size_t sz = 0; \ 868 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); \ 869 zendlval->value.str.len = sz; \ 870 } else { \ 871 zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ 872 zendlval->value.str.len = yyleng; \ 873 } 874 875static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) 876{ 877 register char *s, *t; 878 char *end; 879 880 ZVAL_STRINGL(zendlval, str, len, 1); 881 882 /* convert escape sequences */ 883 s = t = zendlval->value.str.val; 884 end = s+zendlval->value.str.len; 885 while (s<end) { 886 if (*s=='\\') { 887 s++; 888 if (s >= end) { 889 *t++ = '\\'; 890 break; 891 } 892 893 switch(*s) { 894 case 'n': 895 *t++ = '\n'; 896 zendlval->value.str.len--; 897 break; 898 case 'r': 899 *t++ = '\r'; 900 zendlval->value.str.len--; 901 break; 902 case 't': 903 *t++ = '\t'; 904 zendlval->value.str.len--; 905 break; 906 case 'f': 907 *t++ = '\f'; 908 zendlval->value.str.len--; 909 break; 910 case 'v': 911 *t++ = '\v'; 912 zendlval->value.str.len--; 913 break; 914 case 'e': 915#ifdef PHP_WIN32 916 *t++ = VK_ESCAPE; 917#else 918 *t++ = '\e'; 919#endif 920 zendlval->value.str.len--; 921 break; 922 case '"': 923 case '`': 924 if (*s != quote_type) { 925 *t++ = '\\'; 926 *t++ = *s; 927 break; 928 } 929 case '\\': 930 case '$': 931 *t++ = *s; 932 zendlval->value.str.len--; 933 break; 934 case 'x': 935 case 'X': 936 if (ZEND_IS_HEX(*(s+1))) { 937 char hex_buf[3] = { 0, 0, 0 }; 938 939 zendlval->value.str.len--; /* for the 'x' */ 940 941 hex_buf[0] = *(++s); 942 zendlval->value.str.len--; 943 if (ZEND_IS_HEX(*(s+1))) { 944 hex_buf[1] = *(++s); 945 zendlval->value.str.len--; 946 } 947 *t++ = (char) strtol(hex_buf, NULL, 16); 948 } else { 949 *t++ = '\\'; 950 *t++ = *s; 951 } 952 break; 953 default: 954 /* check for an octal */ 955 if (ZEND_IS_OCT(*s)) { 956 char octal_buf[4] = { 0, 0, 0, 0 }; 957 958 octal_buf[0] = *s; 959 zendlval->value.str.len--; 960 if (ZEND_IS_OCT(*(s+1))) { 961 octal_buf[1] = *(++s); 962 zendlval->value.str.len--; 963 if (ZEND_IS_OCT(*(s+1))) { 964 octal_buf[2] = *(++s); 965 zendlval->value.str.len--; 966 } 967 } 968 *t++ = (char) strtol(octal_buf, NULL, 8); 969 } else { 970 *t++ = '\\'; 971 *t++ = *s; 972 } 973 break; 974 } 975 } else { 976 *t++ = *s; 977 } 978 979 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { 980 CG(zend_lineno)++; 981 } 982 s++; 983 } 984 *t = 0; 985 if (SCNG(output_filter)) { 986 size_t sz = 0; 987 s = zendlval->value.str.val; 988 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC); 989 zendlval->value.str.len = sz; 990 efree(s); 991 } 992} 993 994 995int lex_scan(zval *zendlval TSRMLS_DC) 996{ 997restart: 998 SCNG(yy_text) = YYCURSOR; 999 1000yymore_restart: 1001 1002/*!re2c 1003re2c:yyfill:check = 0; 1004LNUM [0-9]+ 1005DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) 1006EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) 1007HNUM "0x"[0-9a-fA-F]+ 1008BNUM "0b"[01]+ 1009LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* 1010WHITESPACE [ \n\r\t]+ 1011TABS_AND_SPACES [ \t]* 1012TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] 1013ANY_CHAR [^] 1014NEWLINE ("\r"|"\n"|"\r\n") 1015 1016/* compute yyleng before each rule */ 1017<!*> := yyleng = YYCURSOR - SCNG(yy_text); 1018 1019 1020<ST_IN_SCRIPTING>"exit" { 1021 return T_EXIT; 1022} 1023 1024<ST_IN_SCRIPTING>"die" { 1025 return T_EXIT; 1026} 1027 1028<ST_IN_SCRIPTING>"function" { 1029 return T_FUNCTION; 1030} 1031 1032<ST_IN_SCRIPTING>"const" { 1033 return T_CONST; 1034} 1035 1036<ST_IN_SCRIPTING>"return" { 1037 return T_RETURN; 1038} 1039 1040<ST_IN_SCRIPTING>"yield" { 1041 return T_YIELD; 1042} 1043 1044<ST_IN_SCRIPTING>"try" { 1045 return T_TRY; 1046} 1047 1048<ST_IN_SCRIPTING>"catch" { 1049 return T_CATCH; 1050} 1051 1052<ST_IN_SCRIPTING>"finally" { 1053 return T_FINALLY; 1054} 1055 1056<ST_IN_SCRIPTING>"throw" { 1057 return T_THROW; 1058} 1059 1060<ST_IN_SCRIPTING>"if" { 1061 return T_IF; 1062} 1063 1064<ST_IN_SCRIPTING>"elseif" { 1065 return T_ELSEIF; 1066} 1067 1068<ST_IN_SCRIPTING>"endif" { 1069 return T_ENDIF; 1070} 1071 1072<ST_IN_SCRIPTING>"else" { 1073 return T_ELSE; 1074} 1075 1076<ST_IN_SCRIPTING>"while" { 1077 return T_WHILE; 1078} 1079 1080<ST_IN_SCRIPTING>"endwhile" { 1081 return T_ENDWHILE; 1082} 1083 1084<ST_IN_SCRIPTING>"do" { 1085 return T_DO; 1086} 1087 1088<ST_IN_SCRIPTING>"for" { 1089 return T_FOR; 1090} 1091 1092<ST_IN_SCRIPTING>"endfor" { 1093 return T_ENDFOR; 1094} 1095 1096<ST_IN_SCRIPTING>"foreach" { 1097 return T_FOREACH; 1098} 1099 1100<ST_IN_SCRIPTING>"endforeach" { 1101 return T_ENDFOREACH; 1102} 1103 1104<ST_IN_SCRIPTING>"declare" { 1105 return T_DECLARE; 1106} 1107 1108<ST_IN_SCRIPTING>"enddeclare" { 1109 return T_ENDDECLARE; 1110} 1111 1112<ST_IN_SCRIPTING>"instanceof" { 1113 return T_INSTANCEOF; 1114} 1115 1116<ST_IN_SCRIPTING>"as" { 1117 return T_AS; 1118} 1119 1120<ST_IN_SCRIPTING>"switch" { 1121 return T_SWITCH; 1122} 1123 1124<ST_IN_SCRIPTING>"endswitch" { 1125 return T_ENDSWITCH; 1126} 1127 1128<ST_IN_SCRIPTING>"case" { 1129 return T_CASE; 1130} 1131 1132<ST_IN_SCRIPTING>"default" { 1133 return T_DEFAULT; 1134} 1135 1136<ST_IN_SCRIPTING>"break" { 1137 return T_BREAK; 1138} 1139 1140<ST_IN_SCRIPTING>"continue" { 1141 return T_CONTINUE; 1142} 1143 1144<ST_IN_SCRIPTING>"goto" { 1145 return T_GOTO; 1146} 1147 1148<ST_IN_SCRIPTING>"echo" { 1149 return T_ECHO; 1150} 1151 1152<ST_IN_SCRIPTING>"print" { 1153 return T_PRINT; 1154} 1155 1156<ST_IN_SCRIPTING>"class" { 1157 return T_CLASS; 1158} 1159 1160<ST_IN_SCRIPTING>"interface" { 1161 return T_INTERFACE; 1162} 1163 1164<ST_IN_SCRIPTING>"trait" { 1165 return T_TRAIT; 1166} 1167 1168<ST_IN_SCRIPTING>"extends" { 1169 return T_EXTENDS; 1170} 1171 1172<ST_IN_SCRIPTING>"implements" { 1173 return T_IMPLEMENTS; 1174} 1175 1176<ST_IN_SCRIPTING>"->" { 1177 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); 1178 return T_OBJECT_OPERATOR; 1179} 1180 1181<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ { 1182 zendlval->value.str.val = yytext; /* no copying - intentional */ 1183 zendlval->value.str.len = yyleng; 1184 zendlval->type = IS_STRING; 1185 HANDLE_NEWLINES(yytext, yyleng); 1186 return T_WHITESPACE; 1187} 1188 1189<ST_LOOKING_FOR_PROPERTY>"->" { 1190 return T_OBJECT_OPERATOR; 1191} 1192 1193<ST_LOOKING_FOR_PROPERTY>{LABEL} { 1194 yy_pop_state(TSRMLS_C); 1195 zend_copy_value(zendlval, yytext, yyleng); 1196 zendlval->type = IS_STRING; 1197 return T_STRING; 1198} 1199 1200<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} { 1201 yyless(0); 1202 yy_pop_state(TSRMLS_C); 1203 goto restart; 1204} 1205 1206<ST_IN_SCRIPTING>"::" { 1207 return T_PAAMAYIM_NEKUDOTAYIM; 1208} 1209 1210<ST_IN_SCRIPTING>"\\" { 1211 return T_NS_SEPARATOR; 1212} 1213 1214<ST_IN_SCRIPTING>"new" { 1215 return T_NEW; 1216} 1217 1218<ST_IN_SCRIPTING>"clone" { 1219 return T_CLONE; 1220} 1221 1222<ST_IN_SCRIPTING>"var" { 1223 return T_VAR; 1224} 1225 1226<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { 1227 return T_INT_CAST; 1228} 1229 1230<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" { 1231 return T_DOUBLE_CAST; 1232} 1233 1234<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" { 1235 return T_STRING_CAST; 1236} 1237 1238<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" { 1239 return T_ARRAY_CAST; 1240} 1241 1242<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" { 1243 return T_OBJECT_CAST; 1244} 1245 1246<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" { 1247 return T_BOOL_CAST; 1248} 1249 1250<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" { 1251 return T_UNSET_CAST; 1252} 1253 1254<ST_IN_SCRIPTING>"eval" { 1255 return T_EVAL; 1256} 1257 1258<ST_IN_SCRIPTING>"include" { 1259 return T_INCLUDE; 1260} 1261 1262<ST_IN_SCRIPTING>"include_once" { 1263 return T_INCLUDE_ONCE; 1264} 1265 1266<ST_IN_SCRIPTING>"require" { 1267 return T_REQUIRE; 1268} 1269 1270<ST_IN_SCRIPTING>"require_once" { 1271 return T_REQUIRE_ONCE; 1272} 1273 1274<ST_IN_SCRIPTING>"namespace" { 1275 return T_NAMESPACE; 1276} 1277 1278<ST_IN_SCRIPTING>"use" { 1279 return T_USE; 1280} 1281 1282<ST_IN_SCRIPTING>"insteadof" { 1283 return T_INSTEADOF; 1284} 1285 1286<ST_IN_SCRIPTING>"global" { 1287 return T_GLOBAL; 1288} 1289 1290<ST_IN_SCRIPTING>"isset" { 1291 return T_ISSET; 1292} 1293 1294<ST_IN_SCRIPTING>"empty" { 1295 return T_EMPTY; 1296} 1297 1298<ST_IN_SCRIPTING>"__halt_compiler" { 1299 return T_HALT_COMPILER; 1300} 1301 1302<ST_IN_SCRIPTING>"static" { 1303 return T_STATIC; 1304} 1305 1306<ST_IN_SCRIPTING>"abstract" { 1307 return T_ABSTRACT; 1308} 1309 1310<ST_IN_SCRIPTING>"final" { 1311 return T_FINAL; 1312} 1313 1314<ST_IN_SCRIPTING>"private" { 1315 return T_PRIVATE; 1316} 1317 1318<ST_IN_SCRIPTING>"protected" { 1319 return T_PROTECTED; 1320} 1321 1322<ST_IN_SCRIPTING>"public" { 1323 return T_PUBLIC; 1324} 1325 1326<ST_IN_SCRIPTING>"unset" { 1327 return T_UNSET; 1328} 1329 1330<ST_IN_SCRIPTING>"=>" { 1331 return T_DOUBLE_ARROW; 1332} 1333 1334<ST_IN_SCRIPTING>"list" { 1335 return T_LIST; 1336} 1337 1338<ST_IN_SCRIPTING>"array" { 1339 return T_ARRAY; 1340} 1341 1342<ST_IN_SCRIPTING>"callable" { 1343 return T_CALLABLE; 1344} 1345 1346<ST_IN_SCRIPTING>"++" { 1347 return T_INC; 1348} 1349 1350<ST_IN_SCRIPTING>"--" { 1351 return T_DEC; 1352} 1353 1354<ST_IN_SCRIPTING>"===" { 1355 return T_IS_IDENTICAL; 1356} 1357 1358<ST_IN_SCRIPTING>"!==" { 1359 return T_IS_NOT_IDENTICAL; 1360} 1361 1362<ST_IN_SCRIPTING>"==" { 1363 return T_IS_EQUAL; 1364} 1365 1366<ST_IN_SCRIPTING>"!="|"<>" { 1367 return T_IS_NOT_EQUAL; 1368} 1369 1370<ST_IN_SCRIPTING>"<=" { 1371 return T_IS_SMALLER_OR_EQUAL; 1372} 1373 1374<ST_IN_SCRIPTING>">=" { 1375 return T_IS_GREATER_OR_EQUAL; 1376} 1377 1378<ST_IN_SCRIPTING>"+=" { 1379 return T_PLUS_EQUAL; 1380} 1381 1382<ST_IN_SCRIPTING>"-=" { 1383 return T_MINUS_EQUAL; 1384} 1385 1386<ST_IN_SCRIPTING>"*=" { 1387 return T_MUL_EQUAL; 1388} 1389 1390<ST_IN_SCRIPTING>"/=" { 1391 return T_DIV_EQUAL; 1392} 1393 1394<ST_IN_SCRIPTING>".=" { 1395 return T_CONCAT_EQUAL; 1396} 1397 1398<ST_IN_SCRIPTING>"%=" { 1399 return T_MOD_EQUAL; 1400} 1401 1402<ST_IN_SCRIPTING>"<<=" { 1403 return T_SL_EQUAL; 1404} 1405 1406<ST_IN_SCRIPTING>">>=" { 1407 return T_SR_EQUAL; 1408} 1409 1410<ST_IN_SCRIPTING>"&=" { 1411 return T_AND_EQUAL; 1412} 1413 1414<ST_IN_SCRIPTING>"|=" { 1415 return T_OR_EQUAL; 1416} 1417 1418<ST_IN_SCRIPTING>"^=" { 1419 return T_XOR_EQUAL; 1420} 1421 1422<ST_IN_SCRIPTING>"||" { 1423 return T_BOOLEAN_OR; 1424} 1425 1426<ST_IN_SCRIPTING>"&&" { 1427 return T_BOOLEAN_AND; 1428} 1429 1430<ST_IN_SCRIPTING>"OR" { 1431 return T_LOGICAL_OR; 1432} 1433 1434<ST_IN_SCRIPTING>"AND" { 1435 return T_LOGICAL_AND; 1436} 1437 1438<ST_IN_SCRIPTING>"XOR" { 1439 return T_LOGICAL_XOR; 1440} 1441 1442<ST_IN_SCRIPTING>"<<" { 1443 return T_SL; 1444} 1445 1446<ST_IN_SCRIPTING>">>" { 1447 return T_SR; 1448} 1449 1450<ST_IN_SCRIPTING>{TOKENS} { 1451 return yytext[0]; 1452} 1453 1454 1455<ST_IN_SCRIPTING>"{" { 1456 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); 1457 return '{'; 1458} 1459 1460 1461<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" { 1462 yy_push_state(ST_LOOKING_FOR_VARNAME TSRMLS_CC); 1463 return T_DOLLAR_OPEN_CURLY_BRACES; 1464} 1465 1466 1467<ST_IN_SCRIPTING>"}" { 1468 RESET_DOC_COMMENT(); 1469 if (!zend_stack_is_empty(&SCNG(state_stack))) { 1470 yy_pop_state(TSRMLS_C); 1471 } 1472 return '}'; 1473} 1474 1475 1476<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] { 1477 yyless(yyleng - 1); 1478 zend_copy_value(zendlval, yytext, yyleng); 1479 zendlval->type = IS_STRING; 1480 yy_pop_state(TSRMLS_C); 1481 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); 1482 return T_STRING_VARNAME; 1483} 1484 1485 1486<ST_LOOKING_FOR_VARNAME>{ANY_CHAR} { 1487 yyless(0); 1488 yy_pop_state(TSRMLS_C); 1489 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); 1490 goto restart; 1491} 1492 1493<ST_IN_SCRIPTING>{BNUM} { 1494 char *bin = yytext + 2; /* Skip "0b" */ 1495 int len = yyleng - 2; 1496 1497 /* Skip any leading 0s */ 1498 while (*bin == '0') { 1499 ++bin; 1500 --len; 1501 } 1502 1503 if (len < SIZEOF_LONG * 8) { 1504 if (len == 0) { 1505 zendlval->value.lval = 0; 1506 } else { 1507 zendlval->value.lval = strtol(bin, NULL, 2); 1508 } 1509 zendlval->type = IS_LONG; 1510 return T_LNUMBER; 1511 } else { 1512 zendlval->value.dval = zend_bin_strtod(bin, NULL); 1513 zendlval->type = IS_DOUBLE; 1514 return T_DNUMBER; 1515 } 1516} 1517 1518<ST_IN_SCRIPTING>{LNUM} { 1519 if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ 1520 zendlval->value.lval = strtol(yytext, NULL, 0); 1521 } else { 1522 errno = 0; 1523 zendlval->value.lval = strtol(yytext, NULL, 0); 1524 if (errno == ERANGE) { /* Overflow */ 1525 if (yytext[0] == '0') { /* octal overflow */ 1526 zendlval->value.dval = zend_oct_strtod(yytext, NULL); 1527 } else { 1528 zendlval->value.dval = zend_strtod(yytext, NULL); 1529 } 1530 zendlval->type = IS_DOUBLE; 1531 return T_DNUMBER; 1532 } 1533 } 1534 1535 zendlval->type = IS_LONG; 1536 return T_LNUMBER; 1537} 1538 1539<ST_IN_SCRIPTING>{HNUM} { 1540 char *hex = yytext + 2; /* Skip "0x" */ 1541 int len = yyleng - 2; 1542 1543 /* Skip any leading 0s */ 1544 while (*hex == '0') { 1545 hex++; 1546 len--; 1547 } 1548 1549 if (len < SIZEOF_LONG * 2 || (len == SIZEOF_LONG * 2 && *hex <= '7')) { 1550 if (len == 0) { 1551 zendlval->value.lval = 0; 1552 } else { 1553 zendlval->value.lval = strtol(hex, NULL, 16); 1554 } 1555 zendlval->type = IS_LONG; 1556 return T_LNUMBER; 1557 } else { 1558 zendlval->value.dval = zend_hex_strtod(hex, NULL); 1559 zendlval->type = IS_DOUBLE; 1560 return T_DNUMBER; 1561 } 1562} 1563 1564<ST_VAR_OFFSET>[0]|([1-9][0-9]*) { /* Offset could be treated as a long */ 1565 if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) { 1566 zendlval->value.lval = strtol(yytext, NULL, 10); 1567 zendlval->type = IS_LONG; 1568 } else { 1569 zendlval->value.str.val = (char *)estrndup(yytext, yyleng); 1570 zendlval->value.str.len = yyleng; 1571 zendlval->type = IS_STRING; 1572 } 1573 return T_NUM_STRING; 1574} 1575 1576<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */ 1577 zendlval->value.str.val = (char *)estrndup(yytext, yyleng); 1578 zendlval->value.str.len = yyleng; 1579 zendlval->type = IS_STRING; 1580 return T_NUM_STRING; 1581} 1582 1583<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} { 1584 zendlval->value.dval = zend_strtod(yytext, NULL); 1585 zendlval->type = IS_DOUBLE; 1586 return T_DNUMBER; 1587} 1588 1589<ST_IN_SCRIPTING>"__CLASS__" { 1590 const char *class_name = NULL; 1591 1592 if (CG(active_class_entry) 1593 && (ZEND_ACC_TRAIT == 1594 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { 1595 /* We create a special __CLASS__ constant that is going to be resolved 1596 at run-time */ 1597 zendlval->value.str.len = sizeof("__CLASS__")-1; 1598 zendlval->value.str.val = estrndup("__CLASS__", zendlval->value.str.len); 1599 zendlval->type = IS_CONSTANT; 1600 } else { 1601 if (CG(active_class_entry)) { 1602 class_name = CG(active_class_entry)->name; 1603 } 1604 1605 if (!class_name) { 1606 class_name = ""; 1607 } 1608 1609 zendlval->value.str.len = strlen(class_name); 1610 zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len); 1611 zendlval->type = IS_STRING; 1612 } 1613 return T_CLASS_C; 1614} 1615 1616<ST_IN_SCRIPTING>"__TRAIT__" { 1617 const char *trait_name = NULL; 1618 1619 if (CG(active_class_entry) 1620 && (ZEND_ACC_TRAIT == 1621 (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { 1622 trait_name = CG(active_class_entry)->name; 1623 } 1624 1625 if (!trait_name) { 1626 trait_name = ""; 1627 } 1628 1629 zendlval->value.str.len = strlen(trait_name); 1630 zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len); 1631 zendlval->type = IS_STRING; 1632 1633 return T_TRAIT_C; 1634} 1635 1636<ST_IN_SCRIPTING>"__FUNCTION__" { 1637 const char *func_name = NULL; 1638 1639 if (CG(active_op_array)) { 1640 func_name = CG(active_op_array)->function_name; 1641 } 1642 1643 if (!func_name) { 1644 func_name = ""; 1645 } 1646 zendlval->value.str.len = strlen(func_name); 1647 zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len); 1648 zendlval->type = IS_STRING; 1649 return T_FUNC_C; 1650} 1651 1652<ST_IN_SCRIPTING>"__METHOD__" { 1653 const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; 1654 const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; 1655 size_t len = 0; 1656 1657 if (class_name) { 1658 len += strlen(class_name) + 2; 1659 } 1660 if (func_name) { 1661 len += strlen(func_name); 1662 } 1663 1664 zendlval->value.str.len = zend_spprintf(&zendlval->value.str.val, 0, "%s%s%s", 1665 class_name ? class_name : "", 1666 class_name && func_name ? "::" : "", 1667 func_name ? func_name : "" 1668 ); 1669 zendlval->type = IS_STRING; 1670 return T_METHOD_C; 1671} 1672 1673<ST_IN_SCRIPTING>"__LINE__" { 1674 zendlval->value.lval = CG(zend_lineno); 1675 zendlval->type = IS_LONG; 1676 return T_LINE; 1677} 1678 1679<ST_IN_SCRIPTING>"__FILE__" { 1680 char *filename = zend_get_compiled_filename(TSRMLS_C); 1681 1682 if (!filename) { 1683 filename = ""; 1684 } 1685 zendlval->value.str.len = strlen(filename); 1686 zendlval->value.str.val = estrndup(filename, zendlval->value.str.len); 1687 zendlval->type = IS_STRING; 1688 return T_FILE; 1689} 1690 1691<ST_IN_SCRIPTING>"__DIR__" { 1692 char *filename = zend_get_compiled_filename(TSRMLS_C); 1693 const size_t filename_len = strlen(filename); 1694 char *dirname; 1695 1696 if (!filename) { 1697 filename = ""; 1698 } 1699 1700 dirname = estrndup(filename, filename_len); 1701 zend_dirname(dirname, filename_len); 1702 1703 if (strcmp(dirname, ".") == 0) { 1704 dirname = erealloc(dirname, MAXPATHLEN); 1705#if HAVE_GETCWD 1706 VCWD_GETCWD(dirname, MAXPATHLEN); 1707#elif HAVE_GETWD 1708 VCWD_GETWD(dirname); 1709#endif 1710 } 1711 1712 zendlval->value.str.len = strlen(dirname); 1713 zendlval->value.str.val = dirname; 1714 zendlval->type = IS_STRING; 1715 return T_DIR; 1716} 1717 1718<ST_IN_SCRIPTING>"__NAMESPACE__" { 1719 if (CG(current_namespace)) { 1720 *zendlval = *CG(current_namespace); 1721 zval_copy_ctor(zendlval); 1722 } else { 1723 ZVAL_EMPTY_STRING(zendlval); 1724 } 1725 return T_NS_C; 1726} 1727 1728<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" { 1729 YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); 1730 1731 if (bracket != SCNG(yy_text)) { 1732 /* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */ 1733 YYCURSOR = bracket; 1734 goto inline_html; 1735 } 1736 1737 HANDLE_NEWLINES(yytext, yyleng); 1738 zendlval->value.str.val = yytext; /* no copying - intentional */ 1739 zendlval->value.str.len = yyleng; 1740 zendlval->type = IS_STRING; 1741 BEGIN(ST_IN_SCRIPTING); 1742 return T_OPEN_TAG; 1743} 1744 1745 1746<INITIAL>"<%=" { 1747 if (CG(asp_tags)) { 1748 zendlval->value.str.val = yytext; /* no copying - intentional */ 1749 zendlval->value.str.len = yyleng; 1750 zendlval->type = IS_STRING; 1751 BEGIN(ST_IN_SCRIPTING); 1752 return T_OPEN_TAG_WITH_ECHO; 1753 } else { 1754 goto inline_char_handler; 1755 } 1756} 1757 1758 1759<INITIAL>"<?=" { 1760 zendlval->value.str.val = yytext; /* no copying - intentional */ 1761 zendlval->value.str.len = yyleng; 1762 zendlval->type = IS_STRING; 1763 BEGIN(ST_IN_SCRIPTING); 1764 return T_OPEN_TAG_WITH_ECHO; 1765} 1766 1767 1768<INITIAL>"<%" { 1769 if (CG(asp_tags)) { 1770 zendlval->value.str.val = yytext; /* no copying - intentional */ 1771 zendlval->value.str.len = yyleng; 1772 zendlval->type = IS_STRING; 1773 BEGIN(ST_IN_SCRIPTING); 1774 return T_OPEN_TAG; 1775 } else { 1776 goto inline_char_handler; 1777 } 1778} 1779 1780 1781<INITIAL>"<?php"([ \t]|{NEWLINE}) { 1782 zendlval->value.str.val = yytext; /* no copying - intentional */ 1783 zendlval->value.str.len = yyleng; 1784 zendlval->type = IS_STRING; 1785 HANDLE_NEWLINE(yytext[yyleng-1]); 1786 BEGIN(ST_IN_SCRIPTING); 1787 return T_OPEN_TAG; 1788} 1789 1790 1791<INITIAL>"<?" { 1792 if (CG(short_tags)) { 1793 zendlval->value.str.val = yytext; /* no copying - intentional */ 1794 zendlval->value.str.len = yyleng; 1795 zendlval->type = IS_STRING; 1796 BEGIN(ST_IN_SCRIPTING); 1797 return T_OPEN_TAG; 1798 } else { 1799 goto inline_char_handler; 1800 } 1801} 1802 1803<INITIAL>{ANY_CHAR} { 1804 if (YYCURSOR > YYLIMIT) { 1805 return 0; 1806 } 1807 1808inline_char_handler: 1809 1810 while (1) { 1811 YYCTYPE *ptr = memchr(YYCURSOR, '<', YYLIMIT - YYCURSOR); 1812 1813 YYCURSOR = ptr ? ptr + 1 : YYLIMIT; 1814 1815 if (YYCURSOR < YYLIMIT) { 1816 switch (*YYCURSOR) { 1817 case '?': 1818 if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */ 1819 break; 1820 } 1821 continue; 1822 case '%': 1823 if (CG(asp_tags)) { 1824 break; 1825 } 1826 continue; 1827 case 's': 1828 case 'S': 1829 /* Probably NOT an opening PHP <script> tag, so don't end the HTML chunk yet 1830 * If it is, the PHP <script> tag rule checks for any HTML scanned before it */ 1831 YYCURSOR--; 1832 yymore(); 1833 default: 1834 continue; 1835 } 1836 1837 YYCURSOR--; 1838 } 1839 1840 break; 1841 } 1842 1843inline_html: 1844 yyleng = YYCURSOR - SCNG(yy_text); 1845 1846 if (SCNG(output_filter)) { 1847 int readsize; 1848 size_t sz = 0; 1849 readsize = SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)yytext, (size_t)yyleng TSRMLS_CC); 1850 zendlval->value.str.len = sz; 1851 if (readsize < yyleng) { 1852 yyless(readsize); 1853 } 1854 } else { 1855 zendlval->value.str.val = (char *) estrndup(yytext, yyleng); 1856 zendlval->value.str.len = yyleng; 1857 } 1858 zendlval->type = IS_STRING; 1859 HANDLE_NEWLINES(yytext, yyleng); 1860 return T_INLINE_HTML; 1861} 1862 1863 1864/* Make sure a label character follows "->", otherwise there is no property 1865 * and "->" will be taken literally 1866 */ 1867<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] { 1868 yyless(yyleng - 3); 1869 yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); 1870 zend_copy_value(zendlval, (yytext+1), (yyleng-1)); 1871 zendlval->type = IS_STRING; 1872 return T_VARIABLE; 1873} 1874 1875/* A [ always designates a variable offset, regardless of what follows 1876 */ 1877<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" { 1878 yyless(yyleng - 1); 1879 yy_push_state(ST_VAR_OFFSET TSRMLS_CC); 1880 zend_copy_value(zendlval, (yytext+1), (yyleng-1)); 1881 zendlval->type = IS_STRING; 1882 return T_VARIABLE; 1883} 1884 1885<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} { 1886 zend_copy_value(zendlval, (yytext+1), (yyleng-1)); 1887 zendlval->type = IS_STRING; 1888 return T_VARIABLE; 1889} 1890 1891<ST_VAR_OFFSET>"]" { 1892 yy_pop_state(TSRMLS_C); 1893 return ']'; 1894} 1895 1896<ST_VAR_OFFSET>{TOKENS}|[{}"`] { 1897 /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */ 1898 return yytext[0]; 1899} 1900 1901<ST_VAR_OFFSET>[ \n\r\t\\'#] { 1902 /* Invalid rule to return a more explicit parse error with proper line number */ 1903 yyless(0); 1904 yy_pop_state(TSRMLS_C); 1905 return T_ENCAPSED_AND_WHITESPACE; 1906} 1907 1908<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} { 1909 zend_copy_value(zendlval, yytext, yyleng); 1910 zendlval->type = IS_STRING; 1911 return T_STRING; 1912} 1913 1914 1915<ST_IN_SCRIPTING>"#"|"//" { 1916 while (YYCURSOR < YYLIMIT) { 1917 switch (*YYCURSOR++) { 1918 case '\r': 1919 if (*YYCURSOR == '\n') { 1920 YYCURSOR++; 1921 } 1922 /* fall through */ 1923 case '\n': 1924 CG(zend_lineno)++; 1925 break; 1926 case '%': 1927 if (!CG(asp_tags)) { 1928 continue; 1929 } 1930 /* fall through */ 1931 case '?': 1932 if (*YYCURSOR == '>') { 1933 YYCURSOR--; 1934 break; 1935 } 1936 /* fall through */ 1937 default: 1938 continue; 1939 } 1940 1941 break; 1942 } 1943 1944 yyleng = YYCURSOR - SCNG(yy_text); 1945 1946 return T_COMMENT; 1947} 1948 1949<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} { 1950 int doc_com; 1951 1952 if (yyleng > 2) { 1953 doc_com = 1; 1954 RESET_DOC_COMMENT(); 1955 } else { 1956 doc_com = 0; 1957 } 1958 1959 while (YYCURSOR < YYLIMIT) { 1960 if (*YYCURSOR++ == '*' && *YYCURSOR == '/') { 1961 break; 1962 } 1963 } 1964 1965 if (YYCURSOR < YYLIMIT) { 1966 YYCURSOR++; 1967 } else { 1968 zend_error(E_COMPILE_WARNING, "Unterminated comment starting line %d", CG(zend_lineno)); 1969 } 1970 1971 yyleng = YYCURSOR - SCNG(yy_text); 1972 HANDLE_NEWLINES(yytext, yyleng); 1973 1974 if (doc_com) { 1975 CG(doc_comment) = estrndup(yytext, yyleng); 1976 CG(doc_comment_len) = yyleng; 1977 return T_DOC_COMMENT; 1978 } 1979 1980 return T_COMMENT; 1981} 1982 1983<ST_IN_SCRIPTING>("?>"|"</script"{WHITESPACE}*">"){NEWLINE}? { 1984 zendlval->value.str.val = yytext; /* no copying - intentional */ 1985 zendlval->value.str.len = yyleng; 1986 zendlval->type = IS_STRING; 1987 BEGIN(INITIAL); 1988 return T_CLOSE_TAG; /* implicit ';' at php-end tag */ 1989} 1990 1991 1992<ST_IN_SCRIPTING>"%>"{NEWLINE}? { 1993 if (CG(asp_tags)) { 1994 BEGIN(INITIAL); 1995 zendlval->value.str.len = yyleng; 1996 zendlval->type = IS_STRING; 1997 zendlval->value.str.val = yytext; /* no copying - intentional */ 1998 return T_CLOSE_TAG; /* implicit ';' at php-end tag */ 1999 } else { 2000 yyless(1); 2001 return yytext[0]; 2002 } 2003} 2004 2005 2006<ST_IN_SCRIPTING>b?['] { 2007 register char *s, *t; 2008 char *end; 2009 int bprefix = (yytext[0] != '\'') ? 1 : 0; 2010 2011 while (1) { 2012 if (YYCURSOR < YYLIMIT) { 2013 if (*YYCURSOR == '\'') { 2014 YYCURSOR++; 2015 yyleng = YYCURSOR - SCNG(yy_text); 2016 2017 break; 2018 } else if (*YYCURSOR++ == '\\' && YYCURSOR < YYLIMIT) { 2019 YYCURSOR++; 2020 } 2021 } else { 2022 yyleng = YYLIMIT - SCNG(yy_text); 2023 2024 /* Unclosed single quotes; treat similar to double quotes, but without a separate token 2025 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..." 2026 * rule, which continued in ST_IN_SCRIPTING state after the quote */ 2027 return T_ENCAPSED_AND_WHITESPACE; 2028 } 2029 } 2030 2031 zendlval->value.str.val = estrndup(yytext+bprefix+1, yyleng-bprefix-2); 2032 zendlval->value.str.len = yyleng-bprefix-2; 2033 zendlval->type = IS_STRING; 2034 2035 /* convert escape sequences */ 2036 s = t = zendlval->value.str.val; 2037 end = s+zendlval->value.str.len; 2038 while (s<end) { 2039 if (*s=='\\') { 2040 s++; 2041 2042 switch(*s) { 2043 case '\\': 2044 case '\'': 2045 *t++ = *s; 2046 zendlval->value.str.len--; 2047 break; 2048 default: 2049 *t++ = '\\'; 2050 *t++ = *s; 2051 break; 2052 } 2053 } else { 2054 *t++ = *s; 2055 } 2056 2057 if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { 2058 CG(zend_lineno)++; 2059 } 2060 s++; 2061 } 2062 *t = 0; 2063 2064 if (SCNG(output_filter)) { 2065 size_t sz = 0; 2066 s = zendlval->value.str.val; 2067 SCNG(output_filter)((unsigned char **)&(zendlval->value.str.val), &sz, (unsigned char *)s, (size_t)zendlval->value.str.len TSRMLS_CC); 2068 zendlval->value.str.len = sz; 2069 efree(s); 2070 } 2071 return T_CONSTANT_ENCAPSED_STRING; 2072} 2073 2074 2075<ST_IN_SCRIPTING>b?["] { 2076 int bprefix = (yytext[0] != '"') ? 1 : 0; 2077 2078 while (YYCURSOR < YYLIMIT) { 2079 switch (*YYCURSOR++) { 2080 case '"': 2081 yyleng = YYCURSOR - SCNG(yy_text); 2082 zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC); 2083 return T_CONSTANT_ENCAPSED_STRING; 2084 case '$': 2085 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { 2086 break; 2087 } 2088 continue; 2089 case '{': 2090 if (*YYCURSOR == '$') { 2091 break; 2092 } 2093 continue; 2094 case '\\': 2095 if (YYCURSOR < YYLIMIT) { 2096 YYCURSOR++; 2097 } 2098 /* fall through */ 2099 default: 2100 continue; 2101 } 2102 2103 YYCURSOR--; 2104 break; 2105 } 2106 2107 /* Remember how much was scanned to save rescanning */ 2108 SET_DOUBLE_QUOTES_SCANNED_LENGTH(YYCURSOR - SCNG(yy_text) - yyleng); 2109 2110 YYCURSOR = SCNG(yy_text) + yyleng; 2111 2112 BEGIN(ST_DOUBLE_QUOTES); 2113 return '"'; 2114} 2115 2116 2117<ST_IN_SCRIPTING>b?"<<<"{TABS_AND_SPACES}({LABEL}|([']{LABEL}['])|(["]{LABEL}["])){NEWLINE} { 2118 char *s; 2119 int bprefix = (yytext[0] != '<') ? 1 : 0; 2120 zend_heredoc_label *heredoc_label = emalloc(sizeof(zend_heredoc_label)); 2121 2122 CG(zend_lineno)++; 2123 heredoc_label->length = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0); 2124 s = yytext+bprefix+3; 2125 while ((*s == ' ') || (*s == '\t')) { 2126 s++; 2127 heredoc_label->length--; 2128 } 2129 2130 if (*s == '\'') { 2131 s++; 2132 heredoc_label->length -= 2; 2133 2134 BEGIN(ST_NOWDOC); 2135 } else { 2136 if (*s == '"') { 2137 s++; 2138 heredoc_label->length -= 2; 2139 } 2140 2141 BEGIN(ST_HEREDOC); 2142 } 2143 2144 heredoc_label->label = estrndup(s, heredoc_label->length); 2145 2146 /* Check for ending label on the next line */ 2147 if (heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, s, heredoc_label->length)) { 2148 YYCTYPE *end = YYCURSOR + heredoc_label->length; 2149 2150 if (*end == ';') { 2151 end++; 2152 } 2153 2154 if (*end == '\n' || *end == '\r') { 2155 BEGIN(ST_END_HEREDOC); 2156 } 2157 } 2158 2159 zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label); 2160 2161 return T_START_HEREDOC; 2162} 2163 2164 2165<ST_IN_SCRIPTING>[`] { 2166 BEGIN(ST_BACKQUOTE); 2167 return '`'; 2168} 2169 2170 2171<ST_END_HEREDOC>{ANY_CHAR} { 2172 zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack)); 2173 2174 YYCURSOR += heredoc_label->length - 1; 2175 yyleng = heredoc_label->length; 2176 2177 heredoc_label_dtor(heredoc_label); 2178 efree(heredoc_label); 2179 2180 BEGIN(ST_IN_SCRIPTING); 2181 return T_END_HEREDOC; 2182} 2183 2184 2185<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" { 2186 zendlval->value.lval = (long) '{'; 2187 yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); 2188 yyless(1); 2189 return T_CURLY_OPEN; 2190} 2191 2192 2193<ST_DOUBLE_QUOTES>["] { 2194 BEGIN(ST_IN_SCRIPTING); 2195 return '"'; 2196} 2197 2198<ST_BACKQUOTE>[`] { 2199 BEGIN(ST_IN_SCRIPTING); 2200 return '`'; 2201} 2202 2203 2204<ST_DOUBLE_QUOTES>{ANY_CHAR} { 2205 if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) { 2206 YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1; 2207 SET_DOUBLE_QUOTES_SCANNED_LENGTH(0); 2208 2209 goto double_quotes_scan_done; 2210 } 2211 2212 if (YYCURSOR > YYLIMIT) { 2213 return 0; 2214 } 2215 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { 2216 YYCURSOR++; 2217 } 2218 2219 while (YYCURSOR < YYLIMIT) { 2220 switch (*YYCURSOR++) { 2221 case '"': 2222 break; 2223 case '$': 2224 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { 2225 break; 2226 } 2227 continue; 2228 case '{': 2229 if (*YYCURSOR == '$') { 2230 break; 2231 } 2232 continue; 2233 case '\\': 2234 if (YYCURSOR < YYLIMIT) { 2235 YYCURSOR++; 2236 } 2237 /* fall through */ 2238 default: 2239 continue; 2240 } 2241 2242 YYCURSOR--; 2243 break; 2244 } 2245 2246double_quotes_scan_done: 2247 yyleng = YYCURSOR - SCNG(yy_text); 2248 2249 zend_scan_escape_string(zendlval, yytext, yyleng, '"' TSRMLS_CC); 2250 return T_ENCAPSED_AND_WHITESPACE; 2251} 2252 2253 2254<ST_BACKQUOTE>{ANY_CHAR} { 2255 if (YYCURSOR > YYLIMIT) { 2256 return 0; 2257 } 2258 if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { 2259 YYCURSOR++; 2260 } 2261 2262 while (YYCURSOR < YYLIMIT) { 2263 switch (*YYCURSOR++) { 2264 case '`': 2265 break; 2266 case '$': 2267 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { 2268 break; 2269 } 2270 continue; 2271 case '{': 2272 if (*YYCURSOR == '$') { 2273 break; 2274 } 2275 continue; 2276 case '\\': 2277 if (YYCURSOR < YYLIMIT) { 2278 YYCURSOR++; 2279 } 2280 /* fall through */ 2281 default: 2282 continue; 2283 } 2284 2285 YYCURSOR--; 2286 break; 2287 } 2288 2289 yyleng = YYCURSOR - SCNG(yy_text); 2290 2291 zend_scan_escape_string(zendlval, yytext, yyleng, '`' TSRMLS_CC); 2292 return T_ENCAPSED_AND_WHITESPACE; 2293} 2294 2295 2296<ST_HEREDOC>{ANY_CHAR} { 2297 int newline = 0; 2298 2299 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack)); 2300 2301 if (YYCURSOR > YYLIMIT) { 2302 return 0; 2303 } 2304 2305 YYCURSOR--; 2306 2307 while (YYCURSOR < YYLIMIT) { 2308 switch (*YYCURSOR++) { 2309 case '\r': 2310 if (*YYCURSOR == '\n') { 2311 YYCURSOR++; 2312 } 2313 /* fall through */ 2314 case '\n': 2315 /* Check for ending label on the next line */ 2316 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) { 2317 YYCTYPE *end = YYCURSOR + heredoc_label->length; 2318 2319 if (*end == ';') { 2320 end++; 2321 } 2322 2323 if (*end == '\n' || *end == '\r') { 2324 /* newline before label will be subtracted from returned text, but 2325 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */ 2326 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') { 2327 newline = 2; /* Windows newline */ 2328 } else { 2329 newline = 1; 2330 } 2331 2332 CG(increment_lineno) = 1; /* For newline before label */ 2333 BEGIN(ST_END_HEREDOC); 2334 2335 goto heredoc_scan_done; 2336 } 2337 } 2338 continue; 2339 case '$': 2340 if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') { 2341 break; 2342 } 2343 continue; 2344 case '{': 2345 if (*YYCURSOR == '$') { 2346 break; 2347 } 2348 continue; 2349 case '\\': 2350 if (YYCURSOR < YYLIMIT && *YYCURSOR != '\n' && *YYCURSOR != '\r') { 2351 YYCURSOR++; 2352 } 2353 /* fall through */ 2354 default: 2355 continue; 2356 } 2357 2358 YYCURSOR--; 2359 break; 2360 } 2361 2362heredoc_scan_done: 2363 yyleng = YYCURSOR - SCNG(yy_text); 2364 2365 zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0 TSRMLS_CC); 2366 return T_ENCAPSED_AND_WHITESPACE; 2367} 2368 2369 2370<ST_NOWDOC>{ANY_CHAR} { 2371 int newline = 0; 2372 2373 zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack)); 2374 2375 if (YYCURSOR > YYLIMIT) { 2376 return 0; 2377 } 2378 2379 YYCURSOR--; 2380 2381 while (YYCURSOR < YYLIMIT) { 2382 switch (*YYCURSOR++) { 2383 case '\r': 2384 if (*YYCURSOR == '\n') { 2385 YYCURSOR++; 2386 } 2387 /* fall through */ 2388 case '\n': 2389 /* Check for ending label on the next line */ 2390 if (IS_LABEL_START(*YYCURSOR) && heredoc_label->length < YYLIMIT - YYCURSOR && !memcmp(YYCURSOR, heredoc_label->label, heredoc_label->length)) { 2391 YYCTYPE *end = YYCURSOR + heredoc_label->length; 2392 2393 if (*end == ';') { 2394 end++; 2395 } 2396 2397 if (*end == '\n' || *end == '\r') { 2398 /* newline before label will be subtracted from returned text, but 2399 * yyleng/yytext will include it, for zend_highlight/strip, tokenizer, etc. */ 2400 if (YYCURSOR[-2] == '\r' && YYCURSOR[-1] == '\n') { 2401 newline = 2; /* Windows newline */ 2402 } else { 2403 newline = 1; 2404 } 2405 2406 CG(increment_lineno) = 1; /* For newline before label */ 2407 BEGIN(ST_END_HEREDOC); 2408 2409 goto nowdoc_scan_done; 2410 } 2411 } 2412 /* fall through */ 2413 default: 2414 continue; 2415 } 2416 } 2417 2418nowdoc_scan_done: 2419 yyleng = YYCURSOR - SCNG(yy_text); 2420 2421 zend_copy_value(zendlval, yytext, yyleng - newline); 2422 zendlval->type = IS_STRING; 2423 HANDLE_NEWLINES(yytext, yyleng - newline); 2424 return T_ENCAPSED_AND_WHITESPACE; 2425} 2426 2427 2428<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} { 2429 if (YYCURSOR > YYLIMIT) { 2430 return 0; 2431 } 2432 2433 zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); 2434 goto restart; 2435} 2436 2437*/ 2438} 2439