1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Andrei Zmievski <andrei@php.net> | 16 +----------------------------------------------------------------------+ 17 */ 18 19/* $Id$ */ 20 21#include "php.h" 22#include "php_ini.h" 23#include "php_globals.h" 24#include "php_pcre.h" 25#include "ext/standard/info.h" 26#include "ext/standard/php_smart_str.h" 27 28#if HAVE_PCRE || HAVE_BUNDLED_PCRE 29 30#include "ext/standard/php_string.h" 31 32#define PREG_PATTERN_ORDER 1 33#define PREG_SET_ORDER 2 34#define PREG_OFFSET_CAPTURE (1<<8) 35 36#define PREG_SPLIT_NO_EMPTY (1<<0) 37#define PREG_SPLIT_DELIM_CAPTURE (1<<1) 38#define PREG_SPLIT_OFFSET_CAPTURE (1<<2) 39 40#define PREG_REPLACE_EVAL (1<<0) 41 42#define PREG_GREP_INVERT (1<<0) 43 44#define PCRE_CACHE_SIZE 4096 45 46enum { 47 PHP_PCRE_NO_ERROR = 0, 48 PHP_PCRE_INTERNAL_ERROR, 49 PHP_PCRE_BACKTRACK_LIMIT_ERROR, 50 PHP_PCRE_RECURSION_LIMIT_ERROR, 51 PHP_PCRE_BAD_UTF8_ERROR, 52 PHP_PCRE_BAD_UTF8_OFFSET_ERROR 53}; 54 55 56ZEND_DECLARE_MODULE_GLOBALS(pcre) 57 58 59static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ 60{ 61 int preg_code = 0; 62 63 switch (pcre_code) { 64 case PCRE_ERROR_MATCHLIMIT: 65 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR; 66 break; 67 68 case PCRE_ERROR_RECURSIONLIMIT: 69 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR; 70 break; 71 72 case PCRE_ERROR_BADUTF8: 73 preg_code = PHP_PCRE_BAD_UTF8_ERROR; 74 break; 75 76 case PCRE_ERROR_BADUTF8_OFFSET: 77 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; 78 break; 79 80 default: 81 preg_code = PHP_PCRE_INTERNAL_ERROR; 82 break; 83 } 84 85 PCRE_G(error_code) = preg_code; 86} 87/* }}} */ 88 89static void php_free_pcre_cache(void *data) /* {{{ */ 90{ 91 pcre_cache_entry *pce = (pcre_cache_entry *) data; 92 if (!pce) return; 93 pefree(pce->re, 1); 94 if (pce->extra) pefree(pce->extra, 1); 95#if HAVE_SETLOCALE 96 if ((void*)pce->tables) pefree((void*)pce->tables, 1); 97 pefree(pce->locale, 1); 98#endif 99} 100/* }}} */ 101 102static PHP_GINIT_FUNCTION(pcre) /* {{{ */ 103{ 104 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1); 105 pcre_globals->backtrack_limit = 0; 106 pcre_globals->recursion_limit = 0; 107 pcre_globals->error_code = PHP_PCRE_NO_ERROR; 108} 109/* }}} */ 110 111static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ 112{ 113 zend_hash_destroy(&pcre_globals->pcre_cache); 114} 115/* }}} */ 116 117PHP_INI_BEGIN() 118 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals) 119 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals) 120PHP_INI_END() 121 122 123/* {{{ PHP_MINFO_FUNCTION(pcre) */ 124static PHP_MINFO_FUNCTION(pcre) 125{ 126 php_info_print_table_start(); 127 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" ); 128 php_info_print_table_row(2, "PCRE Library Version", pcre_version() ); 129 php_info_print_table_end(); 130 131 DISPLAY_INI_ENTRIES(); 132} 133/* }}} */ 134 135/* {{{ PHP_MINIT_FUNCTION(pcre) */ 136static PHP_MINIT_FUNCTION(pcre) 137{ 138 REGISTER_INI_ENTRIES(); 139 140 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); 141 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); 142 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); 143 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); 144 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); 145 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); 146 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT); 147 148 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT); 149 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT); 150 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); 151 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); 152 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); 153 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); 154 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); 155 156 return SUCCESS; 157} 158/* }}} */ 159 160/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */ 161static PHP_MSHUTDOWN_FUNCTION(pcre) 162{ 163 UNREGISTER_INI_ENTRIES(); 164 165 return SUCCESS; 166} 167/* }}} */ 168 169/* {{{ static pcre_clean_cache */ 170static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) 171{ 172 int *num_clean = (int *)arg; 173 174 if (*num_clean > 0) { 175 (*num_clean)--; 176 return 1; 177 } else { 178 return 0; 179 } 180} 181/* }}} */ 182 183/* {{{ static make_subpats_table */ 184static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC) 185{ 186 pcre_extra *extra = pce->extra; 187 int name_cnt = 0, name_size, ni = 0; 188 int rc; 189 char *name_table; 190 unsigned short name_idx; 191 char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); 192 193 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); 194 if (rc < 0) { 195 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 196 efree(subpat_names); 197 return NULL; 198 } 199 if (name_cnt > 0) { 200 int rc1, rc2; 201 202 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); 203 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); 204 rc = rc2 ? rc2 : rc1; 205 if (rc < 0) { 206 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 207 efree(subpat_names); 208 return NULL; 209 } 210 211 while (ni++ < name_cnt) { 212 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1]; 213 subpat_names[name_idx] = name_table + 2; 214 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { 215 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); 216 efree(subpat_names); 217 return NULL; 218 } 219 name_table += name_size; 220 } 221 } 222 223 return subpat_names; 224} 225/* }}} */ 226 227/* {{{ pcre_get_compiled_regex_cache 228 */ 229PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) 230{ 231 pcre *re = NULL; 232 pcre_extra *extra; 233 int coptions = 0; 234 int soptions = 0; 235 const char *error; 236 int erroffset; 237 char delimiter; 238 char start_delimiter; 239 char end_delimiter; 240 char *p, *pp; 241 char *pattern; 242 int do_study = 0; 243 int poptions = 0; 244 int count = 0; 245 unsigned const char *tables = NULL; 246#if HAVE_SETLOCALE 247 char *locale; 248#endif 249 pcre_cache_entry *pce; 250 pcre_cache_entry new_entry; 251 char *tmp = NULL; 252 253#if HAVE_SETLOCALE 254# if defined(PHP_WIN32) && defined(ZTS) 255 _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); 256# endif 257 locale = setlocale(LC_CTYPE, NULL); 258#endif 259 260 /* Try to lookup the cached regex entry, and if successful, just pass 261 back the compiled pattern, otherwise go on and compile it. */ 262 if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { 263 /* 264 * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it 265 * is, we flush it and compile the pattern from scratch. 266 */ 267 if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) { 268 zend_hash_clean(&PCRE_G(pcre_cache)); 269 } else { 270#if HAVE_SETLOCALE 271 if (!strcmp(pce->locale, locale)) { 272#endif 273 return pce; 274#if HAVE_SETLOCALE 275 } 276#endif 277 } 278 } 279 280 p = regex; 281 282 /* Parse through the leading whitespace, and display a warning if we 283 get to the end without encountering a delimiter. */ 284 while (isspace((int)*(unsigned char *)p)) p++; 285 if (*p == 0) { 286 php_error_docref(NULL TSRMLS_CC, E_WARNING, 287 p < regex + regex_len ? "Null byte in regex" : "Empty regular expression"); 288 return NULL; 289 } 290 291 /* Get the delimiter and display a warning if it is alphanumeric 292 or a backslash. */ 293 delimiter = *p++; 294 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { 295 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash"); 296 return NULL; 297 } 298 299 start_delimiter = delimiter; 300 if ((pp = strchr("([{< )]}> )]}>", delimiter))) 301 delimiter = pp[5]; 302 end_delimiter = delimiter; 303 304 pp = p; 305 306 if (start_delimiter == end_delimiter) { 307 /* We need to iterate through the pattern, searching for the ending delimiter, 308 but skipping the backslashed delimiters. If the ending delimiter is not 309 found, display a warning. */ 310 while (*pp != 0) { 311 if (*pp == '\\' && pp[1] != 0) pp++; 312 else if (*pp == delimiter) 313 break; 314 pp++; 315 } 316 } else { 317 /* We iterate through the pattern, searching for the matching ending 318 * delimiter. For each matching starting delimiter, we increment nesting 319 * level, and decrement it for each matching ending delimiter. If we 320 * reach the end of the pattern without matching, display a warning. 321 */ 322 int brackets = 1; /* brackets nesting level */ 323 while (*pp != 0) { 324 if (*pp == '\\' && pp[1] != 0) pp++; 325 else if (*pp == end_delimiter && --brackets <= 0) 326 break; 327 else if (*pp == start_delimiter) 328 brackets++; 329 pp++; 330 } 331 } 332 333 if (*pp == 0) { 334 if (pp < regex + regex_len) { 335 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex"); 336 } else if (start_delimiter == end_delimiter) { 337 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter); 338 } else { 339 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter); 340 } 341 return NULL; 342 } 343 344 /* Make a copy of the actual pattern. */ 345 pattern = estrndup(p, pp-p); 346 347 /* Move on to the options */ 348 pp++; 349 350 /* Parse through the options, setting appropriate flags. Display 351 a warning if we encounter an unknown modifier. */ 352 while (pp < regex + regex_len) { 353 switch (*pp++) { 354 /* Perl compatible options */ 355 case 'i': coptions |= PCRE_CASELESS; break; 356 case 'm': coptions |= PCRE_MULTILINE; break; 357 case 's': coptions |= PCRE_DOTALL; break; 358 case 'x': coptions |= PCRE_EXTENDED; break; 359 360 /* PCRE specific options */ 361 case 'A': coptions |= PCRE_ANCHORED; break; 362 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break; 363 case 'S': do_study = 1; break; 364 case 'U': coptions |= PCRE_UNGREEDY; break; 365 case 'X': coptions |= PCRE_EXTRA; break; 366 case 'u': coptions |= PCRE_UTF8; 367 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII 368 characters, even in UTF-8 mode. However, this can be changed by setting 369 the PCRE_UCP option. */ 370#ifdef PCRE_UCP 371 coptions |= PCRE_UCP; 372#endif 373 break; 374 375 /* Custom preg options */ 376 case 'e': poptions |= PREG_REPLACE_EVAL; break; 377 378 case ' ': 379 case '\n': 380 break; 381 382 default: 383 if (pp[-1]) { 384 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]); 385 } else { 386 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex"); 387 } 388 efree(pattern); 389 return NULL; 390 } 391 } 392 393#if HAVE_SETLOCALE 394 if (strcmp(locale, "C")) 395 tables = pcre_maketables(); 396#endif 397 398 /* Compile pattern and display a warning if compilation failed. */ 399 re = pcre_compile(pattern, 400 coptions, 401 &error, 402 &erroffset, 403 tables); 404 405 if (re == NULL) { 406 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset); 407 efree(pattern); 408 if (tables) { 409 pefree((void*)tables, 1); 410 } 411 return NULL; 412 } 413 414 /* If study option was specified, study the pattern and 415 store the result in extra for passing to pcre_exec. */ 416 if (do_study) { 417 extra = pcre_study(re, soptions, &error); 418 if (extra) { 419 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; 420 } 421 if (error != NULL) { 422 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern"); 423 } 424 } else { 425 extra = NULL; 426 } 427 428 efree(pattern); 429 430 /* 431 * If we reached cache limit, clean out the items from the head of the list; 432 * these are supposedly the oldest ones (but not necessarily the least used 433 * ones). 434 */ 435 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) { 436 int num_clean = PCRE_CACHE_SIZE / 8; 437 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC); 438 } 439 440 /* Store the compiled pattern and extra info in the cache. */ 441 new_entry.re = re; 442 new_entry.extra = extra; 443 new_entry.preg_options = poptions; 444 new_entry.compile_options = coptions; 445#if HAVE_SETLOCALE 446 new_entry.locale = pestrdup(locale, 1); 447 new_entry.tables = tables; 448#endif 449 450 /* 451 * Interned strings are not duplicated when stored in HashTable, 452 * but all the interned strings created during HTTP request are removed 453 * at end of request. However PCRE_G(pcre_cache) must be consistent 454 * on the next request as well. So we disable usage of interned strings 455 * as hash keys especually for this table. 456 * See bug #63180 457 */ 458 if (IS_INTERNED(regex)) { 459 regex = tmp = estrndup(regex, regex_len); 460 } 461 462 zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, 463 sizeof(pcre_cache_entry), (void**)&pce); 464 465 if (tmp) { 466 efree(tmp); 467 } 468 469 return pce; 470} 471/* }}} */ 472 473/* {{{ pcre_get_compiled_regex 474 */ 475PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) 476{ 477 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); 478 479 if (extra) { 480 *extra = pce ? pce->extra : NULL; 481 } 482 if (preg_options) { 483 *preg_options = pce ? pce->preg_options : 0; 484 } 485 486 return pce ? pce->re : NULL; 487} 488/* }}} */ 489 490/* {{{ pcre_get_compiled_regex_ex 491 */ 492PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) 493{ 494 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); 495 496 if (extra) { 497 *extra = pce ? pce->extra : NULL; 498 } 499 if (preg_options) { 500 *preg_options = pce ? pce->preg_options : 0; 501 } 502 if (compile_options) { 503 *compile_options = pce ? pce->compile_options : 0; 504 } 505 506 return pce ? pce->re : NULL; 507} 508/* }}} */ 509 510/* {{{ add_offset_pair */ 511static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) 512{ 513 zval *match_pair; 514 515 ALLOC_ZVAL(match_pair); 516 array_init(match_pair); 517 INIT_PZVAL(match_pair); 518 519 /* Add (match, offset) to the return value */ 520 add_next_index_stringl(match_pair, str, len, 1); 521 add_next_index_long(match_pair, offset); 522 523 if (name) { 524 zval_add_ref(&match_pair); 525 zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); 526 } 527 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); 528} 529/* }}} */ 530 531static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ 532{ 533 /* parameters */ 534 char *regex; /* Regular expression */ 535 char *subject; /* String to match against */ 536 int regex_len; 537 int subject_len; 538 pcre_cache_entry *pce; /* Compiled regular expression */ 539 zval *subpats = NULL; /* Array for subpatterns */ 540 long flags = 0; /* Match control flags */ 541 long start_offset = 0; /* Where the new search starts */ 542 543 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len, 544 &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { 545 RETURN_FALSE; 546 } 547 548 /* Compile regex or get it from cache. */ 549 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { 550 RETURN_FALSE; 551 } 552 553 php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, 554 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC); 555} 556/* }}} */ 557 558/* {{{ php_pcre_match_impl() */ 559PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, 560 zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC) 561{ 562 zval *result_set, /* Holds a set of subpatterns after 563 a global match */ 564 **match_sets = NULL; /* An array of sets of matches for each 565 subpattern after a global match */ 566 pcre_extra *extra = pce->extra;/* Holds results of studying */ 567 pcre_extra extra_data; /* Used locally for exec options */ 568 int exoptions = 0; /* Execution options */ 569 int count = 0; /* Count of matched subpatterns */ 570 int *offsets; /* Array of subpattern offsets */ 571 int num_subpats; /* Number of captured subpatterns */ 572 int size_offsets; /* Size of the offsets array */ 573 int matched; /* Has anything matched */ 574 int g_notempty = 0; /* If the match should not be empty */ 575 const char **stringlist; /* Holds list of subpatterns */ 576 char **subpat_names; /* Array for named subpatterns */ 577 int i, rc; 578 int subpats_order; /* Order of subpattern matches */ 579 int offset_capture; /* Capture match offsets: yes/no */ 580 581 /* Overwrite the passed-in value for subpatterns with an empty array. */ 582 if (subpats != NULL) { 583 zval_dtor(subpats); 584 array_init(subpats); 585 } 586 587 subpats_order = global ? PREG_PATTERN_ORDER : 0; 588 589 if (use_flags) { 590 offset_capture = flags & PREG_OFFSET_CAPTURE; 591 592 /* 593 * subpats_order is pre-set to pattern mode so we change it only if 594 * necessary. 595 */ 596 if (flags & 0xff) { 597 subpats_order = flags & 0xff; 598 } 599 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) || 600 (!global && subpats_order != 0)) { 601 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified"); 602 return; 603 } 604 } else { 605 offset_capture = 0; 606 } 607 608 /* Negative offset counts from the end of the string. */ 609 if (start_offset < 0) { 610 start_offset = subject_len + start_offset; 611 if (start_offset < 0) { 612 start_offset = 0; 613 } 614 } 615 616 if (extra == NULL) { 617 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; 618 extra = &extra_data; 619 } 620 extra->match_limit = PCRE_G(backtrack_limit); 621 extra->match_limit_recursion = PCRE_G(recursion_limit); 622 623 /* Calculate the size of the offsets array, and allocate memory for it. */ 624 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); 625 if (rc < 0) { 626 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 627 RETURN_FALSE; 628 } 629 num_subpats++; 630 size_offsets = num_subpats * 3; 631 632 /* 633 * Build a mapping from subpattern numbers to their names. We will always 634 * allocate the table, even though there may be no named subpatterns. This 635 * avoids somewhat more complicated logic in the inner loops. 636 */ 637 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); 638 if (!subpat_names) { 639 RETURN_FALSE; 640 } 641 642 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); 643 644 /* Allocate match sets array and initialize the values. */ 645 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { 646 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); 647 for (i=0; i<num_subpats; i++) { 648 ALLOC_ZVAL(match_sets[i]); 649 array_init(match_sets[i]); 650 INIT_PZVAL(match_sets[i]); 651 } 652 } 653 654 matched = 0; 655 PCRE_G(error_code) = PHP_PCRE_NO_ERROR; 656 657 do { 658 /* Execute the regular expression. */ 659 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, 660 exoptions|g_notempty, offsets, size_offsets); 661 662 /* the string was already proved to be valid UTF-8 */ 663 exoptions |= PCRE_NO_UTF8_CHECK; 664 665 /* Check for too many substrings condition. */ 666 if (count == 0) { 667 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); 668 count = size_offsets/3; 669 } 670 671 /* If something has matched */ 672 if (count > 0) { 673 matched++; 674 675 /* If subpatterns array has been passed, fill it in with values. */ 676 if (subpats != NULL) { 677 /* Try to get the list of substrings and display a warning if failed. */ 678 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) { 679 efree(subpat_names); 680 efree(offsets); 681 if (match_sets) efree(match_sets); 682 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed"); 683 RETURN_FALSE; 684 } 685 686 if (global) { /* global pattern matching */ 687 if (subpats && subpats_order == PREG_PATTERN_ORDER) { 688 /* For each subpattern, insert it into the appropriate array. */ 689 for (i = 0; i < count; i++) { 690 if (offset_capture) { 691 add_offset_pair(match_sets[i], (char *)stringlist[i], 692 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); 693 } else { 694 add_next_index_stringl(match_sets[i], (char *)stringlist[i], 695 offsets[(i<<1)+1] - offsets[i<<1], 1); 696 } 697 } 698 /* 699 * If the number of captured subpatterns on this run is 700 * less than the total possible number, pad the result 701 * arrays with empty strings. 702 */ 703 if (count < num_subpats) { 704 for (; i < num_subpats; i++) { 705 add_next_index_string(match_sets[i], "", 1); 706 } 707 } 708 } else { 709 /* Allocate the result set array */ 710 ALLOC_ZVAL(result_set); 711 array_init(result_set); 712 INIT_PZVAL(result_set); 713 714 /* Add all the subpatterns to it */ 715 for (i = 0; i < count; i++) { 716 if (offset_capture) { 717 add_offset_pair(result_set, (char *)stringlist[i], 718 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); 719 } else { 720 if (subpat_names[i]) { 721 add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i], 722 offsets[(i<<1)+1] - offsets[i<<1], 1); 723 } 724 add_next_index_stringl(result_set, (char *)stringlist[i], 725 offsets[(i<<1)+1] - offsets[i<<1], 1); 726 } 727 } 728 /* And add it to the output array */ 729 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); 730 } 731 } else { /* single pattern matching */ 732 /* For each subpattern, insert it into the subpatterns array. */ 733 for (i = 0; i < count; i++) { 734 if (offset_capture) { 735 add_offset_pair(subpats, (char *)stringlist[i], 736 offsets[(i<<1)+1] - offsets[i<<1], 737 offsets[i<<1], subpat_names[i]); 738 } else { 739 if (subpat_names[i]) { 740 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], 741 offsets[(i<<1)+1] - offsets[i<<1], 1); 742 } 743 add_next_index_stringl(subpats, (char *)stringlist[i], 744 offsets[(i<<1)+1] - offsets[i<<1], 1); 745 } 746 } 747 } 748 749 pcre_free((void *) stringlist); 750 } 751 } else if (count == PCRE_ERROR_NOMATCH) { 752 /* If we previously set PCRE_NOTEMPTY after a null match, 753 this is not necessarily the end. We need to advance 754 the start offset, and continue. Fudge the offset values 755 to achieve this, unless we're already at the end of the string. */ 756 if (g_notempty != 0 && start_offset < subject_len) { 757 offsets[0] = start_offset; 758 offsets[1] = start_offset + 1; 759 } else 760 break; 761 } else { 762 pcre_handle_exec_error(count TSRMLS_CC); 763 break; 764 } 765 766 /* If we have matched an empty string, mimic what Perl's /g options does. 767 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try 768 the match again at the same point. If this fails (picked up above) we 769 advance to the next character. */ 770 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; 771 772 /* Advance to the position right after the last full match */ 773 start_offset = offsets[1]; 774 } while (global); 775 776 /* Add the match sets to the output array and clean up */ 777 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { 778 for (i = 0; i < num_subpats; i++) { 779 if (subpat_names[i]) { 780 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], 781 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL); 782 Z_ADDREF_P(match_sets[i]); 783 } 784 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); 785 } 786 efree(match_sets); 787 } 788 789 efree(offsets); 790 efree(subpat_names); 791 792 /* Did we encounter an error? */ 793 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { 794 RETVAL_LONG(matched); 795 } else { 796 RETVAL_FALSE; 797 } 798} 799/* }}} */ 800 801/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]]) 802 Perform a Perl-style regular expression match */ 803static PHP_FUNCTION(preg_match) 804{ 805 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); 806} 807/* }}} */ 808 809/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]]) 810 Perform a Perl-style global regular expression match */ 811static PHP_FUNCTION(preg_match_all) 812{ 813 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); 814} 815/* }}} */ 816 817/* {{{ preg_get_backref 818 */ 819static int preg_get_backref(char **str, int *backref) 820{ 821 register char in_brace = 0; 822 register char *walk = *str; 823 824 if (walk[1] == 0) 825 return 0; 826 827 if (*walk == '$' && walk[1] == '{') { 828 in_brace = 1; 829 walk++; 830 } 831 walk++; 832 833 if (*walk >= '0' && *walk <= '9') { 834 *backref = *walk - '0'; 835 walk++; 836 } else 837 return 0; 838 839 if (*walk && *walk >= '0' && *walk <= '9') { 840 *backref = *backref * 10 + *walk - '0'; 841 walk++; 842 } 843 844 if (in_brace) { 845 if (*walk == 0 || *walk != '}') 846 return 0; 847 else 848 walk++; 849 } 850 851 *str = walk; 852 return 1; 853} 854/* }}} */ 855 856/* {{{ preg_do_repl_func 857 */ 858static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) 859{ 860 zval *retval_ptr; /* Function return value */ 861 zval **args[1]; /* Argument to pass to function */ 862 zval *subpats; /* Captured subpatterns */ 863 int result_len; /* Return value length */ 864 int i; 865 866 MAKE_STD_ZVAL(subpats); 867 array_init(subpats); 868 for (i = 0; i < count; i++) { 869 if (subpat_names[i]) { 870 add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1); 871 } 872 add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); 873 } 874 args[0] = &subpats; 875 876 if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { 877 convert_to_string_ex(&retval_ptr); 878 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); 879 result_len = Z_STRLEN_P(retval_ptr); 880 zval_ptr_dtor(&retval_ptr); 881 } else { 882 if (!EG(exception)) { 883 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function"); 884 } 885 result_len = offsets[1] - offsets[0]; 886 *result = estrndup(&subject[offsets[0]], result_len); 887 } 888 889 zval_ptr_dtor(&subpats); 890 891 return result_len; 892} 893/* }}} */ 894 895/* {{{ preg_do_eval 896 */ 897static int preg_do_eval(char *eval_str, int eval_str_len, char *subject, 898 int *offsets, int count, char **result TSRMLS_DC) 899{ 900 zval retval; /* Return value from evaluation */ 901 char *eval_str_end, /* End of eval string */ 902 *match, /* Current match for a backref */ 903 *esc_match, /* Quote-escaped match */ 904 *walk, /* Used to walk the code string */ 905 *segment, /* Start of segment to append while walking */ 906 walk_last; /* Last walked character */ 907 int match_len; /* Length of the match */ 908 int esc_match_len; /* Length of the quote-escaped match */ 909 int result_len; /* Length of the result of the evaluation */ 910 int backref; /* Current backref */ 911 char *compiled_string_description; 912 smart_str code = {0}; 913 914 eval_str_end = eval_str + eval_str_len; 915 walk = segment = eval_str; 916 walk_last = 0; 917 918 while (walk < eval_str_end) { 919 /* If found a backreference.. */ 920 if ('\\' == *walk || '$' == *walk) { 921 smart_str_appendl(&code, segment, walk - segment); 922 if (walk_last == '\\') { 923 code.c[code.len-1] = *walk++; 924 segment = walk; 925 walk_last = 0; 926 continue; 927 } 928 segment = walk; 929 if (preg_get_backref(&walk, &backref)) { 930 if (backref < count) { 931 /* Find the corresponding string match and substitute it 932 in instead of the backref */ 933 match = subject + offsets[backref<<1]; 934 match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; 935 if (match_len) { 936 esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC); 937 } else { 938 esc_match = match; 939 esc_match_len = 0; 940 } 941 } else { 942 esc_match = ""; 943 esc_match_len = 0; 944 } 945 smart_str_appendl(&code, esc_match, esc_match_len); 946 947 segment = walk; 948 949 /* Clean up and reassign */ 950 if (esc_match_len) 951 efree(esc_match); 952 continue; 953 } 954 } 955 walk++; 956 walk_last = walk[-1]; 957 } 958 smart_str_appendl(&code, segment, walk - segment); 959 smart_str_0(&code); 960 961 compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC); 962 /* Run the code */ 963 if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) { 964 efree(compiled_string_description); 965 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c); 966 /* zend_error() does not return in this case */ 967 } 968 efree(compiled_string_description); 969 convert_to_string(&retval); 970 971 /* Save the return value and its length */ 972 *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval)); 973 result_len = Z_STRLEN(retval); 974 975 /* Clean up */ 976 zval_dtor(&retval); 977 smart_str_free(&code); 978 979 return result_len; 980} 981/* }}} */ 982 983/* {{{ php_pcre_replace 984 */ 985PHPAPI char *php_pcre_replace(char *regex, int regex_len, 986 char *subject, int subject_len, 987 zval *replace_val, int is_callable_replace, 988 int *result_len, int limit, int *replace_count TSRMLS_DC) 989{ 990 pcre_cache_entry *pce; /* Compiled regular expression */ 991 992 /* Compile regex or get it from cache. */ 993 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { 994 return NULL; 995 } 996 997 return php_pcre_replace_impl(pce, subject, subject_len, replace_val, 998 is_callable_replace, result_len, limit, replace_count TSRMLS_CC); 999} 1000/* }}} */ 1001 1002/* {{{ php_pcre_replace_impl() */ 1003PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, 1004 int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC) 1005{ 1006 pcre_extra *extra = pce->extra;/* Holds results of studying */ 1007 pcre_extra extra_data; /* Used locally for exec options */ 1008 int exoptions = 0; /* Execution options */ 1009 int count = 0; /* Count of matched subpatterns */ 1010 int *offsets; /* Array of subpattern offsets */ 1011 char **subpat_names; /* Array for named subpatterns */ 1012 int num_subpats; /* Number of captured subpatterns */ 1013 int size_offsets; /* Size of the offsets array */ 1014 int new_len; /* Length of needed storage */ 1015 int alloc_len; /* Actual allocated length */ 1016 int eval_result_len=0; /* Length of the eval'ed or 1017 function-returned string */ 1018 int match_len; /* Length of the current match */ 1019 int backref; /* Backreference number */ 1020 int eval; /* If the replacement string should be eval'ed */ 1021 int start_offset; /* Where the new search starts */ 1022 int g_notempty=0; /* If the match should not be empty */ 1023 int replace_len=0; /* Length of replacement string */ 1024 char *result, /* Result of replacement */ 1025 *replace=NULL, /* Replacement string */ 1026 *new_buf, /* Temporary buffer for re-allocation */ 1027 *walkbuf, /* Location of current replacement in the result */ 1028 *walk, /* Used to walk the replacement string */ 1029 *match, /* The current match */ 1030 *piece, /* The current piece of subject */ 1031 *replace_end=NULL, /* End of replacement string */ 1032 *eval_result, /* Result of eval or custom function */ 1033 walk_last; /* Last walked character */ 1034 int rc; 1035 1036 if (extra == NULL) { 1037 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; 1038 extra = &extra_data; 1039 } 1040 extra->match_limit = PCRE_G(backtrack_limit); 1041 extra->match_limit_recursion = PCRE_G(recursion_limit); 1042 1043 eval = pce->preg_options & PREG_REPLACE_EVAL; 1044 if (is_callable_replace) { 1045 if (eval) { 1046 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback"); 1047 return NULL; 1048 } 1049 } else { 1050 replace = Z_STRVAL_P(replace_val); 1051 replace_len = Z_STRLEN_P(replace_val); 1052 replace_end = replace + replace_len; 1053 } 1054 1055 if (eval) { 1056 php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead"); 1057 } 1058 1059 /* Calculate the size of the offsets array, and allocate memory for it. */ 1060 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); 1061 if (rc < 0) { 1062 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 1063 return NULL; 1064 } 1065 num_subpats++; 1066 size_offsets = num_subpats * 3; 1067 1068 /* 1069 * Build a mapping from subpattern numbers to their names. We will always 1070 * allocate the table, even though there may be no named subpatterns. This 1071 * avoids somewhat more complicated logic in the inner loops. 1072 */ 1073 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC); 1074 if (!subpat_names) { 1075 return NULL; 1076 } 1077 1078 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); 1079 1080 alloc_len = 2 * subject_len + 1; 1081 result = safe_emalloc(alloc_len, sizeof(char), 0); 1082 1083 /* Initialize */ 1084 match = NULL; 1085 *result_len = 0; 1086 start_offset = 0; 1087 PCRE_G(error_code) = PHP_PCRE_NO_ERROR; 1088 1089 while (1) { 1090 /* Execute the regular expression. */ 1091 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, 1092 exoptions|g_notempty, offsets, size_offsets); 1093 1094 /* the string was already proved to be valid UTF-8 */ 1095 exoptions |= PCRE_NO_UTF8_CHECK; 1096 1097 /* Check for too many substrings condition. */ 1098 if (count == 0) { 1099 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); 1100 count = size_offsets/3; 1101 } 1102 1103 piece = subject + start_offset; 1104 1105 if (count > 0 && (limit == -1 || limit > 0)) { 1106 if (replace_count) { 1107 ++*replace_count; 1108 } 1109 /* Set the match location in subject */ 1110 match = subject + offsets[0]; 1111 1112 new_len = *result_len + offsets[0] - start_offset; /* part before the match */ 1113 1114 /* If evaluating, do it and add the return string's length */ 1115 if (eval) { 1116 eval_result_len = preg_do_eval(replace, replace_len, subject, 1117 offsets, count, &eval_result TSRMLS_CC); 1118 new_len += eval_result_len; 1119 } else if (is_callable_replace) { 1120 /* Use custom function to get replacement string and its length. */ 1121 eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); 1122 new_len += eval_result_len; 1123 } else { /* do regular substitution */ 1124 walk = replace; 1125 walk_last = 0; 1126 while (walk < replace_end) { 1127 if ('\\' == *walk || '$' == *walk) { 1128 if (walk_last == '\\') { 1129 walk++; 1130 walk_last = 0; 1131 continue; 1132 } 1133 if (preg_get_backref(&walk, &backref)) { 1134 if (backref < count) 1135 new_len += offsets[(backref<<1)+1] - offsets[backref<<1]; 1136 continue; 1137 } 1138 } 1139 new_len++; 1140 walk++; 1141 walk_last = walk[-1]; 1142 } 1143 } 1144 1145 if (new_len + 1 > alloc_len) { 1146 alloc_len = 1 + alloc_len + 2 * new_len; 1147 new_buf = emalloc(alloc_len); 1148 memcpy(new_buf, result, *result_len); 1149 efree(result); 1150 result = new_buf; 1151 } 1152 /* copy the part of the string before the match */ 1153 memcpy(&result[*result_len], piece, match-piece); 1154 *result_len += match-piece; 1155 1156 /* copy replacement and backrefs */ 1157 walkbuf = result + *result_len; 1158 1159 /* If evaluating or using custom function, copy result to the buffer 1160 * and clean up. */ 1161 if (eval || is_callable_replace) { 1162 memcpy(walkbuf, eval_result, eval_result_len); 1163 *result_len += eval_result_len; 1164 STR_FREE(eval_result); 1165 } else { /* do regular backreference copying */ 1166 walk = replace; 1167 walk_last = 0; 1168 while (walk < replace_end) { 1169 if ('\\' == *walk || '$' == *walk) { 1170 if (walk_last == '\\') { 1171 *(walkbuf-1) = *walk++; 1172 walk_last = 0; 1173 continue; 1174 } 1175 if (preg_get_backref(&walk, &backref)) { 1176 if (backref < count) { 1177 match_len = offsets[(backref<<1)+1] - offsets[backref<<1]; 1178 memcpy(walkbuf, subject + offsets[backref<<1], match_len); 1179 walkbuf += match_len; 1180 } 1181 continue; 1182 } 1183 } 1184 *walkbuf++ = *walk++; 1185 walk_last = walk[-1]; 1186 } 1187 *walkbuf = '\0'; 1188 /* increment the result length by how much we've added to the string */ 1189 *result_len += walkbuf - (result + *result_len); 1190 } 1191 1192 if (limit != -1) 1193 limit--; 1194 1195 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) { 1196 /* If we previously set PCRE_NOTEMPTY after a null match, 1197 this is not necessarily the end. We need to advance 1198 the start offset, and continue. Fudge the offset values 1199 to achieve this, unless we're already at the end of the string. */ 1200 if (g_notempty != 0 && start_offset < subject_len) { 1201 offsets[0] = start_offset; 1202 offsets[1] = start_offset + 1; 1203 memcpy(&result[*result_len], piece, 1); 1204 (*result_len)++; 1205 } else { 1206 new_len = *result_len + subject_len - start_offset; 1207 if (new_len + 1 > alloc_len) { 1208 alloc_len = new_len + 1; /* now we know exactly how long it is */ 1209 new_buf = safe_emalloc(alloc_len, sizeof(char), 0); 1210 memcpy(new_buf, result, *result_len); 1211 efree(result); 1212 result = new_buf; 1213 } 1214 /* stick that last bit of string on our output */ 1215 memcpy(&result[*result_len], piece, subject_len - start_offset); 1216 *result_len += subject_len - start_offset; 1217 result[*result_len] = '\0'; 1218 break; 1219 } 1220 } else { 1221 pcre_handle_exec_error(count TSRMLS_CC); 1222 efree(result); 1223 result = NULL; 1224 break; 1225 } 1226 1227 /* If we have matched an empty string, mimic what Perl's /g options does. 1228 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try 1229 the match again at the same point. If this fails (picked up above) we 1230 advance to the next character. */ 1231 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; 1232 1233 /* Advance to the next piece. */ 1234 start_offset = offsets[1]; 1235 } 1236 1237 efree(offsets); 1238 efree(subpat_names); 1239 1240 return result; 1241} 1242/* }}} */ 1243 1244/* {{{ php_replace_in_subject 1245 */ 1246static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC) 1247{ 1248 zval **regex_entry, 1249 **replace_entry = NULL, 1250 *replace_value, 1251 empty_replace; 1252 char *subject_value, 1253 *result; 1254 int subject_len; 1255 1256 /* Make sure we're dealing with strings. */ 1257 convert_to_string_ex(subject); 1258 /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */ 1259 ZVAL_STRINGL(&empty_replace, "", 0, 0); 1260 1261 /* If regex is an array */ 1262 if (Z_TYPE_P(regex) == IS_ARRAY) { 1263 /* Duplicate subject string for repeated replacement */ 1264 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject)); 1265 subject_len = Z_STRLEN_PP(subject); 1266 *result_len = subject_len; 1267 1268 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex)); 1269 1270 replace_value = replace; 1271 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) 1272 zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace)); 1273 1274 /* For each entry in the regex array, get the entry */ 1275 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) { 1276 /* Make sure we're dealing with strings. */ 1277 convert_to_string_ex(regex_entry); 1278 1279 /* If replace is an array and not a callable construct */ 1280 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) { 1281 /* Get current entry */ 1282 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) { 1283 if (!is_callable_replace) { 1284 convert_to_string_ex(replace_entry); 1285 } 1286 replace_value = *replace_entry; 1287 zend_hash_move_forward(Z_ARRVAL_P(replace)); 1288 } else { 1289 /* We've run out of replacement strings, so use an empty one */ 1290 replace_value = &empty_replace; 1291 } 1292 } 1293 1294 /* Do the actual replacement and put the result back into subject_value 1295 for further replacements. */ 1296 if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry), 1297 Z_STRLEN_PP(regex_entry), 1298 subject_value, 1299 subject_len, 1300 replace_value, 1301 is_callable_replace, 1302 result_len, 1303 limit, 1304 replace_count TSRMLS_CC)) != NULL) { 1305 efree(subject_value); 1306 subject_value = result; 1307 subject_len = *result_len; 1308 } else { 1309 efree(subject_value); 1310 return NULL; 1311 } 1312 1313 zend_hash_move_forward(Z_ARRVAL_P(regex)); 1314 } 1315 1316 return subject_value; 1317 } else { 1318 result = php_pcre_replace(Z_STRVAL_P(regex), 1319 Z_STRLEN_P(regex), 1320 Z_STRVAL_PP(subject), 1321 Z_STRLEN_PP(subject), 1322 replace, 1323 is_callable_replace, 1324 result_len, 1325 limit, 1326 replace_count TSRMLS_CC); 1327 return result; 1328 } 1329} 1330/* }}} */ 1331 1332/* {{{ preg_replace_impl 1333 */ 1334static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter) 1335{ 1336 zval **regex, 1337 **replace, 1338 **subject, 1339 **subject_entry, 1340 **zcount = NULL; 1341 char *result; 1342 int result_len; 1343 int limit_val = -1; 1344 long limit = -1; 1345 char *string_key; 1346 ulong num_key; 1347 char *callback_name; 1348 int replace_count=0, old_replace_count; 1349 1350 /* Get function parameters and do error-checking. */ 1351 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) { 1352 return; 1353 } 1354 1355 if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) { 1356 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array"); 1357 RETURN_FALSE; 1358 } 1359 1360 SEPARATE_ZVAL(replace); 1361 if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) { 1362 convert_to_string_ex(replace); 1363 } 1364 if (is_callable_replace) { 1365 if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) { 1366 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name); 1367 efree(callback_name); 1368 MAKE_COPY_ZVAL(subject, return_value); 1369 return; 1370 } 1371 efree(callback_name); 1372 } 1373 1374 SEPARATE_ZVAL(regex); 1375 SEPARATE_ZVAL(subject); 1376 1377 if (ZEND_NUM_ARGS() > 3) { 1378 limit_val = limit; 1379 } 1380 1381 if (Z_TYPE_PP(regex) != IS_ARRAY) 1382 convert_to_string_ex(regex); 1383 1384 /* if subject is an array */ 1385 if (Z_TYPE_PP(subject) == IS_ARRAY) { 1386 array_init(return_value); 1387 zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject)); 1388 1389 /* For each subject entry, convert it to string, then perform replacement 1390 and add the result to the return_value array. */ 1391 while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) { 1392 SEPARATE_ZVAL(subject_entry); 1393 old_replace_count = replace_count; 1394 if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { 1395 if (!is_filter || replace_count > old_replace_count) { 1396 /* Add to return array */ 1397 switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0)) 1398 { 1399 case HASH_KEY_IS_STRING: 1400 add_assoc_stringl(return_value, string_key, result, result_len, 0); 1401 break; 1402 1403 case HASH_KEY_IS_LONG: 1404 add_index_stringl(return_value, num_key, result, result_len, 0); 1405 break; 1406 } 1407 } else { 1408 efree(result); 1409 } 1410 } 1411 1412 zend_hash_move_forward(Z_ARRVAL_PP(subject)); 1413 } 1414 } else { /* if subject is not an array */ 1415 old_replace_count = replace_count; 1416 if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { 1417 if (!is_filter || replace_count > old_replace_count) { 1418 RETVAL_STRINGL(result, result_len, 0); 1419 } else { 1420 efree(result); 1421 } 1422 } 1423 } 1424 if (ZEND_NUM_ARGS() > 4) { 1425 zval_dtor(*zcount); 1426 ZVAL_LONG(*zcount, replace_count); 1427 } 1428 1429} 1430/* }}} */ 1431 1432/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) 1433 Perform Perl-style regular expression replacement. */ 1434static PHP_FUNCTION(preg_replace) 1435{ 1436 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); 1437} 1438/* }}} */ 1439 1440/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]]) 1441 Perform Perl-style regular expression replacement using replacement callback. */ 1442static PHP_FUNCTION(preg_replace_callback) 1443{ 1444 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0); 1445} 1446/* }}} */ 1447 1448/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]]) 1449 Perform Perl-style regular expression replacement and only return matches. */ 1450static PHP_FUNCTION(preg_filter) 1451{ 1452 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); 1453} 1454/* }}} */ 1455 1456/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) 1457 Split string into an array using a perl-style regular expression as a delimiter */ 1458static PHP_FUNCTION(preg_split) 1459{ 1460 char *regex; /* Regular expression */ 1461 char *subject; /* String to match against */ 1462 int regex_len; 1463 int subject_len; 1464 long limit_val = -1;/* Integer value of limit */ 1465 long flags = 0; /* Match control flags */ 1466 pcre_cache_entry *pce; /* Compiled regular expression */ 1467 1468 /* Get function parameters and do error checking */ 1469 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, 1470 &subject, &subject_len, &limit_val, &flags) == FAILURE) { 1471 RETURN_FALSE; 1472 } 1473 1474 /* Compile regex or get it from cache. */ 1475 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { 1476 RETURN_FALSE; 1477 } 1478 1479 php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC); 1480} 1481/* }}} */ 1482 1483/* {{{ php_pcre_split 1484 */ 1485PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, 1486 long limit_val, long flags TSRMLS_DC) 1487{ 1488 pcre_extra *extra = NULL; /* Holds results of studying */ 1489 pcre *re_bump = NULL; /* Regex instance for empty matches */ 1490 pcre_extra *extra_bump = NULL; /* Almost dummy */ 1491 pcre_extra extra_data; /* Used locally for exec options */ 1492 int *offsets; /* Array of subpattern offsets */ 1493 int size_offsets; /* Size of the offsets array */ 1494 int exoptions = 0; /* Execution options */ 1495 int count = 0; /* Count of matched subpatterns */ 1496 int start_offset; /* Where the new search starts */ 1497 int next_offset; /* End of the last delimiter match + 1 */ 1498 int g_notempty = 0; /* If the match should not be empty */ 1499 char *last_match; /* Location of last match */ 1500 int rc; 1501 int no_empty; /* If NO_EMPTY flag is set */ 1502 int delim_capture; /* If delimiters should be captured */ 1503 int offset_capture; /* If offsets should be captured */ 1504 1505 no_empty = flags & PREG_SPLIT_NO_EMPTY; 1506 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; 1507 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE; 1508 1509 if (limit_val == 0) { 1510 limit_val = -1; 1511 } 1512 1513 if (extra == NULL) { 1514 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; 1515 extra = &extra_data; 1516 } 1517 extra->match_limit = PCRE_G(backtrack_limit); 1518 extra->match_limit_recursion = PCRE_G(recursion_limit); 1519 1520 /* Initialize return value */ 1521 array_init(return_value); 1522 1523 /* Calculate the size of the offsets array, and allocate memory for it. */ 1524 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); 1525 if (rc < 0) { 1526 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 1527 RETURN_FALSE; 1528 } 1529 size_offsets = (size_offsets + 1) * 3; 1530 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); 1531 1532 /* Start at the beginning of the string */ 1533 start_offset = 0; 1534 next_offset = 0; 1535 last_match = subject; 1536 PCRE_G(error_code) = PHP_PCRE_NO_ERROR; 1537 1538 /* Get next piece if no limit or limit not yet reached and something matched*/ 1539 while ((limit_val == -1 || limit_val > 1)) { 1540 count = pcre_exec(pce->re, extra, subject, 1541 subject_len, start_offset, 1542 exoptions|g_notempty, offsets, size_offsets); 1543 1544 /* the string was already proved to be valid UTF-8 */ 1545 exoptions |= PCRE_NO_UTF8_CHECK; 1546 1547 /* Check for too many substrings condition. */ 1548 if (count == 0) { 1549 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); 1550 count = size_offsets/3; 1551 } 1552 1553 /* If something matched */ 1554 if (count > 0) { 1555 if (!no_empty || &subject[offsets[0]] != last_match) { 1556 1557 if (offset_capture) { 1558 /* Add (match, offset) pair to the return value */ 1559 add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL); 1560 } else { 1561 /* Add the piece to the return value */ 1562 add_next_index_stringl(return_value, last_match, 1563 &subject[offsets[0]]-last_match, 1); 1564 } 1565 1566 /* One less left to do */ 1567 if (limit_val != -1) 1568 limit_val--; 1569 } 1570 1571 last_match = &subject[offsets[1]]; 1572 next_offset = offsets[1]; 1573 1574 if (delim_capture) { 1575 int i, match_len; 1576 for (i = 1; i < count; i++) { 1577 match_len = offsets[(i<<1)+1] - offsets[i<<1]; 1578 /* If we have matched a delimiter */ 1579 if (!no_empty || match_len > 0) { 1580 if (offset_capture) { 1581 add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); 1582 } else { 1583 add_next_index_stringl(return_value, 1584 &subject[offsets[i<<1]], 1585 match_len, 1); 1586 } 1587 } 1588 } 1589 } 1590 } else if (count == PCRE_ERROR_NOMATCH) { 1591 /* If we previously set PCRE_NOTEMPTY after a null match, 1592 this is not necessarily the end. We need to advance 1593 the start offset, and continue. Fudge the offset values 1594 to achieve this, unless we're already at the end of the string. */ 1595 if (g_notempty != 0 && start_offset < subject_len) { 1596 if (pce->compile_options & PCRE_UTF8) { 1597 if (re_bump == NULL) { 1598 int dummy; 1599 1600 if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) { 1601 RETURN_FALSE; 1602 } 1603 } 1604 count = pcre_exec(re_bump, extra_bump, subject, 1605 subject_len, start_offset, 1606 exoptions, offsets, size_offsets); 1607 if (count < 1) { 1608 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); 1609 RETURN_FALSE; 1610 } 1611 } else { 1612 offsets[0] = start_offset; 1613 offsets[1] = start_offset + 1; 1614 } 1615 } else 1616 break; 1617 } else { 1618 pcre_handle_exec_error(count TSRMLS_CC); 1619 break; 1620 } 1621 1622 /* If we have matched an empty string, mimic what Perl's /g options does. 1623 This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try 1624 the match again at the same point. If this fails (picked up above) we 1625 advance to the next character. */ 1626 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0; 1627 1628 /* Advance to the position right after the last full match */ 1629 start_offset = offsets[1]; 1630 } 1631 1632 1633 start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */ 1634 1635 if (!no_empty || start_offset < subject_len) 1636 { 1637 if (offset_capture) { 1638 /* Add the last (match, offset) pair to the return value */ 1639 add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); 1640 } else { 1641 /* Add the last piece to the return value */ 1642 add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1); 1643 } 1644 } 1645 1646 1647 /* Clean up */ 1648 efree(offsets); 1649} 1650/* }}} */ 1651 1652/* {{{ proto string preg_quote(string str [, string delim_char]) 1653 Quote regular expression characters plus an optional character */ 1654static PHP_FUNCTION(preg_quote) 1655{ 1656 int in_str_len; 1657 char *in_str; /* Input string argument */ 1658 char *in_str_end; /* End of the input string */ 1659 int delim_len = 0; 1660 char *delim = NULL; /* Additional delimiter argument */ 1661 char *out_str, /* Output string with quoted characters */ 1662 *p, /* Iterator for input string */ 1663 *q, /* Iterator for output string */ 1664 delim_char=0, /* Delimiter character to be quoted */ 1665 c; /* Current character */ 1666 zend_bool quote_delim = 0; /* Whether to quote additional delim char */ 1667 1668 /* Get the arguments and check for errors */ 1669 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len, 1670 &delim, &delim_len) == FAILURE) { 1671 return; 1672 } 1673 1674 in_str_end = in_str + in_str_len; 1675 1676 /* Nothing to do if we got an empty string */ 1677 if (in_str == in_str_end) { 1678 RETURN_EMPTY_STRING(); 1679 } 1680 1681 if (delim && *delim) { 1682 delim_char = delim[0]; 1683 quote_delim = 1; 1684 } 1685 1686 /* Allocate enough memory so that even if each character 1687 is quoted, we won't run out of room */ 1688 out_str = safe_emalloc(4, in_str_len, 1); 1689 1690 /* Go through the string and quote necessary characters */ 1691 for(p = in_str, q = out_str; p != in_str_end; p++) { 1692 c = *p; 1693 switch(c) { 1694 case '.': 1695 case '\\': 1696 case '+': 1697 case '*': 1698 case '?': 1699 case '[': 1700 case '^': 1701 case ']': 1702 case '$': 1703 case '(': 1704 case ')': 1705 case '{': 1706 case '}': 1707 case '=': 1708 case '!': 1709 case '>': 1710 case '<': 1711 case '|': 1712 case ':': 1713 case '-': 1714 *q++ = '\\'; 1715 *q++ = c; 1716 break; 1717 1718 case '\0': 1719 *q++ = '\\'; 1720 *q++ = '0'; 1721 *q++ = '0'; 1722 *q++ = '0'; 1723 break; 1724 1725 default: 1726 if (quote_delim && c == delim_char) 1727 *q++ = '\\'; 1728 *q++ = c; 1729 break; 1730 } 1731 } 1732 *q = '\0'; 1733 1734 /* Reallocate string and return it */ 1735 RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); 1736} 1737/* }}} */ 1738 1739/* {{{ proto array preg_grep(string regex, array input [, int flags]) 1740 Searches array and returns entries which match regex */ 1741static PHP_FUNCTION(preg_grep) 1742{ 1743 char *regex; /* Regular expression */ 1744 int regex_len; 1745 zval *input; /* Input array */ 1746 long flags = 0; /* Match control flags */ 1747 pcre_cache_entry *pce; /* Compiled regular expression */ 1748 1749 /* Get arguments and do error checking */ 1750 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, 1751 &input, &flags) == FAILURE) { 1752 return; 1753 } 1754 1755 /* Compile regex or get it from cache. */ 1756 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { 1757 RETURN_FALSE; 1758 } 1759 1760 php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC); 1761} 1762/* }}} */ 1763 1764PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */ 1765{ 1766 zval **entry; /* An entry in the input array */ 1767 pcre_extra *extra = pce->extra;/* Holds results of studying */ 1768 pcre_extra extra_data; /* Used locally for exec options */ 1769 int *offsets; /* Array of subpattern offsets */ 1770 int size_offsets; /* Size of the offsets array */ 1771 int count = 0; /* Count of matched subpatterns */ 1772 char *string_key; 1773 ulong num_key; 1774 zend_bool invert; /* Whether to return non-matching 1775 entries */ 1776 int rc; 1777 1778 invert = flags & PREG_GREP_INVERT ? 1 : 0; 1779 1780 if (extra == NULL) { 1781 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; 1782 extra = &extra_data; 1783 } 1784 extra->match_limit = PCRE_G(backtrack_limit); 1785 extra->match_limit_recursion = PCRE_G(recursion_limit); 1786 1787 /* Calculate the size of the offsets array, and allocate memory for it. */ 1788 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); 1789 if (rc < 0) { 1790 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); 1791 RETURN_FALSE; 1792 } 1793 size_offsets = (size_offsets + 1) * 3; 1794 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); 1795 1796 /* Initialize return array */ 1797 array_init(return_value); 1798 1799 PCRE_G(error_code) = PHP_PCRE_NO_ERROR; 1800 1801 /* Go through the input array */ 1802 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); 1803 while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { 1804 zval subject = **entry; 1805 1806 if (Z_TYPE_PP(entry) != IS_STRING) { 1807 zval_copy_ctor(&subject); 1808 convert_to_string(&subject); 1809 } 1810 1811 /* Perform the match */ 1812 count = pcre_exec(pce->re, extra, Z_STRVAL(subject), 1813 Z_STRLEN(subject), 0, 1814 0, offsets, size_offsets); 1815 1816 /* Check for too many substrings condition. */ 1817 if (count == 0) { 1818 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); 1819 count = size_offsets/3; 1820 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) { 1821 pcre_handle_exec_error(count TSRMLS_CC); 1822 break; 1823 } 1824 1825 /* If the entry fits our requirements */ 1826 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { 1827 1828 Z_ADDREF_PP(entry); 1829 1830 /* Add to return array */ 1831 switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) 1832 { 1833 case HASH_KEY_IS_STRING: 1834 zend_hash_update(Z_ARRVAL_P(return_value), string_key, 1835 strlen(string_key)+1, entry, sizeof(zval *), NULL); 1836 break; 1837 1838 case HASH_KEY_IS_LONG: 1839 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, 1840 sizeof(zval *), NULL); 1841 break; 1842 } 1843 } 1844 1845 if (Z_TYPE_PP(entry) != IS_STRING) { 1846 zval_dtor(&subject); 1847 } 1848 1849 zend_hash_move_forward(Z_ARRVAL_P(input)); 1850 } 1851 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); 1852 /* Clean up */ 1853 efree(offsets); 1854} 1855/* }}} */ 1856 1857/* {{{ proto int preg_last_error() 1858 Returns the error code of the last regexp execution. */ 1859static PHP_FUNCTION(preg_last_error) 1860{ 1861 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) { 1862 return; 1863 } 1864 1865 RETURN_LONG(PCRE_G(error_code)); 1866} 1867/* }}} */ 1868 1869/* {{{ module definition structures */ 1870 1871/* {{{ arginfo */ 1872ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2) 1873 ZEND_ARG_INFO(0, pattern) 1874 ZEND_ARG_INFO(0, subject) 1875 ZEND_ARG_INFO(1, subpatterns) /* array */ 1876 ZEND_ARG_INFO(0, flags) 1877 ZEND_ARG_INFO(0, offset) 1878ZEND_END_ARG_INFO() 1879 1880ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2) 1881 ZEND_ARG_INFO(0, pattern) 1882 ZEND_ARG_INFO(0, subject) 1883 ZEND_ARG_INFO(1, subpatterns) /* array */ 1884 ZEND_ARG_INFO(0, flags) 1885 ZEND_ARG_INFO(0, offset) 1886ZEND_END_ARG_INFO() 1887 1888ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3) 1889 ZEND_ARG_INFO(0, regex) 1890 ZEND_ARG_INFO(0, replace) 1891 ZEND_ARG_INFO(0, subject) 1892 ZEND_ARG_INFO(0, limit) 1893 ZEND_ARG_INFO(1, count) 1894ZEND_END_ARG_INFO() 1895 1896ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3) 1897 ZEND_ARG_INFO(0, regex) 1898 ZEND_ARG_INFO(0, callback) 1899 ZEND_ARG_INFO(0, subject) 1900 ZEND_ARG_INFO(0, limit) 1901 ZEND_ARG_INFO(1, count) 1902ZEND_END_ARG_INFO() 1903 1904ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2) 1905 ZEND_ARG_INFO(0, pattern) 1906 ZEND_ARG_INFO(0, subject) 1907 ZEND_ARG_INFO(0, limit) 1908 ZEND_ARG_INFO(0, flags) 1909ZEND_END_ARG_INFO() 1910 1911ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1) 1912 ZEND_ARG_INFO(0, str) 1913 ZEND_ARG_INFO(0, delim_char) 1914ZEND_END_ARG_INFO() 1915 1916ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2) 1917 ZEND_ARG_INFO(0, regex) 1918 ZEND_ARG_INFO(0, input) /* array */ 1919 ZEND_ARG_INFO(0, flags) 1920ZEND_END_ARG_INFO() 1921 1922ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0) 1923ZEND_END_ARG_INFO() 1924/* }}} */ 1925 1926static const zend_function_entry pcre_functions[] = { 1927 PHP_FE(preg_match, arginfo_preg_match) 1928 PHP_FE(preg_match_all, arginfo_preg_match_all) 1929 PHP_FE(preg_replace, arginfo_preg_replace) 1930 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback) 1931 PHP_FE(preg_filter, arginfo_preg_replace) 1932 PHP_FE(preg_split, arginfo_preg_split) 1933 PHP_FE(preg_quote, arginfo_preg_quote) 1934 PHP_FE(preg_grep, arginfo_preg_grep) 1935 PHP_FE(preg_last_error, arginfo_preg_last_error) 1936 PHP_FE_END 1937}; 1938 1939zend_module_entry pcre_module_entry = { 1940 STANDARD_MODULE_HEADER, 1941 "pcre", 1942 pcre_functions, 1943 PHP_MINIT(pcre), 1944 PHP_MSHUTDOWN(pcre), 1945 NULL, 1946 NULL, 1947 PHP_MINFO(pcre), 1948 NO_VERSION_YET, 1949 PHP_MODULE_GLOBALS(pcre), 1950 PHP_GINIT(pcre), 1951 PHP_GSHUTDOWN(pcre), 1952 NULL, 1953 STANDARD_MODULE_PROPERTIES_EX 1954}; 1955 1956#ifdef COMPILE_DL_PCRE 1957ZEND_GET_MODULE(pcre) 1958#endif 1959 1960/* }}} */ 1961 1962#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */ 1963 1964/* 1965 * Local variables: 1966 * tab-width: 4 1967 * c-basic-offset: 4 1968 * End: 1969 * vim600: sw=4 ts=4 fdm=marker 1970 * vim<600: sw=4 ts=4 1971 */ 1972