1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | 16 +----------------------------------------------------------------------+ 17 */ 18 19/* $Id$ */ 20 21 22#ifdef HAVE_CONFIG_H 23#include "config.h" 24#endif 25 26#include "php.h" 27#include "php_ini.h" 28 29#if HAVE_MBREGEX 30 31#include "ext/standard/php_smart_str.h" 32#include "ext/standard/info.h" 33#include "php_mbregex.h" 34#include "mbstring.h" 35 36#include "php_onig_compat.h" /* must come prior to the oniguruma header */ 37#include <oniguruma.h> 38#undef UChar 39 40ZEND_EXTERN_MODULE_GLOBALS(mbstring) 41 42struct _zend_mb_regex_globals { 43 OnigEncoding default_mbctype; 44 OnigEncoding current_mbctype; 45 HashTable ht_rc; 46 zval *search_str; 47 zval *search_str_val; 48 unsigned int search_pos; 49 php_mb_regex_t *search_re; 50 OnigRegion *search_regs; 51 OnigOptionType regex_default_options; 52 OnigSyntaxType *regex_default_syntax; 53}; 54 55#define MBREX(g) (MBSTRG(mb_regex_globals)->g) 56 57/* {{{ static void php_mb_regex_free_cache() */ 58static void php_mb_regex_free_cache(php_mb_regex_t **pre) 59{ 60 onig_free(*pre); 61} 62/* }}} */ 63 64/* {{{ _php_mb_regex_globals_ctor */ 65static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC) 66{ 67 pglobals->default_mbctype = ONIG_ENCODING_EUC_JP; 68 pglobals->current_mbctype = ONIG_ENCODING_EUC_JP; 69 zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1); 70 pglobals->search_str = (zval*) NULL; 71 pglobals->search_re = (php_mb_regex_t*)NULL; 72 pglobals->search_pos = 0; 73 pglobals->search_regs = (OnigRegion*)NULL; 74 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; 75 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY; 76 return SUCCESS; 77} 78/* }}} */ 79 80/* {{{ _php_mb_regex_globals_dtor */ 81static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC) 82{ 83 zend_hash_destroy(&pglobals->ht_rc); 84} 85/* }}} */ 86 87/* {{{ php_mb_regex_globals_alloc */ 88zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D) 89{ 90 zend_mb_regex_globals *pglobals = pemalloc( 91 sizeof(zend_mb_regex_globals), 1); 92 if (!pglobals) { 93 return NULL; 94 } 95 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) { 96 pefree(pglobals, 1); 97 return NULL; 98 } 99 return pglobals; 100} 101/* }}} */ 102 103/* {{{ php_mb_regex_globals_free */ 104void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC) 105{ 106 if (!pglobals) { 107 return; 108 } 109 _php_mb_regex_globals_dtor(pglobals TSRMLS_CC); 110 pefree(pglobals, 1); 111} 112/* }}} */ 113 114/* {{{ PHP_MINIT_FUNCTION(mb_regex) */ 115PHP_MINIT_FUNCTION(mb_regex) 116{ 117 onig_init(); 118 return SUCCESS; 119} 120/* }}} */ 121 122/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */ 123PHP_MSHUTDOWN_FUNCTION(mb_regex) 124{ 125 onig_end(); 126 return SUCCESS; 127} 128/* }}} */ 129 130/* {{{ PHP_RINIT_FUNCTION(mb_regex) */ 131PHP_RINIT_FUNCTION(mb_regex) 132{ 133 return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE; 134} 135/* }}} */ 136 137/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */ 138PHP_RSHUTDOWN_FUNCTION(mb_regex) 139{ 140 MBREX(current_mbctype) = MBREX(default_mbctype); 141 142 if (MBREX(search_str) != NULL) { 143 zval_ptr_dtor(&MBREX(search_str)); 144 MBREX(search_str) = (zval *)NULL; 145 } 146 MBREX(search_pos) = 0; 147 148 if (MBREX(search_regs) != NULL) { 149 onig_region_free(MBREX(search_regs), 1); 150 MBREX(search_regs) = (OnigRegion *)NULL; 151 } 152 zend_hash_clean(&MBREX(ht_rc)); 153 154 return SUCCESS; 155} 156/* }}} */ 157 158/* {{{ PHP_MINFO_FUNCTION(mb_regex) */ 159PHP_MINFO_FUNCTION(mb_regex) 160{ 161 char buf[32]; 162 php_info_print_table_start(); 163 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled"); 164 snprintf(buf, sizeof(buf), "%d.%d.%d", 165 ONIGURUMA_VERSION_MAJOR, 166 ONIGURUMA_VERSION_MINOR, 167 ONIGURUMA_VERSION_TEENY); 168#ifdef PHP_ONIG_BUNDLED 169#ifdef USE_COMBINATION_EXPLOSION_CHECK 170 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On"); 171#else /* USE_COMBINATION_EXPLOSION_CHECK */ 172 php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off"); 173#endif /* USE_COMBINATION_EXPLOSION_CHECK */ 174#endif /* PHP_BUNDLED_ONIG */ 175 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf); 176 php_info_print_table_end(); 177} 178/* }}} */ 179 180/* 181 * encoding name resolver 182 */ 183 184/* {{{ encoding name map */ 185typedef struct _php_mb_regex_enc_name_map_t { 186 const char *names; 187 OnigEncoding code; 188} php_mb_regex_enc_name_map_t; 189 190php_mb_regex_enc_name_map_t enc_name_map[] = { 191#ifdef ONIG_ENCODING_EUC_JP 192 { 193 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0", 194 ONIG_ENCODING_EUC_JP 195 }, 196#endif 197#ifdef ONIG_ENCODING_UTF8 198 { 199 "UTF-8\0UTF8\0", 200 ONIG_ENCODING_UTF8 201 }, 202#endif 203#ifdef ONIG_ENCODING_UTF16_BE 204 { 205 "UTF-16\0UTF-16BE\0", 206 ONIG_ENCODING_UTF16_BE 207 }, 208#endif 209#ifdef ONIG_ENCODING_UTF16_LE 210 { 211 "UTF-16LE\0", 212 ONIG_ENCODING_UTF16_LE 213 }, 214#endif 215#ifdef ONIG_ENCODING_UTF32_BE 216 { 217 "UCS-4\0UTF-32\0UTF-32BE\0", 218 ONIG_ENCODING_UTF32_BE 219 }, 220#endif 221#ifdef ONIG_ENCODING_UTF32_LE 222 { 223 "UCS-4LE\0UTF-32LE\0", 224 ONIG_ENCODING_UTF32_LE 225 }, 226#endif 227#ifdef ONIG_ENCODING_SJIS 228 { 229 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0", 230 ONIG_ENCODING_SJIS 231 }, 232#endif 233#ifdef ONIG_ENCODING_BIG5 234 { 235 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0", 236 ONIG_ENCODING_BIG5 237 }, 238#endif 239#ifdef ONIG_ENCODING_EUC_CN 240 { 241 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0", 242 ONIG_ENCODING_EUC_CN 243 }, 244#endif 245#ifdef ONIG_ENCODING_EUC_TW 246 { 247 "EUC-TW\0EUCTW\0EUC_TW\0", 248 ONIG_ENCODING_EUC_TW 249 }, 250#endif 251#ifdef ONIG_ENCODING_EUC_KR 252 { 253 "EUC-KR\0EUCKR\0EUC_KR\0", 254 ONIG_ENCODING_EUC_KR 255 }, 256#endif 257#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY 258 { 259 "KOI8\0KOI-8\0", 260 ONIG_ENCODING_KOI8 261 }, 262#endif 263#ifdef ONIG_ENCODING_KOI8_R 264 { 265 "KOI8R\0KOI8-R\0KOI-8R\0", 266 ONIG_ENCODING_KOI8_R 267 }, 268#endif 269#ifdef ONIG_ENCODING_ISO_8859_1 270 { 271 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0", 272 ONIG_ENCODING_ISO_8859_1 273 }, 274#endif 275#ifdef ONIG_ENCODING_ISO_8859_2 276 { 277 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0", 278 ONIG_ENCODING_ISO_8859_2 279 }, 280#endif 281#ifdef ONIG_ENCODING_ISO_8859_3 282 { 283 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0", 284 ONIG_ENCODING_ISO_8859_3 285 }, 286#endif 287#ifdef ONIG_ENCODING_ISO_8859_4 288 { 289 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0", 290 ONIG_ENCODING_ISO_8859_4 291 }, 292#endif 293#ifdef ONIG_ENCODING_ISO_8859_5 294 { 295 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0", 296 ONIG_ENCODING_ISO_8859_5 297 }, 298#endif 299#ifdef ONIG_ENCODING_ISO_8859_6 300 { 301 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0", 302 ONIG_ENCODING_ISO_8859_6 303 }, 304#endif 305#ifdef ONIG_ENCODING_ISO_8859_7 306 { 307 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0", 308 ONIG_ENCODING_ISO_8859_7 309 }, 310#endif 311#ifdef ONIG_ENCODING_ISO_8859_8 312 { 313 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0", 314 ONIG_ENCODING_ISO_8859_8 315 }, 316#endif 317#ifdef ONIG_ENCODING_ISO_8859_9 318 { 319 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0", 320 ONIG_ENCODING_ISO_8859_9 321 }, 322#endif 323#ifdef ONIG_ENCODING_ISO_8859_10 324 { 325 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0", 326 ONIG_ENCODING_ISO_8859_10 327 }, 328#endif 329#ifdef ONIG_ENCODING_ISO_8859_11 330 { 331 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0", 332 ONIG_ENCODING_ISO_8859_11 333 }, 334#endif 335#ifdef ONIG_ENCODING_ISO_8859_13 336 { 337 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0", 338 ONIG_ENCODING_ISO_8859_13 339 }, 340#endif 341#ifdef ONIG_ENCODING_ISO_8859_14 342 { 343 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0", 344 ONIG_ENCODING_ISO_8859_14 345 }, 346#endif 347#ifdef ONIG_ENCODING_ISO_8859_15 348 { 349 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0", 350 ONIG_ENCODING_ISO_8859_15 351 }, 352#endif 353#ifdef ONIG_ENCODING_ISO_8859_16 354 { 355 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0", 356 ONIG_ENCODING_ISO_8859_16 357 }, 358#endif 359#ifdef ONIG_ENCODING_ASCII 360 { 361 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0", 362 ONIG_ENCODING_ASCII 363 }, 364#endif 365 { NULL, ONIG_ENCODING_UNDEF } 366}; 367/* }}} */ 368 369/* {{{ php_mb_regex_name2mbctype */ 370static OnigEncoding _php_mb_regex_name2mbctype(const char *pname) 371{ 372 const char *p; 373 php_mb_regex_enc_name_map_t *mapping; 374 375 if (pname == NULL || !*pname) { 376 return ONIG_ENCODING_UNDEF; 377 } 378 379 for (mapping = enc_name_map; mapping->names != NULL; mapping++) { 380 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) { 381 if (strcasecmp(p, pname) == 0) { 382 return mapping->code; 383 } 384 } 385 } 386 387 return ONIG_ENCODING_UNDEF; 388} 389/* }}} */ 390 391/* {{{ php_mb_regex_mbctype2name */ 392static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype) 393{ 394 php_mb_regex_enc_name_map_t *mapping; 395 396 for (mapping = enc_name_map; mapping->names != NULL; mapping++) { 397 if (mapping->code == mbctype) { 398 return mapping->names; 399 } 400 } 401 402 return NULL; 403} 404/* }}} */ 405 406/* {{{ php_mb_regex_set_mbctype */ 407int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC) 408{ 409 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname); 410 if (mbctype == ONIG_ENCODING_UNDEF) { 411 return FAILURE; 412 } 413 MBREX(current_mbctype) = mbctype; 414 return SUCCESS; 415} 416/* }}} */ 417 418/* {{{ php_mb_regex_set_default_mbctype */ 419int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC) 420{ 421 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname); 422 if (mbctype == ONIG_ENCODING_UNDEF) { 423 return FAILURE; 424 } 425 MBREX(default_mbctype) = mbctype; 426 return SUCCESS; 427} 428/* }}} */ 429 430/* {{{ php_mb_regex_get_mbctype */ 431const char *php_mb_regex_get_mbctype(TSRMLS_D) 432{ 433 return _php_mb_regex_mbctype2name(MBREX(current_mbctype)); 434} 435/* }}} */ 436 437/* {{{ php_mb_regex_get_default_mbctype */ 438const char *php_mb_regex_get_default_mbctype(TSRMLS_D) 439{ 440 return _php_mb_regex_mbctype2name(MBREX(default_mbctype)); 441} 442/* }}} */ 443 444/* 445 * regex cache 446 */ 447/* {{{ php_mbregex_compile_pattern */ 448static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC) 449{ 450 int err_code = 0; 451 int found = 0; 452 php_mb_regex_t *retval = NULL, **rc = NULL; 453 OnigErrorInfo err_info; 454 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 455 456 found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc); 457 if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) { 458 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) { 459 onig_error_code_to_str(err_str, err_code, err_info); 460 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str); 461 retval = NULL; 462 goto out; 463 } 464 zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL); 465 } else if (found == SUCCESS) { 466 retval = *rc; 467 } 468out: 469 return retval; 470} 471/* }}} */ 472 473/* {{{ _php_mb_regex_get_option_string */ 474static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax) 475{ 476 size_t len_left = len; 477 size_t len_req = 0; 478 char *p = str; 479 char c; 480 481 if ((option & ONIG_OPTION_IGNORECASE) != 0) { 482 if (len_left > 0) { 483 --len_left; 484 *(p++) = 'i'; 485 } 486 ++len_req; 487 } 488 489 if ((option & ONIG_OPTION_EXTEND) != 0) { 490 if (len_left > 0) { 491 --len_left; 492 *(p++) = 'x'; 493 } 494 ++len_req; 495 } 496 497 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) == 498 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) { 499 if (len_left > 0) { 500 --len_left; 501 *(p++) = 'p'; 502 } 503 ++len_req; 504 } else { 505 if ((option & ONIG_OPTION_MULTILINE) != 0) { 506 if (len_left > 0) { 507 --len_left; 508 *(p++) = 'm'; 509 } 510 ++len_req; 511 } 512 513 if ((option & ONIG_OPTION_SINGLELINE) != 0) { 514 if (len_left > 0) { 515 --len_left; 516 *(p++) = 's'; 517 } 518 ++len_req; 519 } 520 } 521 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) { 522 if (len_left > 0) { 523 --len_left; 524 *(p++) = 'l'; 525 } 526 ++len_req; 527 } 528 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) { 529 if (len_left > 0) { 530 --len_left; 531 *(p++) = 'n'; 532 } 533 ++len_req; 534 } 535 536 c = 0; 537 538 if (syntax == ONIG_SYNTAX_JAVA) { 539 c = 'j'; 540 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) { 541 c = 'u'; 542 } else if (syntax == ONIG_SYNTAX_GREP) { 543 c = 'g'; 544 } else if (syntax == ONIG_SYNTAX_EMACS) { 545 c = 'c'; 546 } else if (syntax == ONIG_SYNTAX_RUBY) { 547 c = 'r'; 548 } else if (syntax == ONIG_SYNTAX_PERL) { 549 c = 'z'; 550 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) { 551 c = 'b'; 552 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) { 553 c = 'd'; 554 } 555 556 if (c != 0) { 557 if (len_left > 0) { 558 --len_left; 559 *(p++) = c; 560 } 561 ++len_req; 562 } 563 564 565 if (len_left > 0) { 566 --len_left; 567 *(p++) = '\0'; 568 } 569 ++len_req; 570 if (len < len_req) { 571 return len_req; 572 } 573 574 return 0; 575} 576/* }}} */ 577 578/* {{{ _php_mb_regex_init_options */ 579static void 580_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) 581{ 582 int n; 583 char c; 584 int optm = 0; 585 586 *syntax = ONIG_SYNTAX_RUBY; 587 588 if (parg != NULL) { 589 n = 0; 590 while(n < narg) { 591 c = parg[n++]; 592 switch (c) { 593 case 'i': 594 optm |= ONIG_OPTION_IGNORECASE; 595 break; 596 case 'x': 597 optm |= ONIG_OPTION_EXTEND; 598 break; 599 case 'm': 600 optm |= ONIG_OPTION_MULTILINE; 601 break; 602 case 's': 603 optm |= ONIG_OPTION_SINGLELINE; 604 break; 605 case 'p': 606 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; 607 break; 608 case 'l': 609 optm |= ONIG_OPTION_FIND_LONGEST; 610 break; 611 case 'n': 612 optm |= ONIG_OPTION_FIND_NOT_EMPTY; 613 break; 614 case 'j': 615 *syntax = ONIG_SYNTAX_JAVA; 616 break; 617 case 'u': 618 *syntax = ONIG_SYNTAX_GNU_REGEX; 619 break; 620 case 'g': 621 *syntax = ONIG_SYNTAX_GREP; 622 break; 623 case 'c': 624 *syntax = ONIG_SYNTAX_EMACS; 625 break; 626 case 'r': 627 *syntax = ONIG_SYNTAX_RUBY; 628 break; 629 case 'z': 630 *syntax = ONIG_SYNTAX_PERL; 631 break; 632 case 'b': 633 *syntax = ONIG_SYNTAX_POSIX_BASIC; 634 break; 635 case 'd': 636 *syntax = ONIG_SYNTAX_POSIX_EXTENDED; 637 break; 638 case 'e': 639 if (eval != NULL) *eval = 1; 640 break; 641 default: 642 break; 643 } 644 } 645 if (option != NULL) *option|=optm; 646 } 647} 648/* }}} */ 649 650/* 651 * php functions 652 */ 653 654/* {{{ proto string mb_regex_encoding([string encoding]) 655 Returns the current encoding for regex as a string. */ 656PHP_FUNCTION(mb_regex_encoding) 657{ 658 size_t argc = ZEND_NUM_ARGS(); 659 char *encoding; 660 int encoding_len; 661 OnigEncoding mbctype; 662 663 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) { 664 return; 665 } 666 667 if (argc == 0) { 668 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); 669 670 if (retval == NULL) { 671 RETURN_FALSE; 672 } 673 674 RETURN_STRING((char *)retval, 1); 675 } else if (argc == 1) { 676 mbctype = _php_mb_regex_name2mbctype(encoding); 677 678 if (mbctype == ONIG_ENCODING_UNDEF) { 679 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 680 RETURN_FALSE; 681 } 682 683 MBREX(current_mbctype) = mbctype; 684 RETURN_TRUE; 685 } 686} 687/* }}} */ 688 689/* {{{ _php_mb_regex_ereg_exec */ 690static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) 691{ 692 zval **arg_pattern, *array; 693 char *string; 694 int string_len; 695 php_mb_regex_t *re; 696 OnigRegion *regs = NULL; 697 int i, match_len, beg, end; 698 OnigOptionType options; 699 char *str; 700 701 array = NULL; 702 703 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { 704 RETURN_FALSE; 705 } 706 707 options = MBREX(regex_default_options); 708 if (icase) { 709 options |= ONIG_OPTION_IGNORECASE; 710 } 711 712 /* compile the regular expression from the supplied regex */ 713 if (Z_TYPE_PP(arg_pattern) != IS_STRING) { 714 /* we convert numbers to integers and treat them as a string */ 715 if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) { 716 convert_to_long_ex(arg_pattern); /* get rid of decimal places */ 717 } 718 convert_to_string_ex(arg_pattern); 719 /* don't bother doing an extended regex with just a number */ 720 } 721 722 if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) { 723 php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern"); 724 RETVAL_FALSE; 725 goto out; 726 } 727 728 re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC); 729 if (re == NULL) { 730 RETVAL_FALSE; 731 goto out; 732 } 733 734 regs = onig_region_new(); 735 736 /* actually execute the regular expression */ 737 if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) { 738 RETVAL_FALSE; 739 goto out; 740 } 741 742 match_len = 1; 743 str = string; 744 if (array != NULL) { 745 match_len = regs->end[0] - regs->beg[0]; 746 zval_dtor(array); 747 array_init(array); 748 for (i = 0; i < regs->num_regs; i++) { 749 beg = regs->beg[i]; 750 end = regs->end[i]; 751 if (beg >= 0 && beg < end && end <= string_len) { 752 add_index_stringl(array, i, (char *)&str[beg], end - beg, 1); 753 } else { 754 add_index_bool(array, i, 0); 755 } 756 } 757 } 758 759 if (match_len == 0) { 760 match_len = 1; 761 } 762 RETVAL_LONG(match_len); 763out: 764 if (regs != NULL) { 765 onig_region_free(regs, 1); 766 } 767} 768/* }}} */ 769 770/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) 771 Regular expression match for multibyte string */ 772PHP_FUNCTION(mb_ereg) 773{ 774 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); 775} 776/* }}} */ 777 778/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) 779 Case-insensitive regular expression match for multibyte string */ 780PHP_FUNCTION(mb_eregi) 781{ 782 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); 783} 784/* }}} */ 785 786/* {{{ _php_mb_regex_ereg_replace_exec */ 787static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable) 788{ 789 zval **arg_pattern_zval; 790 791 char *arg_pattern; 792 int arg_pattern_len; 793 794 char *replace; 795 int replace_len; 796 797 zend_fcall_info arg_replace_fci; 798 zend_fcall_info_cache arg_replace_fci_cache; 799 800 char *string; 801 int string_len; 802 803 char *p; 804 php_mb_regex_t *re; 805 OnigSyntaxType *syntax; 806 OnigRegion *regs = NULL; 807 smart_str out_buf = { 0 }; 808 smart_str eval_buf = { 0 }; 809 smart_str *pbuf; 810 int i, err, eval, n; 811 OnigUChar *pos; 812 OnigUChar *string_lim; 813 char *description = NULL; 814 char pat_buf[2]; 815 816 const mbfl_encoding *enc; 817 818 { 819 const char *current_enc_name; 820 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype)); 821 if (current_enc_name == NULL || 822 (enc = mbfl_name2encoding(current_enc_name)) == NULL) { 823 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); 824 RETURN_FALSE; 825 } 826 } 827 eval = 0; 828 { 829 char *option_str = NULL; 830 int option_str_len = 0; 831 832 if (!is_callable) { 833 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s", 834 &arg_pattern_zval, 835 &replace, &replace_len, 836 &string, &string_len, 837 &option_str, &option_str_len) == FAILURE) { 838 RETURN_FALSE; 839 } 840 } else { 841 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s", 842 &arg_pattern_zval, 843 &arg_replace_fci, &arg_replace_fci_cache, 844 &string, &string_len, 845 &option_str, &option_str_len) == FAILURE) { 846 RETURN_FALSE; 847 } 848 } 849 850 if (option_str != NULL) { 851 _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); 852 } else { 853 options |= MBREX(regex_default_options); 854 syntax = MBREX(regex_default_syntax); 855 } 856 } 857 if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) { 858 arg_pattern = Z_STRVAL_PP(arg_pattern_zval); 859 arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval); 860 } else { 861 /* FIXME: this code is not multibyte aware! */ 862 convert_to_long_ex(arg_pattern_zval); 863 pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval); 864 pat_buf[1] = '\0'; 865 866 arg_pattern = pat_buf; 867 arg_pattern_len = 1; 868 } 869 /* create regex pattern buffer */ 870 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC); 871 if (re == NULL) { 872 RETURN_FALSE; 873 } 874 875 if (eval || is_callable) { 876 pbuf = &eval_buf; 877 description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); 878 } else { 879 pbuf = &out_buf; 880 description = NULL; 881 } 882 883 if (is_callable) { 884 if (eval) { 885 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback"); 886 RETURN_FALSE; 887 } 888 } 889 890 /* do the actual work */ 891 err = 0; 892 pos = (OnigUChar *)string; 893 string_lim = (OnigUChar*)(string + string_len); 894 regs = onig_region_new(); 895 while (err >= 0) { 896 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0); 897 if (err <= -2) { 898 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 899 onig_error_code_to_str(err_str, err); 900 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str); 901 break; 902 } 903 if (err >= 0) { 904#if moriyoshi_0 905 if (regs->beg[0] == regs->end[0]) { 906 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); 907 break; 908 } 909#endif 910 /* copy the part of the string before the match */ 911 smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos)); 912 913 if (!is_callable) { 914 /* copy replacement and backrefs */ 915 i = 0; 916 p = replace; 917 while (i < replace_len) { 918 int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); 919 n = -1; 920 if ((replace_len - i) >= 2 && fwd == 1 && 921 p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { 922 n = p[1] - '0'; 923 } 924 if (n >= 0 && n < regs->num_regs) { 925 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) { 926 smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]); 927 } 928 p += 2; 929 i += 2; 930 } else { 931 smart_str_appendl(pbuf, p, fwd); 932 p += fwd; 933 i += fwd; 934 } 935 } 936 } 937 938 if (eval) { 939 zval v; 940 /* null terminate buffer */ 941 smart_str_0(&eval_buf); 942 /* do eval */ 943 if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) { 944 efree(description); 945 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c); 946 /* zend_error() does not return in this case */ 947 } 948 949 /* result of eval */ 950 convert_to_string(&v); 951 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v)); 952 /* Clean up */ 953 eval_buf.len = 0; 954 zval_dtor(&v); 955 } else if (is_callable) { 956 zval *retval_ptr; 957 zval **args[1]; 958 zval *subpats; 959 int i; 960 961 MAKE_STD_ZVAL(subpats); 962 array_init(subpats); 963 964 for (i = 0; i < regs->num_regs; i++) { 965 add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1); 966 } 967 968 args[0] = &subpats; 969 /* null terminate buffer */ 970 smart_str_0(&eval_buf); 971 972 arg_replace_fci.param_count = 1; 973 arg_replace_fci.params = args; 974 arg_replace_fci.retval_ptr_ptr = &retval_ptr; 975 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) { 976 convert_to_string_ex(&retval_ptr); 977 smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); 978 eval_buf.len = 0; 979 zval_ptr_dtor(&retval_ptr); 980 } else { 981 efree(description); 982 if (!EG(exception)) { 983 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function"); 984 } 985 } 986 zval_ptr_dtor(&subpats); 987 } 988 989 n = regs->end[0]; 990 if ((pos - (OnigUChar *)string) < n) { 991 pos = (OnigUChar *)string + n; 992 } else { 993 if (pos < string_lim) { 994 smart_str_appendl(&out_buf, pos, 1); 995 } 996 pos++; 997 } 998 } else { /* nomatch */ 999 /* stick that last bit of string on our output */ 1000 if (string_lim - pos > 0) { 1001 smart_str_appendl(&out_buf, pos, string_lim - pos); 1002 } 1003 } 1004 onig_region_free(regs, 0); 1005 } 1006 1007 if (description) { 1008 efree(description); 1009 } 1010 if (regs != NULL) { 1011 onig_region_free(regs, 1); 1012 } 1013 smart_str_free(&eval_buf); 1014 1015 if (err <= -2) { 1016 smart_str_free(&out_buf); 1017 RETVAL_FALSE; 1018 } else { 1019 smart_str_appendc(&out_buf, '\0'); 1020 RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0); 1021 } 1022} 1023/* }}} */ 1024 1025/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) 1026 Replace regular expression for multibyte string */ 1027PHP_FUNCTION(mb_ereg_replace) 1028{ 1029 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0); 1030} 1031/* }}} */ 1032 1033/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) 1034 Case insensitive replace regular expression for multibyte string */ 1035PHP_FUNCTION(mb_eregi_replace) 1036{ 1037 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0); 1038} 1039/* }}} */ 1040 1041/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option]) 1042 regular expression for multibyte string using replacement callback */ 1043PHP_FUNCTION(mb_ereg_replace_callback) 1044{ 1045 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1); 1046} 1047/* }}} */ 1048 1049/* {{{ proto array mb_split(string pattern, string string [, int limit]) 1050 split multibyte string into array by regular expression */ 1051PHP_FUNCTION(mb_split) 1052{ 1053 char *arg_pattern; 1054 int arg_pattern_len; 1055 php_mb_regex_t *re; 1056 OnigRegion *regs = NULL; 1057 char *string; 1058 OnigUChar *pos, *chunk_pos; 1059 int string_len; 1060 1061 int n, err; 1062 long count = -1; 1063 1064 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) { 1065 RETURN_FALSE; 1066 } 1067 1068 if (count > 0) { 1069 count--; 1070 } 1071 1072 /* create regex pattern buffer */ 1073 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) { 1074 RETURN_FALSE; 1075 } 1076 1077 array_init(return_value); 1078 1079 chunk_pos = pos = (OnigUChar *)string; 1080 err = 0; 1081 regs = onig_region_new(); 1082 /* churn through str, generating array entries as we go */ 1083 while (count != 0 && (pos - (OnigUChar *)string) < string_len) { 1084 int beg, end; 1085 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0); 1086 if (err < 0) { 1087 break; 1088 } 1089 beg = regs->beg[0], end = regs->end[0]; 1090 /* add it to the array */ 1091 if ((pos - (OnigUChar *)string) < end) { 1092 if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) { 1093 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1); 1094 --count; 1095 } else { 1096 err = -2; 1097 break; 1098 } 1099 /* point at our new starting point */ 1100 chunk_pos = pos = (OnigUChar *)string + end; 1101 } else { 1102 pos++; 1103 } 1104 onig_region_free(regs, 0); 1105 } 1106 1107 onig_region_free(regs, 1); 1108 1109 /* see if we encountered an error */ 1110 if (err <= -2) { 1111 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 1112 onig_error_code_to_str(err_str, err); 1113 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str); 1114 zval_dtor(return_value); 1115 RETURN_FALSE; 1116 } 1117 1118 /* otherwise we just have one last element to add to the array */ 1119 n = ((OnigUChar *)(string + string_len) - chunk_pos); 1120 if (n > 0) { 1121 add_next_index_stringl(return_value, (char *)chunk_pos, n, 1); 1122 } else { 1123 add_next_index_stringl(return_value, "", 0, 1); 1124 } 1125} 1126/* }}} */ 1127 1128/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) 1129 Regular expression match for multibyte string */ 1130PHP_FUNCTION(mb_ereg_match) 1131{ 1132 char *arg_pattern; 1133 int arg_pattern_len; 1134 1135 char *string; 1136 int string_len; 1137 1138 php_mb_regex_t *re; 1139 OnigSyntaxType *syntax; 1140 OnigOptionType option = 0; 1141 int err; 1142 1143 { 1144 char *option_str = NULL; 1145 int option_str_len = 0; 1146 1147 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", 1148 &arg_pattern, &arg_pattern_len, &string, &string_len, 1149 &option_str, &option_str_len)==FAILURE) { 1150 RETURN_FALSE; 1151 } 1152 1153 if (option_str != NULL) { 1154 _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL); 1155 } else { 1156 option |= MBREX(regex_default_options); 1157 syntax = MBREX(regex_default_syntax); 1158 } 1159 } 1160 1161 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) { 1162 RETURN_FALSE; 1163 } 1164 1165 /* match */ 1166 err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0); 1167 if (err >= 0) { 1168 RETVAL_TRUE; 1169 } else { 1170 RETVAL_FALSE; 1171 } 1172} 1173/* }}} */ 1174 1175/* regex search */ 1176/* {{{ _php_mb_regex_ereg_search_exec */ 1177static void 1178_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) 1179{ 1180 size_t argc = ZEND_NUM_ARGS(); 1181 char *arg_pattern, *arg_options; 1182 int arg_pattern_len, arg_options_len; 1183 int n, i, err, pos, len, beg, end; 1184 OnigOptionType option; 1185 OnigUChar *str; 1186 OnigSyntaxType *syntax; 1187 1188 if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { 1189 return; 1190 } 1191 1192 option = MBREX(regex_default_options); 1193 1194 if (argc == 2) { 1195 option = 0; 1196 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); 1197 } 1198 1199 if (argc > 0) { 1200 /* create regex pattern buffer */ 1201 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) { 1202 RETURN_FALSE; 1203 } 1204 } 1205 1206 pos = MBREX(search_pos); 1207 str = NULL; 1208 len = 0; 1209 if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){ 1210 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str)); 1211 len = Z_STRLEN_P(MBREX(search_str)); 1212 } 1213 1214 if (MBREX(search_re) == NULL) { 1215 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given"); 1216 RETURN_FALSE; 1217 } 1218 1219 if (str == NULL) { 1220 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given"); 1221 RETURN_FALSE; 1222 } 1223 1224 if (MBREX(search_regs)) { 1225 onig_region_free(MBREX(search_regs), 1); 1226 } 1227 MBREX(search_regs) = onig_region_new(); 1228 1229 err = onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0); 1230 if (err == ONIG_MISMATCH) { 1231 MBREX(search_pos) = len; 1232 RETVAL_FALSE; 1233 } else if (err <= -2) { 1234 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 1235 onig_error_code_to_str(err_str, err); 1236 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str); 1237 RETVAL_FALSE; 1238 } else { 1239 if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) { 1240 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); 1241 } 1242 switch (mode) { 1243 case 1: 1244 array_init(return_value); 1245 beg = MBREX(search_regs)->beg[0]; 1246 end = MBREX(search_regs)->end[0]; 1247 add_next_index_long(return_value, beg); 1248 add_next_index_long(return_value, end - beg); 1249 break; 1250 case 2: 1251 array_init(return_value); 1252 n = MBREX(search_regs)->num_regs; 1253 for (i = 0; i < n; i++) { 1254 beg = MBREX(search_regs)->beg[i]; 1255 end = MBREX(search_regs)->end[i]; 1256 if (beg >= 0 && beg <= end && end <= len) { 1257 add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); 1258 } else { 1259 add_index_bool(return_value, i, 0); 1260 } 1261 } 1262 break; 1263 default: 1264 RETVAL_TRUE; 1265 break; 1266 } 1267 end = MBREX(search_regs)->end[0]; 1268 if (pos < end) { 1269 MBREX(search_pos) = end; 1270 } else { 1271 MBREX(search_pos) = pos + 1; 1272 } 1273 } 1274 1275 if (err < 0) { 1276 onig_region_free(MBREX(search_regs), 1); 1277 MBREX(search_regs) = (OnigRegion *)NULL; 1278 } 1279} 1280/* }}} */ 1281 1282/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) 1283 Regular expression search for multibyte string */ 1284PHP_FUNCTION(mb_ereg_search) 1285{ 1286 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); 1287} 1288/* }}} */ 1289 1290/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) 1291 Regular expression search for multibyte string */ 1292PHP_FUNCTION(mb_ereg_search_pos) 1293{ 1294 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); 1295} 1296/* }}} */ 1297 1298/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) 1299 Regular expression search for multibyte string */ 1300PHP_FUNCTION(mb_ereg_search_regs) 1301{ 1302 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); 1303} 1304/* }}} */ 1305 1306/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) 1307 Initialize string and regular expression for search. */ 1308PHP_FUNCTION(mb_ereg_search_init) 1309{ 1310 size_t argc = ZEND_NUM_ARGS(); 1311 zval *arg_str; 1312 char *arg_pattern = NULL, *arg_options = NULL; 1313 int arg_pattern_len = 0, arg_options_len = 0; 1314 OnigSyntaxType *syntax = NULL; 1315 OnigOptionType option; 1316 1317 if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) { 1318 return; 1319 } 1320 1321 if (argc > 1 && arg_pattern_len == 0) { 1322 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern"); 1323 RETURN_FALSE; 1324 } 1325 1326 option = MBREX(regex_default_options); 1327 syntax = MBREX(regex_default_syntax); 1328 1329 if (argc == 3) { 1330 option = 0; 1331 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); 1332 } 1333 1334 if (argc > 1) { 1335 /* create regex pattern buffer */ 1336 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) { 1337 RETURN_FALSE; 1338 } 1339 } 1340 1341 if (MBREX(search_str) != NULL) { 1342 zval_ptr_dtor(&MBREX(search_str)); 1343 MBREX(search_str) = (zval *)NULL; 1344 } 1345 1346 MBREX(search_str) = arg_str; 1347 Z_ADDREF_P(MBREX(search_str)); 1348 SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str)); 1349 1350 MBREX(search_pos) = 0; 1351 1352 if (MBREX(search_regs) != NULL) { 1353 onig_region_free(MBREX(search_regs), 1); 1354 MBREX(search_regs) = (OnigRegion *) NULL; 1355 } 1356 1357 RETURN_TRUE; 1358} 1359/* }}} */ 1360 1361/* {{{ proto array mb_ereg_search_getregs(void) 1362 Get matched substring of the last time */ 1363PHP_FUNCTION(mb_ereg_search_getregs) 1364{ 1365 int n, i, len, beg, end; 1366 OnigUChar *str; 1367 1368 if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) { 1369 array_init(return_value); 1370 1371 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str)); 1372 len = Z_STRLEN_P(MBREX(search_str)); 1373 n = MBREX(search_regs)->num_regs; 1374 for (i = 0; i < n; i++) { 1375 beg = MBREX(search_regs)->beg[i]; 1376 end = MBREX(search_regs)->end[i]; 1377 if (beg >= 0 && beg <= end && end <= len) { 1378 add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); 1379 } else { 1380 add_index_bool(return_value, i, 0); 1381 } 1382 } 1383 } else { 1384 RETVAL_FALSE; 1385 } 1386} 1387/* }}} */ 1388 1389/* {{{ proto int mb_ereg_search_getpos(void) 1390 Get search start position */ 1391PHP_FUNCTION(mb_ereg_search_getpos) 1392{ 1393 RETVAL_LONG(MBREX(search_pos)); 1394} 1395/* }}} */ 1396 1397/* {{{ proto bool mb_ereg_search_setpos(int position) 1398 Set search start position */ 1399PHP_FUNCTION(mb_ereg_search_setpos) 1400{ 1401 long position; 1402 1403 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) { 1404 return; 1405 } 1406 1407 if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) { 1408 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range"); 1409 MBREX(search_pos) = 0; 1410 RETURN_FALSE; 1411 } 1412 1413 MBREX(search_pos) = position; 1414 RETURN_TRUE; 1415} 1416/* }}} */ 1417 1418/* {{{ php_mb_regex_set_options */ 1419static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) 1420{ 1421 if (prev_options != NULL) { 1422 *prev_options = MBREX(regex_default_options); 1423 } 1424 if (prev_syntax != NULL) { 1425 *prev_syntax = MBREX(regex_default_syntax); 1426 } 1427 MBREX(regex_default_options) = options; 1428 MBREX(regex_default_syntax) = syntax; 1429} 1430/* }}} */ 1431 1432/* {{{ proto string mb_regex_set_options([string options]) 1433 Set or get the default options for mbregex functions */ 1434PHP_FUNCTION(mb_regex_set_options) 1435{ 1436 OnigOptionType opt; 1437 OnigSyntaxType *syntax; 1438 char *string = NULL; 1439 int string_len; 1440 char buf[16]; 1441 1442 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", 1443 &string, &string_len) == FAILURE) { 1444 RETURN_FALSE; 1445 } 1446 if (string != NULL) { 1447 opt = 0; 1448 syntax = NULL; 1449 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL); 1450 _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC); 1451 } else { 1452 opt = MBREX(regex_default_options); 1453 syntax = MBREX(regex_default_syntax); 1454 } 1455 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax); 1456 1457 RETVAL_STRING(buf, 1); 1458} 1459/* }}} */ 1460 1461#endif /* HAVE_MBREGEX */ 1462 1463/* 1464 * Local variables: 1465 * tab-width: 4 1466 * c-basic-offset: 4 1467 * End: 1468 * vim600: fdm=marker 1469 * vim: noet sw=4 ts=4 1470 */ 1471