1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
25
26#include "php.h"
27#include "php_ini.h"
28
29#if HAVE_MBREGEX
30
31#include "ext/standard/php_smart_str.h"
32#include "ext/standard/info.h"
33#include "php_mbregex.h"
34#include "mbstring.h"
35
36#include "php_onig_compat.h" /* must come prior to the oniguruma header */
37#include <oniguruma.h>
38#undef UChar
39
40ZEND_EXTERN_MODULE_GLOBALS(mbstring)
41
42struct _zend_mb_regex_globals {
43    OnigEncoding default_mbctype;
44    OnigEncoding current_mbctype;
45    HashTable ht_rc;
46    zval *search_str;
47    zval *search_str_val;
48    unsigned int search_pos;
49    php_mb_regex_t *search_re;
50    OnigRegion *search_regs;
51    OnigOptionType regex_default_options;
52    OnigSyntaxType *regex_default_syntax;
53};
54
55#define MBREX(g) (MBSTRG(mb_regex_globals)->g)
56
57/* {{{ static void php_mb_regex_free_cache() */
58static void php_mb_regex_free_cache(php_mb_regex_t **pre)
59{
60    onig_free(*pre);
61}
62/* }}} */
63
64/* {{{ _php_mb_regex_globals_ctor */
65static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
66{
67    pglobals->default_mbctype = ONIG_ENCODING_EUC_JP;
68    pglobals->current_mbctype = ONIG_ENCODING_EUC_JP;
69    zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
70    pglobals->search_str = (zval*) NULL;
71    pglobals->search_re = (php_mb_regex_t*)NULL;
72    pglobals->search_pos = 0;
73    pglobals->search_regs = (OnigRegion*)NULL;
74    pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
75    pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
76    return SUCCESS;
77}
78/* }}} */
79
80/* {{{ _php_mb_regex_globals_dtor */
81static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC)
82{
83    zend_hash_destroy(&pglobals->ht_rc);
84}
85/* }}} */
86
87/* {{{ php_mb_regex_globals_alloc */
88zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
89{
90    zend_mb_regex_globals *pglobals = pemalloc(
91            sizeof(zend_mb_regex_globals), 1);
92    if (!pglobals) {
93        return NULL;
94    }
95    if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
96        pefree(pglobals, 1);
97        return NULL;
98    }
99    return pglobals;
100}
101/* }}} */
102
103/* {{{ php_mb_regex_globals_free */
104void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
105{
106    if (!pglobals) {
107        return;
108    }
109    _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
110    pefree(pglobals, 1);
111}
112/* }}} */
113
114/* {{{ PHP_MINIT_FUNCTION(mb_regex) */
115PHP_MINIT_FUNCTION(mb_regex)
116{
117    onig_init();
118    return SUCCESS;
119}
120/* }}} */
121
122/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
123PHP_MSHUTDOWN_FUNCTION(mb_regex)
124{
125    onig_end();
126    return SUCCESS;
127}
128/* }}} */
129
130/* {{{ PHP_RINIT_FUNCTION(mb_regex) */
131PHP_RINIT_FUNCTION(mb_regex)
132{
133    return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
134}
135/* }}} */
136
137/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
138PHP_RSHUTDOWN_FUNCTION(mb_regex)
139{
140    MBREX(current_mbctype) = MBREX(default_mbctype);
141
142    if (MBREX(search_str) != NULL) {
143        zval_ptr_dtor(&MBREX(search_str));
144        MBREX(search_str) = (zval *)NULL;
145    }
146    MBREX(search_pos) = 0;
147
148    if (MBREX(search_regs) != NULL) {
149        onig_region_free(MBREX(search_regs), 1);
150        MBREX(search_regs) = (OnigRegion *)NULL;
151    }
152    zend_hash_clean(&MBREX(ht_rc));
153
154    return SUCCESS;
155}
156/* }}} */
157
158/* {{{ PHP_MINFO_FUNCTION(mb_regex) */
159PHP_MINFO_FUNCTION(mb_regex)
160{
161    char buf[32];
162    php_info_print_table_start();
163    php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
164    snprintf(buf, sizeof(buf), "%d.%d.%d",
165            ONIGURUMA_VERSION_MAJOR,
166            ONIGURUMA_VERSION_MINOR,
167            ONIGURUMA_VERSION_TEENY);
168#ifdef PHP_ONIG_BUNDLED
169#ifdef USE_COMBINATION_EXPLOSION_CHECK
170    php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
171#else   /* USE_COMBINATION_EXPLOSION_CHECK */
172    php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
173#endif  /* USE_COMBINATION_EXPLOSION_CHECK */
174#endif /* PHP_BUNDLED_ONIG */
175    php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176    php_info_print_table_end();
177}
178/* }}} */
179
180/*
181 * encoding name resolver
182 */
183
184/* {{{ encoding name map */
185typedef struct _php_mb_regex_enc_name_map_t {
186    const char *names;
187    OnigEncoding code;
188} php_mb_regex_enc_name_map_t;
189
190php_mb_regex_enc_name_map_t enc_name_map[] = {
191#ifdef ONIG_ENCODING_EUC_JP
192    {
193        "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
194        ONIG_ENCODING_EUC_JP
195    },
196#endif
197#ifdef ONIG_ENCODING_UTF8
198    {
199        "UTF-8\0UTF8\0",
200        ONIG_ENCODING_UTF8
201    },
202#endif
203#ifdef ONIG_ENCODING_UTF16_BE
204    {
205        "UTF-16\0UTF-16BE\0",
206        ONIG_ENCODING_UTF16_BE
207    },
208#endif
209#ifdef ONIG_ENCODING_UTF16_LE
210    {
211        "UTF-16LE\0",
212        ONIG_ENCODING_UTF16_LE
213    },
214#endif
215#ifdef ONIG_ENCODING_UTF32_BE
216    {
217        "UCS-4\0UTF-32\0UTF-32BE\0",
218        ONIG_ENCODING_UTF32_BE
219    },
220#endif
221#ifdef ONIG_ENCODING_UTF32_LE
222    {
223        "UCS-4LE\0UTF-32LE\0",
224        ONIG_ENCODING_UTF32_LE
225    },
226#endif
227#ifdef ONIG_ENCODING_SJIS
228    {
229        "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
230        ONIG_ENCODING_SJIS
231    },
232#endif
233#ifdef ONIG_ENCODING_BIG5
234    {
235        "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
236        ONIG_ENCODING_BIG5
237    },
238#endif
239#ifdef ONIG_ENCODING_EUC_CN
240    {
241        "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
242        ONIG_ENCODING_EUC_CN
243    },
244#endif
245#ifdef ONIG_ENCODING_EUC_TW
246    {
247        "EUC-TW\0EUCTW\0EUC_TW\0",
248        ONIG_ENCODING_EUC_TW
249    },
250#endif
251#ifdef ONIG_ENCODING_EUC_KR
252    {
253        "EUC-KR\0EUCKR\0EUC_KR\0",
254        ONIG_ENCODING_EUC_KR
255    },
256#endif
257#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
258    {
259        "KOI8\0KOI-8\0",
260        ONIG_ENCODING_KOI8
261    },
262#endif
263#ifdef ONIG_ENCODING_KOI8_R
264    {
265        "KOI8R\0KOI8-R\0KOI-8R\0",
266        ONIG_ENCODING_KOI8_R
267    },
268#endif
269#ifdef ONIG_ENCODING_ISO_8859_1
270    {
271        "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
272        ONIG_ENCODING_ISO_8859_1
273    },
274#endif
275#ifdef ONIG_ENCODING_ISO_8859_2
276    {
277        "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
278        ONIG_ENCODING_ISO_8859_2
279    },
280#endif
281#ifdef ONIG_ENCODING_ISO_8859_3
282    {
283        "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
284        ONIG_ENCODING_ISO_8859_3
285    },
286#endif
287#ifdef ONIG_ENCODING_ISO_8859_4
288    {
289        "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
290        ONIG_ENCODING_ISO_8859_4
291    },
292#endif
293#ifdef ONIG_ENCODING_ISO_8859_5
294    {
295        "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
296        ONIG_ENCODING_ISO_8859_5
297    },
298#endif
299#ifdef ONIG_ENCODING_ISO_8859_6
300    {
301        "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
302        ONIG_ENCODING_ISO_8859_6
303    },
304#endif
305#ifdef ONIG_ENCODING_ISO_8859_7
306    {
307        "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
308        ONIG_ENCODING_ISO_8859_7
309    },
310#endif
311#ifdef ONIG_ENCODING_ISO_8859_8
312    {
313        "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
314        ONIG_ENCODING_ISO_8859_8
315    },
316#endif
317#ifdef ONIG_ENCODING_ISO_8859_9
318    {
319        "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
320        ONIG_ENCODING_ISO_8859_9
321    },
322#endif
323#ifdef ONIG_ENCODING_ISO_8859_10
324    {
325        "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
326        ONIG_ENCODING_ISO_8859_10
327    },
328#endif
329#ifdef ONIG_ENCODING_ISO_8859_11
330    {
331        "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
332        ONIG_ENCODING_ISO_8859_11
333    },
334#endif
335#ifdef ONIG_ENCODING_ISO_8859_13
336    {
337        "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
338        ONIG_ENCODING_ISO_8859_13
339    },
340#endif
341#ifdef ONIG_ENCODING_ISO_8859_14
342    {
343        "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
344        ONIG_ENCODING_ISO_8859_14
345    },
346#endif
347#ifdef ONIG_ENCODING_ISO_8859_15
348    {
349        "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
350        ONIG_ENCODING_ISO_8859_15
351    },
352#endif
353#ifdef ONIG_ENCODING_ISO_8859_16
354    {
355        "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
356        ONIG_ENCODING_ISO_8859_16
357    },
358#endif
359#ifdef ONIG_ENCODING_ASCII
360    {
361        "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
362        ONIG_ENCODING_ASCII
363    },
364#endif
365    { NULL, ONIG_ENCODING_UNDEF }
366};
367/* }}} */
368
369/* {{{ php_mb_regex_name2mbctype */
370static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
371{
372    const char *p;
373    php_mb_regex_enc_name_map_t *mapping;
374
375    if (pname == NULL || !*pname) {
376        return ONIG_ENCODING_UNDEF;
377    }
378
379    for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
380        for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
381            if (strcasecmp(p, pname) == 0) {
382                return mapping->code;
383            }
384        }
385    }
386
387    return ONIG_ENCODING_UNDEF;
388}
389/* }}} */
390
391/* {{{ php_mb_regex_mbctype2name */
392static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
393{
394    php_mb_regex_enc_name_map_t *mapping;
395
396    for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
397        if (mapping->code == mbctype) {
398            return mapping->names;
399        }
400    }
401
402    return NULL;
403}
404/* }}} */
405
406/* {{{ php_mb_regex_set_mbctype */
407int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
408{
409    OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
410    if (mbctype == ONIG_ENCODING_UNDEF) {
411        return FAILURE;
412    }
413    MBREX(current_mbctype) = mbctype;
414    return SUCCESS;
415}
416/* }}} */
417
418/* {{{ php_mb_regex_set_default_mbctype */
419int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
420{
421    OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422    if (mbctype == ONIG_ENCODING_UNDEF) {
423        return FAILURE;
424    }
425    MBREX(default_mbctype) = mbctype;
426    return SUCCESS;
427}
428/* }}} */
429
430/* {{{ php_mb_regex_get_mbctype */
431const char *php_mb_regex_get_mbctype(TSRMLS_D)
432{
433    return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434}
435/* }}} */
436
437/* {{{ php_mb_regex_get_default_mbctype */
438const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
439{
440    return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
441}
442/* }}} */
443
444/*
445 * regex cache
446 */
447/* {{{ php_mbregex_compile_pattern */
448static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
449{
450    int err_code = 0;
451    int found = 0;
452    php_mb_regex_t *retval = NULL, **rc = NULL;
453    OnigErrorInfo err_info;
454    OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
455
456    found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
457    if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
458        if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
459            onig_error_code_to_str(err_str, err_code, err_info);
460            php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
461            retval = NULL;
462            goto out;
463        }
464        zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
465    } else if (found == SUCCESS) {
466        retval = *rc;
467    }
468out:
469    return retval;
470}
471/* }}} */
472
473/* {{{ _php_mb_regex_get_option_string */
474static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
475{
476    size_t len_left = len;
477    size_t len_req = 0;
478    char *p = str;
479    char c;
480
481    if ((option & ONIG_OPTION_IGNORECASE) != 0) {
482        if (len_left > 0) {
483            --len_left;
484            *(p++) = 'i';
485        }
486        ++len_req;
487    }
488
489    if ((option & ONIG_OPTION_EXTEND) != 0) {
490        if (len_left > 0) {
491            --len_left;
492            *(p++) = 'x';
493        }
494        ++len_req;
495    }
496
497    if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
498            (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
499        if (len_left > 0) {
500            --len_left;
501            *(p++) = 'p';
502        }
503        ++len_req;
504    } else {
505        if ((option & ONIG_OPTION_MULTILINE) != 0) {
506            if (len_left > 0) {
507                --len_left;
508                *(p++) = 'm';
509            }
510            ++len_req;
511        }
512
513        if ((option & ONIG_OPTION_SINGLELINE) != 0) {
514            if (len_left > 0) {
515                --len_left;
516                *(p++) = 's';
517            }
518            ++len_req;
519        }
520    }
521    if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
522        if (len_left > 0) {
523            --len_left;
524            *(p++) = 'l';
525        }
526        ++len_req;
527    }
528    if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
529        if (len_left > 0) {
530            --len_left;
531            *(p++) = 'n';
532        }
533        ++len_req;
534    }
535
536    c = 0;
537
538    if (syntax == ONIG_SYNTAX_JAVA) {
539        c = 'j';
540    } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
541        c = 'u';
542    } else if (syntax == ONIG_SYNTAX_GREP) {
543        c = 'g';
544    } else if (syntax == ONIG_SYNTAX_EMACS) {
545        c = 'c';
546    } else if (syntax == ONIG_SYNTAX_RUBY) {
547        c = 'r';
548    } else if (syntax == ONIG_SYNTAX_PERL) {
549        c = 'z';
550    } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
551        c = 'b';
552    } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
553        c = 'd';
554    }
555
556    if (c != 0) {
557        if (len_left > 0) {
558            --len_left;
559            *(p++) = c;
560        }
561        ++len_req;
562    }
563
564
565    if (len_left > 0) {
566        --len_left;
567        *(p++) = '\0';
568    }
569    ++len_req;
570    if (len < len_req) {
571        return len_req;
572    }
573
574    return 0;
575}
576/* }}} */
577
578/* {{{ _php_mb_regex_init_options */
579static void
580_php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval)
581{
582    int n;
583    char c;
584    int optm = 0;
585
586    *syntax = ONIG_SYNTAX_RUBY;
587
588    if (parg != NULL) {
589        n = 0;
590        while(n < narg) {
591            c = parg[n++];
592            switch (c) {
593                case 'i':
594                    optm |= ONIG_OPTION_IGNORECASE;
595                    break;
596                case 'x':
597                    optm |= ONIG_OPTION_EXTEND;
598                    break;
599                case 'm':
600                    optm |= ONIG_OPTION_MULTILINE;
601                    break;
602                case 's':
603                    optm |= ONIG_OPTION_SINGLELINE;
604                    break;
605                case 'p':
606                    optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
607                    break;
608                case 'l':
609                    optm |= ONIG_OPTION_FIND_LONGEST;
610                    break;
611                case 'n':
612                    optm |= ONIG_OPTION_FIND_NOT_EMPTY;
613                    break;
614                case 'j':
615                    *syntax = ONIG_SYNTAX_JAVA;
616                    break;
617                case 'u':
618                    *syntax = ONIG_SYNTAX_GNU_REGEX;
619                    break;
620                case 'g':
621                    *syntax = ONIG_SYNTAX_GREP;
622                    break;
623                case 'c':
624                    *syntax = ONIG_SYNTAX_EMACS;
625                    break;
626                case 'r':
627                    *syntax = ONIG_SYNTAX_RUBY;
628                    break;
629                case 'z':
630                    *syntax = ONIG_SYNTAX_PERL;
631                    break;
632                case 'b':
633                    *syntax = ONIG_SYNTAX_POSIX_BASIC;
634                    break;
635                case 'd':
636                    *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
637                    break;
638                case 'e':
639                    if (eval != NULL) *eval = 1;
640                    break;
641                default:
642                    break;
643            }
644        }
645        if (option != NULL) *option|=optm;
646    }
647}
648/* }}} */
649
650/*
651 * php functions
652 */
653
654/* {{{ proto string mb_regex_encoding([string encoding])
655   Returns the current encoding for regex as a string. */
656PHP_FUNCTION(mb_regex_encoding)
657{
658    size_t argc = ZEND_NUM_ARGS();
659    char *encoding;
660    int encoding_len;
661    OnigEncoding mbctype;
662
663    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
664        return;
665    }
666
667    if (argc == 0) {
668        const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
669
670        if (retval == NULL) {
671            RETURN_FALSE;
672        }
673
674        RETURN_STRING((char *)retval, 1);
675    } else if (argc == 1) {
676        mbctype = _php_mb_regex_name2mbctype(encoding);
677
678        if (mbctype == ONIG_ENCODING_UNDEF) {
679            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
680            RETURN_FALSE;
681        }
682
683        MBREX(current_mbctype) = mbctype;
684        RETURN_TRUE;
685    }
686}
687/* }}} */
688
689/* {{{ _php_mb_regex_ereg_exec */
690static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
691{
692    zval **arg_pattern, *array;
693    char *string;
694    int string_len;
695    php_mb_regex_t *re;
696    OnigRegion *regs = NULL;
697    int i, match_len, beg, end;
698    OnigOptionType options;
699    char *str;
700
701    array = NULL;
702
703    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
704        RETURN_FALSE;
705    }
706
707    options = MBREX(regex_default_options);
708    if (icase) {
709        options |= ONIG_OPTION_IGNORECASE;
710    }
711
712    /* compile the regular expression from the supplied regex */
713    if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
714        /* we convert numbers to integers and treat them as a string */
715        if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
716            convert_to_long_ex(arg_pattern);    /* get rid of decimal places */
717        }
718        convert_to_string_ex(arg_pattern);
719        /* don't bother doing an extended regex with just a number */
720    }
721
722    if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
723        php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
724        RETVAL_FALSE;
725        goto out;
726    }
727
728    re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
729    if (re == NULL) {
730        RETVAL_FALSE;
731        goto out;
732    }
733
734    regs = onig_region_new();
735
736    /* actually execute the regular expression */
737    if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
738        RETVAL_FALSE;
739        goto out;
740    }
741
742    match_len = 1;
743    str = string;
744    if (array != NULL) {
745        match_len = regs->end[0] - regs->beg[0];
746        zval_dtor(array);
747        array_init(array);
748        for (i = 0; i < regs->num_regs; i++) {
749            beg = regs->beg[i];
750            end = regs->end[i];
751            if (beg >= 0 && beg < end && end <= string_len) {
752                add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
753            } else {
754                add_index_bool(array, i, 0);
755            }
756        }
757    }
758
759    if (match_len == 0) {
760        match_len = 1;
761    }
762    RETVAL_LONG(match_len);
763out:
764    if (regs != NULL) {
765        onig_region_free(regs, 1);
766    }
767}
768/* }}} */
769
770/* {{{ proto int mb_ereg(string pattern, string string [, array registers])
771   Regular expression match for multibyte string */
772PHP_FUNCTION(mb_ereg)
773{
774    _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
775}
776/* }}} */
777
778/* {{{ proto int mb_eregi(string pattern, string string [, array registers])
779   Case-insensitive regular expression match for multibyte string */
780PHP_FUNCTION(mb_eregi)
781{
782    _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
783}
784/* }}} */
785
786/* {{{ _php_mb_regex_ereg_replace_exec */
787static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
788{
789    zval **arg_pattern_zval;
790
791    char *arg_pattern;
792    int arg_pattern_len;
793
794    char *replace;
795    int replace_len;
796
797    zend_fcall_info arg_replace_fci;
798    zend_fcall_info_cache arg_replace_fci_cache;
799
800    char *string;
801    int string_len;
802
803    char *p;
804    php_mb_regex_t *re;
805    OnigSyntaxType *syntax;
806    OnigRegion *regs = NULL;
807    smart_str out_buf = { 0 };
808    smart_str eval_buf = { 0 };
809    smart_str *pbuf;
810    int i, err, eval, n;
811    OnigUChar *pos;
812    OnigUChar *string_lim;
813    char *description = NULL;
814    char pat_buf[2];
815
816    const mbfl_encoding *enc;
817
818    {
819        const char *current_enc_name;
820        current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
821        if (current_enc_name == NULL ||
822            (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
823            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
824            RETURN_FALSE;
825        }
826    }
827    eval = 0;
828    {
829        char *option_str = NULL;
830        int option_str_len = 0;
831
832        if (!is_callable) {
833            if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
834                        &arg_pattern_zval,
835                        &replace, &replace_len,
836                        &string, &string_len,
837                        &option_str, &option_str_len) == FAILURE) {
838                RETURN_FALSE;
839            }
840        } else {
841            if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
842                        &arg_pattern_zval,
843                        &arg_replace_fci, &arg_replace_fci_cache,
844                        &string, &string_len,
845                        &option_str, &option_str_len) == FAILURE) {
846                RETURN_FALSE;
847            }
848        }
849
850        if (option_str != NULL) {
851            _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
852        } else {
853            options |= MBREX(regex_default_options);
854            syntax = MBREX(regex_default_syntax);
855        }
856    }
857    if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
858        arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
859        arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
860    } else {
861        /* FIXME: this code is not multibyte aware! */
862        convert_to_long_ex(arg_pattern_zval);
863        pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval);
864        pat_buf[1] = '\0';
865
866        arg_pattern = pat_buf;
867        arg_pattern_len = 1;
868    }
869    /* create regex pattern buffer */
870    re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
871    if (re == NULL) {
872        RETURN_FALSE;
873    }
874
875    if (eval || is_callable) {
876        pbuf = &eval_buf;
877        description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
878    } else {
879        pbuf = &out_buf;
880        description = NULL;
881    }
882
883    if (is_callable) {
884        if (eval) {
885            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
886            RETURN_FALSE;
887        }
888    }
889
890    /* do the actual work */
891    err = 0;
892    pos = (OnigUChar *)string;
893    string_lim = (OnigUChar*)(string + string_len);
894    regs = onig_region_new();
895    while (err >= 0) {
896        err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
897        if (err <= -2) {
898            OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
899            onig_error_code_to_str(err_str, err);
900            php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
901            break;
902        }
903        if (err >= 0) {
904#if moriyoshi_0
905            if (regs->beg[0] == regs->end[0]) {
906                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
907                break;
908            }
909#endif
910            /* copy the part of the string before the match */
911            smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
912
913            if (!is_callable) {
914                /* copy replacement and backrefs */
915                i = 0;
916                p = replace;
917                while (i < replace_len) {
918                    int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
919                    n = -1;
920                    if ((replace_len - i) >= 2 && fwd == 1 &&
921                    p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
922                        n = p[1] - '0';
923                    }
924                    if (n >= 0 && n < regs->num_regs) {
925                        if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
926                            smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
927                        }
928                        p += 2;
929                        i += 2;
930                    } else {
931                        smart_str_appendl(pbuf, p, fwd);
932                        p += fwd;
933                        i += fwd;
934                    }
935                }
936            }
937
938            if (eval) {
939                zval v;
940                /* null terminate buffer */
941                smart_str_0(&eval_buf);
942                /* do eval */
943                if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
944                    efree(description);
945                    php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
946                    /* zend_error() does not return in this case */
947                }
948
949                /* result of eval */
950                convert_to_string(&v);
951                smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
952                /* Clean up */
953                eval_buf.len = 0;
954                zval_dtor(&v);
955            } else if (is_callable) {
956                zval *retval_ptr;
957                zval **args[1];
958                zval *subpats;
959                int i;
960
961                MAKE_STD_ZVAL(subpats);
962                array_init(subpats);
963
964                for (i = 0; i < regs->num_regs; i++) {
965                    add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
966                }
967
968                args[0] = &subpats;
969                /* null terminate buffer */
970                smart_str_0(&eval_buf);
971
972                arg_replace_fci.param_count = 1;
973                arg_replace_fci.params = args;
974                arg_replace_fci.retval_ptr_ptr = &retval_ptr;
975                if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) {
976                    convert_to_string_ex(&retval_ptr);
977                    smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
978                    eval_buf.len = 0;
979                    zval_ptr_dtor(&retval_ptr);
980                } else {
981                    efree(description);
982                    if (!EG(exception)) {
983                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
984                    }
985                }
986                zval_ptr_dtor(&subpats);
987            }
988
989            n = regs->end[0];
990            if ((pos - (OnigUChar *)string) < n) {
991                pos = (OnigUChar *)string + n;
992            } else {
993                if (pos < string_lim) {
994                    smart_str_appendl(&out_buf, pos, 1);
995                }
996                pos++;
997            }
998        } else { /* nomatch */
999            /* stick that last bit of string on our output */
1000            if (string_lim - pos > 0) {
1001                smart_str_appendl(&out_buf, pos, string_lim - pos);
1002            }
1003        }
1004        onig_region_free(regs, 0);
1005    }
1006
1007    if (description) {
1008        efree(description);
1009    }
1010    if (regs != NULL) {
1011        onig_region_free(regs, 1);
1012    }
1013    smart_str_free(&eval_buf);
1014
1015    if (err <= -2) {
1016        smart_str_free(&out_buf);
1017        RETVAL_FALSE;
1018    } else {
1019        smart_str_appendc(&out_buf, '\0');
1020        RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
1021    }
1022}
1023/* }}} */
1024
1025/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1026   Replace regular expression for multibyte string */
1027PHP_FUNCTION(mb_ereg_replace)
1028{
1029    _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1030}
1031/* }}} */
1032
1033/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1034   Case insensitive replace regular expression for multibyte string */
1035PHP_FUNCTION(mb_eregi_replace)
1036{
1037    _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1038}
1039/* }}} */
1040
1041/* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1042    regular expression for multibyte string using replacement callback */
1043PHP_FUNCTION(mb_ereg_replace_callback)
1044{
1045    _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1046}
1047/* }}} */
1048
1049/* {{{ proto array mb_split(string pattern, string string [, int limit])
1050   split multibyte string into array by regular expression */
1051PHP_FUNCTION(mb_split)
1052{
1053    char *arg_pattern;
1054    int arg_pattern_len;
1055    php_mb_regex_t *re;
1056    OnigRegion *regs = NULL;
1057    char *string;
1058    OnigUChar *pos, *chunk_pos;
1059    int string_len;
1060
1061    int n, err;
1062    long count = -1;
1063
1064    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1065        RETURN_FALSE;
1066    }
1067
1068    if (count > 0) {
1069        count--;
1070    }
1071
1072    /* create regex pattern buffer */
1073    if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1074        RETURN_FALSE;
1075    }
1076
1077    array_init(return_value);
1078
1079    chunk_pos = pos = (OnigUChar *)string;
1080    err = 0;
1081    regs = onig_region_new();
1082    /* churn through str, generating array entries as we go */
1083    while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1084        int beg, end;
1085        err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086        if (err < 0) {
1087            break;
1088        }
1089        beg = regs->beg[0], end = regs->end[0];
1090        /* add it to the array */
1091        if ((pos - (OnigUChar *)string) < end) {
1092            if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093                add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1094                --count;
1095            } else {
1096                err = -2;
1097                break;
1098            }
1099            /* point at our new starting point */
1100            chunk_pos = pos = (OnigUChar *)string + end;
1101        } else {
1102            pos++;
1103        }
1104        onig_region_free(regs, 0);
1105    }
1106
1107    onig_region_free(regs, 1);
1108
1109    /* see if we encountered an error */
1110    if (err <= -2) {
1111        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1112        onig_error_code_to_str(err_str, err);
1113        php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1114        zval_dtor(return_value);
1115        RETURN_FALSE;
1116    }
1117
1118    /* otherwise we just have one last element to add to the array */
1119    n = ((OnigUChar *)(string + string_len) - chunk_pos);
1120    if (n > 0) {
1121        add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
1122    } else {
1123        add_next_index_stringl(return_value, "", 0, 1);
1124    }
1125}
1126/* }}} */
1127
1128/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1129   Regular expression match for multibyte string */
1130PHP_FUNCTION(mb_ereg_match)
1131{
1132    char *arg_pattern;
1133    int arg_pattern_len;
1134
1135    char *string;
1136    int string_len;
1137
1138    php_mb_regex_t *re;
1139    OnigSyntaxType *syntax;
1140    OnigOptionType option = 0;
1141    int err;
1142
1143    {
1144        char *option_str = NULL;
1145        int option_str_len = 0;
1146
1147        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1148                                  &arg_pattern, &arg_pattern_len, &string, &string_len,
1149                                  &option_str, &option_str_len)==FAILURE) {
1150            RETURN_FALSE;
1151        }
1152
1153        if (option_str != NULL) {
1154            _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1155        } else {
1156            option |= MBREX(regex_default_options);
1157            syntax = MBREX(regex_default_syntax);
1158        }
1159    }
1160
1161    if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1162        RETURN_FALSE;
1163    }
1164
1165    /* match */
1166    err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1167    if (err >= 0) {
1168        RETVAL_TRUE;
1169    } else {
1170        RETVAL_FALSE;
1171    }
1172}
1173/* }}} */
1174
1175/* regex search */
1176/* {{{ _php_mb_regex_ereg_search_exec */
1177static void
1178_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1179{
1180    size_t argc = ZEND_NUM_ARGS();
1181    char *arg_pattern, *arg_options;
1182    int arg_pattern_len, arg_options_len;
1183    int n, i, err, pos, len, beg, end;
1184    OnigOptionType option;
1185    OnigUChar *str;
1186    OnigSyntaxType *syntax;
1187
1188    if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1189        return;
1190    }
1191
1192    option = MBREX(regex_default_options);
1193
1194    if (argc == 2) {
1195        option = 0;
1196        _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1197    }
1198
1199    if (argc > 0) {
1200        /* create regex pattern buffer */
1201        if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1202            RETURN_FALSE;
1203        }
1204    }
1205
1206    pos = MBREX(search_pos);
1207    str = NULL;
1208    len = 0;
1209    if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1210        str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1211        len = Z_STRLEN_P(MBREX(search_str));
1212    }
1213
1214    if (MBREX(search_re) == NULL) {
1215        php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1216        RETURN_FALSE;
1217    }
1218
1219    if (str == NULL) {
1220        php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1221        RETURN_FALSE;
1222    }
1223
1224    if (MBREX(search_regs)) {
1225        onig_region_free(MBREX(search_regs), 1);
1226    }
1227    MBREX(search_regs) = onig_region_new();
1228
1229    err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1230    if (err == ONIG_MISMATCH) {
1231        MBREX(search_pos) = len;
1232        RETVAL_FALSE;
1233    } else if (err <= -2) {
1234        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1235        onig_error_code_to_str(err_str, err);
1236        php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1237        RETVAL_FALSE;
1238    } else {
1239        if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1240            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1241        }
1242        switch (mode) {
1243        case 1:
1244            array_init(return_value);
1245            beg = MBREX(search_regs)->beg[0];
1246            end = MBREX(search_regs)->end[0];
1247            add_next_index_long(return_value, beg);
1248            add_next_index_long(return_value, end - beg);
1249            break;
1250        case 2:
1251            array_init(return_value);
1252            n = MBREX(search_regs)->num_regs;
1253            for (i = 0; i < n; i++) {
1254                beg = MBREX(search_regs)->beg[i];
1255                end = MBREX(search_regs)->end[i];
1256                if (beg >= 0 && beg <= end && end <= len) {
1257                    add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1258                } else {
1259                    add_index_bool(return_value, i, 0);
1260                }
1261            }
1262            break;
1263        default:
1264            RETVAL_TRUE;
1265            break;
1266        }
1267        end = MBREX(search_regs)->end[0];
1268        if (pos < end) {
1269            MBREX(search_pos) = end;
1270        } else {
1271            MBREX(search_pos) = pos + 1;
1272        }
1273    }
1274
1275    if (err < 0) {
1276        onig_region_free(MBREX(search_regs), 1);
1277        MBREX(search_regs) = (OnigRegion *)NULL;
1278    }
1279}
1280/* }}} */
1281
1282/* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1283   Regular expression search for multibyte string */
1284PHP_FUNCTION(mb_ereg_search)
1285{
1286    _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1287}
1288/* }}} */
1289
1290/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1291   Regular expression search for multibyte string */
1292PHP_FUNCTION(mb_ereg_search_pos)
1293{
1294    _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1295}
1296/* }}} */
1297
1298/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1299   Regular expression search for multibyte string */
1300PHP_FUNCTION(mb_ereg_search_regs)
1301{
1302    _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1303}
1304/* }}} */
1305
1306/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1307   Initialize string and regular expression for search. */
1308PHP_FUNCTION(mb_ereg_search_init)
1309{
1310    size_t argc = ZEND_NUM_ARGS();
1311    zval *arg_str;
1312    char *arg_pattern = NULL, *arg_options = NULL;
1313    int arg_pattern_len = 0, arg_options_len = 0;
1314    OnigSyntaxType *syntax = NULL;
1315    OnigOptionType option;
1316
1317    if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1318        return;
1319    }
1320
1321    if (argc > 1 && arg_pattern_len == 0) {
1322        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1323        RETURN_FALSE;
1324    }
1325
1326    option = MBREX(regex_default_options);
1327    syntax = MBREX(regex_default_syntax);
1328
1329    if (argc == 3) {
1330        option = 0;
1331        _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1332    }
1333
1334    if (argc > 1) {
1335        /* create regex pattern buffer */
1336        if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1337            RETURN_FALSE;
1338        }
1339    }
1340
1341    if (MBREX(search_str) != NULL) {
1342        zval_ptr_dtor(&MBREX(search_str));
1343        MBREX(search_str) = (zval *)NULL;
1344    }
1345
1346    MBREX(search_str) = arg_str;
1347    Z_ADDREF_P(MBREX(search_str));
1348    SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1349
1350    MBREX(search_pos) = 0;
1351
1352    if (MBREX(search_regs) != NULL) {
1353        onig_region_free(MBREX(search_regs), 1);
1354        MBREX(search_regs) = (OnigRegion *) NULL;
1355    }
1356
1357    RETURN_TRUE;
1358}
1359/* }}} */
1360
1361/* {{{ proto array mb_ereg_search_getregs(void)
1362   Get matched substring of the last time */
1363PHP_FUNCTION(mb_ereg_search_getregs)
1364{
1365    int n, i, len, beg, end;
1366    OnigUChar *str;
1367
1368    if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1369        array_init(return_value);
1370
1371        str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1372        len = Z_STRLEN_P(MBREX(search_str));
1373        n = MBREX(search_regs)->num_regs;
1374        for (i = 0; i < n; i++) {
1375            beg = MBREX(search_regs)->beg[i];
1376            end = MBREX(search_regs)->end[i];
1377            if (beg >= 0 && beg <= end && end <= len) {
1378                add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1379            } else {
1380                add_index_bool(return_value, i, 0);
1381            }
1382        }
1383    } else {
1384        RETVAL_FALSE;
1385    }
1386}
1387/* }}} */
1388
1389/* {{{ proto int mb_ereg_search_getpos(void)
1390   Get search start position */
1391PHP_FUNCTION(mb_ereg_search_getpos)
1392{
1393    RETVAL_LONG(MBREX(search_pos));
1394}
1395/* }}} */
1396
1397/* {{{ proto bool mb_ereg_search_setpos(int position)
1398   Set search start position */
1399PHP_FUNCTION(mb_ereg_search_setpos)
1400{
1401    long position;
1402
1403    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1404        return;
1405    }
1406
1407    if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1408        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1409        MBREX(search_pos) = 0;
1410        RETURN_FALSE;
1411    }
1412
1413    MBREX(search_pos) = position;
1414    RETURN_TRUE;
1415}
1416/* }}} */
1417
1418/* {{{ php_mb_regex_set_options */
1419static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC)
1420{
1421    if (prev_options != NULL) {
1422        *prev_options = MBREX(regex_default_options);
1423    }
1424    if (prev_syntax != NULL) {
1425        *prev_syntax = MBREX(regex_default_syntax);
1426    }
1427    MBREX(regex_default_options) = options;
1428    MBREX(regex_default_syntax) = syntax;
1429}
1430/* }}} */
1431
1432/* {{{ proto string mb_regex_set_options([string options])
1433   Set or get the default options for mbregex functions */
1434PHP_FUNCTION(mb_regex_set_options)
1435{
1436    OnigOptionType opt;
1437    OnigSyntaxType *syntax;
1438    char *string = NULL;
1439    int string_len;
1440    char buf[16];
1441
1442    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1443                              &string, &string_len) == FAILURE) {
1444        RETURN_FALSE;
1445    }
1446    if (string != NULL) {
1447        opt = 0;
1448        syntax = NULL;
1449        _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1450        _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1451    } else {
1452        opt = MBREX(regex_default_options);
1453        syntax = MBREX(regex_default_syntax);
1454    }
1455    _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1456
1457    RETVAL_STRING(buf, 1);
1458}
1459/* }}} */
1460
1461#endif  /* HAVE_MBREGEX */
1462
1463/*
1464 * Local variables:
1465 * tab-width: 4
1466 * c-basic-offset: 4
1467 * End:
1468 * vim600: fdm=marker
1469 * vim: noet sw=4 ts=4
1470 */
1471