1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pefree(pce->re, 1);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    int jit_yes = 0;
136
137    php_info_print_table_start();
138    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
139    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
140
141    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
142        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
143    } else {
144        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
145    }
146
147    php_info_print_table_end();
148
149    DISPLAY_INI_ENTRIES();
150}
151/* }}} */
152
153/* {{{ PHP_MINIT_FUNCTION(pcre) */
154static PHP_MINIT_FUNCTION(pcre)
155{
156    REGISTER_INI_ENTRIES();
157
158    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
163    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
164    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
165
166    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
167    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
168    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
169    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
170    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
172    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
179static PHP_MSHUTDOWN_FUNCTION(pcre)
180{
181    UNREGISTER_INI_ENTRIES();
182
183    return SUCCESS;
184}
185/* }}} */
186
187/* {{{ static pcre_clean_cache */
188static int pcre_clean_cache(zval *data, void *arg)
189{
190    int *num_clean = (int *)arg;
191
192    if (*num_clean > 0) {
193        (*num_clean)--;
194        return 1;
195    } else {
196        return 0;
197    }
198}
199/* }}} */
200
201/* {{{ static make_subpats_table */
202static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
203{
204    pcre_extra *extra = pce->extra;
205    int name_cnt = pce->name_count, name_size, ni = 0;
206    int rc;
207    char *name_table;
208    unsigned short name_idx;
209    char **subpat_names;
210    int rc1, rc2;
211
212    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
213    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
214    rc = rc2 ? rc2 : rc1;
215    if (rc < 0) {
216        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
217        return NULL;
218    }
219
220    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
221    while (ni++ < name_cnt) {
222        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
223        subpat_names[name_idx] = name_table + 2;
224        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
225            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
226            efree(subpat_names);
227            return NULL;
228        }
229        name_table += name_size;
230    }
231    return subpat_names;
232}
233/* }}} */
234
235/* {{{ pcre_get_compiled_regex_cache
236 */
237PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
238{
239    pcre                *re = NULL;
240    pcre_extra          *extra;
241    int                  coptions = 0;
242    int                  soptions = 0;
243    const char          *error;
244    int                  erroffset;
245    char                 delimiter;
246    char                 start_delimiter;
247    char                 end_delimiter;
248    char                *p, *pp;
249    char                *pattern;
250    int                  do_study = 0;
251    int                  poptions = 0;
252    unsigned const char *tables = NULL;
253    pcre_cache_entry    *pce;
254    pcre_cache_entry     new_entry;
255    int                  rc;
256
257    /* Try to lookup the cached regex entry, and if successful, just pass
258       back the compiled pattern, otherwise go on and compile it. */
259    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
260    if (pce) {
261#if HAVE_SETLOCALE
262        if (pce->locale == BG(locale_string) ||
263            (pce->locale && BG(locale_string) &&
264             pce->locale->len == BG(locale_string)->len &&
265             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len))) {
266            return pce;
267        }
268#else
269        return pce;
270#endif
271    }
272
273    p = regex->val;
274
275    /* Parse through the leading whitespace, and display a warning if we
276       get to the end without encountering a delimiter. */
277    while (isspace((int)*(unsigned char *)p)) p++;
278    if (*p == 0) {
279        php_error_docref(NULL, E_WARNING,
280                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
281        return NULL;
282    }
283
284    /* Get the delimiter and display a warning if it is alphanumeric
285       or a backslash. */
286    delimiter = *p++;
287    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
288        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
289        return NULL;
290    }
291
292    start_delimiter = delimiter;
293    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
294        delimiter = pp[5];
295    end_delimiter = delimiter;
296
297    pp = p;
298
299    if (start_delimiter == end_delimiter) {
300        /* We need to iterate through the pattern, searching for the ending delimiter,
301           but skipping the backslashed delimiters.  If the ending delimiter is not
302           found, display a warning. */
303        while (*pp != 0) {
304            if (*pp == '\\' && pp[1] != 0) pp++;
305            else if (*pp == delimiter)
306                break;
307            pp++;
308        }
309    } else {
310        /* We iterate through the pattern, searching for the matching ending
311         * delimiter. For each matching starting delimiter, we increment nesting
312         * level, and decrement it for each matching ending delimiter. If we
313         * reach the end of the pattern without matching, display a warning.
314         */
315        int brackets = 1;   /* brackets nesting level */
316        while (*pp != 0) {
317            if (*pp == '\\' && pp[1] != 0) pp++;
318            else if (*pp == end_delimiter && --brackets <= 0)
319                break;
320            else if (*pp == start_delimiter)
321                brackets++;
322            pp++;
323        }
324    }
325
326    if (*pp == 0) {
327        if (pp < regex->val + regex->len) {
328            php_error_docref(NULL,E_WARNING, "Null byte in regex");
329        } else if (start_delimiter == end_delimiter) {
330            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
331        } else {
332            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
333        }
334        return NULL;
335    }
336
337    /* Make a copy of the actual pattern. */
338    pattern = estrndup(p, pp-p);
339
340    /* Move on to the options */
341    pp++;
342
343    /* Parse through the options, setting appropriate flags.  Display
344       a warning if we encounter an unknown modifier. */
345    while (pp < regex->val + regex->len) {
346        switch (*pp++) {
347            /* Perl compatible options */
348            case 'i':   coptions |= PCRE_CASELESS;      break;
349            case 'm':   coptions |= PCRE_MULTILINE;     break;
350            case 's':   coptions |= PCRE_DOTALL;        break;
351            case 'x':   coptions |= PCRE_EXTENDED;      break;
352
353            /* PCRE specific options */
354            case 'A':   coptions |= PCRE_ANCHORED;      break;
355            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
356            case 'S':   do_study  = 1;                  break;
357            case 'U':   coptions |= PCRE_UNGREEDY;      break;
358            case 'X':   coptions |= PCRE_EXTRA;         break;
359            case 'u':   coptions |= PCRE_UTF8;
360    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
361       characters, even in UTF-8 mode. However, this can be changed by setting
362       the PCRE_UCP option. */
363#ifdef PCRE_UCP
364                        coptions |= PCRE_UCP;
365#endif
366                break;
367
368            /* Custom preg options */
369            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
370
371            case ' ':
372            case '\n':
373                break;
374
375            default:
376                if (pp[-1]) {
377                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
378                } else {
379                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
380                }
381                efree(pattern);
382                return NULL;
383        }
384    }
385
386#if HAVE_SETLOCALE
387    if (BG(locale_string) &&
388        (BG(locale_string)->len != 1 || BG(locale_string)->val[0] != 'C')) {
389        tables = pcre_maketables();
390    }
391#endif
392
393    /* Compile pattern and display a warning if compilation failed. */
394    re = pcre_compile(pattern,
395                      coptions,
396                      &error,
397                      &erroffset,
398                      tables);
399
400    if (re == NULL) {
401        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
402        efree(pattern);
403        if (tables) {
404            pefree((void*)tables, 1);
405        }
406        return NULL;
407    }
408
409#ifdef PCRE_STUDY_JIT_COMPILE
410    if (PCRE_G(jit)) {
411        /* Enable PCRE JIT compiler */
412        do_study = 1;
413        soptions |= PCRE_STUDY_JIT_COMPILE;
414    }
415#endif
416
417    /* If study option was specified, study the pattern and
418       store the result in extra for passing to pcre_exec. */
419    if (do_study) {
420        extra = pcre_study(re, soptions, &error);
421        if (extra) {
422            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
423            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
424            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
425        }
426        if (error != NULL) {
427            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
428        }
429    } else {
430        extra = NULL;
431    }
432
433    efree(pattern);
434
435    /*
436     * If we reached cache limit, clean out the items from the head of the list;
437     * these are supposedly the oldest ones (but not necessarily the least used
438     * ones).
439     */
440    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
441        int num_clean = PCRE_CACHE_SIZE / 8;
442        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
443    }
444
445    /* Store the compiled pattern and extra info in the cache. */
446    new_entry.re = re;
447    new_entry.extra = extra;
448    new_entry.preg_options = poptions;
449    new_entry.compile_options = coptions;
450#if HAVE_SETLOCALE
451    new_entry.locale = BG(locale_string) ?
452        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
453            zend_string_copy(BG(locale_string)) :
454            zend_string_init(BG(locale_string)->val, BG(locale_string)->len, 1)) :
455        NULL;
456    new_entry.tables = tables;
457#endif
458
459    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
460    if (rc < 0) {
461        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
462        return NULL;
463    }
464
465    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
466    if (rc < 0) {
467        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
468        return NULL;
469    }
470
471    /*
472     * Interned strings are not duplicated when stored in HashTable,
473     * but all the interned strings created during HTTP request are removed
474     * at end of request. However PCRE_G(pcre_cache) must be consistent
475     * on the next request as well. So we disable usage of interned strings
476     * as hash keys especually for this table.
477     * See bug #63180
478     */
479    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
480
481    return pce;
482}
483/* }}} */
484
485/* {{{ pcre_get_compiled_regex
486 */
487PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
488{
489    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
490
491    if (extra) {
492        *extra = pce ? pce->extra : NULL;
493    }
494    if (preg_options) {
495        *preg_options = pce ? pce->preg_options : 0;
496    }
497
498    return pce ? pce->re : NULL;
499}
500/* }}} */
501
502/* {{{ pcre_get_compiled_regex_ex
503 */
504PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
505{
506    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
507
508    if (extra) {
509        *extra = pce ? pce->extra : NULL;
510    }
511    if (preg_options) {
512        *preg_options = pce ? pce->preg_options : 0;
513    }
514    if (compile_options) {
515        *compile_options = pce ? pce->compile_options : 0;
516    }
517
518    return pce ? pce->re : NULL;
519}
520/* }}} */
521
522/* {{{ add_offset_pair */
523static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
524{
525    zval match_pair, tmp;
526
527    array_init_size(&match_pair, 2);
528
529    /* Add (match, offset) to the return value */
530    ZVAL_STRINGL(&tmp, str, len);
531    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
532    ZVAL_LONG(&tmp, offset);
533    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
534
535    if (name) {
536        Z_ADDREF(match_pair);
537        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
538    }
539    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
540}
541/* }}} */
542
543static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
544{
545    /* parameters */
546    zend_string      *regex;            /* Regular expression */
547    zend_string      *subject;          /* String to match against */
548    pcre_cache_entry *pce;              /* Compiled regular expression */
549    zval             *subpats = NULL;   /* Array for subpatterns */
550    zend_long         flags = 0;        /* Match control flags */
551    zend_long         start_offset = 0; /* Where the new search starts */
552
553#ifndef FAST_ZPP
554    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
555                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
556        RETURN_FALSE;
557    }
558#else
559    ZEND_PARSE_PARAMETERS_START(2, 5)
560        Z_PARAM_STR(regex)
561        Z_PARAM_STR(subject)
562        Z_PARAM_OPTIONAL
563        Z_PARAM_ZVAL_EX(subpats, 0, 1)
564        Z_PARAM_LONG(flags)
565        Z_PARAM_LONG(start_offset)
566    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
567#endif
568
569    /* Compile regex or get it from cache. */
570    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
571        RETURN_FALSE;
572    }
573
574    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
575        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
576}
577/* }}} */
578
579/* {{{ php_pcre_match_impl() */
580PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
581    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
582{
583    zval             result_set,        /* Holds a set of subpatterns after
584                                           a global match */
585                    *match_sets = NULL; /* An array of sets of matches for each
586                                           subpattern after a global match */
587    pcre_extra      *extra = pce->extra;/* Holds results of studying */
588    pcre_extra       extra_data;        /* Used locally for exec options */
589    int              exoptions = 0;     /* Execution options */
590    int              count = 0;         /* Count of matched subpatterns */
591    int             *offsets;           /* Array of subpattern offsets */
592    int              num_subpats;       /* Number of captured subpatterns */
593    int              size_offsets;      /* Size of the offsets array */
594    int              matched;           /* Has anything matched */
595    int              g_notempty = 0;    /* If the match should not be empty */
596    const char     **stringlist;        /* Holds list of subpatterns */
597    char           **subpat_names;      /* Array for named subpatterns */
598    int              i;
599    int              subpats_order;     /* Order of subpattern matches */
600    int              offset_capture;    /* Capture match offsets: yes/no */
601    unsigned char   *mark = NULL;       /* Target for MARK name */
602    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
603    ALLOCA_FLAG(use_heap);
604
605    ZVAL_UNDEF(&marks);
606
607    /* Overwrite the passed-in value for subpatterns with an empty array. */
608    if (subpats != NULL) {
609        zval_dtor(subpats);
610        array_init(subpats);
611    }
612
613    subpats_order = global ? PREG_PATTERN_ORDER : 0;
614
615    if (use_flags) {
616        offset_capture = flags & PREG_OFFSET_CAPTURE;
617
618        /*
619         * subpats_order is pre-set to pattern mode so we change it only if
620         * necessary.
621         */
622        if (flags & 0xff) {
623            subpats_order = flags & 0xff;
624        }
625        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
626            (!global && subpats_order != 0)) {
627            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
628            return;
629        }
630    } else {
631        offset_capture = 0;
632    }
633
634    /* Negative offset counts from the end of the string. */
635    if (start_offset < 0) {
636        start_offset = subject_len + start_offset;
637        if (start_offset < 0) {
638            start_offset = 0;
639        }
640    }
641
642    if (extra == NULL) {
643        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
644        extra = &extra_data;
645    }
646    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
647    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
648#ifdef PCRE_EXTRA_MARK
649    extra->mark = &mark;
650    extra->flags |= PCRE_EXTRA_MARK;
651#endif
652
653    /* Calculate the size of the offsets array, and allocate memory for it. */
654    num_subpats = pce->capture_count + 1;
655    size_offsets = num_subpats * 3;
656
657    /*
658     * Build a mapping from subpattern numbers to their names. We will
659     * allocate the table only if there are any named subpatterns.
660     */
661    subpat_names = NULL;
662    if (pce->name_count > 0) {
663        subpat_names = make_subpats_table(num_subpats, pce);
664        if (!subpat_names) {
665            RETURN_FALSE;
666        }
667    }
668
669    if (size_offsets <= 32) {
670        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
671    } else {
672        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
673    }
674    memset(offsets, 0, size_offsets*sizeof(int));
675    /* Allocate match sets array and initialize the values. */
676    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
677        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
678        for (i=0; i<num_subpats; i++) {
679            array_init(&match_sets[i]);
680        }
681    }
682
683    matched = 0;
684    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
685
686    do {
687        /* Execute the regular expression. */
688        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
689                          exoptions|g_notempty, offsets, size_offsets);
690
691        /* the string was already proved to be valid UTF-8 */
692        exoptions |= PCRE_NO_UTF8_CHECK;
693
694        /* Check for too many substrings condition. */
695        if (count == 0) {
696            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
697            count = size_offsets/3;
698        }
699
700        /* If something has matched */
701        if (count > 0) {
702            matched++;
703
704            /* If subpatterns array has been passed, fill it in with values. */
705            if (subpats != NULL) {
706                /* Try to get the list of substrings and display a warning if failed. */
707                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
708                    if (subpat_names) {
709                        efree(subpat_names);
710                    }
711                    if (size_offsets <= 32) {
712                        free_alloca(offsets, use_heap);
713                    } else {
714                        efree(offsets);
715                    }
716                    if (match_sets) efree(match_sets);
717                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
718                    RETURN_FALSE;
719                }
720
721                if (global) {   /* global pattern matching */
722                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
723                        /* For each subpattern, insert it into the appropriate array. */
724                        if (offset_capture) {
725                            for (i = 0; i < count; i++) {
726                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
727                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
728                            }
729                        } else {
730                            for (i = 0; i < count; i++) {
731                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
732                                                       offsets[(i<<1)+1] - offsets[i<<1]);
733                            }
734                        }
735                        /* Add MARK, if available */
736                        if (mark) {
737                            if (Z_TYPE(marks) == IS_UNDEF) {
738                                array_init(&marks);
739                            }
740                            add_index_string(&marks, matched - 1, (char *) mark);
741                        }
742                        /*
743                         * If the number of captured subpatterns on this run is
744                         * less than the total possible number, pad the result
745                         * arrays with empty strings.
746                         */
747                        if (count < num_subpats) {
748                            for (; i < num_subpats; i++) {
749                                add_next_index_string(&match_sets[i], "");
750                            }
751                        }
752                    } else {
753                        /* Allocate the result set array */
754                        array_init_size(&result_set, count + (mark ? 1 : 0));
755
756                        /* Add all the subpatterns to it */
757                        if (subpat_names) {
758                            if (offset_capture) {
759                                for (i = 0; i < count; i++) {
760                                    add_offset_pair(&result_set, (char *)stringlist[i],
761                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
762                                }
763                            } else {
764                                for (i = 0; i < count; i++) {
765                                    if (subpat_names[i]) {
766                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
767                                                               offsets[(i<<1)+1] - offsets[i<<1]);
768                                    }
769                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
770                                                           offsets[(i<<1)+1] - offsets[i<<1]);
771                                }
772                            }
773                        } else {
774                            if (offset_capture) {
775                                for (i = 0; i < count; i++) {
776                                    add_offset_pair(&result_set, (char *)stringlist[i],
777                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
778                                }
779                            } else {
780                                for (i = 0; i < count; i++) {
781                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
782                                                           offsets[(i<<1)+1] - offsets[i<<1]);
783                                }
784                            }
785                        }
786                        /* Add MARK, if available */
787                        if (mark) {
788                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
789                        }
790                        /* And add it to the output array */
791                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
792                    }
793                } else {            /* single pattern matching */
794                    /* For each subpattern, insert it into the subpatterns array. */
795                    if (subpat_names) {
796                        if (offset_capture) {
797                            for (i = 0; i < count; i++) {
798                                add_offset_pair(subpats, (char *)stringlist[i],
799                                                offsets[(i<<1)+1] - offsets[i<<1],
800                                                offsets[i<<1], subpat_names[i]);
801                            }
802                        } else {
803                            for (i = 0; i < count; i++) {
804                                if (subpat_names[i]) {
805                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
806                                                      offsets[(i<<1)+1] - offsets[i<<1]);
807                                }
808                                add_next_index_stringl(subpats, (char *)stringlist[i],
809                                                       offsets[(i<<1)+1] - offsets[i<<1]);
810                            }
811                        }
812                    } else {
813                        if (offset_capture) {
814                            for (i = 0; i < count; i++) {
815                                add_offset_pair(subpats, (char *)stringlist[i],
816                                                offsets[(i<<1)+1] - offsets[i<<1],
817                                                offsets[i<<1], NULL);
818                            }
819                        } else {
820                            for (i = 0; i < count; i++) {
821                                add_next_index_stringl(subpats, (char *)stringlist[i],
822                                                       offsets[(i<<1)+1] - offsets[i<<1]);
823                            }
824                        }
825                    }
826                    /* Add MARK, if available */
827                    if (mark) {
828                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
829                    }
830                }
831
832                pcre_free((void *) stringlist);
833            }
834        } else if (count == PCRE_ERROR_NOMATCH) {
835            /* If we previously set PCRE_NOTEMPTY after a null match,
836               this is not necessarily the end. We need to advance
837               the start offset, and continue. Fudge the offset values
838               to achieve this, unless we're already at the end of the string. */
839            if (g_notempty != 0 && start_offset < subject_len) {
840                offsets[0] = (int)start_offset;
841                offsets[1] = (int)(start_offset + 1);
842            } else
843                break;
844        } else {
845            pcre_handle_exec_error(count);
846            break;
847        }
848
849        /* If we have matched an empty string, mimic what Perl's /g options does.
850           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
851           the match again at the same point. If this fails (picked up above) we
852           advance to the next character. */
853        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
854
855        /* Advance to the position right after the last full match */
856        start_offset = offsets[1];
857    } while (global);
858
859    /* Add the match sets to the output array and clean up */
860    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
861        if (subpat_names) {
862            for (i = 0; i < num_subpats; i++) {
863                if (subpat_names[i]) {
864                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
865                                     strlen(subpat_names[i]), &match_sets[i]);
866                    Z_ADDREF(match_sets[i]);
867                }
868                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
869            }
870        } else {
871            for (i = 0; i < num_subpats; i++) {
872                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
873            }
874        }
875        efree(match_sets);
876
877        if (Z_TYPE(marks) != IS_UNDEF) {
878            add_assoc_zval(subpats, "MARK", &marks);
879        }
880    }
881
882    if (size_offsets <= 32) {
883        free_alloca(offsets, use_heap);
884    } else {
885        efree(offsets);
886    }
887    if (subpat_names) {
888        efree(subpat_names);
889    }
890
891    /* Did we encounter an error? */
892    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
893        RETVAL_LONG(matched);
894    } else {
895        RETVAL_FALSE;
896    }
897}
898/* }}} */
899
900/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
901   Perform a Perl-style regular expression match */
902static PHP_FUNCTION(preg_match)
903{
904    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
905}
906/* }}} */
907
908/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
909   Perform a Perl-style global regular expression match */
910static PHP_FUNCTION(preg_match_all)
911{
912    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
913}
914/* }}} */
915
916/* {{{ preg_get_backref
917 */
918static int preg_get_backref(char **str, int *backref)
919{
920    register char in_brace = 0;
921    register char *walk = *str;
922
923    if (walk[1] == 0)
924        return 0;
925
926    if (*walk == '$' && walk[1] == '{') {
927        in_brace = 1;
928        walk++;
929    }
930    walk++;
931
932    if (*walk >= '0' && *walk <= '9') {
933        *backref = *walk - '0';
934        walk++;
935    } else
936        return 0;
937
938    if (*walk && *walk >= '0' && *walk <= '9') {
939        *backref = *backref * 10 + *walk - '0';
940        walk++;
941    }
942
943    if (in_brace) {
944        if (*walk == 0 || *walk != '}')
945            return 0;
946        else
947            walk++;
948    }
949
950    *str = walk;
951    return 1;
952}
953/* }}} */
954
955/* {{{ preg_do_repl_func
956 */
957static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
958{
959    zend_string *result_str;
960    zval         retval;            /* Function return value */
961    zval         args[1];           /* Argument to pass to function */
962    int          i;
963
964    array_init_size(&args[0], count + (mark ? 1 : 0));
965    if (subpat_names) {
966        for (i = 0; i < count; i++) {
967            if (subpat_names[i]) {
968                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
969            }
970            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
971        }
972    } else {
973        for (i = 0; i < count; i++) {
974            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
975        }
976    }
977    if (mark) {
978        add_assoc_string(&args[0], "MARK", (char *) mark);
979    }
980
981    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
982        result_str = zval_get_string(&retval);
983        zval_ptr_dtor(&retval);
984    } else {
985        if (!EG(exception)) {
986            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
987        }
988
989        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
990    }
991
992    zval_ptr_dtor(&args[0]);
993
994    return result_str;
995}
996/* }}} */
997
998/* {{{ php_pcre_replace
999 */
1000PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1001                              zend_string *subject_str,
1002                              char *subject, int subject_len,
1003                              zval *replace_val, int is_callable_replace,
1004                              int limit, int *replace_count)
1005{
1006    pcre_cache_entry    *pce;               /* Compiled regular expression */
1007
1008    /* Compile regex or get it from cache. */
1009    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1010        return NULL;
1011    }
1012
1013    return php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1014        is_callable_replace, limit, replace_count);
1015}
1016/* }}} */
1017
1018/* {{{ php_pcre_replace_impl() */
1019PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val,
1020    int is_callable_replace, int limit, int *replace_count)
1021{
1022    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1023    pcre_extra       extra_data;        /* Used locally for exec options */
1024    int              exoptions = 0;     /* Execution options */
1025    int              count = 0;         /* Count of matched subpatterns */
1026    int             *offsets;           /* Array of subpattern offsets */
1027    char            **subpat_names;     /* Array for named subpatterns */
1028    int              num_subpats;       /* Number of captured subpatterns */
1029    int              size_offsets;      /* Size of the offsets array */
1030    int              new_len;           /* Length of needed storage */
1031    int              alloc_len;         /* Actual allocated length */
1032    int              match_len;         /* Length of the current match */
1033    int              backref;           /* Backreference number */
1034    int              start_offset;      /* Where the new search starts */
1035    int              g_notempty=0;      /* If the match should not be empty */
1036    int              replace_len=0;     /* Length of replacement string */
1037    char            *replace=NULL,      /* Replacement string */
1038                    *walkbuf,           /* Location of current replacement in the result */
1039                    *walk,              /* Used to walk the replacement string */
1040                    *match,             /* The current match */
1041                    *piece,             /* The current piece of subject */
1042                    *replace_end=NULL,  /* End of replacement string */
1043                     walk_last;         /* Last walked character */
1044    int              result_len;        /* Length of result */
1045    unsigned char   *mark = NULL;       /* Target for MARK name */
1046    zend_string     *result;            /* Result of replacement */
1047    zend_string     *eval_result=NULL;  /* Result of custom function */
1048    ALLOCA_FLAG(use_heap);
1049
1050    if (extra == NULL) {
1051        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1052        extra = &extra_data;
1053    }
1054    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1055    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1056
1057    if (pce->preg_options & PREG_REPLACE_EVAL) {
1058        php_error_docref(NULL TSRMLS_CC, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1059        return NULL;
1060    }
1061    if (!is_callable_replace) {
1062        replace = Z_STRVAL_P(replace_val);
1063        replace_len = (int)Z_STRLEN_P(replace_val);
1064        replace_end = replace + replace_len;
1065    }
1066
1067    /* Calculate the size of the offsets array, and allocate memory for it. */
1068    num_subpats = pce->capture_count + 1;
1069    size_offsets = num_subpats * 3;
1070    if (size_offsets <= 32) {
1071        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1072    } else {
1073        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1074    }
1075
1076    /*
1077     * Build a mapping from subpattern numbers to their names. We will
1078     * allocate the table only if there are any named subpatterns.
1079     */
1080    subpat_names = NULL;
1081    if (pce->name_count > 0) {
1082        subpat_names = make_subpats_table(num_subpats, pce);
1083        if (!subpat_names) {
1084            return NULL;
1085        }
1086    }
1087
1088    alloc_len = 0;
1089    result = NULL;
1090
1091    /* Initialize */
1092    match = NULL;
1093    start_offset = 0;
1094    result_len = 0;
1095    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1096
1097    while (1) {
1098#ifdef PCRE_EXTRA_MARK
1099        extra->mark = &mark;
1100        extra->flags |= PCRE_EXTRA_MARK;
1101#endif
1102        /* Execute the regular expression. */
1103        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1104                          exoptions|g_notempty, offsets, size_offsets);
1105
1106        /* the string was already proved to be valid UTF-8 */
1107        exoptions |= PCRE_NO_UTF8_CHECK;
1108
1109        /* Check for too many substrings condition. */
1110        if (count == 0) {
1111            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1112            count = size_offsets/3;
1113        }
1114
1115        piece = subject + start_offset;
1116
1117        if (count > 0 && (limit == -1 || limit > 0)) {
1118            if (replace_count) {
1119                ++*replace_count;
1120            }
1121            /* Set the match location in subject */
1122            match = subject + offsets[0];
1123
1124            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1125
1126            if (is_callable_replace) {
1127                /* Use custom function to get replacement string and its length. */
1128                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1129                new_len += (int)eval_result->len;
1130            } else { /* do regular substitution */
1131                walk = replace;
1132                walk_last = 0;
1133                while (walk < replace_end) {
1134                    if ('\\' == *walk || '$' == *walk) {
1135                        if (walk_last == '\\') {
1136                            walk++;
1137                            walk_last = 0;
1138                            continue;
1139                        }
1140                        if (preg_get_backref(&walk, &backref)) {
1141                            if (backref < count)
1142                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1143                            continue;
1144                        }
1145                    }
1146                    new_len++;
1147                    walk++;
1148                    walk_last = walk[-1];
1149                }
1150            }
1151
1152            if (new_len >= alloc_len) {
1153                if (alloc_len == 0) {
1154                    alloc_len = 2 * subject_len;
1155                    if (new_len >= alloc_len) {
1156                        alloc_len = alloc_len + 2 * new_len;
1157                    }
1158                    result = zend_string_alloc(alloc_len, 0);
1159                } else {
1160                    alloc_len = alloc_len + 2 * new_len;
1161                    result = zend_string_realloc(result, alloc_len, 0);
1162                }
1163            }
1164            /* copy the part of the string before the match */
1165            memcpy(&result->val[result_len], piece, match-piece);
1166            result_len += (int)(match-piece);
1167
1168            /* copy replacement and backrefs */
1169            walkbuf = result->val + result_len;
1170
1171            /* If using custom function, copy result to the buffer and clean up. */
1172            if (is_callable_replace) {
1173                memcpy(walkbuf, eval_result->val, eval_result->len);
1174                result_len += (int)eval_result->len;
1175                if (eval_result) zend_string_release(eval_result);
1176            } else { /* do regular backreference copying */
1177                walk = replace;
1178                walk_last = 0;
1179                while (walk < replace_end) {
1180                    if ('\\' == *walk || '$' == *walk) {
1181                        if (walk_last == '\\') {
1182                            *(walkbuf-1) = *walk++;
1183                            walk_last = 0;
1184                            continue;
1185                        }
1186                        if (preg_get_backref(&walk, &backref)) {
1187                            if (backref < count) {
1188                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1189                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1190                                walkbuf += match_len;
1191                            }
1192                            continue;
1193                        }
1194                    }
1195                    *walkbuf++ = *walk++;
1196                    walk_last = walk[-1];
1197                }
1198                *walkbuf = '\0';
1199                /* increment the result length by how much we've added to the string */
1200                result_len += (int)(walkbuf - (result->val + result_len));
1201            }
1202
1203            if (limit != -1)
1204                limit--;
1205
1206        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1207            /* If we previously set PCRE_NOTEMPTY after a null match,
1208               this is not necessarily the end. We need to advance
1209               the start offset, and continue. Fudge the offset values
1210               to achieve this, unless we're already at the end of the string. */
1211            if (g_notempty != 0 && start_offset < subject_len) {
1212                offsets[0] = start_offset;
1213                offsets[1] = start_offset + 1;
1214                memcpy(&result->val[result_len], piece, 1);
1215                result_len++;
1216            } else {
1217                if (!result && subject_str) {
1218                    result = zend_string_copy(subject_str);
1219                    break;
1220                }
1221                new_len = result_len + subject_len - start_offset;
1222                if (new_len > alloc_len) {
1223                    alloc_len = new_len; /* now we know exactly how long it is */
1224                    if (NULL != result) {
1225                        result = zend_string_realloc(result, alloc_len, 0);
1226                    } else {
1227                        result = zend_string_alloc(alloc_len, 0);
1228                    }
1229                }
1230                /* stick that last bit of string on our output */
1231                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1232                result_len += subject_len - start_offset;
1233                result->val[result_len] = '\0';
1234                result->len = result_len;
1235                break;
1236            }
1237        } else {
1238            pcre_handle_exec_error(count);
1239            if (result) {
1240                zend_string_free(result);
1241                result = NULL;
1242            }
1243            break;
1244        }
1245
1246        /* If we have matched an empty string, mimic what Perl's /g options does.
1247           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1248           the match again at the same point. If this fails (picked up above) we
1249           advance to the next character. */
1250        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1251
1252        /* Advance to the next piece. */
1253        start_offset = offsets[1];
1254    }
1255
1256    if (size_offsets <= 32) {
1257        free_alloca(offsets, use_heap);
1258    } else {
1259        efree(offsets);
1260    }
1261    if (subpat_names) {
1262        efree(subpat_names);
1263    }
1264
1265    return result;
1266}
1267/* }}} */
1268
1269/* {{{ php_replace_in_subject
1270 */
1271static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1272{
1273    zval        *regex_entry,
1274                *replace_entry = NULL,
1275                *replace_value,
1276                 empty_replace;
1277    zend_string *result;
1278    zend_string *subject_str = zval_get_string(subject);
1279    uint32_t replace_idx;
1280
1281    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1282    ZVAL_EMPTY_STRING(&empty_replace);
1283
1284    /* If regex is an array */
1285    if (Z_TYPE_P(regex) == IS_ARRAY) {
1286        replace_value = replace;
1287        replace_idx = 0;
1288
1289        /* For each entry in the regex array, get the entry */
1290        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1291            /* Make sure we're dealing with strings. */
1292            zend_string *regex_str = zval_get_string(regex_entry);
1293
1294            /* If replace is an array and not a callable construct */
1295            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1296                /* Get current entry */
1297                replace_entry = NULL;
1298                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1299                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1300                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1301                        break;
1302                    }
1303                    replace_idx++;
1304                }
1305                if (replace_entry != NULL) {
1306                    if (!is_callable_replace) {
1307                        convert_to_string_ex(replace_entry);
1308                    }
1309                    replace_value = replace_entry;
1310                    replace_idx++;
1311                } else {
1312                    /* We've run out of replacement strings, so use an empty one */
1313                    replace_value = &empty_replace;
1314                }
1315            }
1316
1317            /* Do the actual replacement and put the result back into subject_str
1318               for further replacements. */
1319            if ((result = php_pcre_replace(regex_str,
1320                                           subject_str,
1321                                           subject_str->val,
1322                                           (int)subject_str->len,
1323                                           replace_value,
1324                                           is_callable_replace,
1325                                           limit,
1326                                           replace_count)) != NULL) {
1327                zend_string_release(subject_str);
1328                subject_str = result;
1329            } else {
1330                zend_string_release(subject_str);
1331                zend_string_release(regex_str);
1332                return NULL;
1333            }
1334
1335            zend_string_release(regex_str);
1336        } ZEND_HASH_FOREACH_END();
1337
1338        return subject_str;
1339    } else {
1340        result = php_pcre_replace(Z_STR_P(regex),
1341                                  subject_str,
1342                                  subject_str->val,
1343                                  (int)subject_str->len,
1344                                  replace,
1345                                  is_callable_replace,
1346                                  limit,
1347                                  replace_count);
1348        zend_string_release(subject_str);
1349        return result;
1350    }
1351}
1352/* }}} */
1353
1354/* {{{ preg_replace_impl
1355 */
1356static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1357{
1358    zval            *regex,
1359                    *replace,
1360                    *subject,
1361                    *subject_entry,
1362                    *zcount = NULL;
1363    int              limit_val = -1;
1364    zend_long        limit = -1;
1365    zend_string     *result;
1366    zend_string     *string_key;
1367    zend_ulong       num_key;
1368    zend_string     *callback_name;
1369    int              replace_count=0, old_replace_count;
1370
1371#ifndef FAST_ZPP
1372    /* Get function parameters and do error-checking. */
1373    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1374        return;
1375    }
1376#else
1377    ZEND_PARSE_PARAMETERS_START(3, 5)
1378        Z_PARAM_ZVAL(regex)
1379        Z_PARAM_ZVAL(replace)
1380        Z_PARAM_ZVAL(subject)
1381        Z_PARAM_OPTIONAL
1382        Z_PARAM_LONG(limit)
1383        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1384    ZEND_PARSE_PARAMETERS_END();
1385#endif
1386
1387    if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1388        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1389        RETURN_FALSE;
1390    }
1391
1392    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1393        SEPARATE_ZVAL(replace);
1394        convert_to_string_ex(replace);
1395    }
1396    if (is_callable_replace) {
1397        if (!zend_is_callable(replace, 0, &callback_name)) {
1398            php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1399            zend_string_release(callback_name);
1400            ZVAL_DUP(return_value, subject);
1401            return;
1402        }
1403        zend_string_release(callback_name);
1404    }
1405
1406    if (ZEND_NUM_ARGS() > 3) {
1407        limit_val = (int)limit;
1408    }
1409
1410    if (Z_TYPE_P(regex) != IS_ARRAY) {
1411        SEPARATE_ZVAL(regex);
1412        convert_to_string_ex(regex);
1413    }
1414
1415    /* if subject is an array */
1416    if (Z_TYPE_P(subject) == IS_ARRAY) {
1417        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1418
1419        /* For each subject entry, convert it to string, then perform replacement
1420           and add the result to the return_value array. */
1421        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1422            old_replace_count = replace_count;
1423            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1424                if (!is_filter || replace_count > old_replace_count) {
1425                    /* Add to return array */
1426                    if (string_key) {
1427                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1428                    } else {
1429                        add_index_str(return_value, num_key, result);
1430                    }
1431                } else {
1432                    zend_string_release(result);
1433                }
1434            }
1435        } ZEND_HASH_FOREACH_END();
1436    } else {    /* if subject is not an array */
1437        old_replace_count = replace_count;
1438        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1439            if (!is_filter || replace_count > old_replace_count) {
1440                RETVAL_STR(result);
1441            } else {
1442                zend_string_release(result);
1443            }
1444        }
1445    }
1446    if (ZEND_NUM_ARGS() > 4) {
1447        zval_dtor(zcount);
1448        ZVAL_LONG(zcount, replace_count);
1449    }
1450
1451}
1452/* }}} */
1453
1454/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1455   Perform Perl-style regular expression replacement. */
1456static PHP_FUNCTION(preg_replace)
1457{
1458    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1459}
1460/* }}} */
1461
1462/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1463   Perform Perl-style regular expression replacement using replacement callback. */
1464static PHP_FUNCTION(preg_replace_callback)
1465{
1466    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1467}
1468/* }}} */
1469
1470/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1471   Perform Perl-style regular expression replacement and only return matches. */
1472static PHP_FUNCTION(preg_filter)
1473{
1474    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1475}
1476/* }}} */
1477
1478/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1479   Split string into an array using a perl-style regular expression as a delimiter */
1480static PHP_FUNCTION(preg_split)
1481{
1482    zend_string         *regex;         /* Regular expression */
1483    zend_string         *subject;       /* String to match against */
1484    zend_long            limit_val = -1;/* Integer value of limit */
1485    zend_long            flags = 0;     /* Match control flags */
1486    pcre_cache_entry    *pce;           /* Compiled regular expression */
1487
1488    /* Get function parameters and do error checking */
1489#ifndef FAST_ZPP
1490    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1491                              &subject, &limit_val, &flags) == FAILURE) {
1492        RETURN_FALSE;
1493    }
1494#else
1495    ZEND_PARSE_PARAMETERS_START(2, 4)
1496        Z_PARAM_STR(regex)
1497        Z_PARAM_STR(subject)
1498        Z_PARAM_OPTIONAL
1499        Z_PARAM_LONG(limit_val)
1500        Z_PARAM_LONG(flags)
1501    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1502#endif
1503
1504    /* Compile regex or get it from cache. */
1505    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1506        RETURN_FALSE;
1507    }
1508
1509    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1510}
1511/* }}} */
1512
1513/* {{{ php_pcre_split
1514 */
1515PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1516    zend_long limit_val, zend_long flags)
1517{
1518    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1519    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1520    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1521    pcre_extra       extra_data;        /* Used locally for exec options */
1522    int             *offsets;           /* Array of subpattern offsets */
1523    int              size_offsets;      /* Size of the offsets array */
1524    int              exoptions = 0;     /* Execution options */
1525    int              count = 0;         /* Count of matched subpatterns */
1526    int              start_offset;      /* Where the new search starts */
1527    int              next_offset;       /* End of the last delimiter match + 1 */
1528    int              g_notempty = 0;    /* If the match should not be empty */
1529    char            *last_match;        /* Location of last match */
1530    int              no_empty;          /* If NO_EMPTY flag is set */
1531    int              delim_capture;     /* If delimiters should be captured */
1532    int              offset_capture;    /* If offsets should be captured */
1533    zval             tmp;
1534    ALLOCA_FLAG(use_heap);
1535
1536    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1537    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1538    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1539
1540    if (limit_val == 0) {
1541        limit_val = -1;
1542    }
1543
1544    if (extra == NULL) {
1545        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1546        extra = &extra_data;
1547    }
1548    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1549    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1550#ifdef PCRE_EXTRA_MARK
1551    extra->flags &= ~PCRE_EXTRA_MARK;
1552#endif
1553
1554    /* Initialize return value */
1555    array_init(return_value);
1556
1557    /* Calculate the size of the offsets array, and allocate memory for it. */
1558    size_offsets = (pce->capture_count + 1) * 3;
1559    if (size_offsets <= 32) {
1560        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1561    } else {
1562        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1563    }
1564
1565    /* Start at the beginning of the string */
1566    start_offset = 0;
1567    next_offset = 0;
1568    last_match = subject;
1569    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1570
1571    /* Get next piece if no limit or limit not yet reached and something matched*/
1572    while ((limit_val == -1 || limit_val > 1)) {
1573        count = pcre_exec(pce->re, extra, subject,
1574                          subject_len, start_offset,
1575                          exoptions|g_notempty, offsets, size_offsets);
1576
1577        /* the string was already proved to be valid UTF-8 */
1578        exoptions |= PCRE_NO_UTF8_CHECK;
1579
1580        /* Check for too many substrings condition. */
1581        if (count == 0) {
1582            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1583            count = size_offsets/3;
1584        }
1585
1586        /* If something matched */
1587        if (count > 0) {
1588            if (!no_empty || &subject[offsets[0]] != last_match) {
1589
1590                if (offset_capture) {
1591                    /* Add (match, offset) pair to the return value */
1592                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1593                } else {
1594                    /* Add the piece to the return value */
1595                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1596                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1597                }
1598
1599                /* One less left to do */
1600                if (limit_val != -1)
1601                    limit_val--;
1602            }
1603
1604            last_match = &subject[offsets[1]];
1605            next_offset = offsets[1];
1606
1607            if (delim_capture) {
1608                int i, match_len;
1609                for (i = 1; i < count; i++) {
1610                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1611                    /* If we have matched a delimiter */
1612                    if (!no_empty || match_len > 0) {
1613                        if (offset_capture) {
1614                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1615                        } else {
1616                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1617                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1618                        }
1619                    }
1620                }
1621            }
1622        } else if (count == PCRE_ERROR_NOMATCH) {
1623            /* If we previously set PCRE_NOTEMPTY after a null match,
1624               this is not necessarily the end. We need to advance
1625               the start offset, and continue. Fudge the offset values
1626               to achieve this, unless we're already at the end of the string. */
1627            if (g_notempty != 0 && start_offset < subject_len) {
1628                if (pce->compile_options & PCRE_UTF8) {
1629                    if (re_bump == NULL) {
1630                        int dummy;
1631                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1632                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1633                        zend_string_release(regex);
1634                        if (re_bump == NULL) {
1635                            RETURN_FALSE;
1636                        }
1637                    }
1638                    count = pcre_exec(re_bump, extra_bump, subject,
1639                              subject_len, start_offset,
1640                              exoptions, offsets, size_offsets);
1641                    if (count < 1) {
1642                        php_error_docref(NULL, E_WARNING, "Unknown error");
1643                        RETURN_FALSE;
1644                    }
1645                } else {
1646                    offsets[0] = start_offset;
1647                    offsets[1] = start_offset + 1;
1648                }
1649            } else
1650                break;
1651        } else {
1652            pcre_handle_exec_error(count);
1653            break;
1654        }
1655
1656        /* If we have matched an empty string, mimic what Perl's /g options does.
1657           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1658           the match again at the same point. If this fails (picked up above) we
1659           advance to the next character. */
1660        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1661
1662        /* Advance to the position right after the last full match */
1663        start_offset = offsets[1];
1664    }
1665
1666
1667    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1668
1669    if (!no_empty || start_offset < subject_len)
1670    {
1671        if (offset_capture) {
1672            /* Add the last (match, offset) pair to the return value */
1673            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1674        } else {
1675            /* Add the last piece to the return value */
1676            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1677            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1678        }
1679    }
1680
1681
1682    /* Clean up */
1683    if (size_offsets <= 32) {
1684        free_alloca(offsets, use_heap);
1685    } else {
1686        efree(offsets);
1687    }
1688}
1689/* }}} */
1690
1691/* {{{ proto string preg_quote(string str [, string delim_char])
1692   Quote regular expression characters plus an optional character */
1693static PHP_FUNCTION(preg_quote)
1694{
1695    size_t       in_str_len;
1696    char    *in_str;        /* Input string argument */
1697    char    *in_str_end;    /* End of the input string */
1698    size_t       delim_len = 0;
1699    char    *delim = NULL;  /* Additional delimiter argument */
1700    zend_string *out_str;   /* Output string with quoted characters */
1701    char    *p,             /* Iterator for input string */
1702            *q,             /* Iterator for output string */
1703             delim_char=0,  /* Delimiter character to be quoted */
1704             c;             /* Current character */
1705    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1706
1707    /* Get the arguments and check for errors */
1708#ifndef FAST_ZPP
1709    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1710                              &delim, &delim_len) == FAILURE) {
1711        return;
1712    }
1713#else
1714    ZEND_PARSE_PARAMETERS_START(1, 2)
1715        Z_PARAM_STRING(in_str, in_str_len)
1716        Z_PARAM_OPTIONAL
1717        Z_PARAM_STRING(delim, delim_len)
1718    ZEND_PARSE_PARAMETERS_END();
1719#endif
1720
1721    in_str_end = in_str + in_str_len;
1722
1723    /* Nothing to do if we got an empty string */
1724    if (in_str == in_str_end) {
1725        RETURN_EMPTY_STRING();
1726    }
1727
1728    if (delim && *delim) {
1729        delim_char = delim[0];
1730        quote_delim = 1;
1731    }
1732
1733    /* Allocate enough memory so that even if each character
1734       is quoted, we won't run out of room */
1735    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1736
1737    /* Go through the string and quote necessary characters */
1738    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1739        c = *p;
1740        switch(c) {
1741            case '.':
1742            case '\\':
1743            case '+':
1744            case '*':
1745            case '?':
1746            case '[':
1747            case '^':
1748            case ']':
1749            case '$':
1750            case '(':
1751            case ')':
1752            case '{':
1753            case '}':
1754            case '=':
1755            case '!':
1756            case '>':
1757            case '<':
1758            case '|':
1759            case ':':
1760            case '-':
1761                *q++ = '\\';
1762                *q++ = c;
1763                break;
1764
1765            case '\0':
1766                *q++ = '\\';
1767                *q++ = '0';
1768                *q++ = '0';
1769                *q++ = '0';
1770                break;
1771
1772            default:
1773                if (quote_delim && c == delim_char)
1774                    *q++ = '\\';
1775                *q++ = c;
1776                break;
1777        }
1778    }
1779    *q = '\0';
1780
1781    /* Reallocate string and return it */
1782    out_str = zend_string_realloc(out_str, q - out_str->val, 0);
1783    RETURN_STR(out_str);
1784}
1785/* }}} */
1786
1787/* {{{ proto array preg_grep(string regex, array input [, int flags])
1788   Searches array and returns entries which match regex */
1789static PHP_FUNCTION(preg_grep)
1790{
1791    zend_string         *regex;         /* Regular expression */
1792    zval                *input;         /* Input array */
1793    zend_long            flags = 0;     /* Match control flags */
1794    pcre_cache_entry    *pce;           /* Compiled regular expression */
1795
1796    /* Get arguments and do error checking */
1797#ifndef FAST_ZPP
1798    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1799                              &input, &flags) == FAILURE) {
1800        return;
1801    }
1802#else
1803    ZEND_PARSE_PARAMETERS_START(2, 3)
1804        Z_PARAM_STR(regex)
1805        Z_PARAM_ARRAY(input)
1806        Z_PARAM_OPTIONAL
1807        Z_PARAM_LONG(flags)
1808    ZEND_PARSE_PARAMETERS_END();
1809#endif
1810
1811    /* Compile regex or get it from cache. */
1812    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1813        RETURN_FALSE;
1814    }
1815
1816    php_pcre_grep_impl(pce, input, return_value, flags);
1817}
1818/* }}} */
1819
1820PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1821{
1822    zval            *entry;             /* An entry in the input array */
1823    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1824    pcre_extra       extra_data;        /* Used locally for exec options */
1825    int             *offsets;           /* Array of subpattern offsets */
1826    int              size_offsets;      /* Size of the offsets array */
1827    int              count = 0;         /* Count of matched subpatterns */
1828    zend_string     *string_key;
1829    zend_ulong       num_key;
1830    zend_bool        invert;            /* Whether to return non-matching
1831                                           entries */
1832    ALLOCA_FLAG(use_heap);
1833
1834    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1835
1836    if (extra == NULL) {
1837        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1838        extra = &extra_data;
1839    }
1840    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1841    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1842#ifdef PCRE_EXTRA_MARK
1843    extra->flags &= ~PCRE_EXTRA_MARK;
1844#endif
1845
1846    /* Calculate the size of the offsets array, and allocate memory for it. */
1847    size_offsets = (pce->capture_count + 1) * 3;
1848    if (size_offsets <= 32) {
1849        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1850    } else {
1851        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1852    }
1853
1854    /* Initialize return array */
1855    array_init(return_value);
1856
1857    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1858
1859    /* Go through the input array */
1860    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1861        zend_string *subject_str = zval_get_string(entry);
1862
1863        /* Perform the match */
1864        count = pcre_exec(pce->re, extra, subject_str->val,
1865                          (int)subject_str->len, 0,
1866                          0, offsets, size_offsets);
1867
1868        /* Check for too many substrings condition. */
1869        if (count == 0) {
1870            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1871            count = size_offsets/3;
1872        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1873            pcre_handle_exec_error(count);
1874            zend_string_release(subject_str);
1875            break;
1876        }
1877
1878        /* If the entry fits our requirements */
1879        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1880            if (Z_REFCOUNTED_P(entry)) {
1881                Z_ADDREF_P(entry);
1882            }
1883
1884            /* Add to return array */
1885            if (string_key) {
1886                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
1887            } else {
1888                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
1889            }
1890        }
1891
1892        zend_string_release(subject_str);
1893    } ZEND_HASH_FOREACH_END();
1894
1895    /* Clean up */
1896    if (size_offsets <= 32) {
1897        free_alloca(offsets, use_heap);
1898    } else {
1899        efree(offsets);
1900    }
1901}
1902/* }}} */
1903
1904/* {{{ proto int preg_last_error()
1905   Returns the error code of the last regexp execution. */
1906static PHP_FUNCTION(preg_last_error)
1907{
1908#ifndef FAST_ZPP
1909    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
1910        return;
1911    }
1912#else
1913    ZEND_PARSE_PARAMETERS_START(0, 0)
1914    ZEND_PARSE_PARAMETERS_END();
1915#endif
1916
1917    RETURN_LONG(PCRE_G(error_code));
1918}
1919/* }}} */
1920
1921/* {{{ module definition structures */
1922
1923/* {{{ arginfo */
1924ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1925    ZEND_ARG_INFO(0, pattern)
1926    ZEND_ARG_INFO(0, subject)
1927    ZEND_ARG_INFO(1, subpatterns) /* array */
1928    ZEND_ARG_INFO(0, flags)
1929    ZEND_ARG_INFO(0, offset)
1930ZEND_END_ARG_INFO()
1931
1932ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1933    ZEND_ARG_INFO(0, pattern)
1934    ZEND_ARG_INFO(0, subject)
1935    ZEND_ARG_INFO(1, subpatterns) /* array */
1936    ZEND_ARG_INFO(0, flags)
1937    ZEND_ARG_INFO(0, offset)
1938ZEND_END_ARG_INFO()
1939
1940ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1941    ZEND_ARG_INFO(0, regex)
1942    ZEND_ARG_INFO(0, replace)
1943    ZEND_ARG_INFO(0, subject)
1944    ZEND_ARG_INFO(0, limit)
1945    ZEND_ARG_INFO(1, count)
1946ZEND_END_ARG_INFO()
1947
1948ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1949    ZEND_ARG_INFO(0, regex)
1950    ZEND_ARG_INFO(0, callback)
1951    ZEND_ARG_INFO(0, subject)
1952    ZEND_ARG_INFO(0, limit)
1953    ZEND_ARG_INFO(1, count)
1954ZEND_END_ARG_INFO()
1955
1956ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1957    ZEND_ARG_INFO(0, pattern)
1958    ZEND_ARG_INFO(0, subject)
1959    ZEND_ARG_INFO(0, limit)
1960    ZEND_ARG_INFO(0, flags)
1961ZEND_END_ARG_INFO()
1962
1963ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1964    ZEND_ARG_INFO(0, str)
1965    ZEND_ARG_INFO(0, delim_char)
1966ZEND_END_ARG_INFO()
1967
1968ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1969    ZEND_ARG_INFO(0, regex)
1970    ZEND_ARG_INFO(0, input) /* array */
1971    ZEND_ARG_INFO(0, flags)
1972ZEND_END_ARG_INFO()
1973
1974ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1975ZEND_END_ARG_INFO()
1976/* }}} */
1977
1978static const zend_function_entry pcre_functions[] = {
1979    PHP_FE(preg_match,              arginfo_preg_match)
1980    PHP_FE(preg_match_all,          arginfo_preg_match_all)
1981    PHP_FE(preg_replace,            arginfo_preg_replace)
1982    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
1983    PHP_FE(preg_filter,             arginfo_preg_replace)
1984    PHP_FE(preg_split,              arginfo_preg_split)
1985    PHP_FE(preg_quote,              arginfo_preg_quote)
1986    PHP_FE(preg_grep,               arginfo_preg_grep)
1987    PHP_FE(preg_last_error,         arginfo_preg_last_error)
1988    PHP_FE_END
1989};
1990
1991zend_module_entry pcre_module_entry = {
1992    STANDARD_MODULE_HEADER,
1993   "pcre",
1994    pcre_functions,
1995    PHP_MINIT(pcre),
1996    PHP_MSHUTDOWN(pcre),
1997    NULL,
1998    NULL,
1999    PHP_MINFO(pcre),
2000    NO_VERSION_YET,
2001    PHP_MODULE_GLOBALS(pcre),
2002    PHP_GINIT(pcre),
2003    PHP_GSHUTDOWN(pcre),
2004    NULL,
2005    STANDARD_MODULE_PROPERTIES_EX
2006};
2007
2008#ifdef COMPILE_DL_PCRE
2009ZEND_GET_MODULE(pcre)
2010#endif
2011
2012/* }}} */
2013
2014#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2015
2016/*
2017 * Local variables:
2018 * tab-width: 4
2019 * c-basic-offset: 4
2020 * End:
2021 * vim600: sw=4 ts=4 fdm=marker
2022 * vim<600: sw=4 ts=4
2023 */
2024