1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pefree(pce->re, 1);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    php_info_print_table_start();
136    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
137    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
138    php_info_print_table_end();
139
140    DISPLAY_INI_ENTRIES();
141}
142/* }}} */
143
144/* {{{ PHP_MINIT_FUNCTION(pcre) */
145static PHP_MINIT_FUNCTION(pcre)
146{
147    REGISTER_INI_ENTRIES();
148
149    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
150    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
151    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
152    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
153    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
154    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
155    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
156
157    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
158    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
163    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
164
165    return SUCCESS;
166}
167/* }}} */
168
169/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
170static PHP_MSHUTDOWN_FUNCTION(pcre)
171{
172    UNREGISTER_INI_ENTRIES();
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ static pcre_clean_cache */
179static int pcre_clean_cache(zval *data, void *arg)
180{
181    int *num_clean = (int *)arg;
182
183    if (*num_clean > 0) {
184        (*num_clean)--;
185        return 1;
186    } else {
187        return 0;
188    }
189}
190/* }}} */
191
192/* {{{ static make_subpats_table */
193static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
194{
195    pcre_extra *extra = pce->extra;
196    int name_cnt = pce->name_count, name_size, ni = 0;
197    int rc;
198    char *name_table;
199    unsigned short name_idx;
200    char **subpat_names;
201    int rc1, rc2;
202
203    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
204    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
205    rc = rc2 ? rc2 : rc1;
206    if (rc < 0) {
207        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
208        return NULL;
209    }
210
211    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
212    while (ni++ < name_cnt) {
213        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
214        subpat_names[name_idx] = name_table + 2;
215        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
216            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
217            efree(subpat_names);
218            return NULL;
219        }
220        name_table += name_size;
221    }
222    return subpat_names;
223}
224/* }}} */
225
226/* {{{ pcre_get_compiled_regex_cache
227 */
228PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
229{
230    pcre                *re = NULL;
231    pcre_extra          *extra;
232    int                  coptions = 0;
233    int                  soptions = 0;
234    const char          *error;
235    int                  erroffset;
236    char                 delimiter;
237    char                 start_delimiter;
238    char                 end_delimiter;
239    char                *p, *pp;
240    char                *pattern;
241    int                  do_study = 0;
242    int                  poptions = 0;
243    unsigned const char *tables = NULL;
244    pcre_cache_entry    *pce;
245    pcre_cache_entry     new_entry;
246    int                  rc;
247
248    /* Try to lookup the cached regex entry, and if successful, just pass
249       back the compiled pattern, otherwise go on and compile it. */
250    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
251    if (pce) {
252#if HAVE_SETLOCALE
253        if (pce->locale == BG(locale_string) ||
254            (pce->locale && BG(locale_string) &&
255             pce->locale->len == BG(locale_string)->len &&
256             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len))) {
257            return pce;
258        }
259#else
260        return pce;
261#endif
262    }
263
264    p = regex->val;
265
266    /* Parse through the leading whitespace, and display a warning if we
267       get to the end without encountering a delimiter. */
268    while (isspace((int)*(unsigned char *)p)) p++;
269    if (*p == 0) {
270        php_error_docref(NULL, E_WARNING,
271                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
272        return NULL;
273    }
274
275    /* Get the delimiter and display a warning if it is alphanumeric
276       or a backslash. */
277    delimiter = *p++;
278    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
279        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
280        return NULL;
281    }
282
283    start_delimiter = delimiter;
284    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
285        delimiter = pp[5];
286    end_delimiter = delimiter;
287
288    pp = p;
289
290    if (start_delimiter == end_delimiter) {
291        /* We need to iterate through the pattern, searching for the ending delimiter,
292           but skipping the backslashed delimiters.  If the ending delimiter is not
293           found, display a warning. */
294        while (*pp != 0) {
295            if (*pp == '\\' && pp[1] != 0) pp++;
296            else if (*pp == delimiter)
297                break;
298            pp++;
299        }
300    } else {
301        /* We iterate through the pattern, searching for the matching ending
302         * delimiter. For each matching starting delimiter, we increment nesting
303         * level, and decrement it for each matching ending delimiter. If we
304         * reach the end of the pattern without matching, display a warning.
305         */
306        int brackets = 1;   /* brackets nesting level */
307        while (*pp != 0) {
308            if (*pp == '\\' && pp[1] != 0) pp++;
309            else if (*pp == end_delimiter && --brackets <= 0)
310                break;
311            else if (*pp == start_delimiter)
312                brackets++;
313            pp++;
314        }
315    }
316
317    if (*pp == 0) {
318        if (pp < regex->val + regex->len) {
319            php_error_docref(NULL,E_WARNING, "Null byte in regex");
320        } else if (start_delimiter == end_delimiter) {
321            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
322        } else {
323            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
324        }
325        return NULL;
326    }
327
328    /* Make a copy of the actual pattern. */
329    pattern = estrndup(p, pp-p);
330
331    /* Move on to the options */
332    pp++;
333
334    /* Parse through the options, setting appropriate flags.  Display
335       a warning if we encounter an unknown modifier. */
336    while (pp < regex->val + regex->len) {
337        switch (*pp++) {
338            /* Perl compatible options */
339            case 'i':   coptions |= PCRE_CASELESS;      break;
340            case 'm':   coptions |= PCRE_MULTILINE;     break;
341            case 's':   coptions |= PCRE_DOTALL;        break;
342            case 'x':   coptions |= PCRE_EXTENDED;      break;
343
344            /* PCRE specific options */
345            case 'A':   coptions |= PCRE_ANCHORED;      break;
346            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
347            case 'S':   do_study  = 1;                  break;
348            case 'U':   coptions |= PCRE_UNGREEDY;      break;
349            case 'X':   coptions |= PCRE_EXTRA;         break;
350            case 'u':   coptions |= PCRE_UTF8;
351    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
352       characters, even in UTF-8 mode. However, this can be changed by setting
353       the PCRE_UCP option. */
354#ifdef PCRE_UCP
355                        coptions |= PCRE_UCP;
356#endif
357                break;
358
359            /* Custom preg options */
360            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
361
362            case ' ':
363            case '\n':
364                break;
365
366            default:
367                if (pp[-1]) {
368                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
369                } else {
370                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
371                }
372                efree(pattern);
373                return NULL;
374        }
375    }
376
377#if HAVE_SETLOCALE
378    if (BG(locale_string) &&
379        (!BG(locale_string)->len != 1 || !BG(locale_string)->val[0] != 'C')) {
380        tables = pcre_maketables();
381    }
382#endif
383
384    /* Compile pattern and display a warning if compilation failed. */
385    re = pcre_compile(pattern,
386                      coptions,
387                      &error,
388                      &erroffset,
389                      tables);
390
391    if (re == NULL) {
392        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
393        efree(pattern);
394        if (tables) {
395            pefree((void*)tables, 1);
396        }
397        return NULL;
398    }
399
400#ifdef PCRE_STUDY_JIT_COMPILE
401    if (PCRE_G(jit)) {
402        /* Enable PCRE JIT compiler */
403        do_study = 1;
404        soptions |= PCRE_STUDY_JIT_COMPILE;
405    }
406#endif
407
408    /* If study option was specified, study the pattern and
409       store the result in extra for passing to pcre_exec. */
410    if (do_study) {
411        extra = pcre_study(re, soptions, &error);
412        if (extra) {
413            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
414            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
415            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
416        }
417        if (error != NULL) {
418            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
419        }
420    } else {
421        extra = NULL;
422    }
423
424    efree(pattern);
425
426    /*
427     * If we reached cache limit, clean out the items from the head of the list;
428     * these are supposedly the oldest ones (but not necessarily the least used
429     * ones).
430     */
431    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
432        int num_clean = PCRE_CACHE_SIZE / 8;
433        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
434    }
435
436    /* Store the compiled pattern and extra info in the cache. */
437    new_entry.re = re;
438    new_entry.extra = extra;
439    new_entry.preg_options = poptions;
440    new_entry.compile_options = coptions;
441#if HAVE_SETLOCALE
442    new_entry.locale = BG(locale_string) ? zend_string_dup(BG(locale_string), 1) : NULL;
443    new_entry.tables = tables;
444#endif
445
446    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
447    if (rc < 0) {
448        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
449        return NULL;
450    }
451
452    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
453    if (rc < 0) {
454        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
455        return NULL;
456    }
457
458    /*
459     * Interned strings are not duplicated when stored in HashTable,
460     * but all the interned strings created during HTTP request are removed
461     * at end of request. However PCRE_G(pcre_cache) must be consistent
462     * on the next request as well. So we disable usage of interned strings
463     * as hash keys especually for this table.
464     * See bug #63180
465     */
466    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
467
468    return pce;
469}
470/* }}} */
471
472/* {{{ pcre_get_compiled_regex
473 */
474PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
475{
476    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
477
478    if (extra) {
479        *extra = pce ? pce->extra : NULL;
480    }
481    if (preg_options) {
482        *preg_options = pce ? pce->preg_options : 0;
483    }
484
485    return pce ? pce->re : NULL;
486}
487/* }}} */
488
489/* {{{ pcre_get_compiled_regex_ex
490 */
491PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
492{
493    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
494
495    if (extra) {
496        *extra = pce ? pce->extra : NULL;
497    }
498    if (preg_options) {
499        *preg_options = pce ? pce->preg_options : 0;
500    }
501    if (compile_options) {
502        *compile_options = pce ? pce->compile_options : 0;
503    }
504
505    return pce ? pce->re : NULL;
506}
507/* }}} */
508
509/* {{{ add_offset_pair */
510static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
511{
512    zval match_pair, tmp;
513
514    array_init_size(&match_pair, 2);
515
516    /* Add (match, offset) to the return value */
517    ZVAL_STRINGL(&tmp, str, len);
518    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
519    ZVAL_LONG(&tmp, offset);
520    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
521
522    if (name) {
523        Z_ADDREF(match_pair);
524        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
525    }
526    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
527}
528/* }}} */
529
530static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
531{
532    /* parameters */
533    zend_string      *regex;            /* Regular expression */
534    zend_string      *subject;          /* String to match against */
535    pcre_cache_entry *pce;              /* Compiled regular expression */
536    zval             *subpats = NULL;   /* Array for subpatterns */
537    zend_long         flags = 0;        /* Match control flags */
538    zend_long         start_offset = 0; /* Where the new search starts */
539
540#ifndef FAST_ZPP
541    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
542                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
543        RETURN_FALSE;
544    }
545#else
546    ZEND_PARSE_PARAMETERS_START(2, 5)
547        Z_PARAM_STR(regex)
548        Z_PARAM_STR(subject)
549        Z_PARAM_OPTIONAL
550        Z_PARAM_ZVAL_EX(subpats, 0, 1)
551        Z_PARAM_LONG(flags)
552        Z_PARAM_LONG(start_offset)
553    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
554#endif
555
556    /* Compile regex or get it from cache. */
557    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
558        RETURN_FALSE;
559    }
560
561    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
562        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
563}
564/* }}} */
565
566/* {{{ php_pcre_match_impl() */
567PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
568    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
569{
570    zval             result_set,        /* Holds a set of subpatterns after
571                                           a global match */
572                    *match_sets = NULL; /* An array of sets of matches for each
573                                           subpattern after a global match */
574    pcre_extra      *extra = pce->extra;/* Holds results of studying */
575    pcre_extra       extra_data;        /* Used locally for exec options */
576    int              exoptions = 0;     /* Execution options */
577    int              count = 0;         /* Count of matched subpatterns */
578    int             *offsets;           /* Array of subpattern offsets */
579    int              num_subpats;       /* Number of captured subpatterns */
580    int              size_offsets;      /* Size of the offsets array */
581    int              matched;           /* Has anything matched */
582    int              g_notempty = 0;    /* If the match should not be empty */
583    const char     **stringlist;        /* Holds list of subpatterns */
584    char           **subpat_names;      /* Array for named subpatterns */
585    int              i;
586    int              subpats_order;     /* Order of subpattern matches */
587    int              offset_capture;    /* Capture match offsets: yes/no */
588    unsigned char   *mark = NULL;       /* Target for MARK name */
589    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
590    ALLOCA_FLAG(use_heap);
591
592    ZVAL_UNDEF(&marks);
593
594    /* Overwrite the passed-in value for subpatterns with an empty array. */
595    if (subpats != NULL) {
596        zval_dtor(subpats);
597        array_init(subpats);
598    }
599
600    subpats_order = global ? PREG_PATTERN_ORDER : 0;
601
602    if (use_flags) {
603        offset_capture = flags & PREG_OFFSET_CAPTURE;
604
605        /*
606         * subpats_order is pre-set to pattern mode so we change it only if
607         * necessary.
608         */
609        if (flags & 0xff) {
610            subpats_order = flags & 0xff;
611        }
612        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
613            (!global && subpats_order != 0)) {
614            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
615            return;
616        }
617    } else {
618        offset_capture = 0;
619    }
620
621    /* Negative offset counts from the end of the string. */
622    if (start_offset < 0) {
623        start_offset = subject_len + start_offset;
624        if (start_offset < 0) {
625            start_offset = 0;
626        }
627    }
628
629    if (extra == NULL) {
630        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
631        extra = &extra_data;
632    }
633    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
634    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
635#ifdef PCRE_EXTRA_MARK
636    extra->mark = &mark;
637    extra->flags |= PCRE_EXTRA_MARK;
638#endif
639
640    /* Calculate the size of the offsets array, and allocate memory for it. */
641    num_subpats = pce->capture_count + 1;
642    size_offsets = num_subpats * 3;
643
644    /*
645     * Build a mapping from subpattern numbers to their names. We will
646     * allocate the table only if there are any named subpatterns.
647     */
648    subpat_names = NULL;
649    if (pce->name_count > 0) {
650        subpat_names = make_subpats_table(num_subpats, pce);
651        if (!subpat_names) {
652            RETURN_FALSE;
653        }
654    }
655
656    if (size_offsets <= 32) {
657        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
658    } else {
659        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
660    }
661    memset(offsets, 0, size_offsets*sizeof(int));
662    /* Allocate match sets array and initialize the values. */
663    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
664        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
665        for (i=0; i<num_subpats; i++) {
666            array_init(&match_sets[i]);
667        }
668    }
669
670    matched = 0;
671    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
672
673    do {
674        /* Execute the regular expression. */
675        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
676                          exoptions|g_notempty, offsets, size_offsets);
677
678        /* the string was already proved to be valid UTF-8 */
679        exoptions |= PCRE_NO_UTF8_CHECK;
680
681        /* Check for too many substrings condition. */
682        if (count == 0) {
683            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
684            count = size_offsets/3;
685        }
686
687        /* If something has matched */
688        if (count > 0) {
689            matched++;
690
691            /* If subpatterns array has been passed, fill it in with values. */
692            if (subpats != NULL) {
693                /* Try to get the list of substrings and display a warning if failed. */
694                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
695                    if (subpat_names) {
696                        efree(subpat_names);
697                    }
698                    if (size_offsets <= 32) {
699                        free_alloca(offsets, use_heap);
700                    } else {
701                        efree(offsets);
702                    }
703                    if (match_sets) efree(match_sets);
704                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
705                    RETURN_FALSE;
706                }
707
708                if (global) {   /* global pattern matching */
709                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
710                        /* For each subpattern, insert it into the appropriate array. */
711                        if (offset_capture) {
712                            for (i = 0; i < count; i++) {
713                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
714                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
715                            }
716                        } else {
717                            for (i = 0; i < count; i++) {
718                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
719                                                       offsets[(i<<1)+1] - offsets[i<<1]);
720                            }
721                        }
722                        /* Add MARK, if available */
723                        if (mark) {
724                            if (Z_TYPE(marks) == IS_UNDEF) {
725                                array_init(&marks);
726                            }
727                            add_index_string(&marks, matched - 1, (char *) mark);
728                        }
729                        /*
730                         * If the number of captured subpatterns on this run is
731                         * less than the total possible number, pad the result
732                         * arrays with empty strings.
733                         */
734                        if (count < num_subpats) {
735                            for (; i < num_subpats; i++) {
736                                add_next_index_string(&match_sets[i], "");
737                            }
738                        }
739                    } else {
740                        /* Allocate the result set array */
741                        array_init_size(&result_set, count + (mark ? 1 : 0));
742
743                        /* Add all the subpatterns to it */
744                        if (subpat_names) {
745                            if (offset_capture) {
746                                for (i = 0; i < count; i++) {
747                                    add_offset_pair(&result_set, (char *)stringlist[i],
748                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
749                                }
750                            } else {
751                                for (i = 0; i < count; i++) {
752                                    if (subpat_names[i]) {
753                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
754                                                               offsets[(i<<1)+1] - offsets[i<<1]);
755                                    }
756                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
757                                                           offsets[(i<<1)+1] - offsets[i<<1]);
758                                }
759                            }
760                        } else {
761                            if (offset_capture) {
762                                for (i = 0; i < count; i++) {
763                                    add_offset_pair(&result_set, (char *)stringlist[i],
764                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
765                                }
766                            } else {
767                                for (i = 0; i < count; i++) {
768                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
769                                                           offsets[(i<<1)+1] - offsets[i<<1]);
770                                }
771                            }
772                        }
773                        /* Add MARK, if available */
774                        if (mark) {
775                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
776                        }
777                        /* And add it to the output array */
778                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
779                    }
780                } else {            /* single pattern matching */
781                    /* For each subpattern, insert it into the subpatterns array. */
782                    if (subpat_names) {
783                        if (offset_capture) {
784                            for (i = 0; i < count; i++) {
785                                add_offset_pair(subpats, (char *)stringlist[i],
786                                                offsets[(i<<1)+1] - offsets[i<<1],
787                                                offsets[i<<1], subpat_names[i]);
788                            }
789                        } else {
790                            for (i = 0; i < count; i++) {
791                                if (subpat_names[i]) {
792                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
793                                                      offsets[(i<<1)+1] - offsets[i<<1]);
794                                }
795                                add_next_index_stringl(subpats, (char *)stringlist[i],
796                                                       offsets[(i<<1)+1] - offsets[i<<1]);
797                            }
798                        }
799                    } else {
800                        if (offset_capture) {
801                            for (i = 0; i < count; i++) {
802                                add_offset_pair(subpats, (char *)stringlist[i],
803                                                offsets[(i<<1)+1] - offsets[i<<1],
804                                                offsets[i<<1], NULL);
805                            }
806                        } else {
807                            for (i = 0; i < count; i++) {
808                                add_next_index_stringl(subpats, (char *)stringlist[i],
809                                                       offsets[(i<<1)+1] - offsets[i<<1]);
810                            }
811                        }
812                    }
813                    /* Add MARK, if available */
814                    if (mark) {
815                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
816                    }
817                }
818
819                pcre_free((void *) stringlist);
820            }
821        } else if (count == PCRE_ERROR_NOMATCH) {
822            /* If we previously set PCRE_NOTEMPTY after a null match,
823               this is not necessarily the end. We need to advance
824               the start offset, and continue. Fudge the offset values
825               to achieve this, unless we're already at the end of the string. */
826            if (g_notempty != 0 && start_offset < subject_len) {
827                offsets[0] = (int)start_offset;
828                offsets[1] = (int)(start_offset + 1);
829            } else
830                break;
831        } else {
832            pcre_handle_exec_error(count);
833            break;
834        }
835
836        /* If we have matched an empty string, mimic what Perl's /g options does.
837           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
838           the match again at the same point. If this fails (picked up above) we
839           advance to the next character. */
840        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
841
842        /* Advance to the position right after the last full match */
843        start_offset = offsets[1];
844    } while (global);
845
846    /* Add the match sets to the output array and clean up */
847    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
848        if (subpat_names) {
849            for (i = 0; i < num_subpats; i++) {
850                if (subpat_names[i]) {
851                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
852                                     strlen(subpat_names[i]), &match_sets[i]);
853                    Z_ADDREF(match_sets[i]);
854                }
855                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
856            }
857        } else {
858            for (i = 0; i < num_subpats; i++) {
859                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
860            }
861        }
862        efree(match_sets);
863
864        if (Z_TYPE(marks) != IS_UNDEF) {
865            add_assoc_zval(subpats, "MARK", &marks);
866        }
867    }
868
869    if (size_offsets <= 32) {
870        free_alloca(offsets, use_heap);
871    } else {
872        efree(offsets);
873    }
874    if (subpat_names) {
875        efree(subpat_names);
876    }
877
878    /* Did we encounter an error? */
879    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
880        RETVAL_LONG(matched);
881    } else {
882        RETVAL_FALSE;
883    }
884}
885/* }}} */
886
887/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
888   Perform a Perl-style regular expression match */
889static PHP_FUNCTION(preg_match)
890{
891    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
892}
893/* }}} */
894
895/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
896   Perform a Perl-style global regular expression match */
897static PHP_FUNCTION(preg_match_all)
898{
899    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
900}
901/* }}} */
902
903/* {{{ preg_get_backref
904 */
905static int preg_get_backref(char **str, int *backref)
906{
907    register char in_brace = 0;
908    register char *walk = *str;
909
910    if (walk[1] == 0)
911        return 0;
912
913    if (*walk == '$' && walk[1] == '{') {
914        in_brace = 1;
915        walk++;
916    }
917    walk++;
918
919    if (*walk >= '0' && *walk <= '9') {
920        *backref = *walk - '0';
921        walk++;
922    } else
923        return 0;
924
925    if (*walk && *walk >= '0' && *walk <= '9') {
926        *backref = *backref * 10 + *walk - '0';
927        walk++;
928    }
929
930    if (in_brace) {
931        if (*walk == 0 || *walk != '}')
932            return 0;
933        else
934            walk++;
935    }
936
937    *str = walk;
938    return 1;
939}
940/* }}} */
941
942/* {{{ preg_do_repl_func
943 */
944static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
945{
946    zend_string *result_str;
947    zval         retval;            /* Function return value */
948    zval         args[1];           /* Argument to pass to function */
949    int          i;
950
951    array_init_size(&args[0], count + (mark ? 1 : 0));
952    if (subpat_names) {
953        for (i = 0; i < count; i++) {
954            if (subpat_names[i]) {
955                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
956            }
957            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
958        }
959    } else {
960        for (i = 0; i < count; i++) {
961            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
962        }
963    }
964    if (mark) {
965        add_assoc_string(&args[0], "MARK", (char *) mark);
966    }
967
968    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
969        result_str = zval_get_string(&retval);
970        zval_ptr_dtor(&retval);
971    } else {
972        if (!EG(exception)) {
973            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
974        }
975
976        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
977    }
978
979    zval_ptr_dtor(&args[0]);
980
981    return result_str;
982}
983/* }}} */
984
985/* {{{ preg_do_eval
986 */
987static zend_string *preg_do_eval(char *eval_str, int eval_str_len, char *subject,
988                        int *offsets, int count)
989{
990    zval         retval;            /* Return value from evaluation */
991    char        *eval_str_end,      /* End of eval string */
992                *match,             /* Current match for a backref */
993                *walk,              /* Used to walk the code string */
994                *segment,           /* Start of segment to append while walking */
995                 walk_last;         /* Last walked character */
996    int          match_len;         /* Length of the match */
997    int          backref;           /* Current backref */
998    zend_string *esc_match;         /* Quote-escaped match */
999    zend_string *result_str;
1000    char        *compiled_string_description;
1001    smart_str    code = {0};
1002
1003    eval_str_end = eval_str + eval_str_len;
1004    walk = segment = eval_str;
1005    walk_last = 0;
1006
1007    while (walk < eval_str_end) {
1008        /* If found a backreference.. */
1009        if ('\\' == *walk || '$' == *walk) {
1010            smart_str_appendl(&code, segment, walk - segment);
1011            if (walk_last == '\\') {
1012                code.s->val[code.s->len-1] = *walk++;
1013                segment = walk;
1014                walk_last = 0;
1015                continue;
1016            }
1017            segment = walk;
1018            if (preg_get_backref(&walk, &backref)) {
1019                if (backref < count) {
1020                    /* Find the corresponding string match and substitute it
1021                       in instead of the backref */
1022                    match = subject + offsets[backref<<1];
1023                    match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1024                    if (match_len) {
1025                        esc_match = php_addslashes(match, match_len, 0);
1026                    } else {
1027                        esc_match = zend_string_init(match, match_len, 0);
1028                    }
1029                } else {
1030                    esc_match = STR_EMPTY_ALLOC();
1031                }
1032                smart_str_appendl(&code, esc_match->val, esc_match->len);
1033
1034                segment = walk;
1035
1036                /* Clean up and reassign */
1037                zend_string_release(esc_match);
1038                continue;
1039            }
1040        }
1041        walk++;
1042        walk_last = walk[-1];
1043    }
1044    smart_str_appendl(&code, segment, walk - segment);
1045    smart_str_0(&code);
1046
1047    compiled_string_description = zend_make_compiled_string_description("regexp code");
1048    /* Run the code */
1049    if (zend_eval_stringl(code.s->val, code.s->len, &retval, compiled_string_description) == FAILURE) {
1050        efree(compiled_string_description);
1051        php_error_docref(NULL,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.s->val);
1052        /* zend_error() does not return in this case */
1053    }
1054    efree(compiled_string_description);
1055
1056    /* Save the return string */
1057    result_str = zval_get_string(&retval);
1058
1059    /* Clean up */
1060    zval_dtor(&retval);
1061    smart_str_free(&code);
1062
1063    return result_str;
1064}
1065/* }}} */
1066
1067/* {{{ php_pcre_replace
1068 */
1069PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1070                              char *subject, int subject_len,
1071                              zval *replace_val, int is_callable_replace,
1072                              int limit, int *replace_count)
1073{
1074    pcre_cache_entry    *pce;               /* Compiled regular expression */
1075
1076    /* Compile regex or get it from cache. */
1077    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1078        return NULL;
1079    }
1080
1081    return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
1082        is_callable_replace, limit, replace_count);
1083}
1084/* }}} */
1085
1086/* {{{ php_pcre_replace_impl() */
1087PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1088    int is_callable_replace, int limit, int *replace_count)
1089{
1090    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1091    pcre_extra       extra_data;        /* Used locally for exec options */
1092    int              exoptions = 0;     /* Execution options */
1093    int              count = 0;         /* Count of matched subpatterns */
1094    int             *offsets;           /* Array of subpattern offsets */
1095    char            **subpat_names;     /* Array for named subpatterns */
1096    int              num_subpats;       /* Number of captured subpatterns */
1097    int              size_offsets;      /* Size of the offsets array */
1098    int              new_len;           /* Length of needed storage */
1099    int              alloc_len;         /* Actual allocated length */
1100    int              match_len;         /* Length of the current match */
1101    int              backref;           /* Backreference number */
1102    int              eval;              /* If the replacement string should be eval'ed */
1103    int              start_offset;      /* Where the new search starts */
1104    int              g_notempty=0;      /* If the match should not be empty */
1105    int              replace_len=0;     /* Length of replacement string */
1106    char            *replace=NULL,      /* Replacement string */
1107                    *walkbuf,           /* Location of current replacement in the result */
1108                    *walk,              /* Used to walk the replacement string */
1109                    *match,             /* The current match */
1110                    *piece,             /* The current piece of subject */
1111                    *replace_end=NULL,  /* End of replacement string */
1112                     walk_last;         /* Last walked character */
1113    int              result_len;        /* Length of result */
1114    unsigned char   *mark = NULL;       /* Target for MARK name */
1115    zend_string     *result;            /* Result of replacement */
1116    zend_string     *eval_result=NULL;  /* Result of eval or custom function */
1117    ALLOCA_FLAG(use_heap);
1118
1119    if (extra == NULL) {
1120        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1121        extra = &extra_data;
1122    }
1123    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1124    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1125
1126    eval = pce->preg_options & PREG_REPLACE_EVAL;
1127    if (is_callable_replace) {
1128        if (eval) {
1129            php_error_docref(NULL, E_WARNING, "Modifier /e cannot be used with replacement callback");
1130            return NULL;
1131        }
1132    } else {
1133        replace = Z_STRVAL_P(replace_val);
1134        replace_len = (int)Z_STRLEN_P(replace_val);
1135        replace_end = replace + replace_len;
1136    }
1137
1138    if (eval) {
1139        php_error_docref(NULL, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
1140    }
1141
1142    /* Calculate the size of the offsets array, and allocate memory for it. */
1143    num_subpats = pce->capture_count + 1;
1144    size_offsets = num_subpats * 3;
1145    if (size_offsets <= 32) {
1146        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1147    } else {
1148        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1149    }
1150
1151    /*
1152     * Build a mapping from subpattern numbers to their names. We will
1153     * allocate the table only if there are any named subpatterns.
1154     */
1155    subpat_names = NULL;
1156    if (pce->name_count > 0) {
1157        subpat_names = make_subpats_table(num_subpats, pce);
1158        if (!subpat_names) {
1159            return NULL;
1160        }
1161    }
1162
1163    alloc_len = 2 * subject_len;
1164    result = zend_string_alloc(alloc_len * sizeof(char), 0);
1165
1166    /* Initialize */
1167    match = NULL;
1168    start_offset = 0;
1169    result_len = 0;
1170    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1171
1172    while (1) {
1173#ifdef PCRE_EXTRA_MARK
1174        extra->mark = &mark;
1175        extra->flags |= PCRE_EXTRA_MARK;
1176#endif
1177        /* Execute the regular expression. */
1178        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1179                          exoptions|g_notempty, offsets, size_offsets);
1180
1181        /* the string was already proved to be valid UTF-8 */
1182        exoptions |= PCRE_NO_UTF8_CHECK;
1183
1184        /* Check for too many substrings condition. */
1185        if (count == 0) {
1186            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1187            count = size_offsets/3;
1188        }
1189
1190        piece = subject + start_offset;
1191
1192        if (count > 0 && (limit == -1 || limit > 0)) {
1193            if (replace_count) {
1194                ++*replace_count;
1195            }
1196            /* Set the match location in subject */
1197            match = subject + offsets[0];
1198
1199            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1200
1201            /* If evaluating, do it and add the return string's length */
1202            if (eval) {
1203                eval_result = preg_do_eval(replace, replace_len, subject,
1204                                               offsets, count);
1205                new_len += (int)eval_result->len;
1206            } else if (is_callable_replace) {
1207                /* Use custom function to get replacement string and its length. */
1208                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1209                new_len += (int)eval_result->len;
1210            } else { /* do regular substitution */
1211                walk = replace;
1212                walk_last = 0;
1213                while (walk < replace_end) {
1214                    if ('\\' == *walk || '$' == *walk) {
1215                        if (walk_last == '\\') {
1216                            walk++;
1217                            walk_last = 0;
1218                            continue;
1219                        }
1220                        if (preg_get_backref(&walk, &backref)) {
1221                            if (backref < count)
1222                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1223                            continue;
1224                        }
1225                    }
1226                    new_len++;
1227                    walk++;
1228                    walk_last = walk[-1];
1229                }
1230            }
1231
1232            if (new_len > alloc_len) {
1233                alloc_len = alloc_len + 2 * new_len;
1234                result = zend_string_realloc(result, alloc_len, 0);
1235            }
1236            /* copy the part of the string before the match */
1237            memcpy(&result->val[result_len], piece, match-piece);
1238            result_len += (int)(match-piece);
1239
1240            /* copy replacement and backrefs */
1241            walkbuf = result->val + result_len;
1242
1243            /* If evaluating or using custom function, copy result to the buffer
1244             * and clean up. */
1245            if (eval || is_callable_replace) {
1246                memcpy(walkbuf, eval_result->val, eval_result->len);
1247                result_len += (int)eval_result->len;
1248                if (eval_result) zend_string_release(eval_result);
1249            } else { /* do regular backreference copying */
1250                walk = replace;
1251                walk_last = 0;
1252                while (walk < replace_end) {
1253                    if ('\\' == *walk || '$' == *walk) {
1254                        if (walk_last == '\\') {
1255                            *(walkbuf-1) = *walk++;
1256                            walk_last = 0;
1257                            continue;
1258                        }
1259                        if (preg_get_backref(&walk, &backref)) {
1260                            if (backref < count) {
1261                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1262                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1263                                walkbuf += match_len;
1264                            }
1265                            continue;
1266                        }
1267                    }
1268                    *walkbuf++ = *walk++;
1269                    walk_last = walk[-1];
1270                }
1271                *walkbuf = '\0';
1272                /* increment the result length by how much we've added to the string */
1273                result_len += (int)(walkbuf - (result->val + result_len));
1274            }
1275
1276            if (limit != -1)
1277                limit--;
1278
1279        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1280            /* If we previously set PCRE_NOTEMPTY after a null match,
1281               this is not necessarily the end. We need to advance
1282               the start offset, and continue. Fudge the offset values
1283               to achieve this, unless we're already at the end of the string. */
1284            if (g_notempty != 0 && start_offset < subject_len) {
1285                offsets[0] = start_offset;
1286                offsets[1] = start_offset + 1;
1287                memcpy(&result->val[result_len], piece, 1);
1288                result_len++;
1289            } else {
1290                new_len = result_len + subject_len - start_offset;
1291                if (new_len > alloc_len) {
1292                    alloc_len = new_len; /* now we know exactly how long it is */
1293                    result = zend_string_realloc(result, alloc_len, 0);
1294                }
1295                /* stick that last bit of string on our output */
1296                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1297                result_len += subject_len - start_offset;
1298                result->val[result_len] = '\0';
1299                break;
1300            }
1301        } else {
1302            pcre_handle_exec_error(count);
1303            zend_string_free(result);
1304            result = NULL;
1305            break;
1306        }
1307
1308        /* If we have matched an empty string, mimic what Perl's /g options does.
1309           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1310           the match again at the same point. If this fails (picked up above) we
1311           advance to the next character. */
1312        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1313
1314        /* Advance to the next piece. */
1315        start_offset = offsets[1];
1316    }
1317
1318    if (result) {
1319        result->len = result_len;
1320    }
1321    if (size_offsets <= 32) {
1322        free_alloca(offsets, use_heap);
1323    } else {
1324        efree(offsets);
1325    }
1326    if (subpat_names) {
1327        efree(subpat_names);
1328    }
1329
1330    return result;
1331}
1332/* }}} */
1333
1334/* {{{ php_replace_in_subject
1335 */
1336static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1337{
1338    zval        *regex_entry,
1339                *replace_entry = NULL,
1340                *replace_value,
1341                 empty_replace;
1342    zend_string *result;
1343    zend_string *subject_str = zval_get_string(subject);
1344    uint32_t replace_idx;
1345
1346    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1347    ZVAL_EMPTY_STRING(&empty_replace);
1348
1349    /* If regex is an array */
1350    if (Z_TYPE_P(regex) == IS_ARRAY) {
1351        replace_value = replace;
1352        replace_idx = 0;
1353
1354        /* For each entry in the regex array, get the entry */
1355        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1356            /* Make sure we're dealing with strings. */
1357            zend_string *regex_str = zval_get_string(regex_entry);
1358
1359            /* If replace is an array and not a callable construct */
1360            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1361                /* Get current entry */
1362                replace_entry = NULL;
1363                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1364                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1365                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1366                        break;
1367                    }
1368                    replace_idx++;
1369                }
1370                if (replace_entry != NULL) {
1371                    if (!is_callable_replace) {
1372                        convert_to_string_ex(replace_entry);
1373                    }
1374                    replace_value = replace_entry;
1375                    replace_idx++;
1376                } else {
1377                    /* We've run out of replacement strings, so use an empty one */
1378                    replace_value = &empty_replace;
1379                }
1380            }
1381
1382            /* Do the actual replacement and put the result back into subject_str
1383               for further replacements. */
1384            if ((result = php_pcre_replace(regex_str,
1385                                           subject_str->val,
1386                                           (int)subject_str->len,
1387                                           replace_value,
1388                                           is_callable_replace,
1389                                           limit,
1390                                           replace_count)) != NULL) {
1391                zend_string_release(subject_str);
1392                subject_str = result;
1393            } else {
1394                zend_string_release(subject_str);
1395                zend_string_release(regex_str);
1396                return NULL;
1397            }
1398
1399            zend_string_release(regex_str);
1400        } ZEND_HASH_FOREACH_END();
1401
1402        return subject_str;
1403    } else {
1404        result = php_pcre_replace(Z_STR_P(regex),
1405                                  subject_str->val,
1406                                  (int)subject_str->len,
1407                                  replace,
1408                                  is_callable_replace,
1409                                  limit,
1410                                  replace_count);
1411        zend_string_release(subject_str);
1412        return result;
1413    }
1414}
1415/* }}} */
1416
1417/* {{{ preg_replace_impl
1418 */
1419static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1420{
1421    zval            *regex,
1422                    *replace,
1423                    *subject,
1424                    *subject_entry,
1425                    *zcount = NULL;
1426    int              limit_val = -1;
1427    zend_long        limit = -1;
1428    zend_string     *result;
1429    zend_string     *string_key;
1430    zend_ulong       num_key;
1431    zend_string     *callback_name;
1432    int              replace_count=0, old_replace_count;
1433
1434#ifndef FAST_ZPP
1435    /* Get function parameters and do error-checking. */
1436    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1437        return;
1438    }
1439#else
1440    ZEND_PARSE_PARAMETERS_START(3, 5)
1441        Z_PARAM_ZVAL(regex)
1442        Z_PARAM_ZVAL(replace)
1443        Z_PARAM_ZVAL(subject)
1444        Z_PARAM_OPTIONAL
1445        Z_PARAM_LONG(limit)
1446        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1447    ZEND_PARSE_PARAMETERS_END();
1448#endif
1449
1450    if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1451        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1452        RETURN_FALSE;
1453    }
1454
1455    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1456        SEPARATE_ZVAL(replace);
1457        convert_to_string_ex(replace);
1458    }
1459    if (is_callable_replace) {
1460        if (!zend_is_callable(replace, 0, &callback_name)) {
1461            php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1462            zend_string_release(callback_name);
1463            ZVAL_DUP(return_value, subject);
1464            return;
1465        }
1466        zend_string_release(callback_name);
1467    }
1468
1469    if (ZEND_NUM_ARGS() > 3) {
1470        limit_val = (int)limit;
1471    }
1472
1473    if (Z_TYPE_P(regex) != IS_ARRAY) {
1474        SEPARATE_ZVAL(regex);
1475        convert_to_string_ex(regex);
1476    }
1477
1478    /* if subject is an array */
1479    if (Z_TYPE_P(subject) == IS_ARRAY) {
1480        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1481
1482        /* For each subject entry, convert it to string, then perform replacement
1483           and add the result to the return_value array. */
1484        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1485            old_replace_count = replace_count;
1486            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1487                if (!is_filter || replace_count > old_replace_count) {
1488                    /* Add to return array */
1489                    if (string_key) {
1490                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1491                    } else {
1492                        add_index_str(return_value, num_key, result);
1493                    }
1494                } else {
1495                    zend_string_release(result);
1496                }
1497            }
1498        } ZEND_HASH_FOREACH_END();
1499    } else {    /* if subject is not an array */
1500        old_replace_count = replace_count;
1501        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1502            if (!is_filter || replace_count > old_replace_count) {
1503                RETVAL_STR(result);
1504            } else {
1505                zend_string_release(result);
1506            }
1507        }
1508    }
1509    if (ZEND_NUM_ARGS() > 4) {
1510        zval_dtor(zcount);
1511        ZVAL_LONG(zcount, replace_count);
1512    }
1513
1514}
1515/* }}} */
1516
1517/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1518   Perform Perl-style regular expression replacement. */
1519static PHP_FUNCTION(preg_replace)
1520{
1521    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1522}
1523/* }}} */
1524
1525/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1526   Perform Perl-style regular expression replacement using replacement callback. */
1527static PHP_FUNCTION(preg_replace_callback)
1528{
1529    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1530}
1531/* }}} */
1532
1533/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1534   Perform Perl-style regular expression replacement and only return matches. */
1535static PHP_FUNCTION(preg_filter)
1536{
1537    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1538}
1539/* }}} */
1540
1541/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1542   Split string into an array using a perl-style regular expression as a delimiter */
1543static PHP_FUNCTION(preg_split)
1544{
1545    zend_string         *regex;         /* Regular expression */
1546    zend_string         *subject;       /* String to match against */
1547    zend_long            limit_val = -1;/* Integer value of limit */
1548    zend_long            flags = 0;     /* Match control flags */
1549    pcre_cache_entry    *pce;           /* Compiled regular expression */
1550
1551    /* Get function parameters and do error checking */
1552#ifndef FAST_ZPP
1553    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1554                              &subject, &limit_val, &flags) == FAILURE) {
1555        RETURN_FALSE;
1556    }
1557#else
1558    ZEND_PARSE_PARAMETERS_START(2, 4)
1559        Z_PARAM_STR(regex)
1560        Z_PARAM_STR(subject)
1561        Z_PARAM_OPTIONAL
1562        Z_PARAM_LONG(limit_val)
1563        Z_PARAM_LONG(flags)
1564    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1565#endif
1566
1567    /* Compile regex or get it from cache. */
1568    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1569        RETURN_FALSE;
1570    }
1571
1572    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1573}
1574/* }}} */
1575
1576/* {{{ php_pcre_split
1577 */
1578PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1579    zend_long limit_val, zend_long flags)
1580{
1581    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1582    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1583    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1584    pcre_extra       extra_data;        /* Used locally for exec options */
1585    int             *offsets;           /* Array of subpattern offsets */
1586    int              size_offsets;      /* Size of the offsets array */
1587    int              exoptions = 0;     /* Execution options */
1588    int              count = 0;         /* Count of matched subpatterns */
1589    int              start_offset;      /* Where the new search starts */
1590    int              next_offset;       /* End of the last delimiter match + 1 */
1591    int              g_notempty = 0;    /* If the match should not be empty */
1592    char            *last_match;        /* Location of last match */
1593    int              no_empty;          /* If NO_EMPTY flag is set */
1594    int              delim_capture;     /* If delimiters should be captured */
1595    int              offset_capture;    /* If offsets should be captured */
1596    zval             tmp;
1597    ALLOCA_FLAG(use_heap);
1598
1599    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1600    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1601    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1602
1603    if (limit_val == 0) {
1604        limit_val = -1;
1605    }
1606
1607    if (extra == NULL) {
1608        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1609        extra = &extra_data;
1610    }
1611    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1612    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1613#ifdef PCRE_EXTRA_MARK
1614    extra->flags &= ~PCRE_EXTRA_MARK;
1615#endif
1616
1617    /* Initialize return value */
1618    array_init(return_value);
1619
1620    /* Calculate the size of the offsets array, and allocate memory for it. */
1621    size_offsets = (pce->capture_count + 1) * 3;
1622    if (size_offsets <= 32) {
1623        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1624    } else {
1625        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1626    }
1627
1628    /* Start at the beginning of the string */
1629    start_offset = 0;
1630    next_offset = 0;
1631    last_match = subject;
1632    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1633
1634    /* Get next piece if no limit or limit not yet reached and something matched*/
1635    while ((limit_val == -1 || limit_val > 1)) {
1636        count = pcre_exec(pce->re, extra, subject,
1637                          subject_len, start_offset,
1638                          exoptions|g_notempty, offsets, size_offsets);
1639
1640        /* the string was already proved to be valid UTF-8 */
1641        exoptions |= PCRE_NO_UTF8_CHECK;
1642
1643        /* Check for too many substrings condition. */
1644        if (count == 0) {
1645            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1646            count = size_offsets/3;
1647        }
1648
1649        /* If something matched */
1650        if (count > 0) {
1651            if (!no_empty || &subject[offsets[0]] != last_match) {
1652
1653                if (offset_capture) {
1654                    /* Add (match, offset) pair to the return value */
1655                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1656                } else {
1657                    /* Add the piece to the return value */
1658                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1659                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1660                }
1661
1662                /* One less left to do */
1663                if (limit_val != -1)
1664                    limit_val--;
1665            }
1666
1667            last_match = &subject[offsets[1]];
1668            next_offset = offsets[1];
1669
1670            if (delim_capture) {
1671                int i, match_len;
1672                for (i = 1; i < count; i++) {
1673                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1674                    /* If we have matched a delimiter */
1675                    if (!no_empty || match_len > 0) {
1676                        if (offset_capture) {
1677                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1678                        } else {
1679                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1680                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1681                        }
1682                    }
1683                }
1684            }
1685        } else if (count == PCRE_ERROR_NOMATCH) {
1686            /* If we previously set PCRE_NOTEMPTY after a null match,
1687               this is not necessarily the end. We need to advance
1688               the start offset, and continue. Fudge the offset values
1689               to achieve this, unless we're already at the end of the string. */
1690            if (g_notempty != 0 && start_offset < subject_len) {
1691                if (pce->compile_options & PCRE_UTF8) {
1692                    if (re_bump == NULL) {
1693                        int dummy;
1694                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1695                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1696                        zend_string_release(regex);
1697                        if (re_bump == NULL) {
1698                            RETURN_FALSE;
1699                        }
1700                    }
1701                    count = pcre_exec(re_bump, extra_bump, subject,
1702                              subject_len, start_offset,
1703                              exoptions, offsets, size_offsets);
1704                    if (count < 1) {
1705                        php_error_docref(NULL, E_WARNING, "Unknown error");
1706                        RETURN_FALSE;
1707                    }
1708                } else {
1709                    offsets[0] = start_offset;
1710                    offsets[1] = start_offset + 1;
1711                }
1712            } else
1713                break;
1714        } else {
1715            pcre_handle_exec_error(count);
1716            break;
1717        }
1718
1719        /* If we have matched an empty string, mimic what Perl's /g options does.
1720           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1721           the match again at the same point. If this fails (picked up above) we
1722           advance to the next character. */
1723        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1724
1725        /* Advance to the position right after the last full match */
1726        start_offset = offsets[1];
1727    }
1728
1729
1730    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1731
1732    if (!no_empty || start_offset < subject_len)
1733    {
1734        if (offset_capture) {
1735            /* Add the last (match, offset) pair to the return value */
1736            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1737        } else {
1738            /* Add the last piece to the return value */
1739            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1740            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1741        }
1742    }
1743
1744
1745    /* Clean up */
1746    if (size_offsets <= 32) {
1747        free_alloca(offsets, use_heap);
1748    } else {
1749        efree(offsets);
1750    }
1751}
1752/* }}} */
1753
1754/* {{{ proto string preg_quote(string str [, string delim_char])
1755   Quote regular expression characters plus an optional character */
1756static PHP_FUNCTION(preg_quote)
1757{
1758    size_t       in_str_len;
1759    char    *in_str;        /* Input string argument */
1760    char    *in_str_end;    /* End of the input string */
1761    size_t       delim_len = 0;
1762    char    *delim = NULL;  /* Additional delimiter argument */
1763    zend_string *out_str;   /* Output string with quoted characters */
1764    char    *p,             /* Iterator for input string */
1765            *q,             /* Iterator for output string */
1766             delim_char=0,  /* Delimiter character to be quoted */
1767             c;             /* Current character */
1768    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1769
1770    /* Get the arguments and check for errors */
1771#ifndef FAST_ZPP
1772    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1773                              &delim, &delim_len) == FAILURE) {
1774        return;
1775    }
1776#else
1777    ZEND_PARSE_PARAMETERS_START(1, 2)
1778        Z_PARAM_STRING(in_str, in_str_len)
1779        Z_PARAM_OPTIONAL
1780        Z_PARAM_STRING(delim, delim_len)
1781    ZEND_PARSE_PARAMETERS_END();
1782#endif
1783
1784    in_str_end = in_str + in_str_len;
1785
1786    /* Nothing to do if we got an empty string */
1787    if (in_str == in_str_end) {
1788        RETURN_EMPTY_STRING();
1789    }
1790
1791    if (delim && *delim) {
1792        delim_char = delim[0];
1793        quote_delim = 1;
1794    }
1795
1796    /* Allocate enough memory so that even if each character
1797       is quoted, we won't run out of room */
1798    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1799
1800    /* Go through the string and quote necessary characters */
1801    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1802        c = *p;
1803        switch(c) {
1804            case '.':
1805            case '\\':
1806            case '+':
1807            case '*':
1808            case '?':
1809            case '[':
1810            case '^':
1811            case ']':
1812            case '$':
1813            case '(':
1814            case ')':
1815            case '{':
1816            case '}':
1817            case '=':
1818            case '!':
1819            case '>':
1820            case '<':
1821            case '|':
1822            case ':':
1823            case '-':
1824                *q++ = '\\';
1825                *q++ = c;
1826                break;
1827
1828            case '\0':
1829                *q++ = '\\';
1830                *q++ = '0';
1831                *q++ = '0';
1832                *q++ = '0';
1833                break;
1834
1835            default:
1836                if (quote_delim && c == delim_char)
1837                    *q++ = '\\';
1838                *q++ = c;
1839                break;
1840        }
1841    }
1842    *q = '\0';
1843
1844    /* Reallocate string and return it */
1845    out_str = zend_string_realloc(out_str, q - out_str->val, 0);
1846    RETURN_STR(out_str);
1847}
1848/* }}} */
1849
1850/* {{{ proto array preg_grep(string regex, array input [, int flags])
1851   Searches array and returns entries which match regex */
1852static PHP_FUNCTION(preg_grep)
1853{
1854    zend_string         *regex;         /* Regular expression */
1855    zval                *input;         /* Input array */
1856    zend_long            flags = 0;     /* Match control flags */
1857    pcre_cache_entry    *pce;           /* Compiled regular expression */
1858
1859    /* Get arguments and do error checking */
1860#ifndef FAST_ZPP
1861    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1862                              &input, &flags) == FAILURE) {
1863        return;
1864    }
1865#else
1866    ZEND_PARSE_PARAMETERS_START(2, 3)
1867        Z_PARAM_STR(regex)
1868        Z_PARAM_ARRAY(input)
1869        Z_PARAM_OPTIONAL
1870        Z_PARAM_LONG(flags)
1871    ZEND_PARSE_PARAMETERS_END();
1872#endif
1873
1874    /* Compile regex or get it from cache. */
1875    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1876        RETURN_FALSE;
1877    }
1878
1879    php_pcre_grep_impl(pce, input, return_value, flags);
1880}
1881/* }}} */
1882
1883PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1884{
1885    zval            *entry;             /* An entry in the input array */
1886    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1887    pcre_extra       extra_data;        /* Used locally for exec options */
1888    int             *offsets;           /* Array of subpattern offsets */
1889    int              size_offsets;      /* Size of the offsets array */
1890    int              count = 0;         /* Count of matched subpatterns */
1891    zend_string     *string_key;
1892    zend_ulong       num_key;
1893    zend_bool        invert;            /* Whether to return non-matching
1894                                           entries */
1895    ALLOCA_FLAG(use_heap);
1896
1897    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1898
1899    if (extra == NULL) {
1900        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1901        extra = &extra_data;
1902    }
1903    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1904    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1905#ifdef PCRE_EXTRA_MARK
1906    extra->flags &= ~PCRE_EXTRA_MARK;
1907#endif
1908
1909    /* Calculate the size of the offsets array, and allocate memory for it. */
1910    size_offsets = (pce->capture_count + 1) * 3;
1911    if (size_offsets <= 32) {
1912        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1913    } else {
1914        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1915    }
1916
1917    /* Initialize return array */
1918    array_init(return_value);
1919
1920    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1921
1922    /* Go through the input array */
1923    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1924        zend_string *subject_str = zval_get_string(entry);
1925
1926        /* Perform the match */
1927        count = pcre_exec(pce->re, extra, subject_str->val,
1928                          (int)subject_str->len, 0,
1929                          0, offsets, size_offsets);
1930
1931        /* Check for too many substrings condition. */
1932        if (count == 0) {
1933            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1934            count = size_offsets/3;
1935        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1936            pcre_handle_exec_error(count);
1937            zend_string_release(subject_str);
1938            break;
1939        }
1940
1941        /* If the entry fits our requirements */
1942        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1943            if (Z_REFCOUNTED_P(entry)) {
1944                Z_ADDREF_P(entry);
1945            }
1946
1947            /* Add to return array */
1948            if (string_key) {
1949                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
1950            } else {
1951                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
1952            }
1953        }
1954
1955        zend_string_release(subject_str);
1956    } ZEND_HASH_FOREACH_END();
1957
1958    /* Clean up */
1959    if (size_offsets <= 32) {
1960        free_alloca(offsets, use_heap);
1961    } else {
1962        efree(offsets);
1963    }
1964}
1965/* }}} */
1966
1967/* {{{ proto int preg_last_error()
1968   Returns the error code of the last regexp execution. */
1969static PHP_FUNCTION(preg_last_error)
1970{
1971#ifndef FAST_ZPP
1972    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
1973        return;
1974    }
1975#else
1976    ZEND_PARSE_PARAMETERS_START(0, 0)
1977    ZEND_PARSE_PARAMETERS_END();
1978#endif
1979
1980    RETURN_LONG(PCRE_G(error_code));
1981}
1982/* }}} */
1983
1984/* {{{ module definition structures */
1985
1986/* {{{ arginfo */
1987ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1988    ZEND_ARG_INFO(0, pattern)
1989    ZEND_ARG_INFO(0, subject)
1990    ZEND_ARG_INFO(1, subpatterns) /* array */
1991    ZEND_ARG_INFO(0, flags)
1992    ZEND_ARG_INFO(0, offset)
1993ZEND_END_ARG_INFO()
1994
1995ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1996    ZEND_ARG_INFO(0, pattern)
1997    ZEND_ARG_INFO(0, subject)
1998    ZEND_ARG_INFO(1, subpatterns) /* array */
1999    ZEND_ARG_INFO(0, flags)
2000    ZEND_ARG_INFO(0, offset)
2001ZEND_END_ARG_INFO()
2002
2003ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2004    ZEND_ARG_INFO(0, regex)
2005    ZEND_ARG_INFO(0, replace)
2006    ZEND_ARG_INFO(0, subject)
2007    ZEND_ARG_INFO(0, limit)
2008    ZEND_ARG_INFO(1, count)
2009ZEND_END_ARG_INFO()
2010
2011ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2012    ZEND_ARG_INFO(0, regex)
2013    ZEND_ARG_INFO(0, callback)
2014    ZEND_ARG_INFO(0, subject)
2015    ZEND_ARG_INFO(0, limit)
2016    ZEND_ARG_INFO(1, count)
2017ZEND_END_ARG_INFO()
2018
2019ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2020    ZEND_ARG_INFO(0, pattern)
2021    ZEND_ARG_INFO(0, subject)
2022    ZEND_ARG_INFO(0, limit)
2023    ZEND_ARG_INFO(0, flags)
2024ZEND_END_ARG_INFO()
2025
2026ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2027    ZEND_ARG_INFO(0, str)
2028    ZEND_ARG_INFO(0, delim_char)
2029ZEND_END_ARG_INFO()
2030
2031ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2032    ZEND_ARG_INFO(0, regex)
2033    ZEND_ARG_INFO(0, input) /* array */
2034    ZEND_ARG_INFO(0, flags)
2035ZEND_END_ARG_INFO()
2036
2037ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2038ZEND_END_ARG_INFO()
2039/* }}} */
2040
2041static const zend_function_entry pcre_functions[] = {
2042    PHP_FE(preg_match,              arginfo_preg_match)
2043    PHP_FE(preg_match_all,          arginfo_preg_match_all)
2044    PHP_FE(preg_replace,            arginfo_preg_replace)
2045    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
2046    PHP_FE(preg_filter,             arginfo_preg_replace)
2047    PHP_FE(preg_split,              arginfo_preg_split)
2048    PHP_FE(preg_quote,              arginfo_preg_quote)
2049    PHP_FE(preg_grep,               arginfo_preg_grep)
2050    PHP_FE(preg_last_error,         arginfo_preg_last_error)
2051    PHP_FE_END
2052};
2053
2054zend_module_entry pcre_module_entry = {
2055    STANDARD_MODULE_HEADER,
2056   "pcre",
2057    pcre_functions,
2058    PHP_MINIT(pcre),
2059    PHP_MSHUTDOWN(pcre),
2060    NULL,
2061    NULL,
2062    PHP_MINFO(pcre),
2063    NO_VERSION_YET,
2064    PHP_MODULE_GLOBALS(pcre),
2065    PHP_GINIT(pcre),
2066    PHP_GSHUTDOWN(pcre),
2067    NULL,
2068    STANDARD_MODULE_PROPERTIES_EX
2069};
2070
2071#ifdef COMPILE_DL_PCRE
2072ZEND_GET_MODULE(pcre)
2073#endif
2074
2075/* }}} */
2076
2077#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2078
2079/*
2080 * Local variables:
2081 * tab-width: 4
2082 * c-basic-offset: 4
2083 * End:
2084 * vim600: sw=4 ts=4 fdm=marker
2085 * vim<600: sw=4 ts=4
2086 */
2087