1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pcre_free(pce->re);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    int jit_yes = 0;
136
137    php_info_print_table_start();
138    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
139    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
140
141    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
142        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
143    } else {
144        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
145    }
146
147    php_info_print_table_end();
148
149    DISPLAY_INI_ENTRIES();
150}
151/* }}} */
152
153/* {{{ PHP_MINIT_FUNCTION(pcre) */
154static PHP_MINIT_FUNCTION(pcre)
155{
156    REGISTER_INI_ENTRIES();
157
158    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
163    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
164    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
165
166    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
167    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
168    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
169    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
170    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
172    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
179static PHP_MSHUTDOWN_FUNCTION(pcre)
180{
181    UNREGISTER_INI_ENTRIES();
182
183    return SUCCESS;
184}
185/* }}} */
186
187/* {{{ static pcre_clean_cache */
188static int pcre_clean_cache(zval *data, void *arg)
189{
190    int *num_clean = (int *)arg;
191
192    if (*num_clean > 0) {
193        (*num_clean)--;
194        return 1;
195    } else {
196        return 0;
197    }
198}
199/* }}} */
200
201/* {{{ static make_subpats_table */
202static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
203{
204    pcre_extra *extra = pce->extra;
205    int name_cnt = pce->name_count, name_size, ni = 0;
206    int rc;
207    char *name_table;
208    unsigned short name_idx;
209    char **subpat_names;
210    int rc1, rc2;
211
212    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
213    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
214    rc = rc2 ? rc2 : rc1;
215    if (rc < 0) {
216        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
217        return NULL;
218    }
219
220    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
221    while (ni++ < name_cnt) {
222        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
223        subpat_names[name_idx] = name_table + 2;
224        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
225            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
226            efree(subpat_names);
227            return NULL;
228        }
229        name_table += name_size;
230    }
231    return subpat_names;
232}
233/* }}} */
234
235/* {{{ pcre_get_compiled_regex_cache
236 */
237PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
238{
239    pcre                *re = NULL;
240    pcre_extra          *extra;
241    int                  coptions = 0;
242    int                  soptions = 0;
243    const char          *error;
244    int                  erroffset;
245    char                 delimiter;
246    char                 start_delimiter;
247    char                 end_delimiter;
248    char                *p, *pp;
249    char                *pattern;
250    int                  do_study = 0;
251    int                  poptions = 0;
252    unsigned const char *tables = NULL;
253    pcre_cache_entry    *pce;
254    pcre_cache_entry     new_entry;
255    int                  rc;
256
257    /* Try to lookup the cached regex entry, and if successful, just pass
258       back the compiled pattern, otherwise go on and compile it. */
259    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
260    if (pce) {
261#if HAVE_SETLOCALE
262        if (pce->locale == BG(locale_string) ||
263            (pce->locale && BG(locale_string) &&
264             pce->locale->len == BG(locale_string)->len &&
265             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len))) {
266            return pce;
267        }
268#else
269        return pce;
270#endif
271    }
272
273    p = regex->val;
274
275    /* Parse through the leading whitespace, and display a warning if we
276       get to the end without encountering a delimiter. */
277    while (isspace((int)*(unsigned char *)p)) p++;
278    if (*p == 0) {
279        php_error_docref(NULL, E_WARNING,
280                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
281        return NULL;
282    }
283
284    /* Get the delimiter and display a warning if it is alphanumeric
285       or a backslash. */
286    delimiter = *p++;
287    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
288        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
289        return NULL;
290    }
291
292    start_delimiter = delimiter;
293    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
294        delimiter = pp[5];
295    end_delimiter = delimiter;
296
297    pp = p;
298
299    if (start_delimiter == end_delimiter) {
300        /* We need to iterate through the pattern, searching for the ending delimiter,
301           but skipping the backslashed delimiters.  If the ending delimiter is not
302           found, display a warning. */
303        while (*pp != 0) {
304            if (*pp == '\\' && pp[1] != 0) pp++;
305            else if (*pp == delimiter)
306                break;
307            pp++;
308        }
309    } else {
310        /* We iterate through the pattern, searching for the matching ending
311         * delimiter. For each matching starting delimiter, we increment nesting
312         * level, and decrement it for each matching ending delimiter. If we
313         * reach the end of the pattern without matching, display a warning.
314         */
315        int brackets = 1;   /* brackets nesting level */
316        while (*pp != 0) {
317            if (*pp == '\\' && pp[1] != 0) pp++;
318            else if (*pp == end_delimiter && --brackets <= 0)
319                break;
320            else if (*pp == start_delimiter)
321                brackets++;
322            pp++;
323        }
324    }
325
326    if (*pp == 0) {
327        if (pp < regex->val + regex->len) {
328            php_error_docref(NULL,E_WARNING, "Null byte in regex");
329        } else if (start_delimiter == end_delimiter) {
330            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
331        } else {
332            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
333        }
334        return NULL;
335    }
336
337    /* Make a copy of the actual pattern. */
338    pattern = estrndup(p, pp-p);
339
340    /* Move on to the options */
341    pp++;
342
343    /* Parse through the options, setting appropriate flags.  Display
344       a warning if we encounter an unknown modifier. */
345    while (pp < regex->val + regex->len) {
346        switch (*pp++) {
347            /* Perl compatible options */
348            case 'i':   coptions |= PCRE_CASELESS;      break;
349            case 'm':   coptions |= PCRE_MULTILINE;     break;
350            case 's':   coptions |= PCRE_DOTALL;        break;
351            case 'x':   coptions |= PCRE_EXTENDED;      break;
352
353            /* PCRE specific options */
354            case 'A':   coptions |= PCRE_ANCHORED;      break;
355            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
356            case 'S':   do_study  = 1;                  break;
357            case 'U':   coptions |= PCRE_UNGREEDY;      break;
358            case 'X':   coptions |= PCRE_EXTRA;         break;
359            case 'u':   coptions |= PCRE_UTF8;
360    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
361       characters, even in UTF-8 mode. However, this can be changed by setting
362       the PCRE_UCP option. */
363#ifdef PCRE_UCP
364                        coptions |= PCRE_UCP;
365#endif
366                break;
367
368            /* Custom preg options */
369            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
370
371            case ' ':
372            case '\n':
373                break;
374
375            default:
376                if (pp[-1]) {
377                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
378                } else {
379                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
380                }
381                efree(pattern);
382                return NULL;
383        }
384    }
385
386#if HAVE_SETLOCALE
387    if (BG(locale_string) &&
388        (BG(locale_string)->len != 1 || BG(locale_string)->val[0] != 'C')) {
389        tables = pcre_maketables();
390    }
391#endif
392
393    /* Compile pattern and display a warning if compilation failed. */
394    re = pcre_compile(pattern,
395                      coptions,
396                      &error,
397                      &erroffset,
398                      tables);
399
400    if (re == NULL) {
401        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
402        efree(pattern);
403        if (tables) {
404            pefree((void*)tables, 1);
405        }
406        return NULL;
407    }
408
409#ifdef PCRE_STUDY_JIT_COMPILE
410    if (PCRE_G(jit)) {
411        /* Enable PCRE JIT compiler */
412        do_study = 1;
413        soptions |= PCRE_STUDY_JIT_COMPILE;
414    }
415#endif
416
417    /* If study option was specified, study the pattern and
418       store the result in extra for passing to pcre_exec. */
419    if (do_study) {
420        extra = pcre_study(re, soptions, &error);
421        if (extra) {
422            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
423            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
424            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
425        }
426        if (error != NULL) {
427            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
428        }
429    } else {
430        extra = NULL;
431    }
432
433    efree(pattern);
434
435    /*
436     * If we reached cache limit, clean out the items from the head of the list;
437     * these are supposedly the oldest ones (but not necessarily the least used
438     * ones).
439     */
440    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
441        int num_clean = PCRE_CACHE_SIZE / 8;
442        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
443    }
444
445    /* Store the compiled pattern and extra info in the cache. */
446    new_entry.re = re;
447    new_entry.extra = extra;
448    new_entry.preg_options = poptions;
449    new_entry.compile_options = coptions;
450#if HAVE_SETLOCALE
451    new_entry.locale = BG(locale_string) ?
452        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
453            zend_string_copy(BG(locale_string)) :
454            zend_string_init(BG(locale_string)->val, BG(locale_string)->len, 1)) :
455        NULL;
456    new_entry.tables = tables;
457#endif
458
459    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
460    if (rc < 0) {
461        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
462        return NULL;
463    }
464
465    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
466    if (rc < 0) {
467        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
468        return NULL;
469    }
470
471    /*
472     * Interned strings are not duplicated when stored in HashTable,
473     * but all the interned strings created during HTTP request are removed
474     * at end of request. However PCRE_G(pcre_cache) must be consistent
475     * on the next request as well. So we disable usage of interned strings
476     * as hash keys especually for this table.
477     * See bug #63180
478     */
479    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
480
481    return pce;
482}
483/* }}} */
484
485/* {{{ pcre_get_compiled_regex
486 */
487PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
488{
489    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
490
491    if (extra) {
492        *extra = pce ? pce->extra : NULL;
493    }
494    if (preg_options) {
495        *preg_options = pce ? pce->preg_options : 0;
496    }
497
498    return pce ? pce->re : NULL;
499}
500/* }}} */
501
502/* {{{ pcre_get_compiled_regex_ex
503 */
504PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
505{
506    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
507
508    if (extra) {
509        *extra = pce ? pce->extra : NULL;
510    }
511    if (preg_options) {
512        *preg_options = pce ? pce->preg_options : 0;
513    }
514    if (compile_options) {
515        *compile_options = pce ? pce->compile_options : 0;
516    }
517
518    return pce ? pce->re : NULL;
519}
520/* }}} */
521
522/* {{{ add_offset_pair */
523static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
524{
525    zval match_pair, tmp;
526
527    array_init_size(&match_pair, 2);
528
529    /* Add (match, offset) to the return value */
530    ZVAL_STRINGL(&tmp, str, len);
531    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
532    ZVAL_LONG(&tmp, offset);
533    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
534
535    if (name) {
536        Z_ADDREF(match_pair);
537        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
538    }
539    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
540}
541/* }}} */
542
543static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
544{
545    /* parameters */
546    zend_string      *regex;            /* Regular expression */
547    zend_string      *subject;          /* String to match against */
548    pcre_cache_entry *pce;              /* Compiled regular expression */
549    zval             *subpats = NULL;   /* Array for subpatterns */
550    zend_long         flags = 0;        /* Match control flags */
551    zend_long         start_offset = 0; /* Where the new search starts */
552
553#ifndef FAST_ZPP
554    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
555                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
556        RETURN_FALSE;
557    }
558#else
559    ZEND_PARSE_PARAMETERS_START(2, 5)
560        Z_PARAM_STR(regex)
561        Z_PARAM_STR(subject)
562        Z_PARAM_OPTIONAL
563        Z_PARAM_ZVAL_EX(subpats, 0, 1)
564        Z_PARAM_LONG(flags)
565        Z_PARAM_LONG(start_offset)
566    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
567#endif
568
569    /* Compile regex or get it from cache. */
570    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
571        RETURN_FALSE;
572    }
573
574    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
575        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
576}
577/* }}} */
578
579/* {{{ php_pcre_match_impl() */
580PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
581    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
582{
583    zval             result_set,        /* Holds a set of subpatterns after
584                                           a global match */
585                    *match_sets = NULL; /* An array of sets of matches for each
586                                           subpattern after a global match */
587    pcre_extra      *extra = pce->extra;/* Holds results of studying */
588    pcre_extra       extra_data;        /* Used locally for exec options */
589    int              exoptions = 0;     /* Execution options */
590    int              count = 0;         /* Count of matched subpatterns */
591    int             *offsets;           /* Array of subpattern offsets */
592    int              num_subpats;       /* Number of captured subpatterns */
593    int              size_offsets;      /* Size of the offsets array */
594    int              matched;           /* Has anything matched */
595    int              g_notempty = 0;    /* If the match should not be empty */
596    const char     **stringlist;        /* Holds list of subpatterns */
597    char           **subpat_names;      /* Array for named subpatterns */
598    int              i;
599    int              subpats_order;     /* Order of subpattern matches */
600    int              offset_capture;    /* Capture match offsets: yes/no */
601    unsigned char   *mark = NULL;       /* Target for MARK name */
602    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
603    ALLOCA_FLAG(use_heap);
604
605    ZVAL_UNDEF(&marks);
606
607    /* Overwrite the passed-in value for subpatterns with an empty array. */
608    if (subpats != NULL) {
609        zval_dtor(subpats);
610        array_init(subpats);
611    }
612
613    subpats_order = global ? PREG_PATTERN_ORDER : 0;
614
615    if (use_flags) {
616        offset_capture = flags & PREG_OFFSET_CAPTURE;
617
618        /*
619         * subpats_order is pre-set to pattern mode so we change it only if
620         * necessary.
621         */
622        if (flags & 0xff) {
623            subpats_order = flags & 0xff;
624        }
625        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
626            (!global && subpats_order != 0)) {
627            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
628            return;
629        }
630    } else {
631        offset_capture = 0;
632    }
633
634    /* Negative offset counts from the end of the string. */
635    if (start_offset < 0) {
636        start_offset = subject_len + start_offset;
637        if (start_offset < 0) {
638            start_offset = 0;
639        }
640    }
641
642    if (extra == NULL) {
643        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
644        extra = &extra_data;
645    }
646    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
647    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
648#ifdef PCRE_EXTRA_MARK
649    extra->mark = &mark;
650    extra->flags |= PCRE_EXTRA_MARK;
651#endif
652
653    /* Calculate the size of the offsets array, and allocate memory for it. */
654    num_subpats = pce->capture_count + 1;
655    size_offsets = num_subpats * 3;
656
657    /*
658     * Build a mapping from subpattern numbers to their names. We will
659     * allocate the table only if there are any named subpatterns.
660     */
661    subpat_names = NULL;
662    if (pce->name_count > 0) {
663        subpat_names = make_subpats_table(num_subpats, pce);
664        if (!subpat_names) {
665            RETURN_FALSE;
666        }
667    }
668
669    if (size_offsets <= 32) {
670        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
671    } else {
672        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
673    }
674    memset(offsets, 0, size_offsets*sizeof(int));
675    /* Allocate match sets array and initialize the values. */
676    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
677        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
678        for (i=0; i<num_subpats; i++) {
679            array_init(&match_sets[i]);
680        }
681    }
682
683    matched = 0;
684    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
685
686    do {
687        /* Execute the regular expression. */
688        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
689                          exoptions|g_notempty, offsets, size_offsets);
690
691        /* the string was already proved to be valid UTF-8 */
692        exoptions |= PCRE_NO_UTF8_CHECK;
693
694        /* Check for too many substrings condition. */
695        if (count == 0) {
696            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
697            count = size_offsets/3;
698        }
699
700        /* If something has matched */
701        if (count > 0) {
702            matched++;
703
704            /* If subpatterns array has been passed, fill it in with values. */
705            if (subpats != NULL) {
706                /* Try to get the list of substrings and display a warning if failed. */
707                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
708                    if (subpat_names) {
709                        efree(subpat_names);
710                    }
711                    if (size_offsets <= 32) {
712                        free_alloca(offsets, use_heap);
713                    } else {
714                        efree(offsets);
715                    }
716                    if (match_sets) efree(match_sets);
717                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
718                    RETURN_FALSE;
719                }
720
721                if (global) {   /* global pattern matching */
722                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
723                        /* For each subpattern, insert it into the appropriate array. */
724                        if (offset_capture) {
725                            for (i = 0; i < count; i++) {
726                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
727                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
728                            }
729                        } else {
730                            for (i = 0; i < count; i++) {
731                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
732                                                       offsets[(i<<1)+1] - offsets[i<<1]);
733                            }
734                        }
735                        /* Add MARK, if available */
736                        if (mark) {
737                            if (Z_TYPE(marks) == IS_UNDEF) {
738                                array_init(&marks);
739                            }
740                            add_index_string(&marks, matched - 1, (char *) mark);
741                        }
742                        /*
743                         * If the number of captured subpatterns on this run is
744                         * less than the total possible number, pad the result
745                         * arrays with empty strings.
746                         */
747                        if (count < num_subpats) {
748                            for (; i < num_subpats; i++) {
749                                add_next_index_string(&match_sets[i], "");
750                            }
751                        }
752                    } else {
753                        /* Allocate the result set array */
754                        array_init_size(&result_set, count + (mark ? 1 : 0));
755
756                        /* Add all the subpatterns to it */
757                        if (subpat_names) {
758                            if (offset_capture) {
759                                for (i = 0; i < count; i++) {
760                                    add_offset_pair(&result_set, (char *)stringlist[i],
761                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
762                                }
763                            } else {
764                                for (i = 0; i < count; i++) {
765                                    if (subpat_names[i]) {
766                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
767                                                               offsets[(i<<1)+1] - offsets[i<<1]);
768                                    }
769                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
770                                                           offsets[(i<<1)+1] - offsets[i<<1]);
771                                }
772                            }
773                        } else {
774                            if (offset_capture) {
775                                for (i = 0; i < count; i++) {
776                                    add_offset_pair(&result_set, (char *)stringlist[i],
777                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
778                                }
779                            } else {
780                                for (i = 0; i < count; i++) {
781                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
782                                                           offsets[(i<<1)+1] - offsets[i<<1]);
783                                }
784                            }
785                        }
786                        /* Add MARK, if available */
787                        if (mark) {
788                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
789                        }
790                        /* And add it to the output array */
791                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
792                    }
793                } else {            /* single pattern matching */
794                    /* For each subpattern, insert it into the subpatterns array. */
795                    if (subpat_names) {
796                        if (offset_capture) {
797                            for (i = 0; i < count; i++) {
798                                add_offset_pair(subpats, (char *)stringlist[i],
799                                                offsets[(i<<1)+1] - offsets[i<<1],
800                                                offsets[i<<1], subpat_names[i]);
801                            }
802                        } else {
803                            for (i = 0; i < count; i++) {
804                                if (subpat_names[i]) {
805                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
806                                                      offsets[(i<<1)+1] - offsets[i<<1]);
807                                }
808                                add_next_index_stringl(subpats, (char *)stringlist[i],
809                                                       offsets[(i<<1)+1] - offsets[i<<1]);
810                            }
811                        }
812                    } else {
813                        if (offset_capture) {
814                            for (i = 0; i < count; i++) {
815                                add_offset_pair(subpats, (char *)stringlist[i],
816                                                offsets[(i<<1)+1] - offsets[i<<1],
817                                                offsets[i<<1], NULL);
818                            }
819                        } else {
820                            for (i = 0; i < count; i++) {
821                                add_next_index_stringl(subpats, (char *)stringlist[i],
822                                                       offsets[(i<<1)+1] - offsets[i<<1]);
823                            }
824                        }
825                    }
826                    /* Add MARK, if available */
827                    if (mark) {
828                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
829                    }
830                }
831
832                pcre_free((void *) stringlist);
833            }
834        } else if (count == PCRE_ERROR_NOMATCH) {
835            /* If we previously set PCRE_NOTEMPTY after a null match,
836               this is not necessarily the end. We need to advance
837               the start offset, and continue. Fudge the offset values
838               to achieve this, unless we're already at the end of the string. */
839            if (g_notempty != 0 && start_offset < subject_len) {
840                offsets[0] = (int)start_offset;
841                offsets[1] = (int)(start_offset + 1);
842            } else
843                break;
844        } else {
845            pcre_handle_exec_error(count);
846            break;
847        }
848
849        /* If we have matched an empty string, mimic what Perl's /g options does.
850           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
851           the match again at the same point. If this fails (picked up above) we
852           advance to the next character. */
853        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
854
855        /* Advance to the position right after the last full match */
856        start_offset = offsets[1];
857    } while (global);
858
859    /* Add the match sets to the output array and clean up */
860    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
861        if (subpat_names) {
862            for (i = 0; i < num_subpats; i++) {
863                if (subpat_names[i]) {
864                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
865                                     strlen(subpat_names[i]), &match_sets[i]);
866                    Z_ADDREF(match_sets[i]);
867                }
868                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
869            }
870        } else {
871            for (i = 0; i < num_subpats; i++) {
872                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
873            }
874        }
875        efree(match_sets);
876
877        if (Z_TYPE(marks) != IS_UNDEF) {
878            add_assoc_zval(subpats, "MARK", &marks);
879        }
880    }
881
882    if (size_offsets <= 32) {
883        free_alloca(offsets, use_heap);
884    } else {
885        efree(offsets);
886    }
887    if (subpat_names) {
888        efree(subpat_names);
889    }
890
891    /* Did we encounter an error? */
892    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
893        RETVAL_LONG(matched);
894    } else {
895        RETVAL_FALSE;
896    }
897}
898/* }}} */
899
900/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
901   Perform a Perl-style regular expression match */
902static PHP_FUNCTION(preg_match)
903{
904    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
905}
906/* }}} */
907
908/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
909   Perform a Perl-style global regular expression match */
910static PHP_FUNCTION(preg_match_all)
911{
912    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
913}
914/* }}} */
915
916/* {{{ preg_get_backref
917 */
918static int preg_get_backref(char **str, int *backref)
919{
920    register char in_brace = 0;
921    register char *walk = *str;
922
923    if (walk[1] == 0)
924        return 0;
925
926    if (*walk == '$' && walk[1] == '{') {
927        in_brace = 1;
928        walk++;
929    }
930    walk++;
931
932    if (*walk >= '0' && *walk <= '9') {
933        *backref = *walk - '0';
934        walk++;
935    } else
936        return 0;
937
938    if (*walk && *walk >= '0' && *walk <= '9') {
939        *backref = *backref * 10 + *walk - '0';
940        walk++;
941    }
942
943    if (in_brace) {
944        if (*walk == 0 || *walk != '}')
945            return 0;
946        else
947            walk++;
948    }
949
950    *str = walk;
951    return 1;
952}
953/* }}} */
954
955/* {{{ preg_do_repl_func
956 */
957static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
958{
959    zend_string *result_str;
960    zval         retval;            /* Function return value */
961    zval         args[1];           /* Argument to pass to function */
962    int          i;
963
964    array_init_size(&args[0], count + (mark ? 1 : 0));
965    if (subpat_names) {
966        for (i = 0; i < count; i++) {
967            if (subpat_names[i]) {
968                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
969            }
970            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
971        }
972    } else {
973        for (i = 0; i < count; i++) {
974            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
975        }
976    }
977    if (mark) {
978        add_assoc_string(&args[0], "MARK", (char *) mark);
979    }
980
981    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
982        result_str = zval_get_string(&retval);
983        zval_ptr_dtor(&retval);
984    } else {
985        if (!EG(exception)) {
986            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
987        }
988
989        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
990    }
991
992    zval_ptr_dtor(&args[0]);
993
994    return result_str;
995}
996/* }}} */
997
998/* {{{ php_pcre_replace
999 */
1000PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1001                              zend_string *subject_str,
1002                              char *subject, int subject_len,
1003                              zval *replace_val, int is_callable_replace,
1004                              int limit, int *replace_count)
1005{
1006    pcre_cache_entry    *pce;               /* Compiled regular expression */
1007
1008    /* Compile regex or get it from cache. */
1009    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1010        return NULL;
1011    }
1012
1013    return php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1014        is_callable_replace, limit, replace_count);
1015}
1016/* }}} */
1017
1018/* {{{ php_pcre_replace_impl() */
1019PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val,
1020    int is_callable_replace, int limit, int *replace_count)
1021{
1022    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1023    pcre_extra       extra_data;        /* Used locally for exec options */
1024    int              exoptions = 0;     /* Execution options */
1025    int              count = 0;         /* Count of matched subpatterns */
1026    int             *offsets;           /* Array of subpattern offsets */
1027    char            **subpat_names;     /* Array for named subpatterns */
1028    int              num_subpats;       /* Number of captured subpatterns */
1029    int              size_offsets;      /* Size of the offsets array */
1030    int              new_len;           /* Length of needed storage */
1031    int              alloc_len;         /* Actual allocated length */
1032    int              match_len;         /* Length of the current match */
1033    int              backref;           /* Backreference number */
1034    int              start_offset;      /* Where the new search starts */
1035    int              g_notempty=0;      /* If the match should not be empty */
1036    int              replace_len=0;     /* Length of replacement string */
1037    char            *replace=NULL,      /* Replacement string */
1038                    *walkbuf,           /* Location of current replacement in the result */
1039                    *walk,              /* Used to walk the replacement string */
1040                    *match,             /* The current match */
1041                    *piece,             /* The current piece of subject */
1042                    *replace_end=NULL,  /* End of replacement string */
1043                     walk_last;         /* Last walked character */
1044    int              result_len;        /* Length of result */
1045    unsigned char   *mark = NULL;       /* Target for MARK name */
1046    zend_string     *result;            /* Result of replacement */
1047    zend_string     *eval_result=NULL;  /* Result of custom function */
1048    ALLOCA_FLAG(use_heap);
1049
1050    if (extra == NULL) {
1051        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1052        extra = &extra_data;
1053    }
1054    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1055    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1056
1057    if (pce->preg_options & PREG_REPLACE_EVAL) {
1058        php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1059        return NULL;
1060    }
1061    if (!is_callable_replace) {
1062        replace = Z_STRVAL_P(replace_val);
1063        replace_len = (int)Z_STRLEN_P(replace_val);
1064        replace_end = replace + replace_len;
1065    }
1066
1067    /* Calculate the size of the offsets array, and allocate memory for it. */
1068    num_subpats = pce->capture_count + 1;
1069    size_offsets = num_subpats * 3;
1070    if (size_offsets <= 32) {
1071        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1072    } else {
1073        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1074    }
1075
1076    /*
1077     * Build a mapping from subpattern numbers to their names. We will
1078     * allocate the table only if there are any named subpatterns.
1079     */
1080    subpat_names = NULL;
1081    if (pce->name_count > 0) {
1082        subpat_names = make_subpats_table(num_subpats, pce);
1083        if (!subpat_names) {
1084            return NULL;
1085        }
1086    }
1087
1088    alloc_len = 0;
1089    result = NULL;
1090
1091    /* Initialize */
1092    match = NULL;
1093    start_offset = 0;
1094    result_len = 0;
1095    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1096
1097    while (1) {
1098#ifdef PCRE_EXTRA_MARK
1099        extra->mark = &mark;
1100        extra->flags |= PCRE_EXTRA_MARK;
1101#endif
1102        /* Execute the regular expression. */
1103        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1104                          exoptions|g_notempty, offsets, size_offsets);
1105
1106        /* the string was already proved to be valid UTF-8 */
1107        exoptions |= PCRE_NO_UTF8_CHECK;
1108
1109        /* Check for too many substrings condition. */
1110        if (count == 0) {
1111            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1112            count = size_offsets/3;
1113        }
1114
1115        piece = subject + start_offset;
1116
1117        if (count > 0 && (limit == -1 || limit > 0)) {
1118            if (replace_count) {
1119                ++*replace_count;
1120            }
1121            /* Set the match location in subject */
1122            match = subject + offsets[0];
1123
1124            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1125
1126            if (is_callable_replace) {
1127                /* Use custom function to get replacement string and its length. */
1128                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1129                new_len += (int)eval_result->len;
1130            } else { /* do regular substitution */
1131                walk = replace;
1132                walk_last = 0;
1133                while (walk < replace_end) {
1134                    if ('\\' == *walk || '$' == *walk) {
1135                        if (walk_last == '\\') {
1136                            walk++;
1137                            walk_last = 0;
1138                            continue;
1139                        }
1140                        if (preg_get_backref(&walk, &backref)) {
1141                            if (backref < count)
1142                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1143                            continue;
1144                        }
1145                    }
1146                    new_len++;
1147                    walk++;
1148                    walk_last = walk[-1];
1149                }
1150            }
1151
1152            if (new_len >= alloc_len) {
1153                if (alloc_len == 0) {
1154                    alloc_len = 2 * subject_len;
1155                    if (new_len >= alloc_len) {
1156                        alloc_len = alloc_len + 2 * new_len;
1157                    }
1158                    result = zend_string_alloc(alloc_len, 0);
1159                } else {
1160                    alloc_len = alloc_len + 2 * new_len;
1161                    result = zend_string_extend(result, alloc_len, 0);
1162                }
1163            }
1164            /* copy the part of the string before the match */
1165            memcpy(&result->val[result_len], piece, match-piece);
1166            result_len += (int)(match-piece);
1167
1168            /* copy replacement and backrefs */
1169            walkbuf = result->val + result_len;
1170
1171            /* If using custom function, copy result to the buffer and clean up. */
1172            if (is_callable_replace) {
1173                memcpy(walkbuf, eval_result->val, eval_result->len);
1174                result_len += (int)eval_result->len;
1175                if (eval_result) zend_string_release(eval_result);
1176            } else { /* do regular backreference copying */
1177                walk = replace;
1178                walk_last = 0;
1179                while (walk < replace_end) {
1180                    if ('\\' == *walk || '$' == *walk) {
1181                        if (walk_last == '\\') {
1182                            *(walkbuf-1) = *walk++;
1183                            walk_last = 0;
1184                            continue;
1185                        }
1186                        if (preg_get_backref(&walk, &backref)) {
1187                            if (backref < count) {
1188                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1189                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1190                                walkbuf += match_len;
1191                            }
1192                            continue;
1193                        }
1194                    }
1195                    *walkbuf++ = *walk++;
1196                    walk_last = walk[-1];
1197                }
1198                *walkbuf = '\0';
1199                /* increment the result length by how much we've added to the string */
1200                result_len += (int)(walkbuf - (result->val + result_len));
1201            }
1202
1203            if (limit != -1)
1204                limit--;
1205
1206        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1207            /* If we previously set PCRE_NOTEMPTY after a null match,
1208               this is not necessarily the end. We need to advance
1209               the start offset, and continue. Fudge the offset values
1210               to achieve this, unless we're already at the end of the string. */
1211            if (g_notempty != 0 && start_offset < subject_len) {
1212                offsets[0] = start_offset;
1213                offsets[1] = start_offset + 1;
1214                memcpy(&result->val[result_len], piece, 1);
1215                result_len++;
1216            } else {
1217                if (!result && subject_str) {
1218                    result = zend_string_copy(subject_str);
1219                    break;
1220                }
1221                new_len = result_len + subject_len - start_offset;
1222                if (new_len > alloc_len) {
1223                    alloc_len = new_len; /* now we know exactly how long it is */
1224                    if (NULL != result) {
1225                        result = zend_string_realloc(result, alloc_len, 0);
1226                    } else {
1227                        result = zend_string_alloc(alloc_len, 0);
1228                    }
1229                }
1230                /* stick that last bit of string on our output */
1231                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1232                result_len += subject_len - start_offset;
1233                result->val[result_len] = '\0';
1234                result->len = result_len;
1235                break;
1236            }
1237        } else {
1238            pcre_handle_exec_error(count);
1239            if (result) {
1240                zend_string_free(result);
1241                result = NULL;
1242            }
1243            break;
1244        }
1245
1246        /* If we have matched an empty string, mimic what Perl's /g options does.
1247           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1248           the match again at the same point. If this fails (picked up above) we
1249           advance to the next character. */
1250        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1251
1252        /* Advance to the next piece. */
1253        start_offset = offsets[1];
1254    }
1255
1256    if (size_offsets <= 32) {
1257        free_alloca(offsets, use_heap);
1258    } else {
1259        efree(offsets);
1260    }
1261    if (subpat_names) {
1262        efree(subpat_names);
1263    }
1264
1265    return result;
1266}
1267/* }}} */
1268
1269/* {{{ php_replace_in_subject
1270 */
1271static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1272{
1273    zval        *regex_entry,
1274                *replace_entry = NULL,
1275                *replace_value,
1276                 empty_replace;
1277    zend_string *result;
1278    uint32_t replace_idx;
1279    zend_string *subject_str = zval_get_string(subject);
1280
1281    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1282    ZVAL_EMPTY_STRING(&empty_replace);
1283
1284    /* If regex is an array */
1285    if (Z_TYPE_P(regex) == IS_ARRAY) {
1286        replace_value = replace;
1287        replace_idx = 0;
1288
1289        /* For each entry in the regex array, get the entry */
1290        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1291            /* Make sure we're dealing with strings. */
1292            zend_string *regex_str = zval_get_string(regex_entry);
1293
1294            /* If replace is an array and not a callable construct */
1295            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1296                /* Get current entry */
1297                replace_entry = NULL;
1298                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1299                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1300                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1301                        break;
1302                    }
1303                    replace_idx++;
1304                }
1305                if (replace_entry != NULL) {
1306                    if (!is_callable_replace) {
1307                        convert_to_string_ex(replace_entry);
1308                    }
1309                    replace_value = replace_entry;
1310                    replace_idx++;
1311                } else {
1312                    /* We've run out of replacement strings, so use an empty one */
1313                    replace_value = &empty_replace;
1314                }
1315            }
1316
1317            /* Do the actual replacement and put the result back into subject_str
1318               for further replacements. */
1319            if ((result = php_pcre_replace(regex_str,
1320                                           subject_str,
1321                                           subject_str->val,
1322                                           (int)subject_str->len,
1323                                           replace_value,
1324                                           is_callable_replace,
1325                                           limit,
1326                                           replace_count)) != NULL) {
1327                zend_string_release(subject_str);
1328                subject_str = result;
1329            } else {
1330                zend_string_release(subject_str);
1331                zend_string_release(regex_str);
1332                return NULL;
1333            }
1334
1335            zend_string_release(regex_str);
1336        } ZEND_HASH_FOREACH_END();
1337
1338        return subject_str;
1339    } else {
1340        result = php_pcre_replace(Z_STR_P(regex),
1341                                  subject_str,
1342                                  subject_str->val,
1343                                  (int)subject_str->len,
1344                                  replace,
1345                                  is_callable_replace,
1346                                  limit,
1347                                  replace_count);
1348        zend_string_release(subject_str);
1349        return result;
1350    }
1351}
1352/* }}} */
1353
1354/* {{{ preg_replace_impl
1355 */
1356static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1357{
1358    zval        *subject_entry;
1359    zend_string *result;
1360    zend_string *string_key;
1361    zend_ulong   num_key;
1362    int          replace_count = 0, old_replace_count;
1363
1364    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1365        SEPARATE_ZVAL(replace);
1366        convert_to_string_ex(replace);
1367    }
1368
1369    if (Z_TYPE_P(regex) != IS_ARRAY) {
1370        SEPARATE_ZVAL(regex);
1371        convert_to_string_ex(regex);
1372    }
1373
1374    /* if subject is an array */
1375    if (Z_TYPE_P(subject) == IS_ARRAY) {
1376        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1377
1378        /* For each subject entry, convert it to string, then perform replacement
1379           and add the result to the return_value array. */
1380        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1381            old_replace_count = replace_count;
1382            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1383                if (!is_filter || replace_count > old_replace_count) {
1384                    /* Add to return array */
1385                    if (string_key) {
1386                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1387                    } else {
1388                        add_index_str(return_value, num_key, result);
1389                    }
1390                } else {
1391                    zend_string_release(result);
1392                }
1393            }
1394        } ZEND_HASH_FOREACH_END();
1395    } else {
1396        /* if subject is not an array */
1397        old_replace_count = replace_count;
1398        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1399            if (!is_filter || replace_count > old_replace_count) {
1400                RETVAL_STR(result);
1401            } else {
1402                zend_string_release(result);
1403            }
1404        }
1405    }
1406
1407    return replace_count;
1408}
1409/* }}} */
1410
1411/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1412   Perform Perl-style regular expression replacement. */
1413static PHP_FUNCTION(preg_replace)
1414{
1415    zval *regex, *replace, *subject, *zcount = NULL;
1416    zend_long limit = -1;
1417    int replace_count;
1418
1419#ifndef FAST_ZPP
1420    /* Get function parameters and do error-checking. */
1421    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1422        return;
1423    }
1424#else
1425    ZEND_PARSE_PARAMETERS_START(3, 5)
1426        Z_PARAM_ZVAL(regex)
1427        Z_PARAM_ZVAL(replace)
1428        Z_PARAM_ZVAL(subject)
1429        Z_PARAM_OPTIONAL
1430        Z_PARAM_LONG(limit)
1431        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1432    ZEND_PARSE_PARAMETERS_END();
1433#endif
1434
1435    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1436        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1437        RETURN_FALSE;
1438    }
1439
1440    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1441    if (zcount) {
1442        zval_dtor(zcount);
1443        ZVAL_LONG(zcount, replace_count);
1444    }
1445}
1446/* }}} */
1447
1448/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1449   Perform Perl-style regular expression replacement using replacement callback. */
1450static PHP_FUNCTION(preg_replace_callback)
1451{
1452    zval *regex, *replace, *subject, *zcount = NULL;
1453    zend_long limit = -1;
1454    zend_string *callback_name;
1455    int replace_count;
1456
1457#ifndef FAST_ZPP
1458    /* Get function parameters and do error-checking. */
1459    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1460        return;
1461    }
1462#else
1463    ZEND_PARSE_PARAMETERS_START(3, 5)
1464        Z_PARAM_ZVAL(regex)
1465        Z_PARAM_ZVAL(replace)
1466        Z_PARAM_ZVAL(subject)
1467        Z_PARAM_OPTIONAL
1468        Z_PARAM_LONG(limit)
1469        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1470    ZEND_PARSE_PARAMETERS_END();
1471#endif
1472
1473    if (!zend_is_callable(replace, 0, &callback_name)) {
1474        php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1475        zend_string_release(callback_name);
1476        ZVAL_COPY(return_value, subject);
1477        return;
1478    }
1479    zend_string_release(callback_name);
1480
1481    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1482    if (zcount) {
1483        zval_dtor(zcount);
1484        ZVAL_LONG(zcount, replace_count);
1485    }
1486}
1487/* }}} */
1488
1489/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1490   Perform Perl-style regular expression replacement using replacement callback. */
1491static PHP_FUNCTION(preg_replace_callback_array)
1492{
1493    zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1494    zend_long limit = -1;
1495    zend_ulong num_idx;
1496    zend_string *str_idx;
1497    zend_string *callback_name;
1498    int replace_count = 0;
1499
1500#ifndef FAST_ZPP
1501    /* Get function parameters and do error-checking. */
1502    if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1503        return;
1504    }
1505#else
1506    ZEND_PARSE_PARAMETERS_START(2, 4)
1507        Z_PARAM_ARRAY(pattern)
1508        Z_PARAM_ZVAL(subject)
1509        Z_PARAM_OPTIONAL
1510        Z_PARAM_LONG(limit)
1511        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1512    ZEND_PARSE_PARAMETERS_END();
1513#endif
1514
1515    ZVAL_UNDEF(&zv);
1516    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(pattern), num_idx, str_idx, replace) {
1517        if (str_idx) {
1518            ZVAL_STR_COPY(&regex, str_idx);
1519        } else {
1520            php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1521            zval_ptr_dtor(return_value);
1522            RETURN_NULL();
1523        }
1524
1525        if (!zend_is_callable(replace, 0, &callback_name)) {
1526            php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", callback_name->val);
1527            zend_string_release(callback_name);
1528            zval_ptr_dtor(&regex);
1529            zval_ptr_dtor(return_value);
1530            ZVAL_COPY(return_value, subject);
1531            return;
1532        }
1533        zend_string_release(callback_name);
1534
1535        if (Z_ISNULL_P(return_value)) {
1536            replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1537        } else {
1538            replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1539            zval_ptr_dtor(return_value);
1540        }
1541
1542        zval_ptr_dtor(&regex);
1543
1544        if (Z_ISUNDEF(zv)) {
1545            RETURN_NULL();
1546        }
1547
1548        ZVAL_COPY_VALUE(return_value, &zv);
1549
1550        if (UNEXPECTED(EG(exception))) {
1551            zval_ptr_dtor(return_value);
1552            RETURN_NULL();
1553        }
1554    } ZEND_HASH_FOREACH_END();
1555
1556    if (zcount) {
1557        zval_dtor(zcount);
1558        ZVAL_LONG(zcount, replace_count);
1559    }
1560}
1561/* }}} */
1562
1563/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1564   Perform Perl-style regular expression replacement and only return matches. */
1565static PHP_FUNCTION(preg_filter)
1566{
1567    zval *regex, *replace, *subject, *zcount = NULL;
1568    zend_long limit = -1;
1569    int replace_count;
1570
1571#ifndef FAST_ZPP
1572    /* Get function parameters and do error-checking. */
1573    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1574        return;
1575    }
1576#else
1577    ZEND_PARSE_PARAMETERS_START(3, 5)
1578        Z_PARAM_ZVAL(regex)
1579        Z_PARAM_ZVAL(replace)
1580        Z_PARAM_ZVAL(subject)
1581        Z_PARAM_OPTIONAL
1582        Z_PARAM_LONG(limit)
1583        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1584    ZEND_PARSE_PARAMETERS_END();
1585#endif
1586
1587    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1588        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1589        RETURN_FALSE;
1590    }
1591
1592    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1593    if (zcount) {
1594        zval_dtor(zcount);
1595        ZVAL_LONG(zcount, replace_count);
1596    }
1597}
1598/* }}} */
1599
1600/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1601   Split string into an array using a perl-style regular expression as a delimiter */
1602static PHP_FUNCTION(preg_split)
1603{
1604    zend_string         *regex;         /* Regular expression */
1605    zend_string         *subject;       /* String to match against */
1606    zend_long            limit_val = -1;/* Integer value of limit */
1607    zend_long            flags = 0;     /* Match control flags */
1608    pcre_cache_entry    *pce;           /* Compiled regular expression */
1609
1610    /* Get function parameters and do error checking */
1611#ifndef FAST_ZPP
1612    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1613                              &subject, &limit_val, &flags) == FAILURE) {
1614        RETURN_FALSE;
1615    }
1616#else
1617    ZEND_PARSE_PARAMETERS_START(2, 4)
1618        Z_PARAM_STR(regex)
1619        Z_PARAM_STR(subject)
1620        Z_PARAM_OPTIONAL
1621        Z_PARAM_LONG(limit_val)
1622        Z_PARAM_LONG(flags)
1623    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1624#endif
1625
1626    /* Compile regex or get it from cache. */
1627    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1628        RETURN_FALSE;
1629    }
1630
1631    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1632}
1633/* }}} */
1634
1635/* {{{ php_pcre_split
1636 */
1637PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1638    zend_long limit_val, zend_long flags)
1639{
1640    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1641    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1642    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1643    pcre_extra       extra_data;        /* Used locally for exec options */
1644    int             *offsets;           /* Array of subpattern offsets */
1645    int              size_offsets;      /* Size of the offsets array */
1646    int              exoptions = 0;     /* Execution options */
1647    int              count = 0;         /* Count of matched subpatterns */
1648    int              start_offset;      /* Where the new search starts */
1649    int              next_offset;       /* End of the last delimiter match + 1 */
1650    int              g_notempty = 0;    /* If the match should not be empty */
1651    char            *last_match;        /* Location of last match */
1652    int              no_empty;          /* If NO_EMPTY flag is set */
1653    int              delim_capture;     /* If delimiters should be captured */
1654    int              offset_capture;    /* If offsets should be captured */
1655    zval             tmp;
1656    ALLOCA_FLAG(use_heap);
1657
1658    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1659    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1660    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1661
1662    if (limit_val == 0) {
1663        limit_val = -1;
1664    }
1665
1666    if (extra == NULL) {
1667        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1668        extra = &extra_data;
1669    }
1670    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1671    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1672#ifdef PCRE_EXTRA_MARK
1673    extra->flags &= ~PCRE_EXTRA_MARK;
1674#endif
1675
1676    /* Initialize return value */
1677    array_init(return_value);
1678
1679    /* Calculate the size of the offsets array, and allocate memory for it. */
1680    size_offsets = (pce->capture_count + 1) * 3;
1681    if (size_offsets <= 32) {
1682        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1683    } else {
1684        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1685    }
1686
1687    /* Start at the beginning of the string */
1688    start_offset = 0;
1689    next_offset = 0;
1690    last_match = subject;
1691    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1692
1693    /* Get next piece if no limit or limit not yet reached and something matched*/
1694    while ((limit_val == -1 || limit_val > 1)) {
1695        count = pcre_exec(pce->re, extra, subject,
1696                          subject_len, start_offset,
1697                          exoptions|g_notempty, offsets, size_offsets);
1698
1699        /* the string was already proved to be valid UTF-8 */
1700        exoptions |= PCRE_NO_UTF8_CHECK;
1701
1702        /* Check for too many substrings condition. */
1703        if (count == 0) {
1704            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1705            count = size_offsets/3;
1706        }
1707
1708        /* If something matched */
1709        if (count > 0) {
1710            if (!no_empty || &subject[offsets[0]] != last_match) {
1711
1712                if (offset_capture) {
1713                    /* Add (match, offset) pair to the return value */
1714                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1715                } else {
1716                    /* Add the piece to the return value */
1717                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1718                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1719                }
1720
1721                /* One less left to do */
1722                if (limit_val != -1)
1723                    limit_val--;
1724            }
1725
1726            last_match = &subject[offsets[1]];
1727            next_offset = offsets[1];
1728
1729            if (delim_capture) {
1730                int i, match_len;
1731                for (i = 1; i < count; i++) {
1732                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1733                    /* If we have matched a delimiter */
1734                    if (!no_empty || match_len > 0) {
1735                        if (offset_capture) {
1736                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1737                        } else {
1738                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1739                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1740                        }
1741                    }
1742                }
1743            }
1744        } else if (count == PCRE_ERROR_NOMATCH) {
1745            /* If we previously set PCRE_NOTEMPTY after a null match,
1746               this is not necessarily the end. We need to advance
1747               the start offset, and continue. Fudge the offset values
1748               to achieve this, unless we're already at the end of the string. */
1749            if (g_notempty != 0 && start_offset < subject_len) {
1750                if (pce->compile_options & PCRE_UTF8) {
1751                    if (re_bump == NULL) {
1752                        int dummy;
1753                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1754                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1755                        zend_string_release(regex);
1756                        if (re_bump == NULL) {
1757                            RETURN_FALSE;
1758                        }
1759                    }
1760                    count = pcre_exec(re_bump, extra_bump, subject,
1761                              subject_len, start_offset,
1762                              exoptions, offsets, size_offsets);
1763                    if (count < 1) {
1764                        php_error_docref(NULL, E_WARNING, "Unknown error");
1765                        RETURN_FALSE;
1766                    }
1767                } else {
1768                    offsets[0] = start_offset;
1769                    offsets[1] = start_offset + 1;
1770                }
1771            } else
1772                break;
1773        } else {
1774            pcre_handle_exec_error(count);
1775            break;
1776        }
1777
1778        /* If we have matched an empty string, mimic what Perl's /g options does.
1779           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1780           the match again at the same point. If this fails (picked up above) we
1781           advance to the next character. */
1782        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1783
1784        /* Advance to the position right after the last full match */
1785        start_offset = offsets[1];
1786    }
1787
1788
1789    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1790
1791    if (!no_empty || start_offset < subject_len)
1792    {
1793        if (offset_capture) {
1794            /* Add the last (match, offset) pair to the return value */
1795            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1796        } else {
1797            /* Add the last piece to the return value */
1798            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1799            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1800        }
1801    }
1802
1803
1804    /* Clean up */
1805    if (size_offsets <= 32) {
1806        free_alloca(offsets, use_heap);
1807    } else {
1808        efree(offsets);
1809    }
1810}
1811/* }}} */
1812
1813/* {{{ proto string preg_quote(string str [, string delim_char])
1814   Quote regular expression characters plus an optional character */
1815static PHP_FUNCTION(preg_quote)
1816{
1817    size_t       in_str_len;
1818    char    *in_str;        /* Input string argument */
1819    char    *in_str_end;    /* End of the input string */
1820    size_t       delim_len = 0;
1821    char    *delim = NULL;  /* Additional delimiter argument */
1822    zend_string *out_str;   /* Output string with quoted characters */
1823    char    *p,             /* Iterator for input string */
1824            *q,             /* Iterator for output string */
1825             delim_char=0,  /* Delimiter character to be quoted */
1826             c;             /* Current character */
1827    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1828
1829    /* Get the arguments and check for errors */
1830#ifndef FAST_ZPP
1831    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1832                              &delim, &delim_len) == FAILURE) {
1833        return;
1834    }
1835#else
1836    ZEND_PARSE_PARAMETERS_START(1, 2)
1837        Z_PARAM_STRING(in_str, in_str_len)
1838        Z_PARAM_OPTIONAL
1839        Z_PARAM_STRING(delim, delim_len)
1840    ZEND_PARSE_PARAMETERS_END();
1841#endif
1842
1843    in_str_end = in_str + in_str_len;
1844
1845    /* Nothing to do if we got an empty string */
1846    if (in_str == in_str_end) {
1847        RETURN_EMPTY_STRING();
1848    }
1849
1850    if (delim && *delim) {
1851        delim_char = delim[0];
1852        quote_delim = 1;
1853    }
1854
1855    /* Allocate enough memory so that even if each character
1856       is quoted, we won't run out of room */
1857    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1858
1859    /* Go through the string and quote necessary characters */
1860    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1861        c = *p;
1862        switch(c) {
1863            case '.':
1864            case '\\':
1865            case '+':
1866            case '*':
1867            case '?':
1868            case '[':
1869            case '^':
1870            case ']':
1871            case '$':
1872            case '(':
1873            case ')':
1874            case '{':
1875            case '}':
1876            case '=':
1877            case '!':
1878            case '>':
1879            case '<':
1880            case '|':
1881            case ':':
1882            case '-':
1883                *q++ = '\\';
1884                *q++ = c;
1885                break;
1886
1887            case '\0':
1888                *q++ = '\\';
1889                *q++ = '0';
1890                *q++ = '0';
1891                *q++ = '0';
1892                break;
1893
1894            default:
1895                if (quote_delim && c == delim_char)
1896                    *q++ = '\\';
1897                *q++ = c;
1898                break;
1899        }
1900    }
1901    *q = '\0';
1902
1903    /* Reallocate string and return it */
1904    out_str = zend_string_truncate(out_str, q - out_str->val, 0);
1905    RETURN_NEW_STR(out_str);
1906}
1907/* }}} */
1908
1909/* {{{ proto array preg_grep(string regex, array input [, int flags])
1910   Searches array and returns entries which match regex */
1911static PHP_FUNCTION(preg_grep)
1912{
1913    zend_string         *regex;         /* Regular expression */
1914    zval                *input;         /* Input array */
1915    zend_long            flags = 0;     /* Match control flags */
1916    pcre_cache_entry    *pce;           /* Compiled regular expression */
1917
1918    /* Get arguments and do error checking */
1919#ifndef FAST_ZPP
1920    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1921                              &input, &flags) == FAILURE) {
1922        return;
1923    }
1924#else
1925    ZEND_PARSE_PARAMETERS_START(2, 3)
1926        Z_PARAM_STR(regex)
1927        Z_PARAM_ARRAY(input)
1928        Z_PARAM_OPTIONAL
1929        Z_PARAM_LONG(flags)
1930    ZEND_PARSE_PARAMETERS_END();
1931#endif
1932
1933    /* Compile regex or get it from cache. */
1934    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1935        RETURN_FALSE;
1936    }
1937
1938    php_pcre_grep_impl(pce, input, return_value, flags);
1939}
1940/* }}} */
1941
1942PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1943{
1944    zval            *entry;             /* An entry in the input array */
1945    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1946    pcre_extra       extra_data;        /* Used locally for exec options */
1947    int             *offsets;           /* Array of subpattern offsets */
1948    int              size_offsets;      /* Size of the offsets array */
1949    int              count = 0;         /* Count of matched subpatterns */
1950    zend_string     *string_key;
1951    zend_ulong       num_key;
1952    zend_bool        invert;            /* Whether to return non-matching
1953                                           entries */
1954    ALLOCA_FLAG(use_heap);
1955
1956    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1957
1958    if (extra == NULL) {
1959        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1960        extra = &extra_data;
1961    }
1962    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1963    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1964#ifdef PCRE_EXTRA_MARK
1965    extra->flags &= ~PCRE_EXTRA_MARK;
1966#endif
1967
1968    /* Calculate the size of the offsets array, and allocate memory for it. */
1969    size_offsets = (pce->capture_count + 1) * 3;
1970    if (size_offsets <= 32) {
1971        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1972    } else {
1973        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1974    }
1975
1976    /* Initialize return array */
1977    array_init(return_value);
1978
1979    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1980
1981    /* Go through the input array */
1982    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1983        zend_string *subject_str = zval_get_string(entry);
1984
1985        /* Perform the match */
1986        count = pcre_exec(pce->re, extra, subject_str->val,
1987                          (int)subject_str->len, 0,
1988                          0, offsets, size_offsets);
1989
1990        /* Check for too many substrings condition. */
1991        if (count == 0) {
1992            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1993            count = size_offsets/3;
1994        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1995            pcre_handle_exec_error(count);
1996            zend_string_release(subject_str);
1997            break;
1998        }
1999
2000        /* If the entry fits our requirements */
2001        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2002            if (Z_REFCOUNTED_P(entry)) {
2003                Z_ADDREF_P(entry);
2004            }
2005
2006            /* Add to return array */
2007            if (string_key) {
2008                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2009            } else {
2010                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2011            }
2012        }
2013
2014        zend_string_release(subject_str);
2015    } ZEND_HASH_FOREACH_END();
2016
2017    /* Clean up */
2018    if (size_offsets <= 32) {
2019        free_alloca(offsets, use_heap);
2020    } else {
2021        efree(offsets);
2022    }
2023}
2024/* }}} */
2025
2026/* {{{ proto int preg_last_error()
2027   Returns the error code of the last regexp execution. */
2028static PHP_FUNCTION(preg_last_error)
2029{
2030#ifndef FAST_ZPP
2031    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2032        return;
2033    }
2034#else
2035    ZEND_PARSE_PARAMETERS_START(0, 0)
2036    ZEND_PARSE_PARAMETERS_END();
2037#endif
2038
2039    RETURN_LONG(PCRE_G(error_code));
2040}
2041/* }}} */
2042
2043/* {{{ module definition structures */
2044
2045/* {{{ arginfo */
2046ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2047    ZEND_ARG_INFO(0, pattern)
2048    ZEND_ARG_INFO(0, subject)
2049    ZEND_ARG_INFO(1, subpatterns) /* array */
2050    ZEND_ARG_INFO(0, flags)
2051    ZEND_ARG_INFO(0, offset)
2052ZEND_END_ARG_INFO()
2053
2054ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2055    ZEND_ARG_INFO(0, pattern)
2056    ZEND_ARG_INFO(0, subject)
2057    ZEND_ARG_INFO(1, subpatterns) /* array */
2058    ZEND_ARG_INFO(0, flags)
2059    ZEND_ARG_INFO(0, offset)
2060ZEND_END_ARG_INFO()
2061
2062ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2063    ZEND_ARG_INFO(0, regex)
2064    ZEND_ARG_INFO(0, replace)
2065    ZEND_ARG_INFO(0, subject)
2066    ZEND_ARG_INFO(0, limit)
2067    ZEND_ARG_INFO(1, count)
2068ZEND_END_ARG_INFO()
2069
2070ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2071    ZEND_ARG_INFO(0, regex)
2072    ZEND_ARG_INFO(0, callback)
2073    ZEND_ARG_INFO(0, subject)
2074    ZEND_ARG_INFO(0, limit)
2075    ZEND_ARG_INFO(1, count)
2076ZEND_END_ARG_INFO()
2077
2078ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2079    ZEND_ARG_INFO(0, pattern)
2080    ZEND_ARG_INFO(0, subject)
2081    ZEND_ARG_INFO(0, limit)
2082    ZEND_ARG_INFO(1, count)
2083ZEND_END_ARG_INFO()
2084
2085ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2086    ZEND_ARG_INFO(0, pattern)
2087    ZEND_ARG_INFO(0, subject)
2088    ZEND_ARG_INFO(0, limit)
2089    ZEND_ARG_INFO(0, flags)
2090ZEND_END_ARG_INFO()
2091
2092ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2093    ZEND_ARG_INFO(0, str)
2094    ZEND_ARG_INFO(0, delim_char)
2095ZEND_END_ARG_INFO()
2096
2097ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2098    ZEND_ARG_INFO(0, regex)
2099    ZEND_ARG_INFO(0, input) /* array */
2100    ZEND_ARG_INFO(0, flags)
2101ZEND_END_ARG_INFO()
2102
2103ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2104ZEND_END_ARG_INFO()
2105/* }}} */
2106
2107static const zend_function_entry pcre_functions[] = {
2108    PHP_FE(preg_match,                  arginfo_preg_match)
2109    PHP_FE(preg_match_all,              arginfo_preg_match_all)
2110    PHP_FE(preg_replace,                arginfo_preg_replace)
2111    PHP_FE(preg_replace_callback,       arginfo_preg_replace_callback)
2112    PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2113    PHP_FE(preg_filter,                 arginfo_preg_replace)
2114    PHP_FE(preg_split,                  arginfo_preg_split)
2115    PHP_FE(preg_quote,                  arginfo_preg_quote)
2116    PHP_FE(preg_grep,                   arginfo_preg_grep)
2117    PHP_FE(preg_last_error,             arginfo_preg_last_error)
2118    PHP_FE_END
2119};
2120
2121zend_module_entry pcre_module_entry = {
2122    STANDARD_MODULE_HEADER,
2123   "pcre",
2124    pcre_functions,
2125    PHP_MINIT(pcre),
2126    PHP_MSHUTDOWN(pcre),
2127    NULL,
2128    NULL,
2129    PHP_MINFO(pcre),
2130    PHP_PCRE_VERSION,
2131    PHP_MODULE_GLOBALS(pcre),
2132    PHP_GINIT(pcre),
2133    PHP_GSHUTDOWN(pcre),
2134    NULL,
2135    STANDARD_MODULE_PROPERTIES_EX
2136};
2137
2138#ifdef COMPILE_DL_PCRE
2139ZEND_GET_MODULE(pcre)
2140#endif
2141
2142/* }}} */
2143
2144#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2145
2146/*
2147 * Local variables:
2148 * tab-width: 4
2149 * c-basic-offset: 4
2150 * End:
2151 * vim600: sw=4 ts=4 fdm=marker
2152 * vim<600: sw=4 ts=4
2153 */
2154