1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pcre_free(pce->re);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    int jit_yes = 0;
136
137    php_info_print_table_start();
138    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
139    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
140
141    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
142        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
143    } else {
144        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
145    }
146
147    php_info_print_table_end();
148
149    DISPLAY_INI_ENTRIES();
150}
151/* }}} */
152
153/* {{{ PHP_MINIT_FUNCTION(pcre) */
154static PHP_MINIT_FUNCTION(pcre)
155{
156    REGISTER_INI_ENTRIES();
157
158    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
163    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
164    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
165
166    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
167    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
168    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
169    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
170    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
172    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
179static PHP_MSHUTDOWN_FUNCTION(pcre)
180{
181    UNREGISTER_INI_ENTRIES();
182
183    return SUCCESS;
184}
185/* }}} */
186
187/* {{{ static pcre_clean_cache */
188static int pcre_clean_cache(zval *data, void *arg)
189{
190    int *num_clean = (int *)arg;
191
192    if (*num_clean > 0) {
193        (*num_clean)--;
194        return 1;
195    } else {
196        return 0;
197    }
198}
199/* }}} */
200
201/* {{{ static make_subpats_table */
202static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
203{
204    pcre_extra *extra = pce->extra;
205    int name_cnt = pce->name_count, name_size, ni = 0;
206    int rc;
207    char *name_table;
208    unsigned short name_idx;
209    char **subpat_names;
210    int rc1, rc2;
211
212    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
213    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
214    rc = rc2 ? rc2 : rc1;
215    if (rc < 0) {
216        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
217        return NULL;
218    }
219
220    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
221    while (ni++ < name_cnt) {
222        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
223        subpat_names[name_idx] = name_table + 2;
224        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
225            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
226            efree(subpat_names);
227            return NULL;
228        }
229        name_table += name_size;
230    }
231    return subpat_names;
232}
233/* }}} */
234
235/* {{{ pcre_get_compiled_regex_cache
236 */
237PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
238{
239    pcre                *re = NULL;
240    pcre_extra          *extra;
241    int                  coptions = 0;
242    int                  soptions = 0;
243    const char          *error;
244    int                  erroffset;
245    char                 delimiter;
246    char                 start_delimiter;
247    char                 end_delimiter;
248    char                *p, *pp;
249    char                *pattern;
250    int                  do_study = 0;
251    int                  poptions = 0;
252    unsigned const char *tables = NULL;
253    pcre_cache_entry    *pce;
254    pcre_cache_entry     new_entry;
255    int                  rc;
256
257    /* Try to lookup the cached regex entry, and if successful, just pass
258       back the compiled pattern, otherwise go on and compile it. */
259    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
260    if (pce) {
261#if HAVE_SETLOCALE
262        if (pce->locale == BG(locale_string) ||
263            (pce->locale && BG(locale_string) &&
264             pce->locale->len == BG(locale_string)->len &&
265             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len)) ||
266            (!pce->locale &&
267             BG(locale_string)->len == 1 &&
268             BG(locale_string)->val[0] == 'C') ||
269            (!BG(locale_string) &&
270             pce->locale->len == 1 &&
271             pce->locale->val[0] == 'C')) {
272            return pce;
273        }
274#else
275        return pce;
276#endif
277    }
278
279    p = regex->val;
280
281    /* Parse through the leading whitespace, and display a warning if we
282       get to the end without encountering a delimiter. */
283    while (isspace((int)*(unsigned char *)p)) p++;
284    if (*p == 0) {
285        php_error_docref(NULL, E_WARNING,
286                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
287        return NULL;
288    }
289
290    /* Get the delimiter and display a warning if it is alphanumeric
291       or a backslash. */
292    delimiter = *p++;
293    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
294        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
295        return NULL;
296    }
297
298    start_delimiter = delimiter;
299    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
300        delimiter = pp[5];
301    end_delimiter = delimiter;
302
303    pp = p;
304
305    if (start_delimiter == end_delimiter) {
306        /* We need to iterate through the pattern, searching for the ending delimiter,
307           but skipping the backslashed delimiters.  If the ending delimiter is not
308           found, display a warning. */
309        while (*pp != 0) {
310            if (*pp == '\\' && pp[1] != 0) pp++;
311            else if (*pp == delimiter)
312                break;
313            pp++;
314        }
315    } else {
316        /* We iterate through the pattern, searching for the matching ending
317         * delimiter. For each matching starting delimiter, we increment nesting
318         * level, and decrement it for each matching ending delimiter. If we
319         * reach the end of the pattern without matching, display a warning.
320         */
321        int brackets = 1;   /* brackets nesting level */
322        while (*pp != 0) {
323            if (*pp == '\\' && pp[1] != 0) pp++;
324            else if (*pp == end_delimiter && --brackets <= 0)
325                break;
326            else if (*pp == start_delimiter)
327                brackets++;
328            pp++;
329        }
330    }
331
332    if (*pp == 0) {
333        if (pp < regex->val + regex->len) {
334            php_error_docref(NULL,E_WARNING, "Null byte in regex");
335        } else if (start_delimiter == end_delimiter) {
336            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
337        } else {
338            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
339        }
340        return NULL;
341    }
342
343    /* Make a copy of the actual pattern. */
344    pattern = estrndup(p, pp-p);
345
346    /* Move on to the options */
347    pp++;
348
349    /* Parse through the options, setting appropriate flags.  Display
350       a warning if we encounter an unknown modifier. */
351    while (pp < regex->val + regex->len) {
352        switch (*pp++) {
353            /* Perl compatible options */
354            case 'i':   coptions |= PCRE_CASELESS;      break;
355            case 'm':   coptions |= PCRE_MULTILINE;     break;
356            case 's':   coptions |= PCRE_DOTALL;        break;
357            case 'x':   coptions |= PCRE_EXTENDED;      break;
358
359            /* PCRE specific options */
360            case 'A':   coptions |= PCRE_ANCHORED;      break;
361            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
362            case 'S':   do_study  = 1;                  break;
363            case 'U':   coptions |= PCRE_UNGREEDY;      break;
364            case 'X':   coptions |= PCRE_EXTRA;         break;
365            case 'u':   coptions |= PCRE_UTF8;
366    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
367       characters, even in UTF-8 mode. However, this can be changed by setting
368       the PCRE_UCP option. */
369#ifdef PCRE_UCP
370                        coptions |= PCRE_UCP;
371#endif
372                break;
373
374            /* Custom preg options */
375            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
376
377            case ' ':
378            case '\n':
379                break;
380
381            default:
382                if (pp[-1]) {
383                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
384                } else {
385                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
386                }
387                efree(pattern);
388                return NULL;
389        }
390    }
391
392#if HAVE_SETLOCALE
393    if (BG(locale_string) &&
394        (BG(locale_string)->len != 1 || BG(locale_string)->val[0] != 'C')) {
395        tables = pcre_maketables();
396    }
397#endif
398
399    /* Compile pattern and display a warning if compilation failed. */
400    re = pcre_compile(pattern,
401                      coptions,
402                      &error,
403                      &erroffset,
404                      tables);
405
406    if (re == NULL) {
407        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
408        efree(pattern);
409        if (tables) {
410            pefree((void*)tables, 1);
411        }
412        return NULL;
413    }
414
415#ifdef PCRE_STUDY_JIT_COMPILE
416    if (PCRE_G(jit)) {
417        /* Enable PCRE JIT compiler */
418        do_study = 1;
419        soptions |= PCRE_STUDY_JIT_COMPILE;
420    }
421#endif
422
423    /* If study option was specified, study the pattern and
424       store the result in extra for passing to pcre_exec. */
425    if (do_study) {
426        extra = pcre_study(re, soptions, &error);
427        if (extra) {
428            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
429            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
430            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
431        }
432        if (error != NULL) {
433            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
434        }
435    } else {
436        extra = NULL;
437    }
438
439    efree(pattern);
440
441    /*
442     * If we reached cache limit, clean out the items from the head of the list;
443     * these are supposedly the oldest ones (but not necessarily the least used
444     * ones).
445     */
446    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
447        int num_clean = PCRE_CACHE_SIZE / 8;
448        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
449    }
450
451    /* Store the compiled pattern and extra info in the cache. */
452    new_entry.re = re;
453    new_entry.extra = extra;
454    new_entry.preg_options = poptions;
455    new_entry.compile_options = coptions;
456#if HAVE_SETLOCALE
457    new_entry.locale = BG(locale_string) ?
458        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
459            zend_string_copy(BG(locale_string)) :
460            zend_string_init(BG(locale_string)->val, BG(locale_string)->len, 1)) :
461        NULL;
462    new_entry.tables = tables;
463#endif
464
465    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
466    if (rc < 0) {
467        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
468        return NULL;
469    }
470
471    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
472    if (rc < 0) {
473        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
474        return NULL;
475    }
476
477    /*
478     * Interned strings are not duplicated when stored in HashTable,
479     * but all the interned strings created during HTTP request are removed
480     * at end of request. However PCRE_G(pcre_cache) must be consistent
481     * on the next request as well. So we disable usage of interned strings
482     * as hash keys especually for this table.
483     * See bug #63180
484     */
485    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
486
487    return pce;
488}
489/* }}} */
490
491/* {{{ pcre_get_compiled_regex
492 */
493PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
494{
495    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
496
497    if (extra) {
498        *extra = pce ? pce->extra : NULL;
499    }
500    if (preg_options) {
501        *preg_options = pce ? pce->preg_options : 0;
502    }
503
504    return pce ? pce->re : NULL;
505}
506/* }}} */
507
508/* {{{ pcre_get_compiled_regex_ex
509 */
510PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
511{
512    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
513
514    if (extra) {
515        *extra = pce ? pce->extra : NULL;
516    }
517    if (preg_options) {
518        *preg_options = pce ? pce->preg_options : 0;
519    }
520    if (compile_options) {
521        *compile_options = pce ? pce->compile_options : 0;
522    }
523
524    return pce ? pce->re : NULL;
525}
526/* }}} */
527
528/* {{{ add_offset_pair */
529static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
530{
531    zval match_pair, tmp;
532
533    array_init_size(&match_pair, 2);
534
535    /* Add (match, offset) to the return value */
536    ZVAL_STRINGL(&tmp, str, len);
537    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
538    ZVAL_LONG(&tmp, offset);
539    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
540
541    if (name) {
542        Z_ADDREF(match_pair);
543        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
544    }
545    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
546}
547/* }}} */
548
549static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
550{
551    /* parameters */
552    zend_string      *regex;            /* Regular expression */
553    zend_string      *subject;          /* String to match against */
554    pcre_cache_entry *pce;              /* Compiled regular expression */
555    zval             *subpats = NULL;   /* Array for subpatterns */
556    zend_long         flags = 0;        /* Match control flags */
557    zend_long         start_offset = 0; /* Where the new search starts */
558
559#ifndef FAST_ZPP
560    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
561                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
562        RETURN_FALSE;
563    }
564#else
565    ZEND_PARSE_PARAMETERS_START(2, 5)
566        Z_PARAM_STR(regex)
567        Z_PARAM_STR(subject)
568        Z_PARAM_OPTIONAL
569        Z_PARAM_ZVAL_EX(subpats, 0, 1)
570        Z_PARAM_LONG(flags)
571        Z_PARAM_LONG(start_offset)
572    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
573#endif
574
575    /* Compile regex or get it from cache. */
576    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
577        RETURN_FALSE;
578    }
579
580    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
581        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
582}
583/* }}} */
584
585/* {{{ php_pcre_match_impl() */
586PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
587    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
588{
589    zval             result_set,        /* Holds a set of subpatterns after
590                                           a global match */
591                    *match_sets = NULL; /* An array of sets of matches for each
592                                           subpattern after a global match */
593    pcre_extra      *extra = pce->extra;/* Holds results of studying */
594    pcre_extra       extra_data;        /* Used locally for exec options */
595    int              exoptions = 0;     /* Execution options */
596    int              count = 0;         /* Count of matched subpatterns */
597    int             *offsets;           /* Array of subpattern offsets */
598    int              num_subpats;       /* Number of captured subpatterns */
599    int              size_offsets;      /* Size of the offsets array */
600    int              matched;           /* Has anything matched */
601    int              g_notempty = 0;    /* If the match should not be empty */
602    const char     **stringlist;        /* Holds list of subpatterns */
603    char           **subpat_names;      /* Array for named subpatterns */
604    int              i;
605    int              subpats_order;     /* Order of subpattern matches */
606    int              offset_capture;    /* Capture match offsets: yes/no */
607    unsigned char   *mark = NULL;       /* Target for MARK name */
608    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
609    ALLOCA_FLAG(use_heap);
610
611    ZVAL_UNDEF(&marks);
612
613    /* Overwrite the passed-in value for subpatterns with an empty array. */
614    if (subpats != NULL) {
615        zval_dtor(subpats);
616        array_init(subpats);
617    }
618
619    subpats_order = global ? PREG_PATTERN_ORDER : 0;
620
621    if (use_flags) {
622        offset_capture = flags & PREG_OFFSET_CAPTURE;
623
624        /*
625         * subpats_order is pre-set to pattern mode so we change it only if
626         * necessary.
627         */
628        if (flags & 0xff) {
629            subpats_order = flags & 0xff;
630        }
631        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
632            (!global && subpats_order != 0)) {
633            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
634            return;
635        }
636    } else {
637        offset_capture = 0;
638    }
639
640    /* Negative offset counts from the end of the string. */
641    if (start_offset < 0) {
642        start_offset = subject_len + start_offset;
643        if (start_offset < 0) {
644            start_offset = 0;
645        }
646    }
647
648    if (extra == NULL) {
649        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
650        extra = &extra_data;
651    }
652    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
653    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
654#ifdef PCRE_EXTRA_MARK
655    extra->mark = &mark;
656    extra->flags |= PCRE_EXTRA_MARK;
657#endif
658
659    /* Calculate the size of the offsets array, and allocate memory for it. */
660    num_subpats = pce->capture_count + 1;
661    size_offsets = num_subpats * 3;
662
663    /*
664     * Build a mapping from subpattern numbers to their names. We will
665     * allocate the table only if there are any named subpatterns.
666     */
667    subpat_names = NULL;
668    if (pce->name_count > 0) {
669        subpat_names = make_subpats_table(num_subpats, pce);
670        if (!subpat_names) {
671            RETURN_FALSE;
672        }
673    }
674
675    if (size_offsets <= 32) {
676        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
677    } else {
678        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
679    }
680    memset(offsets, 0, size_offsets*sizeof(int));
681    /* Allocate match sets array and initialize the values. */
682    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
683        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
684        for (i=0; i<num_subpats; i++) {
685            array_init(&match_sets[i]);
686        }
687    }
688
689    matched = 0;
690    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
691
692    do {
693        /* Execute the regular expression. */
694        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
695                          exoptions|g_notempty, offsets, size_offsets);
696
697        /* the string was already proved to be valid UTF-8 */
698        exoptions |= PCRE_NO_UTF8_CHECK;
699
700        /* Check for too many substrings condition. */
701        if (count == 0) {
702            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
703            count = size_offsets/3;
704        }
705
706        /* If something has matched */
707        if (count > 0) {
708            matched++;
709
710            /* If subpatterns array has been passed, fill it in with values. */
711            if (subpats != NULL) {
712                /* Try to get the list of substrings and display a warning if failed. */
713                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
714                    if (subpat_names) {
715                        efree(subpat_names);
716                    }
717                    if (size_offsets <= 32) {
718                        free_alloca(offsets, use_heap);
719                    } else {
720                        efree(offsets);
721                    }
722                    if (match_sets) efree(match_sets);
723                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
724                    RETURN_FALSE;
725                }
726
727                if (global) {   /* global pattern matching */
728                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
729                        /* For each subpattern, insert it into the appropriate array. */
730                        if (offset_capture) {
731                            for (i = 0; i < count; i++) {
732                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
733                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
734                            }
735                        } else {
736                            for (i = 0; i < count; i++) {
737                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
738                                                       offsets[(i<<1)+1] - offsets[i<<1]);
739                            }
740                        }
741                        /* Add MARK, if available */
742                        if (mark) {
743                            if (Z_TYPE(marks) == IS_UNDEF) {
744                                array_init(&marks);
745                            }
746                            add_index_string(&marks, matched - 1, (char *) mark);
747                        }
748                        /*
749                         * If the number of captured subpatterns on this run is
750                         * less than the total possible number, pad the result
751                         * arrays with empty strings.
752                         */
753                        if (count < num_subpats) {
754                            for (; i < num_subpats; i++) {
755                                add_next_index_string(&match_sets[i], "");
756                            }
757                        }
758                    } else {
759                        /* Allocate the result set array */
760                        array_init_size(&result_set, count + (mark ? 1 : 0));
761
762                        /* Add all the subpatterns to it */
763                        if (subpat_names) {
764                            if (offset_capture) {
765                                for (i = 0; i < count; i++) {
766                                    add_offset_pair(&result_set, (char *)stringlist[i],
767                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
768                                }
769                            } else {
770                                for (i = 0; i < count; i++) {
771                                    if (subpat_names[i]) {
772                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
773                                                               offsets[(i<<1)+1] - offsets[i<<1]);
774                                    }
775                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
776                                                           offsets[(i<<1)+1] - offsets[i<<1]);
777                                }
778                            }
779                        } else {
780                            if (offset_capture) {
781                                for (i = 0; i < count; i++) {
782                                    add_offset_pair(&result_set, (char *)stringlist[i],
783                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
784                                }
785                            } else {
786                                for (i = 0; i < count; i++) {
787                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
788                                                           offsets[(i<<1)+1] - offsets[i<<1]);
789                                }
790                            }
791                        }
792                        /* Add MARK, if available */
793                        if (mark) {
794                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
795                        }
796                        /* And add it to the output array */
797                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
798                    }
799                } else {            /* single pattern matching */
800                    /* For each subpattern, insert it into the subpatterns array. */
801                    if (subpat_names) {
802                        if (offset_capture) {
803                            for (i = 0; i < count; i++) {
804                                add_offset_pair(subpats, (char *)stringlist[i],
805                                                offsets[(i<<1)+1] - offsets[i<<1],
806                                                offsets[i<<1], subpat_names[i]);
807                            }
808                        } else {
809                            for (i = 0; i < count; i++) {
810                                if (subpat_names[i]) {
811                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
812                                                      offsets[(i<<1)+1] - offsets[i<<1]);
813                                }
814                                add_next_index_stringl(subpats, (char *)stringlist[i],
815                                                       offsets[(i<<1)+1] - offsets[i<<1]);
816                            }
817                        }
818                    } else {
819                        if (offset_capture) {
820                            for (i = 0; i < count; i++) {
821                                add_offset_pair(subpats, (char *)stringlist[i],
822                                                offsets[(i<<1)+1] - offsets[i<<1],
823                                                offsets[i<<1], NULL);
824                            }
825                        } else {
826                            for (i = 0; i < count; i++) {
827                                add_next_index_stringl(subpats, (char *)stringlist[i],
828                                                       offsets[(i<<1)+1] - offsets[i<<1]);
829                            }
830                        }
831                    }
832                    /* Add MARK, if available */
833                    if (mark) {
834                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
835                    }
836                }
837
838                pcre_free((void *) stringlist);
839            }
840        } else if (count == PCRE_ERROR_NOMATCH) {
841            /* If we previously set PCRE_NOTEMPTY after a null match,
842               this is not necessarily the end. We need to advance
843               the start offset, and continue. Fudge the offset values
844               to achieve this, unless we're already at the end of the string. */
845            if (g_notempty != 0 && start_offset < subject_len) {
846                offsets[0] = (int)start_offset;
847                offsets[1] = (int)(start_offset + 1);
848            } else
849                break;
850        } else {
851            pcre_handle_exec_error(count);
852            break;
853        }
854
855        /* If we have matched an empty string, mimic what Perl's /g options does.
856           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
857           the match again at the same point. If this fails (picked up above) we
858           advance to the next character. */
859        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
860
861        /* Advance to the position right after the last full match */
862        start_offset = offsets[1];
863    } while (global);
864
865    /* Add the match sets to the output array and clean up */
866    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
867        if (subpat_names) {
868            for (i = 0; i < num_subpats; i++) {
869                if (subpat_names[i]) {
870                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
871                                     strlen(subpat_names[i]), &match_sets[i]);
872                    Z_ADDREF(match_sets[i]);
873                }
874                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
875            }
876        } else {
877            for (i = 0; i < num_subpats; i++) {
878                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
879            }
880        }
881        efree(match_sets);
882
883        if (Z_TYPE(marks) != IS_UNDEF) {
884            add_assoc_zval(subpats, "MARK", &marks);
885        }
886    }
887
888    if (size_offsets <= 32) {
889        free_alloca(offsets, use_heap);
890    } else {
891        efree(offsets);
892    }
893    if (subpat_names) {
894        efree(subpat_names);
895    }
896
897    /* Did we encounter an error? */
898    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
899        RETVAL_LONG(matched);
900    } else {
901        RETVAL_FALSE;
902    }
903}
904/* }}} */
905
906/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
907   Perform a Perl-style regular expression match */
908static PHP_FUNCTION(preg_match)
909{
910    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
911}
912/* }}} */
913
914/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
915   Perform a Perl-style global regular expression match */
916static PHP_FUNCTION(preg_match_all)
917{
918    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
919}
920/* }}} */
921
922/* {{{ preg_get_backref
923 */
924static int preg_get_backref(char **str, int *backref)
925{
926    register char in_brace = 0;
927    register char *walk = *str;
928
929    if (walk[1] == 0)
930        return 0;
931
932    if (*walk == '$' && walk[1] == '{') {
933        in_brace = 1;
934        walk++;
935    }
936    walk++;
937
938    if (*walk >= '0' && *walk <= '9') {
939        *backref = *walk - '0';
940        walk++;
941    } else
942        return 0;
943
944    if (*walk && *walk >= '0' && *walk <= '9') {
945        *backref = *backref * 10 + *walk - '0';
946        walk++;
947    }
948
949    if (in_brace) {
950        if (*walk == 0 || *walk != '}')
951            return 0;
952        else
953            walk++;
954    }
955
956    *str = walk;
957    return 1;
958}
959/* }}} */
960
961/* {{{ preg_do_repl_func
962 */
963static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
964{
965    zend_string *result_str;
966    zval         retval;            /* Function return value */
967    zval         args[1];           /* Argument to pass to function */
968    int          i;
969
970    array_init_size(&args[0], count + (mark ? 1 : 0));
971    if (subpat_names) {
972        for (i = 0; i < count; i++) {
973            if (subpat_names[i]) {
974                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
975            }
976            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
977        }
978    } else {
979        for (i = 0; i < count; i++) {
980            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
981        }
982    }
983    if (mark) {
984        add_assoc_string(&args[0], "MARK", (char *) mark);
985    }
986
987    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
988        result_str = zval_get_string(&retval);
989        zval_ptr_dtor(&retval);
990    } else {
991        if (!EG(exception)) {
992            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
993        }
994
995        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
996    }
997
998    zval_ptr_dtor(&args[0]);
999
1000    return result_str;
1001}
1002/* }}} */
1003
1004/* {{{ php_pcre_replace
1005 */
1006PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1007                              zend_string *subject_str,
1008                              char *subject, int subject_len,
1009                              zval *replace_val, int is_callable_replace,
1010                              int limit, int *replace_count)
1011{
1012    pcre_cache_entry    *pce;               /* Compiled regular expression */
1013
1014    /* Compile regex or get it from cache. */
1015    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1016        return NULL;
1017    }
1018
1019    return php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1020        is_callable_replace, limit, replace_count);
1021}
1022/* }}} */
1023
1024/* {{{ php_pcre_replace_impl() */
1025PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val,
1026    int is_callable_replace, int limit, int *replace_count)
1027{
1028    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1029    pcre_extra       extra_data;        /* Used locally for exec options */
1030    int              exoptions = 0;     /* Execution options */
1031    int              count = 0;         /* Count of matched subpatterns */
1032    int             *offsets;           /* Array of subpattern offsets */
1033    char            **subpat_names;     /* Array for named subpatterns */
1034    int              num_subpats;       /* Number of captured subpatterns */
1035    int              size_offsets;      /* Size of the offsets array */
1036    int              new_len;           /* Length of needed storage */
1037    int              alloc_len;         /* Actual allocated length */
1038    int              match_len;         /* Length of the current match */
1039    int              backref;           /* Backreference number */
1040    int              start_offset;      /* Where the new search starts */
1041    int              g_notempty=0;      /* If the match should not be empty */
1042    int              replace_len=0;     /* Length of replacement string */
1043    char            *replace=NULL,      /* Replacement string */
1044                    *walkbuf,           /* Location of current replacement in the result */
1045                    *walk,              /* Used to walk the replacement string */
1046                    *match,             /* The current match */
1047                    *piece,             /* The current piece of subject */
1048                    *replace_end=NULL,  /* End of replacement string */
1049                     walk_last;         /* Last walked character */
1050    int              result_len;        /* Length of result */
1051    unsigned char   *mark = NULL;       /* Target for MARK name */
1052    zend_string     *result;            /* Result of replacement */
1053    zend_string     *eval_result=NULL;  /* Result of custom function */
1054    ALLOCA_FLAG(use_heap);
1055
1056    if (extra == NULL) {
1057        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1058        extra = &extra_data;
1059    }
1060    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1061    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1062
1063    if (pce->preg_options & PREG_REPLACE_EVAL) {
1064        php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1065        return NULL;
1066    }
1067    if (!is_callable_replace) {
1068        replace = Z_STRVAL_P(replace_val);
1069        replace_len = (int)Z_STRLEN_P(replace_val);
1070        replace_end = replace + replace_len;
1071    }
1072
1073    /* Calculate the size of the offsets array, and allocate memory for it. */
1074    num_subpats = pce->capture_count + 1;
1075    size_offsets = num_subpats * 3;
1076    if (size_offsets <= 32) {
1077        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1078    } else {
1079        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1080    }
1081
1082    /*
1083     * Build a mapping from subpattern numbers to their names. We will
1084     * allocate the table only if there are any named subpatterns.
1085     */
1086    subpat_names = NULL;
1087    if (pce->name_count > 0) {
1088        subpat_names = make_subpats_table(num_subpats, pce);
1089        if (!subpat_names) {
1090            return NULL;
1091        }
1092    }
1093
1094    alloc_len = 0;
1095    result = NULL;
1096
1097    /* Initialize */
1098    match = NULL;
1099    start_offset = 0;
1100    result_len = 0;
1101    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1102
1103    while (1) {
1104#ifdef PCRE_EXTRA_MARK
1105        extra->mark = &mark;
1106        extra->flags |= PCRE_EXTRA_MARK;
1107#endif
1108        /* Execute the regular expression. */
1109        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1110                          exoptions|g_notempty, offsets, size_offsets);
1111
1112        /* the string was already proved to be valid UTF-8 */
1113        exoptions |= PCRE_NO_UTF8_CHECK;
1114
1115        /* Check for too many substrings condition. */
1116        if (count == 0) {
1117            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1118            count = size_offsets/3;
1119        }
1120
1121        piece = subject + start_offset;
1122
1123        if (count > 0 && (limit == -1 || limit > 0)) {
1124            if (replace_count) {
1125                ++*replace_count;
1126            }
1127            /* Set the match location in subject */
1128            match = subject + offsets[0];
1129
1130            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1131
1132            if (is_callable_replace) {
1133                /* Use custom function to get replacement string and its length. */
1134                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1135                new_len += (int)eval_result->len;
1136            } else { /* do regular substitution */
1137                walk = replace;
1138                walk_last = 0;
1139                while (walk < replace_end) {
1140                    if ('\\' == *walk || '$' == *walk) {
1141                        if (walk_last == '\\') {
1142                            walk++;
1143                            walk_last = 0;
1144                            continue;
1145                        }
1146                        if (preg_get_backref(&walk, &backref)) {
1147                            if (backref < count)
1148                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1149                            continue;
1150                        }
1151                    }
1152                    new_len++;
1153                    walk++;
1154                    walk_last = walk[-1];
1155                }
1156            }
1157
1158            if (new_len >= alloc_len) {
1159                if (alloc_len == 0) {
1160                    alloc_len = 2 * subject_len;
1161                    if (new_len >= alloc_len) {
1162                        alloc_len = alloc_len + 2 * new_len;
1163                    }
1164                    result = zend_string_alloc(alloc_len, 0);
1165                } else {
1166                    alloc_len = alloc_len + 2 * new_len;
1167                    result = zend_string_extend(result, alloc_len, 0);
1168                }
1169            }
1170            /* copy the part of the string before the match */
1171            memcpy(&result->val[result_len], piece, match-piece);
1172            result_len += (int)(match-piece);
1173
1174            /* copy replacement and backrefs */
1175            walkbuf = result->val + result_len;
1176
1177            /* If using custom function, copy result to the buffer and clean up. */
1178            if (is_callable_replace) {
1179                memcpy(walkbuf, eval_result->val, eval_result->len);
1180                result_len += (int)eval_result->len;
1181                if (eval_result) zend_string_release(eval_result);
1182            } else { /* do regular backreference copying */
1183                walk = replace;
1184                walk_last = 0;
1185                while (walk < replace_end) {
1186                    if ('\\' == *walk || '$' == *walk) {
1187                        if (walk_last == '\\') {
1188                            *(walkbuf-1) = *walk++;
1189                            walk_last = 0;
1190                            continue;
1191                        }
1192                        if (preg_get_backref(&walk, &backref)) {
1193                            if (backref < count) {
1194                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1195                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1196                                walkbuf += match_len;
1197                            }
1198                            continue;
1199                        }
1200                    }
1201                    *walkbuf++ = *walk++;
1202                    walk_last = walk[-1];
1203                }
1204                *walkbuf = '\0';
1205                /* increment the result length by how much we've added to the string */
1206                result_len += (int)(walkbuf - (result->val + result_len));
1207            }
1208
1209            if (limit != -1)
1210                limit--;
1211
1212        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1213            /* If we previously set PCRE_NOTEMPTY after a null match,
1214               this is not necessarily the end. We need to advance
1215               the start offset, and continue. Fudge the offset values
1216               to achieve this, unless we're already at the end of the string. */
1217            if (g_notempty != 0 && start_offset < subject_len) {
1218                offsets[0] = start_offset;
1219                offsets[1] = start_offset + 1;
1220                memcpy(&result->val[result_len], piece, 1);
1221                result_len++;
1222            } else {
1223                if (!result && subject_str) {
1224                    result = zend_string_copy(subject_str);
1225                    break;
1226                }
1227                new_len = result_len + subject_len - start_offset;
1228                if (new_len > alloc_len) {
1229                    alloc_len = new_len; /* now we know exactly how long it is */
1230                    if (NULL != result) {
1231                        result = zend_string_realloc(result, alloc_len, 0);
1232                    } else {
1233                        result = zend_string_alloc(alloc_len, 0);
1234                    }
1235                }
1236                /* stick that last bit of string on our output */
1237                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1238                result_len += subject_len - start_offset;
1239                result->val[result_len] = '\0';
1240                result->len = result_len;
1241                break;
1242            }
1243        } else {
1244            pcre_handle_exec_error(count);
1245            if (result) {
1246                zend_string_free(result);
1247                result = NULL;
1248            }
1249            break;
1250        }
1251
1252        /* If we have matched an empty string, mimic what Perl's /g options does.
1253           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1254           the match again at the same point. If this fails (picked up above) we
1255           advance to the next character. */
1256        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1257
1258        /* Advance to the next piece. */
1259        start_offset = offsets[1];
1260    }
1261
1262    if (size_offsets <= 32) {
1263        free_alloca(offsets, use_heap);
1264    } else {
1265        efree(offsets);
1266    }
1267    if (subpat_names) {
1268        efree(subpat_names);
1269    }
1270
1271    return result;
1272}
1273/* }}} */
1274
1275/* {{{ php_replace_in_subject
1276 */
1277static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1278{
1279    zval        *regex_entry,
1280                *replace_entry = NULL,
1281                *replace_value,
1282                 empty_replace;
1283    zend_string *result;
1284    uint32_t replace_idx;
1285    zend_string *subject_str = zval_get_string(subject);
1286
1287    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1288    ZVAL_EMPTY_STRING(&empty_replace);
1289
1290    /* If regex is an array */
1291    if (Z_TYPE_P(regex) == IS_ARRAY) {
1292        replace_value = replace;
1293        replace_idx = 0;
1294
1295        /* For each entry in the regex array, get the entry */
1296        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1297            /* Make sure we're dealing with strings. */
1298            zend_string *regex_str = zval_get_string(regex_entry);
1299
1300            /* If replace is an array and not a callable construct */
1301            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1302                /* Get current entry */
1303                replace_entry = NULL;
1304                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1305                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1306                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1307                        break;
1308                    }
1309                    replace_idx++;
1310                }
1311                if (replace_entry != NULL) {
1312                    if (!is_callable_replace) {
1313                        convert_to_string_ex(replace_entry);
1314                    }
1315                    replace_value = replace_entry;
1316                    replace_idx++;
1317                } else {
1318                    /* We've run out of replacement strings, so use an empty one */
1319                    replace_value = &empty_replace;
1320                }
1321            }
1322
1323            /* Do the actual replacement and put the result back into subject_str
1324               for further replacements. */
1325            if ((result = php_pcre_replace(regex_str,
1326                                           subject_str,
1327                                           subject_str->val,
1328                                           (int)subject_str->len,
1329                                           replace_value,
1330                                           is_callable_replace,
1331                                           limit,
1332                                           replace_count)) != NULL) {
1333                zend_string_release(subject_str);
1334                subject_str = result;
1335            } else {
1336                zend_string_release(subject_str);
1337                zend_string_release(regex_str);
1338                return NULL;
1339            }
1340
1341            zend_string_release(regex_str);
1342        } ZEND_HASH_FOREACH_END();
1343
1344        return subject_str;
1345    } else {
1346        result = php_pcre_replace(Z_STR_P(regex),
1347                                  subject_str,
1348                                  subject_str->val,
1349                                  (int)subject_str->len,
1350                                  replace,
1351                                  is_callable_replace,
1352                                  limit,
1353                                  replace_count);
1354        zend_string_release(subject_str);
1355        return result;
1356    }
1357}
1358/* }}} */
1359
1360/* {{{ preg_replace_impl
1361 */
1362static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1363{
1364    zval        *subject_entry;
1365    zend_string *result;
1366    zend_string *string_key;
1367    zend_ulong   num_key;
1368    int          replace_count = 0, old_replace_count;
1369
1370    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1371        SEPARATE_ZVAL(replace);
1372        convert_to_string_ex(replace);
1373    }
1374
1375    if (Z_TYPE_P(regex) != IS_ARRAY) {
1376        SEPARATE_ZVAL(regex);
1377        convert_to_string_ex(regex);
1378    }
1379
1380    /* if subject is an array */
1381    if (Z_TYPE_P(subject) == IS_ARRAY) {
1382        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1383
1384        /* For each subject entry, convert it to string, then perform replacement
1385           and add the result to the return_value array. */
1386        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1387            old_replace_count = replace_count;
1388            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1389                if (!is_filter || replace_count > old_replace_count) {
1390                    /* Add to return array */
1391                    if (string_key) {
1392                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1393                    } else {
1394                        add_index_str(return_value, num_key, result);
1395                    }
1396                } else {
1397                    zend_string_release(result);
1398                }
1399            }
1400        } ZEND_HASH_FOREACH_END();
1401    } else {
1402        /* if subject is not an array */
1403        old_replace_count = replace_count;
1404        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1405            if (!is_filter || replace_count > old_replace_count) {
1406                RETVAL_STR(result);
1407            } else {
1408                zend_string_release(result);
1409            }
1410        }
1411    }
1412
1413    return replace_count;
1414}
1415/* }}} */
1416
1417/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1418   Perform Perl-style regular expression replacement. */
1419static PHP_FUNCTION(preg_replace)
1420{
1421    zval *regex, *replace, *subject, *zcount = NULL;
1422    zend_long limit = -1;
1423    int replace_count;
1424
1425#ifndef FAST_ZPP
1426    /* Get function parameters and do error-checking. */
1427    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1428        return;
1429    }
1430#else
1431    ZEND_PARSE_PARAMETERS_START(3, 5)
1432        Z_PARAM_ZVAL(regex)
1433        Z_PARAM_ZVAL(replace)
1434        Z_PARAM_ZVAL(subject)
1435        Z_PARAM_OPTIONAL
1436        Z_PARAM_LONG(limit)
1437        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1438    ZEND_PARSE_PARAMETERS_END();
1439#endif
1440
1441    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1442        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1443        RETURN_FALSE;
1444    }
1445
1446    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1447    if (zcount) {
1448        zval_dtor(zcount);
1449        ZVAL_LONG(zcount, replace_count);
1450    }
1451}
1452/* }}} */
1453
1454/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1455   Perform Perl-style regular expression replacement using replacement callback. */
1456static PHP_FUNCTION(preg_replace_callback)
1457{
1458    zval *regex, *replace, *subject, *zcount = NULL;
1459    zend_long limit = -1;
1460    zend_string *callback_name;
1461    int replace_count;
1462
1463#ifndef FAST_ZPP
1464    /* Get function parameters and do error-checking. */
1465    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1466        return;
1467    }
1468#else
1469    ZEND_PARSE_PARAMETERS_START(3, 5)
1470        Z_PARAM_ZVAL(regex)
1471        Z_PARAM_ZVAL(replace)
1472        Z_PARAM_ZVAL(subject)
1473        Z_PARAM_OPTIONAL
1474        Z_PARAM_LONG(limit)
1475        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1476    ZEND_PARSE_PARAMETERS_END();
1477#endif
1478
1479    if (!zend_is_callable(replace, 0, &callback_name)) {
1480        php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1481        zend_string_release(callback_name);
1482        ZVAL_COPY(return_value, subject);
1483        return;
1484    }
1485    zend_string_release(callback_name);
1486
1487    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1488    if (zcount) {
1489        zval_dtor(zcount);
1490        ZVAL_LONG(zcount, replace_count);
1491    }
1492}
1493/* }}} */
1494
1495/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1496   Perform Perl-style regular expression replacement using replacement callback. */
1497static PHP_FUNCTION(preg_replace_callback_array)
1498{
1499    zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1500    zend_long limit = -1;
1501    zend_ulong num_idx;
1502    zend_string *str_idx;
1503    zend_string *callback_name;
1504    int replace_count = 0;
1505
1506#ifndef FAST_ZPP
1507    /* Get function parameters and do error-checking. */
1508    if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1509        return;
1510    }
1511#else
1512    ZEND_PARSE_PARAMETERS_START(2, 4)
1513        Z_PARAM_ARRAY(pattern)
1514        Z_PARAM_ZVAL(subject)
1515        Z_PARAM_OPTIONAL
1516        Z_PARAM_LONG(limit)
1517        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1518    ZEND_PARSE_PARAMETERS_END();
1519#endif
1520
1521    ZVAL_UNDEF(&zv);
1522    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(pattern), num_idx, str_idx, replace) {
1523        if (str_idx) {
1524            ZVAL_STR_COPY(&regex, str_idx);
1525        } else {
1526            php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1527            zval_ptr_dtor(return_value);
1528            RETURN_NULL();
1529        }
1530
1531        if (!zend_is_callable(replace, 0, &callback_name)) {
1532            php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", callback_name->val);
1533            zend_string_release(callback_name);
1534            zval_ptr_dtor(&regex);
1535            zval_ptr_dtor(return_value);
1536            ZVAL_COPY(return_value, subject);
1537            return;
1538        }
1539        zend_string_release(callback_name);
1540
1541        if (Z_ISNULL_P(return_value)) {
1542            replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1543        } else {
1544            replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1545            zval_ptr_dtor(return_value);
1546        }
1547
1548        zval_ptr_dtor(&regex);
1549
1550        if (Z_ISUNDEF(zv)) {
1551            RETURN_NULL();
1552        }
1553
1554        ZVAL_COPY_VALUE(return_value, &zv);
1555
1556        if (UNEXPECTED(EG(exception))) {
1557            zval_ptr_dtor(return_value);
1558            RETURN_NULL();
1559        }
1560    } ZEND_HASH_FOREACH_END();
1561
1562    if (zcount) {
1563        zval_dtor(zcount);
1564        ZVAL_LONG(zcount, replace_count);
1565    }
1566}
1567/* }}} */
1568
1569/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1570   Perform Perl-style regular expression replacement and only return matches. */
1571static PHP_FUNCTION(preg_filter)
1572{
1573    zval *regex, *replace, *subject, *zcount = NULL;
1574    zend_long limit = -1;
1575    int replace_count;
1576
1577#ifndef FAST_ZPP
1578    /* Get function parameters and do error-checking. */
1579    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1580        return;
1581    }
1582#else
1583    ZEND_PARSE_PARAMETERS_START(3, 5)
1584        Z_PARAM_ZVAL(regex)
1585        Z_PARAM_ZVAL(replace)
1586        Z_PARAM_ZVAL(subject)
1587        Z_PARAM_OPTIONAL
1588        Z_PARAM_LONG(limit)
1589        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1590    ZEND_PARSE_PARAMETERS_END();
1591#endif
1592
1593    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1594        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1595        RETURN_FALSE;
1596    }
1597
1598    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1599    if (zcount) {
1600        zval_dtor(zcount);
1601        ZVAL_LONG(zcount, replace_count);
1602    }
1603}
1604/* }}} */
1605
1606/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1607   Split string into an array using a perl-style regular expression as a delimiter */
1608static PHP_FUNCTION(preg_split)
1609{
1610    zend_string         *regex;         /* Regular expression */
1611    zend_string         *subject;       /* String to match against */
1612    zend_long            limit_val = -1;/* Integer value of limit */
1613    zend_long            flags = 0;     /* Match control flags */
1614    pcre_cache_entry    *pce;           /* Compiled regular expression */
1615
1616    /* Get function parameters and do error checking */
1617#ifndef FAST_ZPP
1618    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1619                              &subject, &limit_val, &flags) == FAILURE) {
1620        RETURN_FALSE;
1621    }
1622#else
1623    ZEND_PARSE_PARAMETERS_START(2, 4)
1624        Z_PARAM_STR(regex)
1625        Z_PARAM_STR(subject)
1626        Z_PARAM_OPTIONAL
1627        Z_PARAM_LONG(limit_val)
1628        Z_PARAM_LONG(flags)
1629    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1630#endif
1631
1632    /* Compile regex or get it from cache. */
1633    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1634        RETURN_FALSE;
1635    }
1636
1637    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1638}
1639/* }}} */
1640
1641/* {{{ php_pcre_split
1642 */
1643PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1644    zend_long limit_val, zend_long flags)
1645{
1646    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1647    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1648    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1649    pcre_extra       extra_data;        /* Used locally for exec options */
1650    int             *offsets;           /* Array of subpattern offsets */
1651    int              size_offsets;      /* Size of the offsets array */
1652    int              exoptions = 0;     /* Execution options */
1653    int              count = 0;         /* Count of matched subpatterns */
1654    int              start_offset;      /* Where the new search starts */
1655    int              next_offset;       /* End of the last delimiter match + 1 */
1656    int              g_notempty = 0;    /* If the match should not be empty */
1657    char            *last_match;        /* Location of last match */
1658    int              no_empty;          /* If NO_EMPTY flag is set */
1659    int              delim_capture;     /* If delimiters should be captured */
1660    int              offset_capture;    /* If offsets should be captured */
1661    zval             tmp;
1662    ALLOCA_FLAG(use_heap);
1663
1664    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1665    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1666    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1667
1668    if (limit_val == 0) {
1669        limit_val = -1;
1670    }
1671
1672    if (extra == NULL) {
1673        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1674        extra = &extra_data;
1675    }
1676    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1677    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1678#ifdef PCRE_EXTRA_MARK
1679    extra->flags &= ~PCRE_EXTRA_MARK;
1680#endif
1681
1682    /* Initialize return value */
1683    array_init(return_value);
1684
1685    /* Calculate the size of the offsets array, and allocate memory for it. */
1686    size_offsets = (pce->capture_count + 1) * 3;
1687    if (size_offsets <= 32) {
1688        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1689    } else {
1690        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1691    }
1692
1693    /* Start at the beginning of the string */
1694    start_offset = 0;
1695    next_offset = 0;
1696    last_match = subject;
1697    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1698
1699    /* Get next piece if no limit or limit not yet reached and something matched*/
1700    while ((limit_val == -1 || limit_val > 1)) {
1701        count = pcre_exec(pce->re, extra, subject,
1702                          subject_len, start_offset,
1703                          exoptions|g_notempty, offsets, size_offsets);
1704
1705        /* the string was already proved to be valid UTF-8 */
1706        exoptions |= PCRE_NO_UTF8_CHECK;
1707
1708        /* Check for too many substrings condition. */
1709        if (count == 0) {
1710            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1711            count = size_offsets/3;
1712        }
1713
1714        /* If something matched */
1715        if (count > 0) {
1716            if (!no_empty || &subject[offsets[0]] != last_match) {
1717
1718                if (offset_capture) {
1719                    /* Add (match, offset) pair to the return value */
1720                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1721                } else {
1722                    /* Add the piece to the return value */
1723                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1724                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1725                }
1726
1727                /* One less left to do */
1728                if (limit_val != -1)
1729                    limit_val--;
1730            }
1731
1732            last_match = &subject[offsets[1]];
1733            next_offset = offsets[1];
1734
1735            if (delim_capture) {
1736                int i, match_len;
1737                for (i = 1; i < count; i++) {
1738                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1739                    /* If we have matched a delimiter */
1740                    if (!no_empty || match_len > 0) {
1741                        if (offset_capture) {
1742                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1743                        } else {
1744                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1745                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1746                        }
1747                    }
1748                }
1749            }
1750        } else if (count == PCRE_ERROR_NOMATCH) {
1751            /* If we previously set PCRE_NOTEMPTY after a null match,
1752               this is not necessarily the end. We need to advance
1753               the start offset, and continue. Fudge the offset values
1754               to achieve this, unless we're already at the end of the string. */
1755            if (g_notempty != 0 && start_offset < subject_len) {
1756                if (pce->compile_options & PCRE_UTF8) {
1757                    if (re_bump == NULL) {
1758                        int dummy;
1759                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1760                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1761                        zend_string_release(regex);
1762                        if (re_bump == NULL) {
1763                            RETURN_FALSE;
1764                        }
1765                    }
1766                    count = pcre_exec(re_bump, extra_bump, subject,
1767                              subject_len, start_offset,
1768                              exoptions, offsets, size_offsets);
1769                    if (count < 1) {
1770                        php_error_docref(NULL, E_WARNING, "Unknown error");
1771                        RETURN_FALSE;
1772                    }
1773                } else {
1774                    offsets[0] = start_offset;
1775                    offsets[1] = start_offset + 1;
1776                }
1777            } else
1778                break;
1779        } else {
1780            pcre_handle_exec_error(count);
1781            break;
1782        }
1783
1784        /* If we have matched an empty string, mimic what Perl's /g options does.
1785           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1786           the match again at the same point. If this fails (picked up above) we
1787           advance to the next character. */
1788        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1789
1790        /* Advance to the position right after the last full match */
1791        start_offset = offsets[1];
1792    }
1793
1794
1795    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1796
1797    if (!no_empty || start_offset < subject_len)
1798    {
1799        if (offset_capture) {
1800            /* Add the last (match, offset) pair to the return value */
1801            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1802        } else {
1803            /* Add the last piece to the return value */
1804            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1805            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1806        }
1807    }
1808
1809
1810    /* Clean up */
1811    if (size_offsets <= 32) {
1812        free_alloca(offsets, use_heap);
1813    } else {
1814        efree(offsets);
1815    }
1816}
1817/* }}} */
1818
1819/* {{{ proto string preg_quote(string str [, string delim_char])
1820   Quote regular expression characters plus an optional character */
1821static PHP_FUNCTION(preg_quote)
1822{
1823    size_t       in_str_len;
1824    char    *in_str;        /* Input string argument */
1825    char    *in_str_end;    /* End of the input string */
1826    size_t       delim_len = 0;
1827    char    *delim = NULL;  /* Additional delimiter argument */
1828    zend_string *out_str;   /* Output string with quoted characters */
1829    char    *p,             /* Iterator for input string */
1830            *q,             /* Iterator for output string */
1831             delim_char=0,  /* Delimiter character to be quoted */
1832             c;             /* Current character */
1833    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1834
1835    /* Get the arguments and check for errors */
1836#ifndef FAST_ZPP
1837    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1838                              &delim, &delim_len) == FAILURE) {
1839        return;
1840    }
1841#else
1842    ZEND_PARSE_PARAMETERS_START(1, 2)
1843        Z_PARAM_STRING(in_str, in_str_len)
1844        Z_PARAM_OPTIONAL
1845        Z_PARAM_STRING(delim, delim_len)
1846    ZEND_PARSE_PARAMETERS_END();
1847#endif
1848
1849    in_str_end = in_str + in_str_len;
1850
1851    /* Nothing to do if we got an empty string */
1852    if (in_str == in_str_end) {
1853        RETURN_EMPTY_STRING();
1854    }
1855
1856    if (delim && *delim) {
1857        delim_char = delim[0];
1858        quote_delim = 1;
1859    }
1860
1861    /* Allocate enough memory so that even if each character
1862       is quoted, we won't run out of room */
1863    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1864
1865    /* Go through the string and quote necessary characters */
1866    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1867        c = *p;
1868        switch(c) {
1869            case '.':
1870            case '\\':
1871            case '+':
1872            case '*':
1873            case '?':
1874            case '[':
1875            case '^':
1876            case ']':
1877            case '$':
1878            case '(':
1879            case ')':
1880            case '{':
1881            case '}':
1882            case '=':
1883            case '!':
1884            case '>':
1885            case '<':
1886            case '|':
1887            case ':':
1888            case '-':
1889                *q++ = '\\';
1890                *q++ = c;
1891                break;
1892
1893            case '\0':
1894                *q++ = '\\';
1895                *q++ = '0';
1896                *q++ = '0';
1897                *q++ = '0';
1898                break;
1899
1900            default:
1901                if (quote_delim && c == delim_char)
1902                    *q++ = '\\';
1903                *q++ = c;
1904                break;
1905        }
1906    }
1907    *q = '\0';
1908
1909    /* Reallocate string and return it */
1910    out_str = zend_string_truncate(out_str, q - out_str->val, 0);
1911    RETURN_NEW_STR(out_str);
1912}
1913/* }}} */
1914
1915/* {{{ proto array preg_grep(string regex, array input [, int flags])
1916   Searches array and returns entries which match regex */
1917static PHP_FUNCTION(preg_grep)
1918{
1919    zend_string         *regex;         /* Regular expression */
1920    zval                *input;         /* Input array */
1921    zend_long            flags = 0;     /* Match control flags */
1922    pcre_cache_entry    *pce;           /* Compiled regular expression */
1923
1924    /* Get arguments and do error checking */
1925#ifndef FAST_ZPP
1926    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1927                              &input, &flags) == FAILURE) {
1928        return;
1929    }
1930#else
1931    ZEND_PARSE_PARAMETERS_START(2, 3)
1932        Z_PARAM_STR(regex)
1933        Z_PARAM_ARRAY(input)
1934        Z_PARAM_OPTIONAL
1935        Z_PARAM_LONG(flags)
1936    ZEND_PARSE_PARAMETERS_END();
1937#endif
1938
1939    /* Compile regex or get it from cache. */
1940    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1941        RETURN_FALSE;
1942    }
1943
1944    php_pcre_grep_impl(pce, input, return_value, flags);
1945}
1946/* }}} */
1947
1948PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1949{
1950    zval            *entry;             /* An entry in the input array */
1951    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1952    pcre_extra       extra_data;        /* Used locally for exec options */
1953    int             *offsets;           /* Array of subpattern offsets */
1954    int              size_offsets;      /* Size of the offsets array */
1955    int              count = 0;         /* Count of matched subpatterns */
1956    zend_string     *string_key;
1957    zend_ulong       num_key;
1958    zend_bool        invert;            /* Whether to return non-matching
1959                                           entries */
1960    ALLOCA_FLAG(use_heap);
1961
1962    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1963
1964    if (extra == NULL) {
1965        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1966        extra = &extra_data;
1967    }
1968    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1969    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1970#ifdef PCRE_EXTRA_MARK
1971    extra->flags &= ~PCRE_EXTRA_MARK;
1972#endif
1973
1974    /* Calculate the size of the offsets array, and allocate memory for it. */
1975    size_offsets = (pce->capture_count + 1) * 3;
1976    if (size_offsets <= 32) {
1977        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1978    } else {
1979        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1980    }
1981
1982    /* Initialize return array */
1983    array_init(return_value);
1984
1985    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1986
1987    /* Go through the input array */
1988    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1989        zend_string *subject_str = zval_get_string(entry);
1990
1991        /* Perform the match */
1992        count = pcre_exec(pce->re, extra, subject_str->val,
1993                          (int)subject_str->len, 0,
1994                          0, offsets, size_offsets);
1995
1996        /* Check for too many substrings condition. */
1997        if (count == 0) {
1998            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1999            count = size_offsets/3;
2000        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2001            pcre_handle_exec_error(count);
2002            zend_string_release(subject_str);
2003            break;
2004        }
2005
2006        /* If the entry fits our requirements */
2007        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2008            if (Z_REFCOUNTED_P(entry)) {
2009                Z_ADDREF_P(entry);
2010            }
2011
2012            /* Add to return array */
2013            if (string_key) {
2014                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2015            } else {
2016                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2017            }
2018        }
2019
2020        zend_string_release(subject_str);
2021    } ZEND_HASH_FOREACH_END();
2022
2023    /* Clean up */
2024    if (size_offsets <= 32) {
2025        free_alloca(offsets, use_heap);
2026    } else {
2027        efree(offsets);
2028    }
2029}
2030/* }}} */
2031
2032/* {{{ proto int preg_last_error()
2033   Returns the error code of the last regexp execution. */
2034static PHP_FUNCTION(preg_last_error)
2035{
2036#ifndef FAST_ZPP
2037    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2038        return;
2039    }
2040#else
2041    ZEND_PARSE_PARAMETERS_START(0, 0)
2042    ZEND_PARSE_PARAMETERS_END();
2043#endif
2044
2045    RETURN_LONG(PCRE_G(error_code));
2046}
2047/* }}} */
2048
2049/* {{{ module definition structures */
2050
2051/* {{{ arginfo */
2052ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2053    ZEND_ARG_INFO(0, pattern)
2054    ZEND_ARG_INFO(0, subject)
2055    ZEND_ARG_INFO(1, subpatterns) /* array */
2056    ZEND_ARG_INFO(0, flags)
2057    ZEND_ARG_INFO(0, offset)
2058ZEND_END_ARG_INFO()
2059
2060ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2061    ZEND_ARG_INFO(0, pattern)
2062    ZEND_ARG_INFO(0, subject)
2063    ZEND_ARG_INFO(1, subpatterns) /* array */
2064    ZEND_ARG_INFO(0, flags)
2065    ZEND_ARG_INFO(0, offset)
2066ZEND_END_ARG_INFO()
2067
2068ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2069    ZEND_ARG_INFO(0, regex)
2070    ZEND_ARG_INFO(0, replace)
2071    ZEND_ARG_INFO(0, subject)
2072    ZEND_ARG_INFO(0, limit)
2073    ZEND_ARG_INFO(1, count)
2074ZEND_END_ARG_INFO()
2075
2076ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2077    ZEND_ARG_INFO(0, regex)
2078    ZEND_ARG_INFO(0, callback)
2079    ZEND_ARG_INFO(0, subject)
2080    ZEND_ARG_INFO(0, limit)
2081    ZEND_ARG_INFO(1, count)
2082ZEND_END_ARG_INFO()
2083
2084ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2085    ZEND_ARG_INFO(0, pattern)
2086    ZEND_ARG_INFO(0, subject)
2087    ZEND_ARG_INFO(0, limit)
2088    ZEND_ARG_INFO(1, count)
2089ZEND_END_ARG_INFO()
2090
2091ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2092    ZEND_ARG_INFO(0, pattern)
2093    ZEND_ARG_INFO(0, subject)
2094    ZEND_ARG_INFO(0, limit)
2095    ZEND_ARG_INFO(0, flags)
2096ZEND_END_ARG_INFO()
2097
2098ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2099    ZEND_ARG_INFO(0, str)
2100    ZEND_ARG_INFO(0, delim_char)
2101ZEND_END_ARG_INFO()
2102
2103ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2104    ZEND_ARG_INFO(0, regex)
2105    ZEND_ARG_INFO(0, input) /* array */
2106    ZEND_ARG_INFO(0, flags)
2107ZEND_END_ARG_INFO()
2108
2109ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2110ZEND_END_ARG_INFO()
2111/* }}} */
2112
2113static const zend_function_entry pcre_functions[] = {
2114    PHP_FE(preg_match,                  arginfo_preg_match)
2115    PHP_FE(preg_match_all,              arginfo_preg_match_all)
2116    PHP_FE(preg_replace,                arginfo_preg_replace)
2117    PHP_FE(preg_replace_callback,       arginfo_preg_replace_callback)
2118    PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2119    PHP_FE(preg_filter,                 arginfo_preg_replace)
2120    PHP_FE(preg_split,                  arginfo_preg_split)
2121    PHP_FE(preg_quote,                  arginfo_preg_quote)
2122    PHP_FE(preg_grep,                   arginfo_preg_grep)
2123    PHP_FE(preg_last_error,             arginfo_preg_last_error)
2124    PHP_FE_END
2125};
2126
2127zend_module_entry pcre_module_entry = {
2128    STANDARD_MODULE_HEADER,
2129   "pcre",
2130    pcre_functions,
2131    PHP_MINIT(pcre),
2132    PHP_MSHUTDOWN(pcre),
2133    NULL,
2134    NULL,
2135    PHP_MINFO(pcre),
2136    PHP_PCRE_VERSION,
2137    PHP_MODULE_GLOBALS(pcre),
2138    PHP_GINIT(pcre),
2139    PHP_GSHUTDOWN(pcre),
2140    NULL,
2141    STANDARD_MODULE_PROPERTIES_EX
2142};
2143
2144#ifdef COMPILE_DL_PCRE
2145ZEND_GET_MODULE(pcre)
2146#endif
2147
2148/* }}} */
2149
2150#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2151
2152/*
2153 * Local variables:
2154 * tab-width: 4
2155 * c-basic-offset: 4
2156 * End:
2157 * vim600: sw=4 ts=4 fdm=marker
2158 * vim<600: sw=4 ts=4
2159 */
2160