1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pcre_free(pce->re);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    int jit_yes = 0;
136
137    php_info_print_table_start();
138    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
139    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
140
141    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
142        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
143    } else {
144        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
145    }
146
147    php_info_print_table_end();
148
149    DISPLAY_INI_ENTRIES();
150}
151/* }}} */
152
153/* {{{ PHP_MINIT_FUNCTION(pcre) */
154static PHP_MINIT_FUNCTION(pcre)
155{
156    REGISTER_INI_ENTRIES();
157
158    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
163    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
164    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
165
166    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
167    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
168    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
169    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
170    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
172    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
179static PHP_MSHUTDOWN_FUNCTION(pcre)
180{
181    UNREGISTER_INI_ENTRIES();
182
183    return SUCCESS;
184}
185/* }}} */
186
187/* {{{ static pcre_clean_cache */
188static int pcre_clean_cache(zval *data, void *arg)
189{
190    int *num_clean = (int *)arg;
191
192    if (*num_clean > 0) {
193        (*num_clean)--;
194        return 1;
195    } else {
196        return 0;
197    }
198}
199/* }}} */
200
201/* {{{ static make_subpats_table */
202static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
203{
204    pcre_extra *extra = pce->extra;
205    int name_cnt = pce->name_count, name_size, ni = 0;
206    int rc;
207    char *name_table;
208    unsigned short name_idx;
209    char **subpat_names;
210    int rc1, rc2;
211
212    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
213    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
214    rc = rc2 ? rc2 : rc1;
215    if (rc < 0) {
216        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
217        return NULL;
218    }
219
220    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
221    while (ni++ < name_cnt) {
222        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
223        subpat_names[name_idx] = name_table + 2;
224        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
225            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
226            efree(subpat_names);
227            return NULL;
228        }
229        name_table += name_size;
230    }
231    return subpat_names;
232}
233/* }}} */
234
235/* {{{ pcre_get_compiled_regex_cache
236 */
237PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
238{
239    pcre                *re = NULL;
240    pcre_extra          *extra;
241    int                  coptions = 0;
242    int                  soptions = 0;
243    const char          *error;
244    int                  erroffset;
245    char                 delimiter;
246    char                 start_delimiter;
247    char                 end_delimiter;
248    char                *p, *pp;
249    char                *pattern;
250    int                  do_study = 0;
251    int                  poptions = 0;
252    unsigned const char *tables = NULL;
253    pcre_cache_entry    *pce;
254    pcre_cache_entry     new_entry;
255    int                  rc;
256
257    /* Try to lookup the cached regex entry, and if successful, just pass
258       back the compiled pattern, otherwise go on and compile it. */
259    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
260    if (pce) {
261#if HAVE_SETLOCALE
262        if (pce->locale == BG(locale_string) ||
263            (pce->locale && BG(locale_string) &&
264             pce->locale->len == BG(locale_string)->len &&
265             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len)) ||
266            (!pce->locale &&
267             BG(locale_string)->len == 1 &&
268             BG(locale_string)->val[0] == 'C') ||
269            (!BG(locale_string) &&
270             pce->locale->len == 1 &&
271             pce->locale->val[0] == 'C')) {
272            return pce;
273        }
274#else
275        return pce;
276#endif
277    }
278
279    p = regex->val;
280
281    /* Parse through the leading whitespace, and display a warning if we
282       get to the end without encountering a delimiter. */
283    while (isspace((int)*(unsigned char *)p)) p++;
284    if (*p == 0) {
285        php_error_docref(NULL, E_WARNING,
286                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
287        return NULL;
288    }
289
290    /* Get the delimiter and display a warning if it is alphanumeric
291       or a backslash. */
292    delimiter = *p++;
293    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
294        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
295        return NULL;
296    }
297
298    start_delimiter = delimiter;
299    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
300        delimiter = pp[5];
301    end_delimiter = delimiter;
302
303    pp = p;
304
305    if (start_delimiter == end_delimiter) {
306        /* We need to iterate through the pattern, searching for the ending delimiter,
307           but skipping the backslashed delimiters.  If the ending delimiter is not
308           found, display a warning. */
309        while (*pp != 0) {
310            if (*pp == '\\' && pp[1] != 0) pp++;
311            else if (*pp == delimiter)
312                break;
313            pp++;
314        }
315    } else {
316        /* We iterate through the pattern, searching for the matching ending
317         * delimiter. For each matching starting delimiter, we increment nesting
318         * level, and decrement it for each matching ending delimiter. If we
319         * reach the end of the pattern without matching, display a warning.
320         */
321        int brackets = 1;   /* brackets nesting level */
322        while (*pp != 0) {
323            if (*pp == '\\' && pp[1] != 0) pp++;
324            else if (*pp == end_delimiter && --brackets <= 0)
325                break;
326            else if (*pp == start_delimiter)
327                brackets++;
328            pp++;
329        }
330    }
331
332    if (*pp == 0) {
333        if (pp < regex->val + regex->len) {
334            php_error_docref(NULL,E_WARNING, "Null byte in regex");
335        } else if (start_delimiter == end_delimiter) {
336            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
337        } else {
338            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
339        }
340        return NULL;
341    }
342
343    /* Make a copy of the actual pattern. */
344    pattern = estrndup(p, pp-p);
345
346    /* Move on to the options */
347    pp++;
348
349    /* Parse through the options, setting appropriate flags.  Display
350       a warning if we encounter an unknown modifier. */
351    while (pp < regex->val + regex->len) {
352        switch (*pp++) {
353            /* Perl compatible options */
354            case 'i':   coptions |= PCRE_CASELESS;      break;
355            case 'm':   coptions |= PCRE_MULTILINE;     break;
356            case 's':   coptions |= PCRE_DOTALL;        break;
357            case 'x':   coptions |= PCRE_EXTENDED;      break;
358
359            /* PCRE specific options */
360            case 'A':   coptions |= PCRE_ANCHORED;      break;
361            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
362            case 'S':   do_study  = 1;                  break;
363            case 'U':   coptions |= PCRE_UNGREEDY;      break;
364            case 'X':   coptions |= PCRE_EXTRA;         break;
365            case 'u':   coptions |= PCRE_UTF8;
366    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
367       characters, even in UTF-8 mode. However, this can be changed by setting
368       the PCRE_UCP option. */
369#ifdef PCRE_UCP
370                        coptions |= PCRE_UCP;
371#endif
372                break;
373
374            /* Custom preg options */
375            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
376
377            case ' ':
378            case '\n':
379                break;
380
381            default:
382                if (pp[-1]) {
383                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
384                } else {
385                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
386                }
387                efree(pattern);
388                return NULL;
389        }
390    }
391
392#if HAVE_SETLOCALE
393    if (BG(locale_string) &&
394        (BG(locale_string)->len != 1 || BG(locale_string)->val[0] != 'C')) {
395        tables = pcre_maketables();
396    }
397#endif
398
399    /* Compile pattern and display a warning if compilation failed. */
400    re = pcre_compile(pattern,
401                      coptions,
402                      &error,
403                      &erroffset,
404                      tables);
405
406    if (re == NULL) {
407        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
408        efree(pattern);
409        if (tables) {
410            pefree((void*)tables, 1);
411        }
412        return NULL;
413    }
414
415#ifdef PCRE_STUDY_JIT_COMPILE
416    if (PCRE_G(jit)) {
417        /* Enable PCRE JIT compiler */
418        do_study = 1;
419        soptions |= PCRE_STUDY_JIT_COMPILE;
420    }
421#endif
422
423    /* If study option was specified, study the pattern and
424       store the result in extra for passing to pcre_exec. */
425    if (do_study) {
426        extra = pcre_study(re, soptions, &error);
427        if (extra) {
428            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
429            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
430            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
431        }
432        if (error != NULL) {
433            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
434        }
435    } else {
436        extra = NULL;
437    }
438
439    efree(pattern);
440
441    /*
442     * If we reached cache limit, clean out the items from the head of the list;
443     * these are supposedly the oldest ones (but not necessarily the least used
444     * ones).
445     */
446    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
447        int num_clean = PCRE_CACHE_SIZE / 8;
448        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
449    }
450
451    /* Store the compiled pattern and extra info in the cache. */
452    new_entry.re = re;
453    new_entry.extra = extra;
454    new_entry.preg_options = poptions;
455    new_entry.compile_options = coptions;
456#if HAVE_SETLOCALE
457    new_entry.locale = BG(locale_string) ?
458        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
459            zend_string_copy(BG(locale_string)) :
460            zend_string_init(BG(locale_string)->val, BG(locale_string)->len, 1)) :
461        NULL;
462    new_entry.tables = tables;
463#endif
464
465    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
466    if (rc < 0) {
467        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
468        return NULL;
469    }
470
471    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
472    if (rc < 0) {
473        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
474        return NULL;
475    }
476
477    /*
478     * Interned strings are not duplicated when stored in HashTable,
479     * but all the interned strings created during HTTP request are removed
480     * at end of request. However PCRE_G(pcre_cache) must be consistent
481     * on the next request as well. So we disable usage of interned strings
482     * as hash keys especually for this table.
483     * See bug #63180
484     */
485    if (!IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
486        zend_string *str = zend_string_init(regex->val, regex->len, 1);
487        GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
488        str->h = regex->h;
489        regex = str;
490    }
491
492    pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
493
494    return pce;
495}
496/* }}} */
497
498/* {{{ pcre_get_compiled_regex
499 */
500PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
501{
502    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
503
504    if (extra) {
505        *extra = pce ? pce->extra : NULL;
506    }
507    if (preg_options) {
508        *preg_options = pce ? pce->preg_options : 0;
509    }
510
511    return pce ? pce->re : NULL;
512}
513/* }}} */
514
515/* {{{ pcre_get_compiled_regex_ex
516 */
517PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
518{
519    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
520
521    if (extra) {
522        *extra = pce ? pce->extra : NULL;
523    }
524    if (preg_options) {
525        *preg_options = pce ? pce->preg_options : 0;
526    }
527    if (compile_options) {
528        *compile_options = pce ? pce->compile_options : 0;
529    }
530
531    return pce ? pce->re : NULL;
532}
533/* }}} */
534
535/* {{{ add_offset_pair */
536static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
537{
538    zval match_pair, tmp;
539
540    array_init_size(&match_pair, 2);
541
542    /* Add (match, offset) to the return value */
543    ZVAL_STRINGL(&tmp, str, len);
544    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
545    ZVAL_LONG(&tmp, offset);
546    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
547
548    if (name) {
549        Z_ADDREF(match_pair);
550        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
551    }
552    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
553}
554/* }}} */
555
556static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
557{
558    /* parameters */
559    zend_string      *regex;            /* Regular expression */
560    zend_string      *subject;          /* String to match against */
561    pcre_cache_entry *pce;              /* Compiled regular expression */
562    zval             *subpats = NULL;   /* Array for subpatterns */
563    zend_long         flags = 0;        /* Match control flags */
564    zend_long         start_offset = 0; /* Where the new search starts */
565
566#ifndef FAST_ZPP
567    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
568                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
569        RETURN_FALSE;
570    }
571#else
572    ZEND_PARSE_PARAMETERS_START(2, 5)
573        Z_PARAM_STR(regex)
574        Z_PARAM_STR(subject)
575        Z_PARAM_OPTIONAL
576        Z_PARAM_ZVAL_EX(subpats, 0, 1)
577        Z_PARAM_LONG(flags)
578        Z_PARAM_LONG(start_offset)
579    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
580#endif
581
582    /* Compile regex or get it from cache. */
583    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
584        RETURN_FALSE;
585    }
586
587    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
588        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
589}
590/* }}} */
591
592/* {{{ php_pcre_match_impl() */
593PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
594    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
595{
596    zval             result_set,        /* Holds a set of subpatterns after
597                                           a global match */
598                    *match_sets = NULL; /* An array of sets of matches for each
599                                           subpattern after a global match */
600    pcre_extra      *extra = pce->extra;/* Holds results of studying */
601    pcre_extra       extra_data;        /* Used locally for exec options */
602    int              exoptions = 0;     /* Execution options */
603    int              count = 0;         /* Count of matched subpatterns */
604    int             *offsets;           /* Array of subpattern offsets */
605    int              num_subpats;       /* Number of captured subpatterns */
606    int              size_offsets;      /* Size of the offsets array */
607    int              matched;           /* Has anything matched */
608    int              g_notempty = 0;    /* If the match should not be empty */
609    const char     **stringlist;        /* Holds list of subpatterns */
610    char           **subpat_names;      /* Array for named subpatterns */
611    int              i;
612    int              subpats_order;     /* Order of subpattern matches */
613    int              offset_capture;    /* Capture match offsets: yes/no */
614    unsigned char   *mark = NULL;       /* Target for MARK name */
615    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
616    ALLOCA_FLAG(use_heap);
617
618    ZVAL_UNDEF(&marks);
619
620    /* Overwrite the passed-in value for subpatterns with an empty array. */
621    if (subpats != NULL) {
622        zval_dtor(subpats);
623        array_init(subpats);
624    }
625
626    subpats_order = global ? PREG_PATTERN_ORDER : 0;
627
628    if (use_flags) {
629        offset_capture = flags & PREG_OFFSET_CAPTURE;
630
631        /*
632         * subpats_order is pre-set to pattern mode so we change it only if
633         * necessary.
634         */
635        if (flags & 0xff) {
636            subpats_order = flags & 0xff;
637        }
638        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
639            (!global && subpats_order != 0)) {
640            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
641            return;
642        }
643    } else {
644        offset_capture = 0;
645    }
646
647    /* Negative offset counts from the end of the string. */
648    if (start_offset < 0) {
649        start_offset = subject_len + start_offset;
650        if (start_offset < 0) {
651            start_offset = 0;
652        }
653    }
654
655    if (extra == NULL) {
656        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
657        extra = &extra_data;
658    }
659    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
660    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
661#ifdef PCRE_EXTRA_MARK
662    extra->mark = &mark;
663    extra->flags |= PCRE_EXTRA_MARK;
664#endif
665
666    /* Calculate the size of the offsets array, and allocate memory for it. */
667    num_subpats = pce->capture_count + 1;
668    size_offsets = num_subpats * 3;
669
670    /*
671     * Build a mapping from subpattern numbers to their names. We will
672     * allocate the table only if there are any named subpatterns.
673     */
674    subpat_names = NULL;
675    if (pce->name_count > 0) {
676        subpat_names = make_subpats_table(num_subpats, pce);
677        if (!subpat_names) {
678            RETURN_FALSE;
679        }
680    }
681
682    if (size_offsets <= 32) {
683        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
684    } else {
685        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
686    }
687    memset(offsets, 0, size_offsets*sizeof(int));
688    /* Allocate match sets array and initialize the values. */
689    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
690        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
691        for (i=0; i<num_subpats; i++) {
692            array_init(&match_sets[i]);
693        }
694    }
695
696    matched = 0;
697    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
698
699    do {
700        /* Execute the regular expression. */
701        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
702                          exoptions|g_notempty, offsets, size_offsets);
703
704        /* the string was already proved to be valid UTF-8 */
705        exoptions |= PCRE_NO_UTF8_CHECK;
706
707        /* Check for too many substrings condition. */
708        if (count == 0) {
709            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
710            count = size_offsets/3;
711        }
712
713        /* If something has matched */
714        if (count > 0) {
715            matched++;
716
717            /* If subpatterns array has been passed, fill it in with values. */
718            if (subpats != NULL) {
719                /* Try to get the list of substrings and display a warning if failed. */
720                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
721                    if (subpat_names) {
722                        efree(subpat_names);
723                    }
724                    if (size_offsets <= 32) {
725                        free_alloca(offsets, use_heap);
726                    } else {
727                        efree(offsets);
728                    }
729                    if (match_sets) efree(match_sets);
730                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
731                    RETURN_FALSE;
732                }
733
734                if (global) {   /* global pattern matching */
735                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
736                        /* For each subpattern, insert it into the appropriate array. */
737                        if (offset_capture) {
738                            for (i = 0; i < count; i++) {
739                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
740                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
741                            }
742                        } else {
743                            for (i = 0; i < count; i++) {
744                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
745                                                       offsets[(i<<1)+1] - offsets[i<<1]);
746                            }
747                        }
748                        /* Add MARK, if available */
749                        if (mark) {
750                            if (Z_TYPE(marks) == IS_UNDEF) {
751                                array_init(&marks);
752                            }
753                            add_index_string(&marks, matched - 1, (char *) mark);
754                        }
755                        /*
756                         * If the number of captured subpatterns on this run is
757                         * less than the total possible number, pad the result
758                         * arrays with empty strings.
759                         */
760                        if (count < num_subpats) {
761                            for (; i < num_subpats; i++) {
762                                add_next_index_string(&match_sets[i], "");
763                            }
764                        }
765                    } else {
766                        /* Allocate the result set array */
767                        array_init_size(&result_set, count + (mark ? 1 : 0));
768
769                        /* Add all the subpatterns to it */
770                        if (subpat_names) {
771                            if (offset_capture) {
772                                for (i = 0; i < count; i++) {
773                                    add_offset_pair(&result_set, (char *)stringlist[i],
774                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
775                                }
776                            } else {
777                                for (i = 0; i < count; i++) {
778                                    if (subpat_names[i]) {
779                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
780                                                               offsets[(i<<1)+1] - offsets[i<<1]);
781                                    }
782                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
783                                                           offsets[(i<<1)+1] - offsets[i<<1]);
784                                }
785                            }
786                        } else {
787                            if (offset_capture) {
788                                for (i = 0; i < count; i++) {
789                                    add_offset_pair(&result_set, (char *)stringlist[i],
790                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
791                                }
792                            } else {
793                                for (i = 0; i < count; i++) {
794                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
795                                                           offsets[(i<<1)+1] - offsets[i<<1]);
796                                }
797                            }
798                        }
799                        /* Add MARK, if available */
800                        if (mark) {
801                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
802                        }
803                        /* And add it to the output array */
804                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
805                    }
806                } else {            /* single pattern matching */
807                    /* For each subpattern, insert it into the subpatterns array. */
808                    if (subpat_names) {
809                        if (offset_capture) {
810                            for (i = 0; i < count; i++) {
811                                add_offset_pair(subpats, (char *)stringlist[i],
812                                                offsets[(i<<1)+1] - offsets[i<<1],
813                                                offsets[i<<1], subpat_names[i]);
814                            }
815                        } else {
816                            for (i = 0; i < count; i++) {
817                                if (subpat_names[i]) {
818                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
819                                                      offsets[(i<<1)+1] - offsets[i<<1]);
820                                }
821                                add_next_index_stringl(subpats, (char *)stringlist[i],
822                                                       offsets[(i<<1)+1] - offsets[i<<1]);
823                            }
824                        }
825                    } else {
826                        if (offset_capture) {
827                            for (i = 0; i < count; i++) {
828                                add_offset_pair(subpats, (char *)stringlist[i],
829                                                offsets[(i<<1)+1] - offsets[i<<1],
830                                                offsets[i<<1], NULL);
831                            }
832                        } else {
833                            for (i = 0; i < count; i++) {
834                                add_next_index_stringl(subpats, (char *)stringlist[i],
835                                                       offsets[(i<<1)+1] - offsets[i<<1]);
836                            }
837                        }
838                    }
839                    /* Add MARK, if available */
840                    if (mark) {
841                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
842                    }
843                }
844
845                pcre_free((void *) stringlist);
846            }
847        } else if (count == PCRE_ERROR_NOMATCH) {
848            /* If we previously set PCRE_NOTEMPTY after a null match,
849               this is not necessarily the end. We need to advance
850               the start offset, and continue. Fudge the offset values
851               to achieve this, unless we're already at the end of the string. */
852            if (g_notempty != 0 && start_offset < subject_len) {
853                offsets[0] = (int)start_offset;
854                offsets[1] = (int)(start_offset + 1);
855            } else
856                break;
857        } else {
858            pcre_handle_exec_error(count);
859            break;
860        }
861
862        /* If we have matched an empty string, mimic what Perl's /g options does.
863           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
864           the match again at the same point. If this fails (picked up above) we
865           advance to the next character. */
866        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
867
868        /* Advance to the position right after the last full match */
869        start_offset = offsets[1];
870    } while (global);
871
872    /* Add the match sets to the output array and clean up */
873    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
874        if (subpat_names) {
875            for (i = 0; i < num_subpats; i++) {
876                if (subpat_names[i]) {
877                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
878                                     strlen(subpat_names[i]), &match_sets[i]);
879                    Z_ADDREF(match_sets[i]);
880                }
881                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
882            }
883        } else {
884            for (i = 0; i < num_subpats; i++) {
885                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
886            }
887        }
888        efree(match_sets);
889
890        if (Z_TYPE(marks) != IS_UNDEF) {
891            add_assoc_zval(subpats, "MARK", &marks);
892        }
893    }
894
895    if (size_offsets <= 32) {
896        free_alloca(offsets, use_heap);
897    } else {
898        efree(offsets);
899    }
900    if (subpat_names) {
901        efree(subpat_names);
902    }
903
904    /* Did we encounter an error? */
905    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
906        RETVAL_LONG(matched);
907    } else {
908        RETVAL_FALSE;
909    }
910}
911/* }}} */
912
913/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
914   Perform a Perl-style regular expression match */
915static PHP_FUNCTION(preg_match)
916{
917    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
918}
919/* }}} */
920
921/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
922   Perform a Perl-style global regular expression match */
923static PHP_FUNCTION(preg_match_all)
924{
925    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
926}
927/* }}} */
928
929/* {{{ preg_get_backref
930 */
931static int preg_get_backref(char **str, int *backref)
932{
933    register char in_brace = 0;
934    register char *walk = *str;
935
936    if (walk[1] == 0)
937        return 0;
938
939    if (*walk == '$' && walk[1] == '{') {
940        in_brace = 1;
941        walk++;
942    }
943    walk++;
944
945    if (*walk >= '0' && *walk <= '9') {
946        *backref = *walk - '0';
947        walk++;
948    } else
949        return 0;
950
951    if (*walk && *walk >= '0' && *walk <= '9') {
952        *backref = *backref * 10 + *walk - '0';
953        walk++;
954    }
955
956    if (in_brace) {
957        if (*walk == 0 || *walk != '}')
958            return 0;
959        else
960            walk++;
961    }
962
963    *str = walk;
964    return 1;
965}
966/* }}} */
967
968/* {{{ preg_do_repl_func
969 */
970static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
971{
972    zend_string *result_str;
973    zval         retval;            /* Function return value */
974    zval         args[1];           /* Argument to pass to function */
975    int          i;
976
977    array_init_size(&args[0], count + (mark ? 1 : 0));
978    if (subpat_names) {
979        for (i = 0; i < count; i++) {
980            if (subpat_names[i]) {
981                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
982            }
983            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
984        }
985    } else {
986        for (i = 0; i < count; i++) {
987            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
988        }
989    }
990    if (mark) {
991        add_assoc_string(&args[0], "MARK", (char *) mark);
992    }
993
994    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
995        result_str = zval_get_string(&retval);
996        zval_ptr_dtor(&retval);
997    } else {
998        if (!EG(exception)) {
999            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1000        }
1001
1002        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1003    }
1004
1005    zval_ptr_dtor(&args[0]);
1006
1007    return result_str;
1008}
1009/* }}} */
1010
1011/* {{{ php_pcre_replace
1012 */
1013PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1014                              zend_string *subject_str,
1015                              char *subject, int subject_len,
1016                              zval *replace_val, int is_callable_replace,
1017                              int limit, int *replace_count)
1018{
1019    pcre_cache_entry    *pce;               /* Compiled regular expression */
1020
1021    /* Compile regex or get it from cache. */
1022    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1023        return NULL;
1024    }
1025
1026    return php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1027        is_callable_replace, limit, replace_count);
1028}
1029/* }}} */
1030
1031/* {{{ php_pcre_replace_impl() */
1032PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val,
1033    int is_callable_replace, int limit, int *replace_count)
1034{
1035    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1036    pcre_extra       extra_data;        /* Used locally for exec options */
1037    int              exoptions = 0;     /* Execution options */
1038    int              count = 0;         /* Count of matched subpatterns */
1039    int             *offsets;           /* Array of subpattern offsets */
1040    char            **subpat_names;     /* Array for named subpatterns */
1041    int              num_subpats;       /* Number of captured subpatterns */
1042    int              size_offsets;      /* Size of the offsets array */
1043    int              new_len;           /* Length of needed storage */
1044    int              alloc_len;         /* Actual allocated length */
1045    int              match_len;         /* Length of the current match */
1046    int              backref;           /* Backreference number */
1047    int              start_offset;      /* Where the new search starts */
1048    int              g_notempty=0;      /* If the match should not be empty */
1049    int              replace_len=0;     /* Length of replacement string */
1050    char            *replace=NULL,      /* Replacement string */
1051                    *walkbuf,           /* Location of current replacement in the result */
1052                    *walk,              /* Used to walk the replacement string */
1053                    *match,             /* The current match */
1054                    *piece,             /* The current piece of subject */
1055                    *replace_end=NULL,  /* End of replacement string */
1056                     walk_last;         /* Last walked character */
1057    int              result_len;        /* Length of result */
1058    unsigned char   *mark = NULL;       /* Target for MARK name */
1059    zend_string     *result;            /* Result of replacement */
1060    zend_string     *eval_result=NULL;  /* Result of custom function */
1061    ALLOCA_FLAG(use_heap);
1062
1063    if (extra == NULL) {
1064        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1065        extra = &extra_data;
1066    }
1067    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1068    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1069
1070    if (pce->preg_options & PREG_REPLACE_EVAL) {
1071        php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1072        return NULL;
1073    }
1074    if (!is_callable_replace) {
1075        replace = Z_STRVAL_P(replace_val);
1076        replace_len = (int)Z_STRLEN_P(replace_val);
1077        replace_end = replace + replace_len;
1078    }
1079
1080    /* Calculate the size of the offsets array, and allocate memory for it. */
1081    num_subpats = pce->capture_count + 1;
1082    size_offsets = num_subpats * 3;
1083    if (size_offsets <= 32) {
1084        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1085    } else {
1086        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1087    }
1088
1089    /*
1090     * Build a mapping from subpattern numbers to their names. We will
1091     * allocate the table only if there are any named subpatterns.
1092     */
1093    subpat_names = NULL;
1094    if (pce->name_count > 0) {
1095        subpat_names = make_subpats_table(num_subpats, pce);
1096        if (!subpat_names) {
1097            return NULL;
1098        }
1099    }
1100
1101    alloc_len = 0;
1102    result = NULL;
1103
1104    /* Initialize */
1105    match = NULL;
1106    start_offset = 0;
1107    result_len = 0;
1108    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1109
1110    while (1) {
1111#ifdef PCRE_EXTRA_MARK
1112        extra->mark = &mark;
1113        extra->flags |= PCRE_EXTRA_MARK;
1114#endif
1115        /* Execute the regular expression. */
1116        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1117                          exoptions|g_notempty, offsets, size_offsets);
1118
1119        /* the string was already proved to be valid UTF-8 */
1120        exoptions |= PCRE_NO_UTF8_CHECK;
1121
1122        /* Check for too many substrings condition. */
1123        if (count == 0) {
1124            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1125            count = size_offsets/3;
1126        }
1127
1128        piece = subject + start_offset;
1129
1130        if (count > 0 && (limit == -1 || limit > 0)) {
1131            if (replace_count) {
1132                ++*replace_count;
1133            }
1134            /* Set the match location in subject */
1135            match = subject + offsets[0];
1136
1137            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1138
1139            if (is_callable_replace) {
1140                /* Use custom function to get replacement string and its length. */
1141                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1142                new_len += (int)eval_result->len;
1143            } else { /* do regular substitution */
1144                walk = replace;
1145                walk_last = 0;
1146                while (walk < replace_end) {
1147                    if ('\\' == *walk || '$' == *walk) {
1148                        if (walk_last == '\\') {
1149                            walk++;
1150                            walk_last = 0;
1151                            continue;
1152                        }
1153                        if (preg_get_backref(&walk, &backref)) {
1154                            if (backref < count)
1155                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1156                            continue;
1157                        }
1158                    }
1159                    new_len++;
1160                    walk++;
1161                    walk_last = walk[-1];
1162                }
1163            }
1164
1165            if (new_len >= alloc_len) {
1166                if (alloc_len == 0) {
1167                    alloc_len = 2 * subject_len;
1168                    if (new_len >= alloc_len) {
1169                        alloc_len = alloc_len + 2 * new_len;
1170                    }
1171                    result = zend_string_alloc(alloc_len, 0);
1172                } else {
1173                    alloc_len = alloc_len + 2 * new_len;
1174                    result = zend_string_extend(result, alloc_len, 0);
1175                }
1176            }
1177            /* copy the part of the string before the match */
1178            memcpy(&result->val[result_len], piece, match-piece);
1179            result_len += (int)(match-piece);
1180
1181            /* copy replacement and backrefs */
1182            walkbuf = result->val + result_len;
1183
1184            /* If using custom function, copy result to the buffer and clean up. */
1185            if (is_callable_replace) {
1186                memcpy(walkbuf, eval_result->val, eval_result->len);
1187                result_len += (int)eval_result->len;
1188                if (eval_result) zend_string_release(eval_result);
1189            } else { /* do regular backreference copying */
1190                walk = replace;
1191                walk_last = 0;
1192                while (walk < replace_end) {
1193                    if ('\\' == *walk || '$' == *walk) {
1194                        if (walk_last == '\\') {
1195                            *(walkbuf-1) = *walk++;
1196                            walk_last = 0;
1197                            continue;
1198                        }
1199                        if (preg_get_backref(&walk, &backref)) {
1200                            if (backref < count) {
1201                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1202                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1203                                walkbuf += match_len;
1204                            }
1205                            continue;
1206                        }
1207                    }
1208                    *walkbuf++ = *walk++;
1209                    walk_last = walk[-1];
1210                }
1211                *walkbuf = '\0';
1212                /* increment the result length by how much we've added to the string */
1213                result_len += (int)(walkbuf - (result->val + result_len));
1214            }
1215
1216            if (limit != -1)
1217                limit--;
1218
1219        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1220            /* If we previously set PCRE_NOTEMPTY after a null match,
1221               this is not necessarily the end. We need to advance
1222               the start offset, and continue. Fudge the offset values
1223               to achieve this, unless we're already at the end of the string. */
1224            if (g_notempty != 0 && start_offset < subject_len) {
1225                offsets[0] = start_offset;
1226                offsets[1] = start_offset + 1;
1227                memcpy(&result->val[result_len], piece, 1);
1228                result_len++;
1229            } else {
1230                if (!result && subject_str) {
1231                    result = zend_string_copy(subject_str);
1232                    break;
1233                }
1234                new_len = result_len + subject_len - start_offset;
1235                if (new_len > alloc_len) {
1236                    alloc_len = new_len; /* now we know exactly how long it is */
1237                    if (NULL != result) {
1238                        result = zend_string_realloc(result, alloc_len, 0);
1239                    } else {
1240                        result = zend_string_alloc(alloc_len, 0);
1241                    }
1242                }
1243                /* stick that last bit of string on our output */
1244                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1245                result_len += subject_len - start_offset;
1246                result->val[result_len] = '\0';
1247                result->len = result_len;
1248                break;
1249            }
1250        } else {
1251            pcre_handle_exec_error(count);
1252            if (result) {
1253                zend_string_free(result);
1254                result = NULL;
1255            }
1256            break;
1257        }
1258
1259        /* If we have matched an empty string, mimic what Perl's /g options does.
1260           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1261           the match again at the same point. If this fails (picked up above) we
1262           advance to the next character. */
1263        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1264
1265        /* Advance to the next piece. */
1266        start_offset = offsets[1];
1267    }
1268
1269    if (size_offsets <= 32) {
1270        free_alloca(offsets, use_heap);
1271    } else {
1272        efree(offsets);
1273    }
1274    if (subpat_names) {
1275        efree(subpat_names);
1276    }
1277
1278    return result;
1279}
1280/* }}} */
1281
1282/* {{{ php_replace_in_subject
1283 */
1284static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1285{
1286    zval        *regex_entry,
1287                *replace_entry = NULL,
1288                *replace_value,
1289                 empty_replace;
1290    zend_string *result;
1291    uint32_t replace_idx;
1292    zend_string *subject_str = zval_get_string(subject);
1293
1294    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1295    ZVAL_EMPTY_STRING(&empty_replace);
1296
1297    /* If regex is an array */
1298    if (Z_TYPE_P(regex) == IS_ARRAY) {
1299        replace_value = replace;
1300        replace_idx = 0;
1301
1302        /* For each entry in the regex array, get the entry */
1303        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1304            /* Make sure we're dealing with strings. */
1305            zend_string *regex_str = zval_get_string(regex_entry);
1306
1307            /* If replace is an array and not a callable construct */
1308            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1309                /* Get current entry */
1310                replace_entry = NULL;
1311                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1312                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1313                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1314                        break;
1315                    }
1316                    replace_idx++;
1317                }
1318                if (replace_entry != NULL) {
1319                    if (!is_callable_replace) {
1320                        convert_to_string_ex(replace_entry);
1321                    }
1322                    replace_value = replace_entry;
1323                    replace_idx++;
1324                } else {
1325                    /* We've run out of replacement strings, so use an empty one */
1326                    replace_value = &empty_replace;
1327                }
1328            }
1329
1330            /* Do the actual replacement and put the result back into subject_str
1331               for further replacements. */
1332            if ((result = php_pcre_replace(regex_str,
1333                                           subject_str,
1334                                           subject_str->val,
1335                                           (int)subject_str->len,
1336                                           replace_value,
1337                                           is_callable_replace,
1338                                           limit,
1339                                           replace_count)) != NULL) {
1340                zend_string_release(subject_str);
1341                subject_str = result;
1342            } else {
1343                zend_string_release(subject_str);
1344                zend_string_release(regex_str);
1345                return NULL;
1346            }
1347
1348            zend_string_release(regex_str);
1349        } ZEND_HASH_FOREACH_END();
1350
1351        return subject_str;
1352    } else {
1353        result = php_pcre_replace(Z_STR_P(regex),
1354                                  subject_str,
1355                                  subject_str->val,
1356                                  (int)subject_str->len,
1357                                  replace,
1358                                  is_callable_replace,
1359                                  limit,
1360                                  replace_count);
1361        zend_string_release(subject_str);
1362        return result;
1363    }
1364}
1365/* }}} */
1366
1367/* {{{ preg_replace_impl
1368 */
1369static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1370{
1371    zval        *subject_entry;
1372    zend_string *result;
1373    zend_string *string_key;
1374    zend_ulong   num_key;
1375    int          replace_count = 0, old_replace_count;
1376
1377    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1378        SEPARATE_ZVAL(replace);
1379        convert_to_string_ex(replace);
1380    }
1381
1382    if (Z_TYPE_P(regex) != IS_ARRAY) {
1383        SEPARATE_ZVAL(regex);
1384        convert_to_string_ex(regex);
1385    }
1386
1387    /* if subject is an array */
1388    if (Z_TYPE_P(subject) == IS_ARRAY) {
1389        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1390
1391        /* For each subject entry, convert it to string, then perform replacement
1392           and add the result to the return_value array. */
1393        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1394            old_replace_count = replace_count;
1395            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1396                if (!is_filter || replace_count > old_replace_count) {
1397                    /* Add to return array */
1398                    if (string_key) {
1399                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1400                    } else {
1401                        add_index_str(return_value, num_key, result);
1402                    }
1403                } else {
1404                    zend_string_release(result);
1405                }
1406            }
1407        } ZEND_HASH_FOREACH_END();
1408    } else {
1409        /* if subject is not an array */
1410        old_replace_count = replace_count;
1411        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1412            if (!is_filter || replace_count > old_replace_count) {
1413                RETVAL_STR(result);
1414            } else {
1415                zend_string_release(result);
1416            }
1417        }
1418    }
1419
1420    return replace_count;
1421}
1422/* }}} */
1423
1424/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1425   Perform Perl-style regular expression replacement. */
1426static PHP_FUNCTION(preg_replace)
1427{
1428    zval *regex, *replace, *subject, *zcount = NULL;
1429    zend_long limit = -1;
1430    int replace_count;
1431
1432#ifndef FAST_ZPP
1433    /* Get function parameters and do error-checking. */
1434    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1435        return;
1436    }
1437#else
1438    ZEND_PARSE_PARAMETERS_START(3, 5)
1439        Z_PARAM_ZVAL(regex)
1440        Z_PARAM_ZVAL(replace)
1441        Z_PARAM_ZVAL(subject)
1442        Z_PARAM_OPTIONAL
1443        Z_PARAM_LONG(limit)
1444        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1445    ZEND_PARSE_PARAMETERS_END();
1446#endif
1447
1448    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1449        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1450        RETURN_FALSE;
1451    }
1452
1453    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1454    if (zcount) {
1455        zval_dtor(zcount);
1456        ZVAL_LONG(zcount, replace_count);
1457    }
1458}
1459/* }}} */
1460
1461/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1462   Perform Perl-style regular expression replacement using replacement callback. */
1463static PHP_FUNCTION(preg_replace_callback)
1464{
1465    zval *regex, *replace, *subject, *zcount = NULL;
1466    zend_long limit = -1;
1467    zend_string *callback_name;
1468    int replace_count;
1469
1470#ifndef FAST_ZPP
1471    /* Get function parameters and do error-checking. */
1472    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1473        return;
1474    }
1475#else
1476    ZEND_PARSE_PARAMETERS_START(3, 5)
1477        Z_PARAM_ZVAL(regex)
1478        Z_PARAM_ZVAL(replace)
1479        Z_PARAM_ZVAL(subject)
1480        Z_PARAM_OPTIONAL
1481        Z_PARAM_LONG(limit)
1482        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1483    ZEND_PARSE_PARAMETERS_END();
1484#endif
1485
1486    if (!zend_is_callable(replace, 0, &callback_name)) {
1487        php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1488        zend_string_release(callback_name);
1489        ZVAL_COPY(return_value, subject);
1490        return;
1491    }
1492    zend_string_release(callback_name);
1493
1494    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1495    if (zcount) {
1496        zval_dtor(zcount);
1497        ZVAL_LONG(zcount, replace_count);
1498    }
1499}
1500/* }}} */
1501
1502/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1503   Perform Perl-style regular expression replacement using replacement callback. */
1504static PHP_FUNCTION(preg_replace_callback_array)
1505{
1506    zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1507    zend_long limit = -1;
1508    zend_string *str_idx;
1509    zend_string *callback_name;
1510    int replace_count = 0;
1511
1512#ifndef FAST_ZPP
1513    /* Get function parameters and do error-checking. */
1514    if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1515        return;
1516    }
1517#else
1518    ZEND_PARSE_PARAMETERS_START(2, 4)
1519        Z_PARAM_ARRAY(pattern)
1520        Z_PARAM_ZVAL(subject)
1521        Z_PARAM_OPTIONAL
1522        Z_PARAM_LONG(limit)
1523        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1524    ZEND_PARSE_PARAMETERS_END();
1525#endif
1526
1527    ZVAL_UNDEF(&zv);
1528    ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1529        if (str_idx) {
1530            ZVAL_STR_COPY(&regex, str_idx);
1531        } else {
1532            php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1533            zval_ptr_dtor(return_value);
1534            RETURN_NULL();
1535        }
1536
1537        if (!zend_is_callable(replace, 0, &callback_name)) {
1538            php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", callback_name->val);
1539            zend_string_release(callback_name);
1540            zval_ptr_dtor(&regex);
1541            zval_ptr_dtor(return_value);
1542            ZVAL_COPY(return_value, subject);
1543            return;
1544        }
1545        zend_string_release(callback_name);
1546
1547        if (Z_ISNULL_P(return_value)) {
1548            replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1549        } else {
1550            replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1551            zval_ptr_dtor(return_value);
1552        }
1553
1554        zval_ptr_dtor(&regex);
1555
1556        if (Z_ISUNDEF(zv)) {
1557            RETURN_NULL();
1558        }
1559
1560        ZVAL_COPY_VALUE(return_value, &zv);
1561
1562        if (UNEXPECTED(EG(exception))) {
1563            zval_ptr_dtor(return_value);
1564            RETURN_NULL();
1565        }
1566    } ZEND_HASH_FOREACH_END();
1567
1568    if (zcount) {
1569        zval_dtor(zcount);
1570        ZVAL_LONG(zcount, replace_count);
1571    }
1572}
1573/* }}} */
1574
1575/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1576   Perform Perl-style regular expression replacement and only return matches. */
1577static PHP_FUNCTION(preg_filter)
1578{
1579    zval *regex, *replace, *subject, *zcount = NULL;
1580    zend_long limit = -1;
1581    int replace_count;
1582
1583#ifndef FAST_ZPP
1584    /* Get function parameters and do error-checking. */
1585    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1586        return;
1587    }
1588#else
1589    ZEND_PARSE_PARAMETERS_START(3, 5)
1590        Z_PARAM_ZVAL(regex)
1591        Z_PARAM_ZVAL(replace)
1592        Z_PARAM_ZVAL(subject)
1593        Z_PARAM_OPTIONAL
1594        Z_PARAM_LONG(limit)
1595        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1596    ZEND_PARSE_PARAMETERS_END();
1597#endif
1598
1599    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1600        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1601        RETURN_FALSE;
1602    }
1603
1604    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1605    if (zcount) {
1606        zval_dtor(zcount);
1607        ZVAL_LONG(zcount, replace_count);
1608    }
1609}
1610/* }}} */
1611
1612/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1613   Split string into an array using a perl-style regular expression as a delimiter */
1614static PHP_FUNCTION(preg_split)
1615{
1616    zend_string         *regex;         /* Regular expression */
1617    zend_string         *subject;       /* String to match against */
1618    zend_long            limit_val = -1;/* Integer value of limit */
1619    zend_long            flags = 0;     /* Match control flags */
1620    pcre_cache_entry    *pce;           /* Compiled regular expression */
1621
1622    /* Get function parameters and do error checking */
1623#ifndef FAST_ZPP
1624    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1625                              &subject, &limit_val, &flags) == FAILURE) {
1626        RETURN_FALSE;
1627    }
1628#else
1629    ZEND_PARSE_PARAMETERS_START(2, 4)
1630        Z_PARAM_STR(regex)
1631        Z_PARAM_STR(subject)
1632        Z_PARAM_OPTIONAL
1633        Z_PARAM_LONG(limit_val)
1634        Z_PARAM_LONG(flags)
1635    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1636#endif
1637
1638    /* Compile regex or get it from cache. */
1639    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1640        RETURN_FALSE;
1641    }
1642
1643    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1644}
1645/* }}} */
1646
1647/* {{{ php_pcre_split
1648 */
1649PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1650    zend_long limit_val, zend_long flags)
1651{
1652    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1653    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1654    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1655    pcre_extra       extra_data;        /* Used locally for exec options */
1656    int             *offsets;           /* Array of subpattern offsets */
1657    int              size_offsets;      /* Size of the offsets array */
1658    int              exoptions = 0;     /* Execution options */
1659    int              count = 0;         /* Count of matched subpatterns */
1660    int              start_offset;      /* Where the new search starts */
1661    int              next_offset;       /* End of the last delimiter match + 1 */
1662    int              g_notempty = 0;    /* If the match should not be empty */
1663    char            *last_match;        /* Location of last match */
1664    int              no_empty;          /* If NO_EMPTY flag is set */
1665    int              delim_capture;     /* If delimiters should be captured */
1666    int              offset_capture;    /* If offsets should be captured */
1667    zval             tmp;
1668    ALLOCA_FLAG(use_heap);
1669
1670    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1671    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1672    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1673
1674    if (limit_val == 0) {
1675        limit_val = -1;
1676    }
1677
1678    if (extra == NULL) {
1679        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1680        extra = &extra_data;
1681    }
1682    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1683    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1684#ifdef PCRE_EXTRA_MARK
1685    extra->flags &= ~PCRE_EXTRA_MARK;
1686#endif
1687
1688    /* Initialize return value */
1689    array_init(return_value);
1690
1691    /* Calculate the size of the offsets array, and allocate memory for it. */
1692    size_offsets = (pce->capture_count + 1) * 3;
1693    if (size_offsets <= 32) {
1694        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1695    } else {
1696        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1697    }
1698
1699    /* Start at the beginning of the string */
1700    start_offset = 0;
1701    next_offset = 0;
1702    last_match = subject;
1703    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1704
1705    /* Get next piece if no limit or limit not yet reached and something matched*/
1706    while ((limit_val == -1 || limit_val > 1)) {
1707        count = pcre_exec(pce->re, extra, subject,
1708                          subject_len, start_offset,
1709                          exoptions|g_notempty, offsets, size_offsets);
1710
1711        /* the string was already proved to be valid UTF-8 */
1712        exoptions |= PCRE_NO_UTF8_CHECK;
1713
1714        /* Check for too many substrings condition. */
1715        if (count == 0) {
1716            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1717            count = size_offsets/3;
1718        }
1719
1720        /* If something matched */
1721        if (count > 0) {
1722            if (!no_empty || &subject[offsets[0]] != last_match) {
1723
1724                if (offset_capture) {
1725                    /* Add (match, offset) pair to the return value */
1726                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1727                } else {
1728                    /* Add the piece to the return value */
1729                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1730                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1731                }
1732
1733                /* One less left to do */
1734                if (limit_val != -1)
1735                    limit_val--;
1736            }
1737
1738            last_match = &subject[offsets[1]];
1739            next_offset = offsets[1];
1740
1741            if (delim_capture) {
1742                int i, match_len;
1743                for (i = 1; i < count; i++) {
1744                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1745                    /* If we have matched a delimiter */
1746                    if (!no_empty || match_len > 0) {
1747                        if (offset_capture) {
1748                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1749                        } else {
1750                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1751                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1752                        }
1753                    }
1754                }
1755            }
1756        } else if (count == PCRE_ERROR_NOMATCH) {
1757            /* If we previously set PCRE_NOTEMPTY after a null match,
1758               this is not necessarily the end. We need to advance
1759               the start offset, and continue. Fudge the offset values
1760               to achieve this, unless we're already at the end of the string. */
1761            if (g_notempty != 0 && start_offset < subject_len) {
1762                if (pce->compile_options & PCRE_UTF8) {
1763                    if (re_bump == NULL) {
1764                        int dummy;
1765                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1766                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1767                        zend_string_release(regex);
1768                        if (re_bump == NULL) {
1769                            RETURN_FALSE;
1770                        }
1771                    }
1772                    count = pcre_exec(re_bump, extra_bump, subject,
1773                              subject_len, start_offset,
1774                              exoptions, offsets, size_offsets);
1775                    if (count < 1) {
1776                        php_error_docref(NULL, E_WARNING, "Unknown error");
1777                        RETURN_FALSE;
1778                    }
1779                } else {
1780                    offsets[0] = start_offset;
1781                    offsets[1] = start_offset + 1;
1782                }
1783            } else
1784                break;
1785        } else {
1786            pcre_handle_exec_error(count);
1787            break;
1788        }
1789
1790        /* If we have matched an empty string, mimic what Perl's /g options does.
1791           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1792           the match again at the same point. If this fails (picked up above) we
1793           advance to the next character. */
1794        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1795
1796        /* Advance to the position right after the last full match */
1797        start_offset = offsets[1];
1798    }
1799
1800
1801    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1802
1803    if (!no_empty || start_offset < subject_len)
1804    {
1805        if (offset_capture) {
1806            /* Add the last (match, offset) pair to the return value */
1807            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1808        } else {
1809            /* Add the last piece to the return value */
1810            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1811            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1812        }
1813    }
1814
1815
1816    /* Clean up */
1817    if (size_offsets <= 32) {
1818        free_alloca(offsets, use_heap);
1819    } else {
1820        efree(offsets);
1821    }
1822}
1823/* }}} */
1824
1825/* {{{ proto string preg_quote(string str [, string delim_char])
1826   Quote regular expression characters plus an optional character */
1827static PHP_FUNCTION(preg_quote)
1828{
1829    size_t       in_str_len;
1830    char    *in_str;        /* Input string argument */
1831    char    *in_str_end;    /* End of the input string */
1832    size_t       delim_len = 0;
1833    char    *delim = NULL;  /* Additional delimiter argument */
1834    zend_string *out_str;   /* Output string with quoted characters */
1835    char    *p,             /* Iterator for input string */
1836            *q,             /* Iterator for output string */
1837             delim_char=0,  /* Delimiter character to be quoted */
1838             c;             /* Current character */
1839    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1840
1841    /* Get the arguments and check for errors */
1842#ifndef FAST_ZPP
1843    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1844                              &delim, &delim_len) == FAILURE) {
1845        return;
1846    }
1847#else
1848    ZEND_PARSE_PARAMETERS_START(1, 2)
1849        Z_PARAM_STRING(in_str, in_str_len)
1850        Z_PARAM_OPTIONAL
1851        Z_PARAM_STRING(delim, delim_len)
1852    ZEND_PARSE_PARAMETERS_END();
1853#endif
1854
1855    in_str_end = in_str + in_str_len;
1856
1857    /* Nothing to do if we got an empty string */
1858    if (in_str == in_str_end) {
1859        RETURN_EMPTY_STRING();
1860    }
1861
1862    if (delim && *delim) {
1863        delim_char = delim[0];
1864        quote_delim = 1;
1865    }
1866
1867    /* Allocate enough memory so that even if each character
1868       is quoted, we won't run out of room */
1869    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1870
1871    /* Go through the string and quote necessary characters */
1872    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1873        c = *p;
1874        switch(c) {
1875            case '.':
1876            case '\\':
1877            case '+':
1878            case '*':
1879            case '?':
1880            case '[':
1881            case '^':
1882            case ']':
1883            case '$':
1884            case '(':
1885            case ')':
1886            case '{':
1887            case '}':
1888            case '=':
1889            case '!':
1890            case '>':
1891            case '<':
1892            case '|':
1893            case ':':
1894            case '-':
1895                *q++ = '\\';
1896                *q++ = c;
1897                break;
1898
1899            case '\0':
1900                *q++ = '\\';
1901                *q++ = '0';
1902                *q++ = '0';
1903                *q++ = '0';
1904                break;
1905
1906            default:
1907                if (quote_delim && c == delim_char)
1908                    *q++ = '\\';
1909                *q++ = c;
1910                break;
1911        }
1912    }
1913    *q = '\0';
1914
1915    /* Reallocate string and return it */
1916    out_str = zend_string_truncate(out_str, q - out_str->val, 0);
1917    RETURN_NEW_STR(out_str);
1918}
1919/* }}} */
1920
1921/* {{{ proto array preg_grep(string regex, array input [, int flags])
1922   Searches array and returns entries which match regex */
1923static PHP_FUNCTION(preg_grep)
1924{
1925    zend_string         *regex;         /* Regular expression */
1926    zval                *input;         /* Input array */
1927    zend_long            flags = 0;     /* Match control flags */
1928    pcre_cache_entry    *pce;           /* Compiled regular expression */
1929
1930    /* Get arguments and do error checking */
1931#ifndef FAST_ZPP
1932    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1933                              &input, &flags) == FAILURE) {
1934        return;
1935    }
1936#else
1937    ZEND_PARSE_PARAMETERS_START(2, 3)
1938        Z_PARAM_STR(regex)
1939        Z_PARAM_ARRAY(input)
1940        Z_PARAM_OPTIONAL
1941        Z_PARAM_LONG(flags)
1942    ZEND_PARSE_PARAMETERS_END();
1943#endif
1944
1945    /* Compile regex or get it from cache. */
1946    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1947        RETURN_FALSE;
1948    }
1949
1950    php_pcre_grep_impl(pce, input, return_value, flags);
1951}
1952/* }}} */
1953
1954PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1955{
1956    zval            *entry;             /* An entry in the input array */
1957    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1958    pcre_extra       extra_data;        /* Used locally for exec options */
1959    int             *offsets;           /* Array of subpattern offsets */
1960    int              size_offsets;      /* Size of the offsets array */
1961    int              count = 0;         /* Count of matched subpatterns */
1962    zend_string     *string_key;
1963    zend_ulong       num_key;
1964    zend_bool        invert;            /* Whether to return non-matching
1965                                           entries */
1966    ALLOCA_FLAG(use_heap);
1967
1968    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1969
1970    if (extra == NULL) {
1971        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1972        extra = &extra_data;
1973    }
1974    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1975    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1976#ifdef PCRE_EXTRA_MARK
1977    extra->flags &= ~PCRE_EXTRA_MARK;
1978#endif
1979
1980    /* Calculate the size of the offsets array, and allocate memory for it. */
1981    size_offsets = (pce->capture_count + 1) * 3;
1982    if (size_offsets <= 32) {
1983        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1984    } else {
1985        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1986    }
1987
1988    /* Initialize return array */
1989    array_init(return_value);
1990
1991    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1992
1993    /* Go through the input array */
1994    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1995        zend_string *subject_str = zval_get_string(entry);
1996
1997        /* Perform the match */
1998        count = pcre_exec(pce->re, extra, subject_str->val,
1999                          (int)subject_str->len, 0,
2000                          0, offsets, size_offsets);
2001
2002        /* Check for too many substrings condition. */
2003        if (count == 0) {
2004            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2005            count = size_offsets/3;
2006        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2007            pcre_handle_exec_error(count);
2008            zend_string_release(subject_str);
2009            break;
2010        }
2011
2012        /* If the entry fits our requirements */
2013        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2014            if (Z_REFCOUNTED_P(entry)) {
2015                Z_ADDREF_P(entry);
2016            }
2017
2018            /* Add to return array */
2019            if (string_key) {
2020                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2021            } else {
2022                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2023            }
2024        }
2025
2026        zend_string_release(subject_str);
2027    } ZEND_HASH_FOREACH_END();
2028
2029    /* Clean up */
2030    if (size_offsets <= 32) {
2031        free_alloca(offsets, use_heap);
2032    } else {
2033        efree(offsets);
2034    }
2035}
2036/* }}} */
2037
2038/* {{{ proto int preg_last_error()
2039   Returns the error code of the last regexp execution. */
2040static PHP_FUNCTION(preg_last_error)
2041{
2042#ifndef FAST_ZPP
2043    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2044        return;
2045    }
2046#else
2047    ZEND_PARSE_PARAMETERS_START(0, 0)
2048    ZEND_PARSE_PARAMETERS_END();
2049#endif
2050
2051    RETURN_LONG(PCRE_G(error_code));
2052}
2053/* }}} */
2054
2055/* {{{ module definition structures */
2056
2057/* {{{ arginfo */
2058ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2059    ZEND_ARG_INFO(0, pattern)
2060    ZEND_ARG_INFO(0, subject)
2061    ZEND_ARG_INFO(1, subpatterns) /* array */
2062    ZEND_ARG_INFO(0, flags)
2063    ZEND_ARG_INFO(0, offset)
2064ZEND_END_ARG_INFO()
2065
2066ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2067    ZEND_ARG_INFO(0, pattern)
2068    ZEND_ARG_INFO(0, subject)
2069    ZEND_ARG_INFO(1, subpatterns) /* array */
2070    ZEND_ARG_INFO(0, flags)
2071    ZEND_ARG_INFO(0, offset)
2072ZEND_END_ARG_INFO()
2073
2074ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2075    ZEND_ARG_INFO(0, regex)
2076    ZEND_ARG_INFO(0, replace)
2077    ZEND_ARG_INFO(0, subject)
2078    ZEND_ARG_INFO(0, limit)
2079    ZEND_ARG_INFO(1, count)
2080ZEND_END_ARG_INFO()
2081
2082ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2083    ZEND_ARG_INFO(0, regex)
2084    ZEND_ARG_INFO(0, callback)
2085    ZEND_ARG_INFO(0, subject)
2086    ZEND_ARG_INFO(0, limit)
2087    ZEND_ARG_INFO(1, count)
2088ZEND_END_ARG_INFO()
2089
2090ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2091    ZEND_ARG_INFO(0, pattern)
2092    ZEND_ARG_INFO(0, subject)
2093    ZEND_ARG_INFO(0, limit)
2094    ZEND_ARG_INFO(1, count)
2095ZEND_END_ARG_INFO()
2096
2097ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2098    ZEND_ARG_INFO(0, pattern)
2099    ZEND_ARG_INFO(0, subject)
2100    ZEND_ARG_INFO(0, limit)
2101    ZEND_ARG_INFO(0, flags)
2102ZEND_END_ARG_INFO()
2103
2104ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2105    ZEND_ARG_INFO(0, str)
2106    ZEND_ARG_INFO(0, delim_char)
2107ZEND_END_ARG_INFO()
2108
2109ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2110    ZEND_ARG_INFO(0, regex)
2111    ZEND_ARG_INFO(0, input) /* array */
2112    ZEND_ARG_INFO(0, flags)
2113ZEND_END_ARG_INFO()
2114
2115ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2116ZEND_END_ARG_INFO()
2117/* }}} */
2118
2119static const zend_function_entry pcre_functions[] = {
2120    PHP_FE(preg_match,                  arginfo_preg_match)
2121    PHP_FE(preg_match_all,              arginfo_preg_match_all)
2122    PHP_FE(preg_replace,                arginfo_preg_replace)
2123    PHP_FE(preg_replace_callback,       arginfo_preg_replace_callback)
2124    PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2125    PHP_FE(preg_filter,                 arginfo_preg_replace)
2126    PHP_FE(preg_split,                  arginfo_preg_split)
2127    PHP_FE(preg_quote,                  arginfo_preg_quote)
2128    PHP_FE(preg_grep,                   arginfo_preg_grep)
2129    PHP_FE(preg_last_error,             arginfo_preg_last_error)
2130    PHP_FE_END
2131};
2132
2133zend_module_entry pcre_module_entry = {
2134    STANDARD_MODULE_HEADER,
2135   "pcre",
2136    pcre_functions,
2137    PHP_MINIT(pcre),
2138    PHP_MSHUTDOWN(pcre),
2139    NULL,
2140    NULL,
2141    PHP_MINFO(pcre),
2142    PHP_PCRE_VERSION,
2143    PHP_MODULE_GLOBALS(pcre),
2144    PHP_GINIT(pcre),
2145    PHP_GSHUTDOWN(pcre),
2146    NULL,
2147    STANDARD_MODULE_PROPERTIES_EX
2148};
2149
2150#ifdef COMPILE_DL_PCRE
2151ZEND_GET_MODULE(pcre)
2152#endif
2153
2154/* }}} */
2155
2156#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2157
2158/*
2159 * Local variables:
2160 * tab-width: 4
2161 * c-basic-offset: 4
2162 * End:
2163 * vim600: sw=4 ts=4 fdm=marker
2164 * vim<600: sw=4 ts=4
2165 */
2166