1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "zend_smart_str.h"
27
28#if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30#include "ext/standard/php_string.h"
31
32#define PREG_PATTERN_ORDER          1
33#define PREG_SET_ORDER              2
34#define PREG_OFFSET_CAPTURE         (1<<8)
35
36#define PREG_SPLIT_NO_EMPTY         (1<<0)
37#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
38#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
39
40#define PREG_REPLACE_EVAL           (1<<0)
41
42#define PREG_GREP_INVERT            (1<<0)
43
44#define PCRE_CACHE_SIZE 4096
45
46enum {
47    PHP_PCRE_NO_ERROR = 0,
48    PHP_PCRE_INTERNAL_ERROR,
49    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50    PHP_PCRE_RECURSION_LIMIT_ERROR,
51    PHP_PCRE_BAD_UTF8_ERROR,
52    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53};
54
55
56ZEND_DECLARE_MODULE_GLOBALS(pcre)
57
58
59static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60{
61    int preg_code = 0;
62
63    switch (pcre_code) {
64        case PCRE_ERROR_MATCHLIMIT:
65            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66            break;
67
68        case PCRE_ERROR_RECURSIONLIMIT:
69            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70            break;
71
72        case PCRE_ERROR_BADUTF8:
73            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74            break;
75
76        case PCRE_ERROR_BADUTF8_OFFSET:
77            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78            break;
79
80        default:
81            preg_code = PHP_PCRE_INTERNAL_ERROR;
82            break;
83    }
84
85    PCRE_G(error_code) = preg_code;
86}
87/* }}} */
88
89static void php_free_pcre_cache(zval *data) /* {{{ */
90{
91    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
92    if (!pce) return;
93    pefree(pce->re, 1);
94    if (pce->extra) {
95        pcre_free_study(pce->extra);
96    }
97#if HAVE_SETLOCALE
98    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
99    pefree(pce->locale, 1);
100#endif
101    pefree(pce, 1);
102}
103/* }}} */
104
105static PHP_GINIT_FUNCTION(pcre) /* {{{ */
106{
107    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
108    pcre_globals->backtrack_limit = 0;
109    pcre_globals->recursion_limit = 0;
110    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
111}
112/* }}} */
113
114static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
115{
116    zend_hash_destroy(&pcre_globals->pcre_cache);
117}
118/* }}} */
119
120PHP_INI_BEGIN()
121    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
122    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
123#ifdef PCRE_STUDY_JIT_COMPILE
124    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
125#endif
126PHP_INI_END()
127
128
129/* {{{ PHP_MINFO_FUNCTION(pcre) */
130static PHP_MINFO_FUNCTION(pcre)
131{
132    php_info_print_table_start();
133    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
134    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
135    php_info_print_table_end();
136
137    DISPLAY_INI_ENTRIES();
138}
139/* }}} */
140
141/* {{{ PHP_MINIT_FUNCTION(pcre) */
142static PHP_MINIT_FUNCTION(pcre)
143{
144    REGISTER_INI_ENTRIES();
145
146    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
147    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
148    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
149    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
150    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
151    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
152    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
153
154    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
155    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
156    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
157    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
158    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
160    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
161
162    return SUCCESS;
163}
164/* }}} */
165
166/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
167static PHP_MSHUTDOWN_FUNCTION(pcre)
168{
169    UNREGISTER_INI_ENTRIES();
170
171    return SUCCESS;
172}
173/* }}} */
174
175/* {{{ static pcre_clean_cache */
176static int pcre_clean_cache(zval *data, void *arg TSRMLS_DC)
177{
178    int *num_clean = (int *)arg;
179
180    if (*num_clean > 0) {
181        (*num_clean)--;
182        return 1;
183    } else {
184        return 0;
185    }
186}
187/* }}} */
188
189/* {{{ static make_subpats_table */
190static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
191{
192    pcre_extra *extra = pce->extra;
193    int name_cnt = pce->name_count, name_size, ni = 0;
194    int rc;
195    char *name_table;
196    unsigned short name_idx;
197    char **subpat_names;
198    int rc1, rc2;
199
200    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
201    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
202    rc = rc2 ? rc2 : rc1;
203    if (rc < 0) {
204        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
205        return NULL;
206    }
207
208    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
209    while (ni++ < name_cnt) {
210        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
211        subpat_names[name_idx] = name_table + 2;
212        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
213            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
214            efree(subpat_names);
215            return NULL;
216        }
217        name_table += name_size;
218    }
219    return subpat_names;
220}
221/* }}} */
222
223/* {{{ pcre_get_compiled_regex_cache
224 */
225PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS_DC)
226{
227    pcre                *re = NULL;
228    pcre_extra          *extra;
229    int                  coptions = 0;
230    int                  soptions = 0;
231    const char          *error;
232    int                  erroffset;
233    char                 delimiter;
234    char                 start_delimiter;
235    char                 end_delimiter;
236    char                *p, *pp;
237    char                *pattern;
238    int                  do_study = 0;
239    int                  poptions = 0;
240    unsigned const char *tables = NULL;
241#if HAVE_SETLOCALE
242    char                *locale;
243#endif
244    pcre_cache_entry    *pce;
245    pcre_cache_entry     new_entry;
246    int                  rc;
247
248#if HAVE_SETLOCALE
249# if defined(PHP_WIN32) && defined(ZTS)
250    _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
251# endif
252    locale = setlocale(LC_CTYPE, NULL);
253#endif
254
255    /* Try to lookup the cached regex entry, and if successful, just pass
256       back the compiled pattern, otherwise go on and compile it. */
257    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
258    if (pce) {
259        /*
260         * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
261         * is, we flush it and compile the pattern from scratch.
262         */
263//???       int count = 0;
264//???
265//???       if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
266//???           zend_hash_clean(&PCRE_G(pcre_cache));
267//???       } else {
268#if HAVE_SETLOCALE
269            if (!strcmp(pce->locale, locale)) {
270#endif
271                return pce;
272#if HAVE_SETLOCALE
273            }
274#endif
275//???       }
276    }
277
278    p = regex->val;
279
280    /* Parse through the leading whitespace, and display a warning if we
281       get to the end without encountering a delimiter. */
282    while (isspace((int)*(unsigned char *)p)) p++;
283    if (*p == 0) {
284        php_error_docref(NULL TSRMLS_CC, E_WARNING,
285                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
286        return NULL;
287    }
288
289    /* Get the delimiter and display a warning if it is alphanumeric
290       or a backslash. */
291    delimiter = *p++;
292    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
293        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
294        return NULL;
295    }
296
297    start_delimiter = delimiter;
298    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
299        delimiter = pp[5];
300    end_delimiter = delimiter;
301
302    pp = p;
303
304    if (start_delimiter == end_delimiter) {
305        /* We need to iterate through the pattern, searching for the ending delimiter,
306           but skipping the backslashed delimiters.  If the ending delimiter is not
307           found, display a warning. */
308        while (*pp != 0) {
309            if (*pp == '\\' && pp[1] != 0) pp++;
310            else if (*pp == delimiter)
311                break;
312            pp++;
313        }
314    } else {
315        /* We iterate through the pattern, searching for the matching ending
316         * delimiter. For each matching starting delimiter, we increment nesting
317         * level, and decrement it for each matching ending delimiter. If we
318         * reach the end of the pattern without matching, display a warning.
319         */
320        int brackets = 1;   /* brackets nesting level */
321        while (*pp != 0) {
322            if (*pp == '\\' && pp[1] != 0) pp++;
323            else if (*pp == end_delimiter && --brackets <= 0)
324                break;
325            else if (*pp == start_delimiter)
326                brackets++;
327            pp++;
328        }
329    }
330
331    if (*pp == 0) {
332        if (pp < regex->val + regex->len) {
333            php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
334        } else if (start_delimiter == end_delimiter) {
335            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
336        } else {
337            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
338        }
339        return NULL;
340    }
341
342    /* Make a copy of the actual pattern. */
343    pattern = estrndup(p, pp-p);
344
345    /* Move on to the options */
346    pp++;
347
348    /* Parse through the options, setting appropriate flags.  Display
349       a warning if we encounter an unknown modifier. */
350    while (pp < regex->val + regex->len) {
351        switch (*pp++) {
352            /* Perl compatible options */
353            case 'i':   coptions |= PCRE_CASELESS;      break;
354            case 'm':   coptions |= PCRE_MULTILINE;     break;
355            case 's':   coptions |= PCRE_DOTALL;        break;
356            case 'x':   coptions |= PCRE_EXTENDED;      break;
357
358            /* PCRE specific options */
359            case 'A':   coptions |= PCRE_ANCHORED;      break;
360            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
361            case 'S':   do_study  = 1;                  break;
362            case 'U':   coptions |= PCRE_UNGREEDY;      break;
363            case 'X':   coptions |= PCRE_EXTRA;         break;
364            case 'u':   coptions |= PCRE_UTF8;
365    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
366       characters, even in UTF-8 mode. However, this can be changed by setting
367       the PCRE_UCP option. */
368#ifdef PCRE_UCP
369                        coptions |= PCRE_UCP;
370#endif
371                break;
372
373            /* Custom preg options */
374            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
375
376            case ' ':
377            case '\n':
378                break;
379
380            default:
381                if (pp[-1]) {
382                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
383                } else {
384                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
385                }
386                efree(pattern);
387                return NULL;
388        }
389    }
390
391#if HAVE_SETLOCALE
392    if (strcmp(locale, "C"))
393        tables = pcre_maketables();
394#endif
395
396    /* Compile pattern and display a warning if compilation failed. */
397    re = pcre_compile(pattern,
398                      coptions,
399                      &error,
400                      &erroffset,
401                      tables);
402
403    if (re == NULL) {
404        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
405        efree(pattern);
406        if (tables) {
407            pefree((void*)tables, 1);
408        }
409        return NULL;
410    }
411
412#ifdef PCRE_STUDY_JIT_COMPILE
413    if (PCRE_G(jit)) {
414        /* Enable PCRE JIT compiler */
415        do_study = 1;
416        soptions |= PCRE_STUDY_JIT_COMPILE;
417    }
418#endif
419
420    /* If study option was specified, study the pattern and
421       store the result in extra for passing to pcre_exec. */
422    if (do_study) {
423        extra = pcre_study(re, soptions, &error);
424        if (extra) {
425            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
426            extra->match_limit = PCRE_G(backtrack_limit);
427            extra->match_limit_recursion = PCRE_G(recursion_limit);
428        }
429        if (error != NULL) {
430            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
431        }
432    } else {
433        extra = NULL;
434    }
435
436    efree(pattern);
437
438    /*
439     * If we reached cache limit, clean out the items from the head of the list;
440     * these are supposedly the oldest ones (but not necessarily the least used
441     * ones).
442     */
443    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
444        int num_clean = PCRE_CACHE_SIZE / 8;
445        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
446    }
447
448    /* Store the compiled pattern and extra info in the cache. */
449    new_entry.re = re;
450    new_entry.extra = extra;
451    new_entry.preg_options = poptions;
452    new_entry.compile_options = coptions;
453#if HAVE_SETLOCALE
454    new_entry.locale = pestrdup(locale, 1);
455    new_entry.tables = tables;
456#endif
457
458    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
459    if (rc < 0) {
460        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
461        return NULL;
462    }
463
464    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
465    if (rc < 0) {
466        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
467        return NULL;
468    }
469
470    /*
471     * Interned strings are not duplicated when stored in HashTable,
472     * but all the interned strings created during HTTP request are removed
473     * at end of request. However PCRE_G(pcre_cache) must be consistent
474     * on the next request as well. So we disable usage of interned strings
475     * as hash keys especually for this table.
476     * See bug #63180
477     */
478    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
479
480    return pce;
481}
482/* }}} */
483
484/* {{{ pcre_get_compiled_regex
485 */
486PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
487{
488    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC);
489
490    if (extra) {
491        *extra = pce ? pce->extra : NULL;
492    }
493    if (preg_options) {
494        *preg_options = pce ? pce->preg_options : 0;
495    }
496
497    return pce ? pce->re : NULL;
498}
499/* }}} */
500
501/* {{{ pcre_get_compiled_regex_ex
502 */
503PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
504{
505    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC);
506
507    if (extra) {
508        *extra = pce ? pce->extra : NULL;
509    }
510    if (preg_options) {
511        *preg_options = pce ? pce->preg_options : 0;
512    }
513    if (compile_options) {
514        *compile_options = pce ? pce->compile_options : 0;
515    }
516
517    return pce ? pce->re : NULL;
518}
519/* }}} */
520
521/* {{{ add_offset_pair */
522static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
523{
524    zval match_pair;
525
526    array_init_size(&match_pair, 2);
527
528    /* Add (match, offset) to the return value */
529    add_next_index_stringl(&match_pair, str, len);
530    add_next_index_long(&match_pair, offset);
531
532    if (name) {
533        zval_add_ref(&match_pair);
534        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
535    }
536    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
537}
538/* }}} */
539
540static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
541{
542    /* parameters */
543    zend_string      *regex;            /* Regular expression */
544    zend_string      *subject;          /* String to match against */
545    pcre_cache_entry *pce;              /* Compiled regular expression */
546    zval             *subpats = NULL;   /* Array for subpatterns */
547    zend_long         flags = 0;        /* Match control flags */
548    zend_long         start_offset = 0; /* Where the new search starts */
549
550#ifndef FAST_ZPP
551    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|z/ll", &regex,
552                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
553        RETURN_FALSE;
554    }
555#else
556    ZEND_PARSE_PARAMETERS_START(2, 5)
557        Z_PARAM_STR(regex)
558        Z_PARAM_STR(subject)
559        Z_PARAM_OPTIONAL
560        Z_PARAM_ZVAL_EX(subpats, 0, 1)
561        Z_PARAM_LONG(flags)
562        Z_PARAM_LONG(start_offset)
563    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
564#endif
565
566    /* Compile regex or get it from cache. */
567    if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
568        RETURN_FALSE;
569    }
570
571    php_pcre_match_impl(pce, subject->val, subject->len, return_value, subpats,
572        global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
573}
574/* }}} */
575
576/* {{{ php_pcre_match_impl() */
577PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
578    zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
579{
580    zval             result_set,        /* Holds a set of subpatterns after
581                                           a global match */
582                    *match_sets = NULL; /* An array of sets of matches for each
583                                           subpattern after a global match */
584    pcre_extra      *extra = pce->extra;/* Holds results of studying */
585    pcre_extra       extra_data;        /* Used locally for exec options */
586    int              exoptions = 0;     /* Execution options */
587    int              count = 0;         /* Count of matched subpatterns */
588    int             *offsets;           /* Array of subpattern offsets */
589    int              num_subpats;       /* Number of captured subpatterns */
590    int              size_offsets;      /* Size of the offsets array */
591    int              matched;           /* Has anything matched */
592    int              g_notempty = 0;    /* If the match should not be empty */
593    const char     **stringlist;        /* Holds list of subpatterns */
594    char           **subpat_names;      /* Array for named subpatterns */
595    int              i;
596    int              subpats_order;     /* Order of subpattern matches */
597    int              offset_capture;    /* Capture match offsets: yes/no */
598    unsigned char   *mark = NULL;       /* Target for MARK name */
599    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
600    ALLOCA_FLAG(use_heap);
601
602    ZVAL_UNDEF(&marks);
603
604    /* Overwrite the passed-in value for subpatterns with an empty array. */
605    if (subpats != NULL) {
606        zval_dtor(subpats);
607        array_init(subpats);
608    }
609
610    subpats_order = global ? PREG_PATTERN_ORDER : 0;
611
612    if (use_flags) {
613        offset_capture = flags & PREG_OFFSET_CAPTURE;
614
615        /*
616         * subpats_order is pre-set to pattern mode so we change it only if
617         * necessary.
618         */
619        if (flags & 0xff) {
620            subpats_order = flags & 0xff;
621        }
622        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
623            (!global && subpats_order != 0)) {
624            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
625            return;
626        }
627    } else {
628        offset_capture = 0;
629    }
630
631    /* Negative offset counts from the end of the string. */
632    if (start_offset < 0) {
633        start_offset = subject_len + start_offset;
634        if (start_offset < 0) {
635            start_offset = 0;
636        }
637    }
638
639    if (extra == NULL) {
640        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
641        extra = &extra_data;
642    }
643    extra->match_limit = PCRE_G(backtrack_limit);
644    extra->match_limit_recursion = PCRE_G(recursion_limit);
645#ifdef PCRE_EXTRA_MARK
646    extra->mark = &mark;
647    extra->flags |= PCRE_EXTRA_MARK;
648#endif
649
650    /* Calculate the size of the offsets array, and allocate memory for it. */
651    num_subpats = pce->capture_count + 1;
652    size_offsets = num_subpats * 3;
653
654    /*
655     * Build a mapping from subpattern numbers to their names. We will
656     * allocate the table only if there are any named subpatterns.
657     */
658    subpat_names = NULL;
659    if (pce->name_count > 0) {
660        subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
661        if (!subpat_names) {
662            RETURN_FALSE;
663        }
664    }
665
666    if (size_offsets <= 32) {
667        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
668    } else {
669        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
670    }
671
672    /* Allocate match sets array and initialize the values. */
673    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
674        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
675        for (i=0; i<num_subpats; i++) {
676            array_init(&match_sets[i]);
677        }
678    }
679
680    matched = 0;
681    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
682
683    do {
684        /* Execute the regular expression. */
685        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
686                          exoptions|g_notempty, offsets, size_offsets);
687
688        /* the string was already proved to be valid UTF-8 */
689        exoptions |= PCRE_NO_UTF8_CHECK;
690
691        /* Check for too many substrings condition. */
692        if (count == 0) {
693            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
694            count = size_offsets/3;
695        }
696
697        /* If something has matched */
698        if (count > 0) {
699            matched++;
700
701            /* If subpatterns array has been passed, fill it in with values. */
702            if (subpats != NULL) {
703                /* Try to get the list of substrings and display a warning if failed. */
704                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
705                    if (subpat_names) {
706                        efree(subpat_names);
707                    }
708                    if (size_offsets <= 32) {
709                        free_alloca(offsets, use_heap);
710                    } else {
711                        efree(offsets);
712                    }
713                    if (match_sets) efree(match_sets);
714                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
715                    RETURN_FALSE;
716                }
717
718                if (global) {   /* global pattern matching */
719                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
720                        /* For each subpattern, insert it into the appropriate array. */
721                        if (offset_capture) {
722                            for (i = 0; i < count; i++) {
723                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
724                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
725                            }
726                        } else {
727                            for (i = 0; i < count; i++) {
728                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
729                                                       offsets[(i<<1)+1] - offsets[i<<1]);
730                            }
731                        }
732                        /* Add MARK, if available */
733                        if (mark) {
734                            if (Z_TYPE(marks) == IS_UNDEF) {
735                                array_init(&marks);
736                            }
737                            add_index_string(&marks, matched - 1, (char *) mark);
738                        }
739                        /*
740                         * If the number of captured subpatterns on this run is
741                         * less than the total possible number, pad the result
742                         * arrays with empty strings.
743                         */
744                        if (count < num_subpats) {
745                            for (; i < num_subpats; i++) {
746                                add_next_index_string(&match_sets[i], "");
747                            }
748                        }
749                    } else {
750                        /* Allocate the result set array */
751                        array_init_size(&result_set, count + (mark ? 1 : 0));
752
753                        /* Add all the subpatterns to it */
754                        if (subpat_names) {
755                            if (offset_capture) {
756                                for (i = 0; i < count; i++) {
757                                    add_offset_pair(&result_set, (char *)stringlist[i],
758                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
759                                }
760                            } else {
761                                for (i = 0; i < count; i++) {
762                                    if (subpat_names[i]) {
763                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
764                                                               offsets[(i<<1)+1] - offsets[i<<1]);
765                                    }
766                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
767                                                           offsets[(i<<1)+1] - offsets[i<<1]);
768                                }
769                            }
770                        } else {
771                            if (offset_capture) {
772                                for (i = 0; i < count; i++) {
773                                    add_offset_pair(&result_set, (char *)stringlist[i],
774                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
775                                }
776                            } else {
777                                for (i = 0; i < count; i++) {
778                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
779                                                           offsets[(i<<1)+1] - offsets[i<<1]);
780                                }
781                            }
782                        }
783                        /* Add MARK, if available */
784                        if (mark) {
785                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
786                        }
787                        /* And add it to the output array */
788                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
789                    }
790                } else {            /* single pattern matching */
791                    /* For each subpattern, insert it into the subpatterns array. */
792                    if (subpat_names) {
793                        if (offset_capture) {
794                            for (i = 0; i < count; i++) {
795                                add_offset_pair(subpats, (char *)stringlist[i],
796                                                offsets[(i<<1)+1] - offsets[i<<1],
797                                                offsets[i<<1], subpat_names[i]);
798                            }
799                        } else {
800                            for (i = 0; i < count; i++) {
801                                if (subpat_names[i]) {
802                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
803                                                      offsets[(i<<1)+1] - offsets[i<<1]);
804                                }
805                                add_next_index_stringl(subpats, (char *)stringlist[i],
806                                                       offsets[(i<<1)+1] - offsets[i<<1]);
807                            }
808                        }
809                    } else {
810                        if (offset_capture) {
811                            for (i = 0; i < count; i++) {
812                                add_offset_pair(subpats, (char *)stringlist[i],
813                                                offsets[(i<<1)+1] - offsets[i<<1],
814                                                offsets[i<<1], NULL);
815                            }
816                        } else {
817                            for (i = 0; i < count; i++) {
818                                add_next_index_stringl(subpats, (char *)stringlist[i],
819                                                       offsets[(i<<1)+1] - offsets[i<<1]);
820                            }
821                        }
822                    }
823                    /* Add MARK, if available */
824                    if (mark) {
825                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
826                    }
827                }
828
829                pcre_free((void *) stringlist);
830            }
831        } else if (count == PCRE_ERROR_NOMATCH) {
832            /* If we previously set PCRE_NOTEMPTY after a null match,
833               this is not necessarily the end. We need to advance
834               the start offset, and continue. Fudge the offset values
835               to achieve this, unless we're already at the end of the string. */
836            if (g_notempty != 0 && start_offset < subject_len) {
837                offsets[0] = start_offset;
838                offsets[1] = start_offset + 1;
839            } else
840                break;
841        } else {
842            pcre_handle_exec_error(count TSRMLS_CC);
843            break;
844        }
845
846        /* If we have matched an empty string, mimic what Perl's /g options does.
847           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
848           the match again at the same point. If this fails (picked up above) we
849           advance to the next character. */
850        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
851
852        /* Advance to the position right after the last full match */
853        start_offset = offsets[1];
854    } while (global);
855
856    /* Add the match sets to the output array and clean up */
857    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
858        if (subpat_names) {
859            for (i = 0; i < num_subpats; i++) {
860                if (subpat_names[i]) {
861                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
862                                     strlen(subpat_names[i]), &match_sets[i]);
863                    Z_ADDREF(match_sets[i]);
864                }
865                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
866            }
867        } else {
868            for (i = 0; i < num_subpats; i++) {
869                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
870            }
871        }
872        efree(match_sets);
873
874        if (Z_TYPE(marks) != IS_UNDEF) {
875            add_assoc_zval(subpats, "MARK", &marks);
876        }
877    }
878
879    if (size_offsets <= 32) {
880        free_alloca(offsets, use_heap);
881    } else {
882        efree(offsets);
883    }
884    if (subpat_names) {
885        efree(subpat_names);
886    }
887
888    /* Did we encounter an error? */
889    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
890        RETVAL_LONG(matched);
891    } else {
892        RETVAL_FALSE;
893    }
894}
895/* }}} */
896
897/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
898   Perform a Perl-style regular expression match */
899static PHP_FUNCTION(preg_match)
900{
901    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
902}
903/* }}} */
904
905/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
906   Perform a Perl-style global regular expression match */
907static PHP_FUNCTION(preg_match_all)
908{
909    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
910}
911/* }}} */
912
913/* {{{ preg_get_backref
914 */
915static int preg_get_backref(char **str, int *backref)
916{
917    register char in_brace = 0;
918    register char *walk = *str;
919
920    if (walk[1] == 0)
921        return 0;
922
923    if (*walk == '$' && walk[1] == '{') {
924        in_brace = 1;
925        walk++;
926    }
927    walk++;
928
929    if (*walk >= '0' && *walk <= '9') {
930        *backref = *walk - '0';
931        walk++;
932    } else
933        return 0;
934
935    if (*walk && *walk >= '0' && *walk <= '9') {
936        *backref = *backref * 10 + *walk - '0';
937        walk++;
938    }
939
940    if (in_brace) {
941        if (*walk == 0 || *walk != '}')
942            return 0;
943        else
944            walk++;
945    }
946
947    *str = walk;
948    return 1;
949}
950/* }}} */
951
952/* {{{ preg_do_repl_func
953 */
954static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark TSRMLS_DC)
955{
956    zend_string *result_str;
957    zval         retval;            /* Function return value */
958    zval         args[1];           /* Argument to pass to function */
959    int          i;
960
961    array_init_size(&args[0], count + (mark ? 1 : 0));
962    if (subpat_names) {
963        for (i = 0; i < count; i++) {
964            if (subpat_names[i]) {
965                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
966            }
967            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
968        }
969    } else {
970        for (i = 0; i < count; i++) {
971            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
972        }
973    }
974    if (mark) {
975        add_assoc_string(&args[0], "MARK", (char *) mark);
976    }
977
978    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
979        result_str = zval_get_string(&retval);
980        zval_ptr_dtor(&retval);
981    } else {
982        if (!EG(exception)) {
983            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
984        }
985
986        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
987    }
988
989    zval_ptr_dtor(&args[0]);
990
991    return result_str;
992}
993/* }}} */
994
995/* {{{ preg_do_eval
996 */
997static zend_string *preg_do_eval(char *eval_str, int eval_str_len, char *subject,
998                        int *offsets, int count TSRMLS_DC)
999{
1000    zval         retval;            /* Return value from evaluation */
1001    char        *eval_str_end,      /* End of eval string */
1002                *match,             /* Current match for a backref */
1003                *walk,              /* Used to walk the code string */
1004                *segment,           /* Start of segment to append while walking */
1005                 walk_last;         /* Last walked character */
1006    int          match_len;         /* Length of the match */
1007    int          backref;           /* Current backref */
1008    zend_string *esc_match;         /* Quote-escaped match */
1009    zend_string *result_str;
1010    char        *compiled_string_description;
1011    smart_str    code = {0};
1012
1013    eval_str_end = eval_str + eval_str_len;
1014    walk = segment = eval_str;
1015    walk_last = 0;
1016
1017    while (walk < eval_str_end) {
1018        /* If found a backreference.. */
1019        if ('\\' == *walk || '$' == *walk) {
1020            smart_str_appendl(&code, segment, walk - segment);
1021            if (walk_last == '\\') {
1022                code.s->val[code.s->len-1] = *walk++;
1023                segment = walk;
1024                walk_last = 0;
1025                continue;
1026            }
1027            segment = walk;
1028            if (preg_get_backref(&walk, &backref)) {
1029                if (backref < count) {
1030                    /* Find the corresponding string match and substitute it
1031                       in instead of the backref */
1032                    match = subject + offsets[backref<<1];
1033                    match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1034                    if (match_len) {
1035                        esc_match = php_addslashes(match, match_len, 0 TSRMLS_CC);
1036                    } else {
1037                        esc_match = zend_string_init(match, match_len, 0);
1038                    }
1039                } else {
1040                    esc_match = STR_EMPTY_ALLOC();
1041                }
1042                smart_str_appendl(&code, esc_match->val, esc_match->len);
1043
1044                segment = walk;
1045
1046                /* Clean up and reassign */
1047                zend_string_release(esc_match);
1048                continue;
1049            }
1050        }
1051        walk++;
1052        walk_last = walk[-1];
1053    }
1054    smart_str_appendl(&code, segment, walk - segment);
1055    smart_str_0(&code);
1056
1057    compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
1058    /* Run the code */
1059    if (zend_eval_stringl(code.s->val, code.s->len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
1060        efree(compiled_string_description);
1061        php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.s->val);
1062        /* zend_error() does not return in this case */
1063    }
1064    efree(compiled_string_description);
1065
1066    /* Save the return string */
1067    result_str = zval_get_string(&retval);
1068
1069    /* Clean up */
1070    zval_dtor(&retval);
1071    smart_str_free(&code);
1072
1073    return result_str;
1074}
1075/* }}} */
1076
1077/* {{{ php_pcre_replace
1078 */
1079PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1080                              char *subject, int subject_len,
1081                              zval *replace_val, int is_callable_replace,
1082                              int limit, int *replace_count TSRMLS_DC)
1083{
1084    pcre_cache_entry    *pce;               /* Compiled regular expression */
1085
1086    /* Compile regex or get it from cache. */
1087    if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
1088        return NULL;
1089    }
1090
1091    return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
1092        is_callable_replace, limit, replace_count TSRMLS_CC);
1093}
1094/* }}} */
1095
1096/* {{{ php_pcre_replace_impl() */
1097PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1098    int is_callable_replace, int limit, int *replace_count TSRMLS_DC)
1099{
1100    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1101    pcre_extra       extra_data;        /* Used locally for exec options */
1102    int              exoptions = 0;     /* Execution options */
1103    int              count = 0;         /* Count of matched subpatterns */
1104    int             *offsets;           /* Array of subpattern offsets */
1105    char            **subpat_names;     /* Array for named subpatterns */
1106    int              num_subpats;       /* Number of captured subpatterns */
1107    int              size_offsets;      /* Size of the offsets array */
1108    int              new_len;           /* Length of needed storage */
1109    int              alloc_len;         /* Actual allocated length */
1110    int              match_len;         /* Length of the current match */
1111    int              backref;           /* Backreference number */
1112    int              eval;              /* If the replacement string should be eval'ed */
1113    int              start_offset;      /* Where the new search starts */
1114    int              g_notempty=0;      /* If the match should not be empty */
1115    int              replace_len=0;     /* Length of replacement string */
1116    char            *replace=NULL,      /* Replacement string */
1117                    *walkbuf,           /* Location of current replacement in the result */
1118                    *walk,              /* Used to walk the replacement string */
1119                    *match,             /* The current match */
1120                    *piece,             /* The current piece of subject */
1121                    *replace_end=NULL,  /* End of replacement string */
1122                     walk_last;         /* Last walked character */
1123    int              result_len;        /* Length of result */
1124    unsigned char   *mark = NULL;       /* Target for MARK name */
1125    zend_string     *result;            /* Result of replacement */
1126    zend_string     *eval_result=NULL;  /* Result of eval or custom function */
1127    ALLOCA_FLAG(use_heap);
1128
1129    if (extra == NULL) {
1130        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1131        extra = &extra_data;
1132    }
1133    extra->match_limit = PCRE_G(backtrack_limit);
1134    extra->match_limit_recursion = PCRE_G(recursion_limit);
1135
1136    eval = pce->preg_options & PREG_REPLACE_EVAL;
1137    if (is_callable_replace) {
1138        if (eval) {
1139            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1140            return NULL;
1141        }
1142    } else {
1143        replace = Z_STRVAL_P(replace_val);
1144        replace_len = Z_STRLEN_P(replace_val);
1145        replace_end = replace + replace_len;
1146    }
1147
1148    if (eval) {
1149        php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
1150    }
1151
1152    /* Calculate the size of the offsets array, and allocate memory for it. */
1153    num_subpats = pce->capture_count + 1;
1154    size_offsets = num_subpats * 3;
1155    if (size_offsets <= 32) {
1156        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1157    } else {
1158        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1159    }
1160
1161    /*
1162     * Build a mapping from subpattern numbers to their names. We will
1163     * allocate the table only if there are any named subpatterns.
1164     */
1165    subpat_names = NULL;
1166    if (pce->name_count > 0) {
1167        subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1168        if (!subpat_names) {
1169            return NULL;
1170        }
1171    }
1172
1173    alloc_len = 2 * subject_len;
1174    result = zend_string_alloc(alloc_len * sizeof(char), 0);
1175
1176    /* Initialize */
1177    match = NULL;
1178    start_offset = 0;
1179    result_len = 0;
1180    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1181
1182    while (1) {
1183#ifdef PCRE_EXTRA_MARK
1184        extra->mark = &mark;
1185        extra->flags |= PCRE_EXTRA_MARK;
1186#endif
1187        /* Execute the regular expression. */
1188        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1189                          exoptions|g_notempty, offsets, size_offsets);
1190
1191        /* the string was already proved to be valid UTF-8 */
1192        exoptions |= PCRE_NO_UTF8_CHECK;
1193
1194        /* Check for too many substrings condition. */
1195        if (count == 0) {
1196            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1197            count = size_offsets/3;
1198        }
1199
1200        piece = subject + start_offset;
1201
1202        if (count > 0 && (limit == -1 || limit > 0)) {
1203            if (replace_count) {
1204                ++*replace_count;
1205            }
1206            /* Set the match location in subject */
1207            match = subject + offsets[0];
1208
1209            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1210
1211            /* If evaluating, do it and add the return string's length */
1212            if (eval) {
1213                eval_result = preg_do_eval(replace, replace_len, subject,
1214                                               offsets, count TSRMLS_CC);
1215                new_len += eval_result->len;
1216            } else if (is_callable_replace) {
1217                /* Use custom function to get replacement string and its length. */
1218                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark TSRMLS_CC);
1219                new_len += eval_result->len;
1220            } else { /* do regular substitution */
1221                walk = replace;
1222                walk_last = 0;
1223                while (walk < replace_end) {
1224                    if ('\\' == *walk || '$' == *walk) {
1225                        if (walk_last == '\\') {
1226                            walk++;
1227                            walk_last = 0;
1228                            continue;
1229                        }
1230                        if (preg_get_backref(&walk, &backref)) {
1231                            if (backref < count)
1232                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1233                            continue;
1234                        }
1235                    }
1236                    new_len++;
1237                    walk++;
1238                    walk_last = walk[-1];
1239                }
1240            }
1241
1242            if (new_len > alloc_len) {
1243                alloc_len = alloc_len + 2 * new_len;
1244                result = zend_string_realloc(result, alloc_len, 0);
1245            }
1246            /* copy the part of the string before the match */
1247            memcpy(&result->val[result_len], piece, match-piece);
1248            result_len += match-piece;
1249
1250            /* copy replacement and backrefs */
1251            walkbuf = result->val + result_len;
1252
1253            /* If evaluating or using custom function, copy result to the buffer
1254             * and clean up. */
1255            if (eval || is_callable_replace) {
1256                memcpy(walkbuf, eval_result->val, eval_result->len);
1257                result_len += eval_result->len;
1258                if (eval_result) zend_string_release(eval_result);
1259            } else { /* do regular backreference copying */
1260                walk = replace;
1261                walk_last = 0;
1262                while (walk < replace_end) {
1263                    if ('\\' == *walk || '$' == *walk) {
1264                        if (walk_last == '\\') {
1265                            *(walkbuf-1) = *walk++;
1266                            walk_last = 0;
1267                            continue;
1268                        }
1269                        if (preg_get_backref(&walk, &backref)) {
1270                            if (backref < count) {
1271                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1272                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1273                                walkbuf += match_len;
1274                            }
1275                            continue;
1276                        }
1277                    }
1278                    *walkbuf++ = *walk++;
1279                    walk_last = walk[-1];
1280                }
1281                *walkbuf = '\0';
1282                /* increment the result length by how much we've added to the string */
1283                result_len += walkbuf - (result->val + result_len);
1284            }
1285
1286            if (limit != -1)
1287                limit--;
1288
1289        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1290            /* If we previously set PCRE_NOTEMPTY after a null match,
1291               this is not necessarily the end. We need to advance
1292               the start offset, and continue. Fudge the offset values
1293               to achieve this, unless we're already at the end of the string. */
1294            if (g_notempty != 0 && start_offset < subject_len) {
1295                offsets[0] = start_offset;
1296                offsets[1] = start_offset + 1;
1297                memcpy(&result->val[result_len], piece, 1);
1298                result_len++;
1299            } else {
1300                new_len = result_len + subject_len - start_offset;
1301                if (new_len > alloc_len) {
1302                    alloc_len = new_len; /* now we know exactly how long it is */
1303                    result = zend_string_realloc(result, alloc_len, 0);
1304                }
1305                /* stick that last bit of string on our output */
1306                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1307                result_len += subject_len - start_offset;
1308                result->val[result_len] = '\0';
1309                break;
1310            }
1311        } else {
1312            pcre_handle_exec_error(count TSRMLS_CC);
1313            zend_string_free(result);
1314            result = NULL;
1315            break;
1316        }
1317
1318        /* If we have matched an empty string, mimic what Perl's /g options does.
1319           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1320           the match again at the same point. If this fails (picked up above) we
1321           advance to the next character. */
1322        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1323
1324        /* Advance to the next piece. */
1325        start_offset = offsets[1];
1326    }
1327
1328    if (result) {
1329        result->len = result_len;
1330    }
1331    if (size_offsets <= 32) {
1332        free_alloca(offsets, use_heap);
1333    } else {
1334        efree(offsets);
1335    }
1336    if (subpat_names) {
1337        efree(subpat_names);
1338    }
1339
1340    return result;
1341}
1342/* }}} */
1343
1344/* {{{ php_replace_in_subject
1345 */
1346static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1347{
1348    zval        *regex_entry,
1349                *replace_entry = NULL,
1350                *replace_value,
1351                 empty_replace;
1352    zend_string *result;
1353    zend_string *subject_str = zval_get_string(subject);
1354    uint32_t replace_idx;
1355
1356    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1357    ZVAL_EMPTY_STRING(&empty_replace);
1358
1359    /* If regex is an array */
1360    if (Z_TYPE_P(regex) == IS_ARRAY) {
1361        replace_value = replace;
1362        replace_idx = 0;
1363
1364        /* For each entry in the regex array, get the entry */
1365        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1366            /* Make sure we're dealing with strings. */
1367            zend_string *regex_str = zval_get_string(regex_entry);
1368
1369            /* If replace is an array and not a callable construct */
1370            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1371                /* Get current entry */
1372                replace_entry = NULL;
1373                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1374                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1375                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1376                        break;
1377                    }
1378                    replace_idx++;
1379                }
1380                if (replace_entry != NULL) {
1381                    if (!is_callable_replace) {
1382                        convert_to_string_ex(replace_entry);
1383                    }
1384                    replace_value = replace_entry;
1385                    replace_idx++;
1386                } else {
1387                    /* We've run out of replacement strings, so use an empty one */
1388                    replace_value = &empty_replace;
1389                }
1390            }
1391
1392            /* Do the actual replacement and put the result back into subject_str
1393               for further replacements. */
1394            if ((result = php_pcre_replace(regex_str,
1395                                           subject_str->val,
1396                                           subject_str->len,
1397                                           replace_value,
1398                                           is_callable_replace,
1399                                           limit,
1400                                           replace_count TSRMLS_CC)) != NULL) {
1401                zend_string_release(subject_str);
1402                subject_str = result;
1403            } else {
1404                zend_string_release(subject_str);
1405                zend_string_release(regex_str);
1406                return NULL;
1407            }
1408
1409            zend_string_release(regex_str);
1410        } ZEND_HASH_FOREACH_END();
1411
1412        return subject_str;
1413    } else {
1414        result = php_pcre_replace(Z_STR_P(regex),
1415                                  subject_str->val,
1416                                  subject_str->len,
1417                                  replace,
1418                                  is_callable_replace,
1419                                  limit,
1420                                  replace_count TSRMLS_CC);
1421        zend_string_release(subject_str);
1422        return result;
1423    }
1424}
1425/* }}} */
1426
1427/* {{{ preg_replace_impl
1428 */
1429static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1430{
1431    zval            *regex,
1432                    *replace,
1433                    *subject,
1434                    *subject_entry,
1435                    *zcount = NULL;
1436    int              limit_val = -1;
1437    zend_long        limit = -1;
1438    zend_string     *result;
1439    zend_string     *string_key;
1440    zend_ulong       num_key;
1441    zend_string     *callback_name;
1442    int              replace_count=0, old_replace_count;
1443
1444#ifndef FAST_ZPP
1445    /* Get function parameters and do error-checking. */
1446    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1447        return;
1448    }
1449#else
1450    ZEND_PARSE_PARAMETERS_START(3, 5)
1451        Z_PARAM_ZVAL(regex)
1452        Z_PARAM_ZVAL(replace)
1453        Z_PARAM_ZVAL(subject)
1454        Z_PARAM_OPTIONAL
1455        Z_PARAM_LONG(limit)
1456        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1457    ZEND_PARSE_PARAMETERS_END();
1458#endif
1459
1460    if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1461        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1462        RETURN_FALSE;
1463    }
1464
1465    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1466        SEPARATE_ZVAL(replace);
1467        convert_to_string_ex(replace);
1468    }
1469    if (is_callable_replace) {
1470        if (!zend_is_callable(replace, 0, &callback_name TSRMLS_CC)) {
1471            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1472            zend_string_release(callback_name);
1473            ZVAL_DUP(return_value, subject);
1474            return;
1475        }
1476        zend_string_release(callback_name);
1477    }
1478
1479    if (ZEND_NUM_ARGS() > 3) {
1480        limit_val = limit;
1481    }
1482
1483    if (Z_TYPE_P(regex) != IS_ARRAY) {
1484        SEPARATE_ZVAL(regex);
1485        convert_to_string_ex(regex);
1486    }
1487
1488    /* if subject is an array */
1489    if (Z_TYPE_P(subject) == IS_ARRAY) {
1490        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1491
1492        /* For each subject entry, convert it to string, then perform replacement
1493           and add the result to the return_value array. */
1494        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1495            old_replace_count = replace_count;
1496            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1497                if (!is_filter || replace_count > old_replace_count) {
1498                    /* Add to return array */
1499                    if (string_key) {
1500                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1501                    } else {
1502                        add_index_str(return_value, num_key, result);
1503                    }
1504                } else {
1505                    zend_string_release(result);
1506                }
1507            }
1508        } ZEND_HASH_FOREACH_END();
1509    } else {    /* if subject is not an array */
1510        old_replace_count = replace_count;
1511        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1512            if (!is_filter || replace_count > old_replace_count) {
1513                RETVAL_STR(result);
1514            } else {
1515                zend_string_release(result);
1516            }
1517        }
1518    }
1519    if (ZEND_NUM_ARGS() > 4) {
1520        zval_dtor(zcount);
1521        ZVAL_LONG(zcount, replace_count);
1522    }
1523
1524}
1525/* }}} */
1526
1527/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1528   Perform Perl-style regular expression replacement. */
1529static PHP_FUNCTION(preg_replace)
1530{
1531    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1532}
1533/* }}} */
1534
1535/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1536   Perform Perl-style regular expression replacement using replacement callback. */
1537static PHP_FUNCTION(preg_replace_callback)
1538{
1539    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1540}
1541/* }}} */
1542
1543/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1544   Perform Perl-style regular expression replacement and only return matches. */
1545static PHP_FUNCTION(preg_filter)
1546{
1547    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1548}
1549/* }}} */
1550
1551/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1552   Split string into an array using a perl-style regular expression as a delimiter */
1553static PHP_FUNCTION(preg_split)
1554{
1555    zend_string         *regex;         /* Regular expression */
1556    zend_string         *subject;       /* String to match against */
1557    zend_long            limit_val = -1;/* Integer value of limit */
1558    zend_long            flags = 0;     /* Match control flags */
1559    pcre_cache_entry    *pce;           /* Compiled regular expression */
1560
1561    /* Get function parameters and do error checking */
1562#ifndef FAST_ZPP
1563    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "SS|ll", &regex,
1564                              &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1565        RETURN_FALSE;
1566    }
1567#else
1568    ZEND_PARSE_PARAMETERS_START(2, 4)
1569        Z_PARAM_STR(regex)
1570        Z_PARAM_STR(subject)
1571        Z_PARAM_OPTIONAL
1572        Z_PARAM_LONG(limit_val)
1573        Z_PARAM_LONG(flags)
1574    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1575#endif
1576
1577    /* Compile regex or get it from cache. */
1578    if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
1579        RETURN_FALSE;
1580    }
1581
1582    php_pcre_split_impl(pce, subject->val, subject->len, return_value, limit_val, flags TSRMLS_CC);
1583}
1584/* }}} */
1585
1586/* {{{ php_pcre_split
1587 */
1588PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1589    long limit_val, long flags TSRMLS_DC)
1590{
1591    pcre_extra      *extra = NULL;      /* Holds results of studying */
1592    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1593    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1594    pcre_extra       extra_data;        /* Used locally for exec options */
1595    int             *offsets;           /* Array of subpattern offsets */
1596    int              size_offsets;      /* Size of the offsets array */
1597    int              exoptions = 0;     /* Execution options */
1598    int              count = 0;         /* Count of matched subpatterns */
1599    int              start_offset;      /* Where the new search starts */
1600    int              next_offset;       /* End of the last delimiter match + 1 */
1601    int              g_notempty = 0;    /* If the match should not be empty */
1602    char            *last_match;        /* Location of last match */
1603    int              no_empty;          /* If NO_EMPTY flag is set */
1604    int              delim_capture;     /* If delimiters should be captured */
1605    int              offset_capture;    /* If offsets should be captured */
1606    ALLOCA_FLAG(use_heap);
1607
1608    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1609    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1610    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1611
1612    if (limit_val == 0) {
1613        limit_val = -1;
1614    }
1615
1616    if (extra == NULL) {
1617        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1618        extra = &extra_data;
1619    }
1620    extra->match_limit = PCRE_G(backtrack_limit);
1621    extra->match_limit_recursion = PCRE_G(recursion_limit);
1622#ifdef PCRE_EXTRA_MARK
1623    extra->flags &= ~PCRE_EXTRA_MARK;
1624#endif
1625
1626    /* Initialize return value */
1627    array_init(return_value);
1628
1629    /* Calculate the size of the offsets array, and allocate memory for it. */
1630    size_offsets = (pce->capture_count + 1) * 3;
1631    if (size_offsets <= 32) {
1632        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1633    } else {
1634        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1635    }
1636
1637    /* Start at the beginning of the string */
1638    start_offset = 0;
1639    next_offset = 0;
1640    last_match = subject;
1641    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1642
1643    /* Get next piece if no limit or limit not yet reached and something matched*/
1644    while ((limit_val == -1 || limit_val > 1)) {
1645        count = pcre_exec(pce->re, extra, subject,
1646                          subject_len, start_offset,
1647                          exoptions|g_notempty, offsets, size_offsets);
1648
1649        /* the string was already proved to be valid UTF-8 */
1650        exoptions |= PCRE_NO_UTF8_CHECK;
1651
1652        /* Check for too many substrings condition. */
1653        if (count == 0) {
1654            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1655            count = size_offsets/3;
1656        }
1657
1658        /* If something matched */
1659        if (count > 0) {
1660            if (!no_empty || &subject[offsets[0]] != last_match) {
1661
1662                if (offset_capture) {
1663                    /* Add (match, offset) pair to the return value */
1664                    add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1665                } else {
1666                    /* Add the piece to the return value */
1667                    add_next_index_stringl(return_value, last_match,
1668                                       &subject[offsets[0]]-last_match);
1669                }
1670
1671                /* One less left to do */
1672                if (limit_val != -1)
1673                    limit_val--;
1674            }
1675
1676            last_match = &subject[offsets[1]];
1677            next_offset = offsets[1];
1678
1679            if (delim_capture) {
1680                int i, match_len;
1681                for (i = 1; i < count; i++) {
1682                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1683                    /* If we have matched a delimiter */
1684                    if (!no_empty || match_len > 0) {
1685                        if (offset_capture) {
1686                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1687                        } else {
1688                            add_next_index_stringl(return_value,
1689                                                   &subject[offsets[i<<1]],
1690                                                   match_len);
1691                        }
1692                    }
1693                }
1694            }
1695        } else if (count == PCRE_ERROR_NOMATCH) {
1696            /* If we previously set PCRE_NOTEMPTY after a null match,
1697               this is not necessarily the end. We need to advance
1698               the start offset, and continue. Fudge the offset values
1699               to achieve this, unless we're already at the end of the string. */
1700            if (g_notempty != 0 && start_offset < subject_len) {
1701                if (pce->compile_options & PCRE_UTF8) {
1702                    if (re_bump == NULL) {
1703                        int dummy;
1704                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1705                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy TSRMLS_CC);
1706                        zend_string_release(regex);
1707                        if (re_bump == NULL) {
1708                            RETURN_FALSE;
1709                        }
1710                    }
1711                    count = pcre_exec(re_bump, extra_bump, subject,
1712                              subject_len, start_offset,
1713                              exoptions, offsets, size_offsets);
1714                    if (count < 1) {
1715                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1716                        RETURN_FALSE;
1717                    }
1718                } else {
1719                    offsets[0] = start_offset;
1720                    offsets[1] = start_offset + 1;
1721                }
1722            } else
1723                break;
1724        } else {
1725            pcre_handle_exec_error(count TSRMLS_CC);
1726            break;
1727        }
1728
1729        /* If we have matched an empty string, mimic what Perl's /g options does.
1730           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1731           the match again at the same point. If this fails (picked up above) we
1732           advance to the next character. */
1733        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1734
1735        /* Advance to the position right after the last full match */
1736        start_offset = offsets[1];
1737    }
1738
1739
1740    start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1741
1742    if (!no_empty || start_offset < subject_len)
1743    {
1744        if (offset_capture) {
1745            /* Add the last (match, offset) pair to the return value */
1746            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1747        } else {
1748            /* Add the last piece to the return value */
1749            add_next_index_stringl(return_value, last_match, subject + subject_len - last_match);
1750        }
1751    }
1752
1753
1754    /* Clean up */
1755    if (size_offsets <= 32) {
1756        free_alloca(offsets, use_heap);
1757    } else {
1758        efree(offsets);
1759    }
1760}
1761/* }}} */
1762
1763/* {{{ proto string preg_quote(string str [, string delim_char])
1764   Quote regular expression characters plus an optional character */
1765static PHP_FUNCTION(preg_quote)
1766{
1767    size_t       in_str_len;
1768    char    *in_str;        /* Input string argument */
1769    char    *in_str_end;    /* End of the input string */
1770    size_t       delim_len = 0;
1771    char    *delim = NULL;  /* Additional delimiter argument */
1772    zend_string *out_str;   /* Output string with quoted characters */
1773    char    *p,             /* Iterator for input string */
1774            *q,             /* Iterator for output string */
1775             delim_char=0,  /* Delimiter character to be quoted */
1776             c;             /* Current character */
1777    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1778
1779    /* Get the arguments and check for errors */
1780#ifndef FAST_ZPP
1781    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1782                              &delim, &delim_len) == FAILURE) {
1783        return;
1784    }
1785#else
1786    ZEND_PARSE_PARAMETERS_START(1, 2)
1787        Z_PARAM_STRING(in_str, in_str_len)
1788        Z_PARAM_OPTIONAL
1789        Z_PARAM_STRING(delim, delim_len)
1790    ZEND_PARSE_PARAMETERS_END();
1791#endif
1792
1793    in_str_end = in_str + in_str_len;
1794
1795    /* Nothing to do if we got an empty string */
1796    if (in_str == in_str_end) {
1797        RETURN_EMPTY_STRING();
1798    }
1799
1800    if (delim && *delim) {
1801        delim_char = delim[0];
1802        quote_delim = 1;
1803    }
1804
1805    /* Allocate enough memory so that even if each character
1806       is quoted, we won't run out of room */
1807    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1808
1809    /* Go through the string and quote necessary characters */
1810    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1811        c = *p;
1812        switch(c) {
1813            case '.':
1814            case '\\':
1815            case '+':
1816            case '*':
1817            case '?':
1818            case '[':
1819            case '^':
1820            case ']':
1821            case '$':
1822            case '(':
1823            case ')':
1824            case '{':
1825            case '}':
1826            case '=':
1827            case '!':
1828            case '>':
1829            case '<':
1830            case '|':
1831            case ':':
1832            case '-':
1833                *q++ = '\\';
1834                *q++ = c;
1835                break;
1836
1837            case '\0':
1838                *q++ = '\\';
1839                *q++ = '0';
1840                *q++ = '0';
1841                *q++ = '0';
1842                break;
1843
1844            default:
1845                if (quote_delim && c == delim_char)
1846                    *q++ = '\\';
1847                *q++ = c;
1848                break;
1849        }
1850    }
1851    *q = '\0';
1852
1853    /* Reallocate string and return it */
1854    out_str = zend_string_realloc(out_str, q - out_str->val, 0);
1855    RETURN_STR(out_str);
1856}
1857/* }}} */
1858
1859/* {{{ proto array preg_grep(string regex, array input [, int flags])
1860   Searches array and returns entries which match regex */
1861static PHP_FUNCTION(preg_grep)
1862{
1863    zend_string         *regex;         /* Regular expression */
1864    zval                *input;         /* Input array */
1865    zend_long            flags = 0;     /* Match control flags */
1866    pcre_cache_entry    *pce;           /* Compiled regular expression */
1867
1868    /* Get arguments and do error checking */
1869#ifndef FAST_ZPP
1870    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Sa|l", &regex,
1871                              &input, &flags) == FAILURE) {
1872        return;
1873    }
1874#else
1875    ZEND_PARSE_PARAMETERS_START(2, 3)
1876        Z_PARAM_STR(regex)
1877        Z_PARAM_ARRAY(input)
1878        Z_PARAM_OPTIONAL
1879        Z_PARAM_LONG(flags)
1880    ZEND_PARSE_PARAMETERS_END();
1881#endif
1882
1883    /* Compile regex or get it from cache. */
1884    if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) {
1885        RETURN_FALSE;
1886    }
1887
1888    php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1889}
1890/* }}} */
1891
1892PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1893{
1894    zval            *entry;             /* An entry in the input array */
1895    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1896    pcre_extra       extra_data;        /* Used locally for exec options */
1897    int             *offsets;           /* Array of subpattern offsets */
1898    int              size_offsets;      /* Size of the offsets array */
1899    int              count = 0;         /* Count of matched subpatterns */
1900    zend_string     *string_key;
1901    zend_ulong       num_key;
1902    zend_bool        invert;            /* Whether to return non-matching
1903                                           entries */
1904    ALLOCA_FLAG(use_heap);
1905
1906    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1907
1908    if (extra == NULL) {
1909        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1910        extra = &extra_data;
1911    }
1912    extra->match_limit = PCRE_G(backtrack_limit);
1913    extra->match_limit_recursion = PCRE_G(recursion_limit);
1914#ifdef PCRE_EXTRA_MARK
1915    extra->flags &= ~PCRE_EXTRA_MARK;
1916#endif
1917
1918    /* Calculate the size of the offsets array, and allocate memory for it. */
1919    size_offsets = (pce->capture_count + 1) * 3;
1920    if (size_offsets <= 32) {
1921        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1922    } else {
1923        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1924    }
1925
1926    /* Initialize return array */
1927    array_init(return_value);
1928
1929    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1930
1931    /* Go through the input array */
1932    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1933        zend_string *subject_str = zval_get_string(entry);
1934
1935        /* Perform the match */
1936        count = pcre_exec(pce->re, extra, subject_str->val,
1937                          subject_str->len, 0,
1938                          0, offsets, size_offsets);
1939
1940        /* Check for too many substrings condition. */
1941        if (count == 0) {
1942            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1943            count = size_offsets/3;
1944        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1945            pcre_handle_exec_error(count TSRMLS_CC);
1946            zend_string_release(subject_str);
1947            break;
1948        }
1949
1950        /* If the entry fits our requirements */
1951        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1952            if (Z_REFCOUNTED_P(entry)) {
1953                Z_ADDREF_P(entry);
1954            }
1955
1956            /* Add to return array */
1957            if (string_key) {
1958                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
1959            } else {
1960                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
1961            }
1962        }
1963
1964        zend_string_release(subject_str);
1965    } ZEND_HASH_FOREACH_END();
1966
1967    /* Clean up */
1968    if (size_offsets <= 32) {
1969        free_alloca(offsets, use_heap);
1970    } else {
1971        efree(offsets);
1972    }
1973}
1974/* }}} */
1975
1976/* {{{ proto int preg_last_error()
1977   Returns the error code of the last regexp execution. */
1978static PHP_FUNCTION(preg_last_error)
1979{
1980#ifndef FAST_ZPP
1981    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1982        return;
1983    }
1984#else
1985    ZEND_PARSE_PARAMETERS_START(0, 0)
1986    ZEND_PARSE_PARAMETERS_END();
1987#endif
1988
1989    RETURN_LONG(PCRE_G(error_code));
1990}
1991/* }}} */
1992
1993/* {{{ module definition structures */
1994
1995/* {{{ arginfo */
1996ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1997    ZEND_ARG_INFO(0, pattern)
1998    ZEND_ARG_INFO(0, subject)
1999    ZEND_ARG_INFO(1, subpatterns) /* array */
2000    ZEND_ARG_INFO(0, flags)
2001    ZEND_ARG_INFO(0, offset)
2002ZEND_END_ARG_INFO()
2003
2004ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2005    ZEND_ARG_INFO(0, pattern)
2006    ZEND_ARG_INFO(0, subject)
2007    ZEND_ARG_INFO(1, subpatterns) /* array */
2008    ZEND_ARG_INFO(0, flags)
2009    ZEND_ARG_INFO(0, offset)
2010ZEND_END_ARG_INFO()
2011
2012ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2013    ZEND_ARG_INFO(0, regex)
2014    ZEND_ARG_INFO(0, replace)
2015    ZEND_ARG_INFO(0, subject)
2016    ZEND_ARG_INFO(0, limit)
2017    ZEND_ARG_INFO(1, count)
2018ZEND_END_ARG_INFO()
2019
2020ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2021    ZEND_ARG_INFO(0, regex)
2022    ZEND_ARG_INFO(0, callback)
2023    ZEND_ARG_INFO(0, subject)
2024    ZEND_ARG_INFO(0, limit)
2025    ZEND_ARG_INFO(1, count)
2026ZEND_END_ARG_INFO()
2027
2028ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2029    ZEND_ARG_INFO(0, pattern)
2030    ZEND_ARG_INFO(0, subject)
2031    ZEND_ARG_INFO(0, limit)
2032    ZEND_ARG_INFO(0, flags)
2033ZEND_END_ARG_INFO()
2034
2035ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2036    ZEND_ARG_INFO(0, str)
2037    ZEND_ARG_INFO(0, delim_char)
2038ZEND_END_ARG_INFO()
2039
2040ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2041    ZEND_ARG_INFO(0, regex)
2042    ZEND_ARG_INFO(0, input) /* array */
2043    ZEND_ARG_INFO(0, flags)
2044ZEND_END_ARG_INFO()
2045
2046ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2047ZEND_END_ARG_INFO()
2048/* }}} */
2049
2050static const zend_function_entry pcre_functions[] = {
2051    PHP_FE(preg_match,              arginfo_preg_match)
2052    PHP_FE(preg_match_all,          arginfo_preg_match_all)
2053    PHP_FE(preg_replace,            arginfo_preg_replace)
2054    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
2055    PHP_FE(preg_filter,             arginfo_preg_replace)
2056    PHP_FE(preg_split,              arginfo_preg_split)
2057    PHP_FE(preg_quote,              arginfo_preg_quote)
2058    PHP_FE(preg_grep,               arginfo_preg_grep)
2059    PHP_FE(preg_last_error,         arginfo_preg_last_error)
2060    PHP_FE_END
2061};
2062
2063zend_module_entry pcre_module_entry = {
2064    STANDARD_MODULE_HEADER,
2065   "pcre",
2066    pcre_functions,
2067    PHP_MINIT(pcre),
2068    PHP_MSHUTDOWN(pcre),
2069    NULL,
2070    NULL,
2071    PHP_MINFO(pcre),
2072    NO_VERSION_YET,
2073    PHP_MODULE_GLOBALS(pcre),
2074    PHP_GINIT(pcre),
2075    PHP_GSHUTDOWN(pcre),
2076    NULL,
2077    STANDARD_MODULE_PROPERTIES_EX
2078};
2079
2080#ifdef COMPILE_DL_PCRE
2081ZEND_GET_MODULE(pcre)
2082#endif
2083
2084/* }}} */
2085
2086#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2087
2088/*
2089 * Local variables:
2090 * tab-width: 4
2091 * c-basic-offset: 4
2092 * End:
2093 * vim600: sw=4 ts=4 fdm=marker
2094 * vim<600: sw=4 ts=4
2095 */
2096