1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
54    PHP_PCRE_JIT_STACKLIMIT_ERROR
55};
56
57
58PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
59
60
61static void pcre_handle_exec_error(int pcre_code) /* {{{ */
62{
63    int preg_code = 0;
64
65    switch (pcre_code) {
66        case PCRE_ERROR_MATCHLIMIT:
67            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
68            break;
69
70        case PCRE_ERROR_RECURSIONLIMIT:
71            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
72            break;
73
74        case PCRE_ERROR_BADUTF8:
75            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
76            break;
77
78        case PCRE_ERROR_BADUTF8_OFFSET:
79            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
80            break;
81
82#ifdef PCRE_STUDY_JIT_COMPILE
83        case PCRE_ERROR_JIT_STACKLIMIT:
84            preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
85            break;
86#endif
87
88        default:
89            preg_code = PHP_PCRE_INTERNAL_ERROR;
90            break;
91    }
92
93    PCRE_G(error_code) = preg_code;
94}
95/* }}} */
96
97static void php_free_pcre_cache(zval *data) /* {{{ */
98{
99    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
100    if (!pce) return;
101    pcre_free(pce->re);
102    if (pce->extra) {
103        pcre_free_study(pce->extra);
104    }
105#if HAVE_SETLOCALE
106    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
107    if (pce->locale) {
108        zend_string_release(pce->locale);
109    }
110#endif
111    pefree(pce, 1);
112}
113/* }}} */
114
115static PHP_GINIT_FUNCTION(pcre) /* {{{ */
116{
117    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
118    pcre_globals->backtrack_limit = 0;
119    pcre_globals->recursion_limit = 0;
120    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
121}
122/* }}} */
123
124static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
125{
126    zend_hash_destroy(&pcre_globals->pcre_cache);
127}
128/* }}} */
129
130PHP_INI_BEGIN()
131    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
132    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
133#ifdef PCRE_STUDY_JIT_COMPILE
134    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
135#endif
136PHP_INI_END()
137
138
139/* {{{ PHP_MINFO_FUNCTION(pcre) */
140static PHP_MINFO_FUNCTION(pcre)
141{
142    int jit_yes = 0;
143
144    php_info_print_table_start();
145    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
146    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
147
148    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
149        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
150    } else {
151        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
152    }
153
154    php_info_print_table_end();
155
156    DISPLAY_INI_ENTRIES();
157}
158/* }}} */
159
160/* {{{ PHP_MINIT_FUNCTION(pcre) */
161static PHP_MINIT_FUNCTION(pcre)
162{
163    REGISTER_INI_ENTRIES();
164
165    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
166    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
167    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
168    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
169    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
170    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
172
173    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
174    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
175    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
176    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
177    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
178    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
179    REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
180    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
181
182    return SUCCESS;
183}
184/* }}} */
185
186/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
187static PHP_MSHUTDOWN_FUNCTION(pcre)
188{
189    UNREGISTER_INI_ENTRIES();
190
191    return SUCCESS;
192}
193/* }}} */
194
195/* {{{ static pcre_clean_cache */
196static int pcre_clean_cache(zval *data, void *arg)
197{
198    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
199    int *num_clean = (int *)arg;
200
201    if (*num_clean > 0 && !pce->refcount) {
202        (*num_clean)--;
203        return ZEND_HASH_APPLY_REMOVE;
204    } else {
205        return ZEND_HASH_APPLY_KEEP;
206    }
207}
208/* }}} */
209
210/* {{{ static make_subpats_table */
211static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
212{
213    pcre_extra *extra = pce->extra;
214    int name_cnt = pce->name_count, name_size, ni = 0;
215    int rc;
216    char *name_table;
217    unsigned short name_idx;
218    char **subpat_names;
219    int rc1, rc2;
220
221    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
222    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
223    rc = rc2 ? rc2 : rc1;
224    if (rc < 0) {
225        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
226        return NULL;
227    }
228
229    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
230    while (ni++ < name_cnt) {
231        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
232        subpat_names[name_idx] = name_table + 2;
233        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
234            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
235            efree(subpat_names);
236            return NULL;
237        }
238        name_table += name_size;
239    }
240    return subpat_names;
241}
242/* }}} */
243
244/* {{{ static calculate_unit_length */
245/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
246static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
247{
248    int unit_len;
249
250    if (pce->compile_options & PCRE_UTF8) {
251        char *end = start;
252
253        /* skip continuation bytes */
254        while ((*++end & 0xC0) == 0x80);
255        unit_len = end - start;
256    } else {
257        unit_len = 1;
258    }
259    return unit_len;
260}
261/* }}} */
262
263/* {{{ pcre_get_compiled_regex_cache
264 */
265PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
266{
267    pcre                *re = NULL;
268    pcre_extra          *extra;
269    int                  coptions = 0;
270    int                  soptions = 0;
271    const char          *error;
272    int                  erroffset;
273    char                 delimiter;
274    char                 start_delimiter;
275    char                 end_delimiter;
276    char                *p, *pp;
277    char                *pattern;
278    int                  do_study = 0;
279    int                  poptions = 0;
280    unsigned const char *tables = NULL;
281    pcre_cache_entry    *pce;
282    pcre_cache_entry     new_entry;
283    int                  rc;
284
285    /* Try to lookup the cached regex entry, and if successful, just pass
286       back the compiled pattern, otherwise go on and compile it. */
287    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
288    if (pce) {
289#if HAVE_SETLOCALE
290        if (pce->locale == BG(locale_string) ||
291            (pce->locale && BG(locale_string) &&
292             ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
293             !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
294            (!pce->locale &&
295             ZSTR_LEN(BG(locale_string)) == 1 &&
296             ZSTR_VAL(BG(locale_string))[0] == 'C') ||
297            (!BG(locale_string) &&
298             ZSTR_LEN(pce->locale) == 1 &&
299             ZSTR_VAL(pce->locale)[0] == 'C')) {
300            return pce;
301        }
302#else
303        return pce;
304#endif
305    }
306
307    p = ZSTR_VAL(regex);
308
309    /* Parse through the leading whitespace, and display a warning if we
310       get to the end without encountering a delimiter. */
311    while (isspace((int)*(unsigned char *)p)) p++;
312    if (*p == 0) {
313        php_error_docref(NULL, E_WARNING,
314                         p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
315        return NULL;
316    }
317
318    /* Get the delimiter and display a warning if it is alphanumeric
319       or a backslash. */
320    delimiter = *p++;
321    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
322        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
323        return NULL;
324    }
325
326    start_delimiter = delimiter;
327    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
328        delimiter = pp[5];
329    end_delimiter = delimiter;
330
331    pp = p;
332
333    if (start_delimiter == end_delimiter) {
334        /* We need to iterate through the pattern, searching for the ending delimiter,
335           but skipping the backslashed delimiters.  If the ending delimiter is not
336           found, display a warning. */
337        while (*pp != 0) {
338            if (*pp == '\\' && pp[1] != 0) pp++;
339            else if (*pp == delimiter)
340                break;
341            pp++;
342        }
343    } else {
344        /* We iterate through the pattern, searching for the matching ending
345         * delimiter. For each matching starting delimiter, we increment nesting
346         * level, and decrement it for each matching ending delimiter. If we
347         * reach the end of the pattern without matching, display a warning.
348         */
349        int brackets = 1;   /* brackets nesting level */
350        while (*pp != 0) {
351            if (*pp == '\\' && pp[1] != 0) pp++;
352            else if (*pp == end_delimiter && --brackets <= 0)
353                break;
354            else if (*pp == start_delimiter)
355                brackets++;
356            pp++;
357        }
358    }
359
360    if (*pp == 0) {
361        if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
362            php_error_docref(NULL,E_WARNING, "Null byte in regex");
363        } else if (start_delimiter == end_delimiter) {
364            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
365        } else {
366            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
367        }
368        return NULL;
369    }
370
371    /* Make a copy of the actual pattern. */
372    pattern = estrndup(p, pp-p);
373
374    /* Move on to the options */
375    pp++;
376
377    /* Parse through the options, setting appropriate flags.  Display
378       a warning if we encounter an unknown modifier. */
379    while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
380        switch (*pp++) {
381            /* Perl compatible options */
382            case 'i':   coptions |= PCRE_CASELESS;      break;
383            case 'm':   coptions |= PCRE_MULTILINE;     break;
384            case 's':   coptions |= PCRE_DOTALL;        break;
385            case 'x':   coptions |= PCRE_EXTENDED;      break;
386
387            /* PCRE specific options */
388            case 'A':   coptions |= PCRE_ANCHORED;      break;
389            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
390            case 'S':   do_study  = 1;                  break;
391            case 'U':   coptions |= PCRE_UNGREEDY;      break;
392            case 'X':   coptions |= PCRE_EXTRA;         break;
393            case 'u':   coptions |= PCRE_UTF8;
394    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
395       characters, even in UTF-8 mode. However, this can be changed by setting
396       the PCRE_UCP option. */
397#ifdef PCRE_UCP
398                        coptions |= PCRE_UCP;
399#endif
400                break;
401
402            /* Custom preg options */
403            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
404
405            case ' ':
406            case '\n':
407                break;
408
409            default:
410                if (pp[-1]) {
411                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
412                } else {
413                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
414                }
415                efree(pattern);
416                return NULL;
417        }
418    }
419
420#if HAVE_SETLOCALE
421    if (BG(locale_string) &&
422        (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
423        tables = pcre_maketables();
424    }
425#endif
426
427    /* Compile pattern and display a warning if compilation failed. */
428    re = pcre_compile(pattern,
429                      coptions,
430                      &error,
431                      &erroffset,
432                      tables);
433
434    if (re == NULL) {
435        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
436        efree(pattern);
437        if (tables) {
438            pefree((void*)tables, 1);
439        }
440        return NULL;
441    }
442
443#ifdef PCRE_STUDY_JIT_COMPILE
444    if (PCRE_G(jit)) {
445        /* Enable PCRE JIT compiler */
446        do_study = 1;
447        soptions |= PCRE_STUDY_JIT_COMPILE;
448    }
449#endif
450
451    /* If study option was specified, study the pattern and
452       store the result in extra for passing to pcre_exec. */
453    if (do_study) {
454        extra = pcre_study(re, soptions, &error);
455        if (extra) {
456            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
457            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
458            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
459        }
460        if (error != NULL) {
461            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
462        }
463    } else {
464        extra = NULL;
465    }
466
467    efree(pattern);
468
469    /*
470     * If we reached cache limit, clean out the items from the head of the list;
471     * these are supposedly the oldest ones (but not necessarily the least used
472     * ones).
473     */
474    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
475        int num_clean = PCRE_CACHE_SIZE / 8;
476        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
477    }
478
479    /* Store the compiled pattern and extra info in the cache. */
480    new_entry.re = re;
481    new_entry.extra = extra;
482    new_entry.preg_options = poptions;
483    new_entry.compile_options = coptions;
484#if HAVE_SETLOCALE
485    new_entry.locale = BG(locale_string) ?
486        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
487            zend_string_copy(BG(locale_string)) :
488            zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
489        NULL;
490    new_entry.tables = tables;
491#endif
492    new_entry.refcount = 0;
493
494    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
495    if (rc < 0) {
496        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
497        return NULL;
498    }
499
500    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
501    if (rc < 0) {
502        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
503        return NULL;
504    }
505
506    /*
507     * Interned strings are not duplicated when stored in HashTable,
508     * but all the interned strings created during HTTP request are removed
509     * at end of request. However PCRE_G(pcre_cache) must be consistent
510     * on the next request as well. So we disable usage of interned strings
511     * as hash keys especually for this table.
512     * See bug #63180
513     */
514    if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
515        zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
516        GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
517        ZSTR_H(str) = ZSTR_H(regex);
518        regex = str;
519    }
520
521    pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
522
523    return pce;
524}
525/* }}} */
526
527/* {{{ pcre_get_compiled_regex
528 */
529PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
530{
531    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
532
533    if (extra) {
534        *extra = pce ? pce->extra : NULL;
535    }
536    if (preg_options) {
537        *preg_options = pce ? pce->preg_options : 0;
538    }
539
540    return pce ? pce->re : NULL;
541}
542/* }}} */
543
544/* {{{ pcre_get_compiled_regex_ex
545 */
546PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
547{
548    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
549
550    if (extra) {
551        *extra = pce ? pce->extra : NULL;
552    }
553    if (preg_options) {
554        *preg_options = pce ? pce->preg_options : 0;
555    }
556    if (compile_options) {
557        *compile_options = pce ? pce->compile_options : 0;
558    }
559
560    return pce ? pce->re : NULL;
561}
562/* }}} */
563
564/* {{{ add_offset_pair */
565static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
566{
567    zval match_pair, tmp;
568
569    array_init_size(&match_pair, 2);
570
571    /* Add (match, offset) to the return value */
572    ZVAL_STRINGL(&tmp, str, len);
573    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
574    ZVAL_LONG(&tmp, offset);
575    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
576
577    if (name) {
578        Z_ADDREF(match_pair);
579        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
580    }
581    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
582}
583/* }}} */
584
585static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
586{
587    /* parameters */
588    zend_string      *regex;            /* Regular expression */
589    zend_string      *subject;          /* String to match against */
590    pcre_cache_entry *pce;              /* Compiled regular expression */
591    zval             *subpats = NULL;   /* Array for subpatterns */
592    zend_long         flags = 0;        /* Match control flags */
593    zend_long         start_offset = 0; /* Where the new search starts */
594
595#ifndef FAST_ZPP
596    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
597                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
598        RETURN_FALSE;
599    }
600#else
601    ZEND_PARSE_PARAMETERS_START(2, 5)
602        Z_PARAM_STR(regex)
603        Z_PARAM_STR(subject)
604        Z_PARAM_OPTIONAL
605        Z_PARAM_ZVAL_EX(subpats, 0, 1)
606        Z_PARAM_LONG(flags)
607        Z_PARAM_LONG(start_offset)
608    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
609#endif
610
611    /* Compile regex or get it from cache. */
612    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
613        RETURN_FALSE;
614    }
615
616    pce->refcount++;
617    php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
618        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
619    pce->refcount--;
620}
621/* }}} */
622
623/* {{{ php_pcre_match_impl() */
624PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
625    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
626{
627    zval             result_set,        /* Holds a set of subpatterns after
628                                           a global match */
629                    *match_sets = NULL; /* An array of sets of matches for each
630                                           subpattern after a global match */
631    pcre_extra      *extra = pce->extra;/* Holds results of studying */
632    pcre_extra       extra_data;        /* Used locally for exec options */
633    int              exoptions = 0;     /* Execution options */
634    int              count = 0;         /* Count of matched subpatterns */
635    int             *offsets;           /* Array of subpattern offsets */
636    int              num_subpats;       /* Number of captured subpatterns */
637    int              size_offsets;      /* Size of the offsets array */
638    int              matched;           /* Has anything matched */
639    int              g_notempty = 0;    /* If the match should not be empty */
640    const char     **stringlist;        /* Holds list of subpatterns */
641    char           **subpat_names;      /* Array for named subpatterns */
642    int              i;
643    int              subpats_order;     /* Order of subpattern matches */
644    int              offset_capture;    /* Capture match offsets: yes/no */
645    unsigned char   *mark = NULL;       /* Target for MARK name */
646    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
647    ALLOCA_FLAG(use_heap);
648
649    ZVAL_UNDEF(&marks);
650
651    /* Overwrite the passed-in value for subpatterns with an empty array. */
652    if (subpats != NULL) {
653        zval_dtor(subpats);
654        array_init(subpats);
655    }
656
657    subpats_order = global ? PREG_PATTERN_ORDER : 0;
658
659    if (use_flags) {
660        offset_capture = flags & PREG_OFFSET_CAPTURE;
661
662        /*
663         * subpats_order is pre-set to pattern mode so we change it only if
664         * necessary.
665         */
666        if (flags & 0xff) {
667            subpats_order = flags & 0xff;
668        }
669        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
670            (!global && subpats_order != 0)) {
671            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
672            return;
673        }
674    } else {
675        offset_capture = 0;
676    }
677
678    /* Negative offset counts from the end of the string. */
679    if (start_offset < 0) {
680        start_offset = subject_len + start_offset;
681        if (start_offset < 0) {
682            start_offset = 0;
683        }
684    }
685
686    if (extra == NULL) {
687        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
688        extra = &extra_data;
689    }
690    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
691    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
692#ifdef PCRE_EXTRA_MARK
693    extra->mark = &mark;
694    extra->flags |= PCRE_EXTRA_MARK;
695#endif
696
697    /* Calculate the size of the offsets array, and allocate memory for it. */
698    num_subpats = pce->capture_count + 1;
699    size_offsets = num_subpats * 3;
700
701    /*
702     * Build a mapping from subpattern numbers to their names. We will
703     * allocate the table only if there are any named subpatterns.
704     */
705    subpat_names = NULL;
706    if (pce->name_count > 0) {
707        subpat_names = make_subpats_table(num_subpats, pce);
708        if (!subpat_names) {
709            RETURN_FALSE;
710        }
711    }
712
713    if (size_offsets <= 32) {
714        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
715    } else {
716        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
717    }
718    memset(offsets, 0, size_offsets*sizeof(int));
719    /* Allocate match sets array and initialize the values. */
720    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
721        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
722        for (i=0; i<num_subpats; i++) {
723            array_init(&match_sets[i]);
724        }
725    }
726
727    matched = 0;
728    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
729
730    do {
731        /* Execute the regular expression. */
732        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
733                          exoptions|g_notempty, offsets, size_offsets);
734
735        /* the string was already proved to be valid UTF-8 */
736        exoptions |= PCRE_NO_UTF8_CHECK;
737
738        /* Check for too many substrings condition. */
739        if (count == 0) {
740            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
741            count = size_offsets/3;
742        }
743
744        /* If something has matched */
745        if (count > 0) {
746            matched++;
747
748            /* If subpatterns array has been passed, fill it in with values. */
749            if (subpats != NULL) {
750                /* Try to get the list of substrings and display a warning if failed. */
751                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
752                    if (subpat_names) {
753                        efree(subpat_names);
754                    }
755                    if (size_offsets <= 32) {
756                        free_alloca(offsets, use_heap);
757                    } else {
758                        efree(offsets);
759                    }
760                    if (match_sets) efree(match_sets);
761                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
762                    RETURN_FALSE;
763                }
764
765                if (global) {   /* global pattern matching */
766                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
767                        /* For each subpattern, insert it into the appropriate array. */
768                        if (offset_capture) {
769                            for (i = 0; i < count; i++) {
770                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
771                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
772                            }
773                        } else {
774                            for (i = 0; i < count; i++) {
775                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
776                                                       offsets[(i<<1)+1] - offsets[i<<1]);
777                            }
778                        }
779                        /* Add MARK, if available */
780                        if (mark) {
781                            if (Z_TYPE(marks) == IS_UNDEF) {
782                                array_init(&marks);
783                            }
784                            add_index_string(&marks, matched - 1, (char *) mark);
785                        }
786                        /*
787                         * If the number of captured subpatterns on this run is
788                         * less than the total possible number, pad the result
789                         * arrays with empty strings.
790                         */
791                        if (count < num_subpats) {
792                            for (; i < num_subpats; i++) {
793                                add_next_index_string(&match_sets[i], "");
794                            }
795                        }
796                    } else {
797                        /* Allocate the result set array */
798                        array_init_size(&result_set, count + (mark ? 1 : 0));
799
800                        /* Add all the subpatterns to it */
801                        if (subpat_names) {
802                            if (offset_capture) {
803                                for (i = 0; i < count; i++) {
804                                    add_offset_pair(&result_set, (char *)stringlist[i],
805                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
806                                }
807                            } else {
808                                for (i = 0; i < count; i++) {
809                                    if (subpat_names[i]) {
810                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
811                                                               offsets[(i<<1)+1] - offsets[i<<1]);
812                                    }
813                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
814                                                           offsets[(i<<1)+1] - offsets[i<<1]);
815                                }
816                            }
817                        } else {
818                            if (offset_capture) {
819                                for (i = 0; i < count; i++) {
820                                    add_offset_pair(&result_set, (char *)stringlist[i],
821                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
822                                }
823                            } else {
824                                for (i = 0; i < count; i++) {
825                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
826                                                           offsets[(i<<1)+1] - offsets[i<<1]);
827                                }
828                            }
829                        }
830                        /* Add MARK, if available */
831                        if (mark) {
832                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
833                        }
834                        /* And add it to the output array */
835                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
836                    }
837                } else {            /* single pattern matching */
838                    /* For each subpattern, insert it into the subpatterns array. */
839                    if (subpat_names) {
840                        if (offset_capture) {
841                            for (i = 0; i < count; i++) {
842                                add_offset_pair(subpats, (char *)stringlist[i],
843                                                offsets[(i<<1)+1] - offsets[i<<1],
844                                                offsets[i<<1], subpat_names[i]);
845                            }
846                        } else {
847                            for (i = 0; i < count; i++) {
848                                if (subpat_names[i]) {
849                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
850                                                      offsets[(i<<1)+1] - offsets[i<<1]);
851                                }
852                                add_next_index_stringl(subpats, (char *)stringlist[i],
853                                                       offsets[(i<<1)+1] - offsets[i<<1]);
854                            }
855                        }
856                    } else {
857                        if (offset_capture) {
858                            for (i = 0; i < count; i++) {
859                                add_offset_pair(subpats, (char *)stringlist[i],
860                                                offsets[(i<<1)+1] - offsets[i<<1],
861                                                offsets[i<<1], NULL);
862                            }
863                        } else {
864                            for (i = 0; i < count; i++) {
865                                add_next_index_stringl(subpats, (char *)stringlist[i],
866                                                       offsets[(i<<1)+1] - offsets[i<<1]);
867                            }
868                        }
869                    }
870                    /* Add MARK, if available */
871                    if (mark) {
872                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
873                    }
874                }
875
876                pcre_free((void *) stringlist);
877            }
878        } else if (count == PCRE_ERROR_NOMATCH) {
879            /* If we previously set PCRE_NOTEMPTY after a null match,
880               this is not necessarily the end. We need to advance
881               the start offset, and continue. Fudge the offset values
882               to achieve this, unless we're already at the end of the string. */
883            if (g_notempty != 0 && start_offset < subject_len) {
884                int unit_len = calculate_unit_length(pce, subject + start_offset);
885
886                offsets[0] = (int)start_offset;
887                offsets[1] = (int)(start_offset + unit_len);
888            } else
889                break;
890        } else {
891            pcre_handle_exec_error(count);
892            break;
893        }
894
895        /* If we have matched an empty string, mimic what Perl's /g options does.
896           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
897           the match again at the same point. If this fails (picked up above) we
898           advance to the next character. */
899        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
900
901        /* Advance to the position right after the last full match */
902        start_offset = offsets[1];
903    } while (global);
904
905    /* Add the match sets to the output array and clean up */
906    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
907        if (subpat_names) {
908            for (i = 0; i < num_subpats; i++) {
909                if (subpat_names[i]) {
910                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
911                                     strlen(subpat_names[i]), &match_sets[i]);
912                    Z_ADDREF(match_sets[i]);
913                }
914                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
915            }
916        } else {
917            for (i = 0; i < num_subpats; i++) {
918                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
919            }
920        }
921        efree(match_sets);
922
923        if (Z_TYPE(marks) != IS_UNDEF) {
924            add_assoc_zval(subpats, "MARK", &marks);
925        }
926    }
927
928    if (size_offsets <= 32) {
929        free_alloca(offsets, use_heap);
930    } else {
931        efree(offsets);
932    }
933    if (subpat_names) {
934        efree(subpat_names);
935    }
936
937    /* Did we encounter an error? */
938    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
939        RETVAL_LONG(matched);
940    } else {
941        RETVAL_FALSE;
942    }
943}
944/* }}} */
945
946/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
947   Perform a Perl-style regular expression match */
948static PHP_FUNCTION(preg_match)
949{
950    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
951}
952/* }}} */
953
954/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
955   Perform a Perl-style global regular expression match */
956static PHP_FUNCTION(preg_match_all)
957{
958    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
959}
960/* }}} */
961
962/* {{{ preg_get_backref
963 */
964static int preg_get_backref(char **str, int *backref)
965{
966    register char in_brace = 0;
967    register char *walk = *str;
968
969    if (walk[1] == 0)
970        return 0;
971
972    if (*walk == '$' && walk[1] == '{') {
973        in_brace = 1;
974        walk++;
975    }
976    walk++;
977
978    if (*walk >= '0' && *walk <= '9') {
979        *backref = *walk - '0';
980        walk++;
981    } else
982        return 0;
983
984    if (*walk && *walk >= '0' && *walk <= '9') {
985        *backref = *backref * 10 + *walk - '0';
986        walk++;
987    }
988
989    if (in_brace) {
990        if (*walk == 0 || *walk != '}')
991            return 0;
992        else
993            walk++;
994    }
995
996    *str = walk;
997    return 1;
998}
999/* }}} */
1000
1001/* {{{ preg_do_repl_func
1002 */
1003static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1004{
1005    zend_string *result_str;
1006    zval         retval;            /* Function return value */
1007    zval         args[1];           /* Argument to pass to function */
1008    int          i;
1009
1010    array_init_size(&args[0], count + (mark ? 1 : 0));
1011    if (subpat_names) {
1012        for (i = 0; i < count; i++) {
1013            if (subpat_names[i]) {
1014                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1015            }
1016            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1017        }
1018    } else {
1019        for (i = 0; i < count; i++) {
1020            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1021        }
1022    }
1023    if (mark) {
1024        add_assoc_string(&args[0], "MARK", (char *) mark);
1025    }
1026
1027    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1028        result_str = zval_get_string(&retval);
1029        zval_ptr_dtor(&retval);
1030    } else {
1031        if (!EG(exception)) {
1032            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1033        }
1034
1035        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1036    }
1037
1038    zval_ptr_dtor(&args[0]);
1039
1040    return result_str;
1041}
1042/* }}} */
1043
1044/* {{{ php_pcre_replace
1045 */
1046PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1047                              zend_string *subject_str,
1048                              char *subject, int subject_len,
1049                              zval *replace_val, int is_callable_replace,
1050                              int limit, int *replace_count)
1051{
1052    pcre_cache_entry    *pce;               /* Compiled regular expression */
1053    zend_string         *result;            /* Function result */
1054
1055    /* Compile regex or get it from cache. */
1056    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1057        return NULL;
1058    }
1059    pce->refcount++;
1060    result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1061        is_callable_replace, limit, replace_count);
1062    pce->refcount--;
1063
1064    return result;
1065}
1066/* }}} */
1067
1068/* {{{ php_pcre_replace_impl() */
1069PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
1070{
1071    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1072    pcre_extra       extra_data;        /* Used locally for exec options */
1073    int              exoptions = 0;     /* Execution options */
1074    int              count = 0;         /* Count of matched subpatterns */
1075    int             *offsets;           /* Array of subpattern offsets */
1076    char            **subpat_names;     /* Array for named subpatterns */
1077    int              num_subpats;       /* Number of captured subpatterns */
1078    int              size_offsets;      /* Size of the offsets array */
1079    int              new_len;           /* Length of needed storage */
1080    int              alloc_len;         /* Actual allocated length */
1081    int              match_len;         /* Length of the current match */
1082    int              backref;           /* Backreference number */
1083    int              start_offset;      /* Where the new search starts */
1084    int              g_notempty=0;      /* If the match should not be empty */
1085    int              replace_len=0;     /* Length of replacement string */
1086    char            *replace=NULL,      /* Replacement string */
1087                    *walkbuf,           /* Location of current replacement in the result */
1088                    *walk,              /* Used to walk the replacement string */
1089                    *match,             /* The current match */
1090                    *piece,             /* The current piece of subject */
1091                    *replace_end=NULL,  /* End of replacement string */
1092                     walk_last;         /* Last walked character */
1093    int              result_len;        /* Length of result */
1094    unsigned char   *mark = NULL;       /* Target for MARK name */
1095    zend_string     *result;            /* Result of replacement */
1096    zend_string     *eval_result=NULL;  /* Result of custom function */
1097
1098    ALLOCA_FLAG(use_heap);
1099
1100    if (extra == NULL) {
1101        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1102        extra = &extra_data;
1103    }
1104
1105    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1106    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1107
1108    if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1109        php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1110        return NULL;
1111    }
1112
1113    if (!is_callable_replace) {
1114        replace = Z_STRVAL_P(replace_val);
1115        replace_len = (int)Z_STRLEN_P(replace_val);
1116        replace_end = replace + replace_len;
1117    }
1118
1119    /* Calculate the size of the offsets array, and allocate memory for it. */
1120    num_subpats = pce->capture_count + 1;
1121    size_offsets = num_subpats * 3;
1122    if (size_offsets <= 32) {
1123        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1124    } else {
1125        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1126    }
1127
1128    /*
1129     * Build a mapping from subpattern numbers to their names. We will
1130     * allocate the table only if there are any named subpatterns.
1131     */
1132    subpat_names = NULL;
1133    if (UNEXPECTED(pce->name_count > 0)) {
1134        subpat_names = make_subpats_table(num_subpats, pce);
1135        if (!subpat_names) {
1136            return NULL;
1137        }
1138    }
1139
1140    alloc_len = 0;
1141    result = NULL;
1142
1143    /* Initialize */
1144    match = NULL;
1145    start_offset = 0;
1146    result_len = 0;
1147    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1148
1149    while (1) {
1150#ifdef PCRE_EXTRA_MARK
1151        extra->mark = &mark;
1152        extra->flags |= PCRE_EXTRA_MARK;
1153#endif
1154        /* Execute the regular expression. */
1155        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1156                          exoptions|g_notempty, offsets, size_offsets);
1157
1158        /* the string was already proved to be valid UTF-8 */
1159        exoptions |= PCRE_NO_UTF8_CHECK;
1160
1161        /* Check for too many substrings condition. */
1162        if (UNEXPECTED(count == 0)) {
1163            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1164            count = size_offsets / 3;
1165        }
1166
1167        piece = subject + start_offset;
1168
1169        /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1170        if (EXPECTED(count > 0 && limit)) {
1171            if (UNEXPECTED(replace_count)) {
1172                ++*replace_count;
1173            }
1174
1175            /* Set the match location in subject */
1176            match = subject + offsets[0];
1177
1178            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1179
1180            /* if (!is_callable_replace) */
1181            if (EXPECTED(replace)) {
1182                /* do regular substitution */
1183                walk = replace;
1184                walk_last = 0;
1185
1186                while (walk < replace_end) {
1187                    if ('\\' == *walk || '$' == *walk) {
1188                        if (walk_last == '\\') {
1189                            walk++;
1190                            walk_last = 0;
1191                            continue;
1192                        }
1193                        if (preg_get_backref(&walk, &backref)) {
1194                            if (backref < count)
1195                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1196                            continue;
1197                        }
1198                    }
1199                    new_len++;
1200                    walk++;
1201                    walk_last = walk[-1];
1202                }
1203
1204                if (new_len >= alloc_len) {
1205                    alloc_len = alloc_len + 2 * new_len;
1206                    if (result == NULL) {
1207                        result = zend_string_alloc(alloc_len, 0);
1208                    } else {
1209                        result = zend_string_extend(result, alloc_len, 0);
1210                    }
1211                }
1212
1213                /* copy the part of the string before the match */
1214                memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1215                result_len += (int)(match-piece);
1216
1217                /* copy replacement and backrefs */
1218                walkbuf = ZSTR_VAL(result) + result_len;
1219
1220                walk = replace;
1221                walk_last = 0;
1222                while (walk < replace_end) {
1223                    if ('\\' == *walk || '$' == *walk) {
1224                        if (walk_last == '\\') {
1225                            *(walkbuf-1) = *walk++;
1226                            walk_last = 0;
1227                            continue;
1228                        }
1229                        if (preg_get_backref(&walk, &backref)) {
1230                            if (backref < count) {
1231                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1232                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1233                                walkbuf += match_len;
1234                            }
1235                            continue;
1236                        }
1237                    }
1238                    *walkbuf++ = *walk++;
1239                    walk_last = walk[-1];
1240                }
1241                *walkbuf = '\0';
1242                /* increment the result length by how much we've added to the string */
1243                result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
1244            } else {
1245                /* Use custom function to get replacement string and its length. */
1246                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1247                ZEND_ASSERT(eval_result);
1248                new_len += (int)ZSTR_LEN(eval_result);
1249                if (new_len >= alloc_len) {
1250                    alloc_len = alloc_len + 2 * new_len;
1251                    if (result == NULL) {
1252                        result = zend_string_alloc(alloc_len, 0);
1253                    } else {
1254                        result = zend_string_extend(result, alloc_len, 0);
1255                    }
1256                }
1257                /* copy the part of the string before the match */
1258                memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1259                result_len += (int)(match-piece);
1260
1261                /* copy replacement and backrefs */
1262                walkbuf = ZSTR_VAL(result) + result_len;
1263
1264                /* If using custom function, copy result to the buffer and clean up. */
1265                memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1266                result_len += (int)ZSTR_LEN(eval_result);
1267                zend_string_release(eval_result);
1268            }
1269
1270            if (EXPECTED(limit)) {
1271                limit--;
1272            }
1273        } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1274            /* If we previously set PCRE_NOTEMPTY after a null match,
1275               this is not necessarily the end. We need to advance
1276               the start offset, and continue. Fudge the offset values
1277               to achieve this, unless we're already at the end of the string. */
1278            if (g_notempty != 0 && start_offset < subject_len) {
1279                int unit_len = calculate_unit_length(pce, piece);
1280
1281                offsets[0] = start_offset;
1282                offsets[1] = start_offset + unit_len;
1283                memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1284                result_len += unit_len;
1285            } else {
1286                if (!result && subject_str) {
1287                    result = zend_string_copy(subject_str);
1288                    break;
1289                }
1290                new_len = result_len + subject_len - start_offset;
1291                if (new_len > alloc_len) {
1292                    alloc_len = new_len; /* now we know exactly how long it is */
1293                    if (NULL != result) {
1294                        result = zend_string_realloc(result, alloc_len, 0);
1295                    } else {
1296                        result = zend_string_alloc(alloc_len, 0);
1297                    }
1298                }
1299                /* stick that last bit of string on our output */
1300                memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1301                result_len += subject_len - start_offset;
1302                ZSTR_VAL(result)[result_len] = '\0';
1303                ZSTR_LEN(result) = result_len;
1304                break;
1305            }
1306        } else {
1307            pcre_handle_exec_error(count);
1308            if (result) {
1309                zend_string_free(result);
1310                result = NULL;
1311            }
1312            break;
1313        }
1314
1315        /* If we have matched an empty string, mimic what Perl's /g options does.
1316           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1317           the match again at the same point. If this fails (picked up above) we
1318           advance to the next character. */
1319        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1320
1321        /* Advance to the next piece. */
1322        start_offset = offsets[1];
1323    }
1324
1325    if (size_offsets <= 32) {
1326        free_alloca(offsets, use_heap);
1327    } else {
1328        efree(offsets);
1329    }
1330    if (UNEXPECTED(subpat_names)) {
1331        efree(subpat_names);
1332    }
1333
1334    return result;
1335}
1336/* }}} */
1337
1338/* {{{ php_replace_in_subject
1339 */
1340static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1341{
1342    zval        *regex_entry,
1343                *replace_entry = NULL,
1344                *replace_value,
1345                 empty_replace;
1346    zend_string *result;
1347    uint32_t replace_idx;
1348    zend_string *subject_str = zval_get_string(subject);
1349
1350    /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1351    ZVAL_EMPTY_STRING(&empty_replace);
1352
1353    /* If regex is an array */
1354    if (Z_TYPE_P(regex) == IS_ARRAY) {
1355        replace_value = replace;
1356        replace_idx = 0;
1357
1358        /* For each entry in the regex array, get the entry */
1359        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1360            /* Make sure we're dealing with strings. */
1361            zend_string *regex_str = zval_get_string(regex_entry);
1362
1363            /* If replace is an array and not a callable construct */
1364            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1365                /* Get current entry */
1366                replace_entry = NULL;
1367                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1368                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1369                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1370                        break;
1371                    }
1372                    replace_idx++;
1373                }
1374                if (replace_entry != NULL) {
1375                    if (!is_callable_replace) {
1376                        convert_to_string_ex(replace_entry);
1377                    }
1378                    replace_value = replace_entry;
1379                    replace_idx++;
1380                } else {
1381                    /* We've run out of replacement strings, so use an empty one */
1382                    replace_value = &empty_replace;
1383                }
1384            }
1385
1386            /* Do the actual replacement and put the result back into subject_str
1387               for further replacements. */
1388            if ((result = php_pcre_replace(regex_str,
1389                                           subject_str,
1390                                           ZSTR_VAL(subject_str),
1391                                           (int)ZSTR_LEN(subject_str),
1392                                           replace_value,
1393                                           is_callable_replace,
1394                                           limit,
1395                                           replace_count)) != NULL) {
1396                zend_string_release(subject_str);
1397                subject_str = result;
1398            } else {
1399                zend_string_release(subject_str);
1400                zend_string_release(regex_str);
1401                return NULL;
1402            }
1403
1404            zend_string_release(regex_str);
1405        } ZEND_HASH_FOREACH_END();
1406
1407        return subject_str;
1408    } else {
1409        result = php_pcre_replace(Z_STR_P(regex),
1410                                  subject_str,
1411                                  ZSTR_VAL(subject_str),
1412                                  (int)ZSTR_LEN(subject_str),
1413                                  replace,
1414                                  is_callable_replace,
1415                                  limit,
1416                                  replace_count);
1417        zend_string_release(subject_str);
1418        return result;
1419    }
1420}
1421/* }}} */
1422
1423/* {{{ preg_replace_impl
1424 */
1425static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1426{
1427    zval        *subject_entry;
1428    zend_string *result;
1429    zend_string *string_key;
1430    zend_ulong   num_key;
1431    int          replace_count = 0, old_replace_count;
1432
1433    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1434        SEPARATE_ZVAL(replace);
1435        convert_to_string_ex(replace);
1436    }
1437
1438    if (Z_TYPE_P(regex) != IS_ARRAY) {
1439        SEPARATE_ZVAL(regex);
1440        convert_to_string_ex(regex);
1441    }
1442
1443    /* if subject is an array */
1444    if (Z_TYPE_P(subject) == IS_ARRAY) {
1445        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1446
1447        /* For each subject entry, convert it to string, then perform replacement
1448           and add the result to the return_value array. */
1449        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1450            old_replace_count = replace_count;
1451            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1452                if (!is_filter || replace_count > old_replace_count) {
1453                    /* Add to return array */
1454                    zval zv;
1455
1456                    ZVAL_STR(&zv, result);
1457                    if (string_key) {
1458                        zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1459                    } else {
1460                        zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1461                    }
1462                } else {
1463                    zend_string_release(result);
1464                }
1465            }
1466        } ZEND_HASH_FOREACH_END();
1467    } else {
1468        /* if subject is not an array */
1469        old_replace_count = replace_count;
1470        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1471            if (!is_filter || replace_count > old_replace_count) {
1472                RETVAL_STR(result);
1473            } else {
1474                zend_string_release(result);
1475            }
1476        }
1477    }
1478
1479    return replace_count;
1480}
1481/* }}} */
1482
1483/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1484   Perform Perl-style regular expression replacement. */
1485static PHP_FUNCTION(preg_replace)
1486{
1487    zval *regex, *replace, *subject, *zcount = NULL;
1488    zend_long limit = -1;
1489    int replace_count;
1490
1491#ifndef FAST_ZPP
1492    /* Get function parameters and do error-checking. */
1493    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1494        return;
1495    }
1496#else
1497    ZEND_PARSE_PARAMETERS_START(3, 5)
1498        Z_PARAM_ZVAL(regex)
1499        Z_PARAM_ZVAL(replace)
1500        Z_PARAM_ZVAL(subject)
1501        Z_PARAM_OPTIONAL
1502        Z_PARAM_LONG(limit)
1503        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1504    ZEND_PARSE_PARAMETERS_END();
1505#endif
1506
1507    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1508        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1509        RETURN_FALSE;
1510    }
1511
1512    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1513    if (zcount) {
1514        zval_dtor(zcount);
1515        ZVAL_LONG(zcount, replace_count);
1516    }
1517}
1518/* }}} */
1519
1520/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1521   Perform Perl-style regular expression replacement using replacement callback. */
1522static PHP_FUNCTION(preg_replace_callback)
1523{
1524    zval *regex, *replace, *subject, *zcount = NULL;
1525    zend_long limit = -1;
1526    zend_string *callback_name;
1527    int replace_count;
1528
1529#ifndef FAST_ZPP
1530    /* Get function parameters and do error-checking. */
1531    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1532        return;
1533    }
1534#else
1535    ZEND_PARSE_PARAMETERS_START(3, 5)
1536        Z_PARAM_ZVAL(regex)
1537        Z_PARAM_ZVAL(replace)
1538        Z_PARAM_ZVAL(subject)
1539        Z_PARAM_OPTIONAL
1540        Z_PARAM_LONG(limit)
1541        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1542    ZEND_PARSE_PARAMETERS_END();
1543#endif
1544
1545    if (!zend_is_callable(replace, 0, &callback_name)) {
1546        php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
1547        zend_string_release(callback_name);
1548        ZVAL_COPY(return_value, subject);
1549        return;
1550    }
1551    zend_string_release(callback_name);
1552
1553    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1554    if (zcount) {
1555        zval_dtor(zcount);
1556        ZVAL_LONG(zcount, replace_count);
1557    }
1558}
1559/* }}} */
1560
1561/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1562   Perform Perl-style regular expression replacement using replacement callback. */
1563static PHP_FUNCTION(preg_replace_callback_array)
1564{
1565    zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1566    zend_long limit = -1;
1567    zend_string *str_idx;
1568    zend_string *callback_name;
1569    int replace_count = 0;
1570
1571#ifndef FAST_ZPP
1572    /* Get function parameters and do error-checking. */
1573    if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1574        return;
1575    }
1576#else
1577    ZEND_PARSE_PARAMETERS_START(2, 4)
1578        Z_PARAM_ARRAY(pattern)
1579        Z_PARAM_ZVAL(subject)
1580        Z_PARAM_OPTIONAL
1581        Z_PARAM_LONG(limit)
1582        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1583    ZEND_PARSE_PARAMETERS_END();
1584#endif
1585
1586    ZVAL_UNDEF(&zv);
1587    ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1588        if (str_idx) {
1589            ZVAL_STR_COPY(&regex, str_idx);
1590        } else {
1591            php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1592            zval_ptr_dtor(return_value);
1593            RETURN_NULL();
1594        }
1595
1596        if (!zend_is_callable(replace, 0, &callback_name)) {
1597            php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
1598            zend_string_release(callback_name);
1599            zval_ptr_dtor(&regex);
1600            zval_ptr_dtor(return_value);
1601            ZVAL_COPY(return_value, subject);
1602            return;
1603        }
1604        zend_string_release(callback_name);
1605
1606        if (Z_ISNULL_P(return_value)) {
1607            replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1608        } else {
1609            replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1610            zval_ptr_dtor(return_value);
1611        }
1612
1613        zval_ptr_dtor(&regex);
1614
1615        if (Z_ISUNDEF(zv)) {
1616            RETURN_NULL();
1617        }
1618
1619        ZVAL_COPY_VALUE(return_value, &zv);
1620
1621        if (UNEXPECTED(EG(exception))) {
1622            zval_ptr_dtor(return_value);
1623            RETURN_NULL();
1624        }
1625    } ZEND_HASH_FOREACH_END();
1626
1627    if (zcount) {
1628        zval_dtor(zcount);
1629        ZVAL_LONG(zcount, replace_count);
1630    }
1631}
1632/* }}} */
1633
1634/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1635   Perform Perl-style regular expression replacement and only return matches. */
1636static PHP_FUNCTION(preg_filter)
1637{
1638    zval *regex, *replace, *subject, *zcount = NULL;
1639    zend_long limit = -1;
1640    int replace_count;
1641
1642#ifndef FAST_ZPP
1643    /* Get function parameters and do error-checking. */
1644    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1645        return;
1646    }
1647#else
1648    ZEND_PARSE_PARAMETERS_START(3, 5)
1649        Z_PARAM_ZVAL(regex)
1650        Z_PARAM_ZVAL(replace)
1651        Z_PARAM_ZVAL(subject)
1652        Z_PARAM_OPTIONAL
1653        Z_PARAM_LONG(limit)
1654        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1655    ZEND_PARSE_PARAMETERS_END();
1656#endif
1657
1658    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1659        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1660        RETURN_FALSE;
1661    }
1662
1663    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1664    if (zcount) {
1665        zval_dtor(zcount);
1666        ZVAL_LONG(zcount, replace_count);
1667    }
1668}
1669/* }}} */
1670
1671/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1672   Split string into an array using a perl-style regular expression as a delimiter */
1673static PHP_FUNCTION(preg_split)
1674{
1675    zend_string         *regex;         /* Regular expression */
1676    zend_string         *subject;       /* String to match against */
1677    zend_long            limit_val = -1;/* Integer value of limit */
1678    zend_long            flags = 0;     /* Match control flags */
1679    pcre_cache_entry    *pce;           /* Compiled regular expression */
1680
1681    /* Get function parameters and do error checking */
1682#ifndef FAST_ZPP
1683    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1684                              &subject, &limit_val, &flags) == FAILURE) {
1685        RETURN_FALSE;
1686    }
1687#else
1688    ZEND_PARSE_PARAMETERS_START(2, 4)
1689        Z_PARAM_STR(regex)
1690        Z_PARAM_STR(subject)
1691        Z_PARAM_OPTIONAL
1692        Z_PARAM_LONG(limit_val)
1693        Z_PARAM_LONG(flags)
1694    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1695#endif
1696
1697    /* Compile regex or get it from cache. */
1698    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1699        RETURN_FALSE;
1700    }
1701
1702    pce->refcount++;
1703    php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
1704    pce->refcount--;
1705}
1706/* }}} */
1707
1708/* {{{ php_pcre_split
1709 */
1710PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1711    zend_long limit_val, zend_long flags)
1712{
1713    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1714    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1715    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1716    pcre_extra       extra_data;        /* Used locally for exec options */
1717    int             *offsets;           /* Array of subpattern offsets */
1718    int              size_offsets;      /* Size of the offsets array */
1719    int              exoptions = 0;     /* Execution options */
1720    int              count = 0;         /* Count of matched subpatterns */
1721    int              start_offset;      /* Where the new search starts */
1722    int              next_offset;       /* End of the last delimiter match + 1 */
1723    int              g_notempty = 0;    /* If the match should not be empty */
1724    char            *last_match;        /* Location of last match */
1725    int              no_empty;          /* If NO_EMPTY flag is set */
1726    int              delim_capture;     /* If delimiters should be captured */
1727    int              offset_capture;    /* If offsets should be captured */
1728    zval             tmp;
1729    ALLOCA_FLAG(use_heap);
1730
1731    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1732    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1733    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1734
1735    if (limit_val == 0) {
1736        limit_val = -1;
1737    }
1738
1739    if (extra == NULL) {
1740        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1741        extra = &extra_data;
1742    }
1743    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1744    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1745#ifdef PCRE_EXTRA_MARK
1746    extra->flags &= ~PCRE_EXTRA_MARK;
1747#endif
1748
1749    /* Initialize return value */
1750    array_init(return_value);
1751
1752    /* Calculate the size of the offsets array, and allocate memory for it. */
1753    size_offsets = (pce->capture_count + 1) * 3;
1754    if (size_offsets <= 32) {
1755        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1756    } else {
1757        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1758    }
1759
1760    /* Start at the beginning of the string */
1761    start_offset = 0;
1762    next_offset = 0;
1763    last_match = subject;
1764    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1765
1766    /* Get next piece if no limit or limit not yet reached and something matched*/
1767    while ((limit_val == -1 || limit_val > 1)) {
1768        count = pcre_exec(pce->re, extra, subject,
1769                          subject_len, start_offset,
1770                          exoptions|g_notempty, offsets, size_offsets);
1771
1772        /* the string was already proved to be valid UTF-8 */
1773        exoptions |= PCRE_NO_UTF8_CHECK;
1774
1775        /* Check for too many substrings condition. */
1776        if (count == 0) {
1777            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1778            count = size_offsets/3;
1779        }
1780
1781        /* If something matched */
1782        if (count > 0) {
1783            if (!no_empty || &subject[offsets[0]] != last_match) {
1784
1785                if (offset_capture) {
1786                    /* Add (match, offset) pair to the return value */
1787                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1788                } else {
1789                    /* Add the piece to the return value */
1790                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1791                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1792                }
1793
1794                /* One less left to do */
1795                if (limit_val != -1)
1796                    limit_val--;
1797            }
1798
1799            last_match = &subject[offsets[1]];
1800            next_offset = offsets[1];
1801
1802            if (delim_capture) {
1803                int i, match_len;
1804                for (i = 1; i < count; i++) {
1805                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1806                    /* If we have matched a delimiter */
1807                    if (!no_empty || match_len > 0) {
1808                        if (offset_capture) {
1809                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1810                        } else {
1811                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1812                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1813                        }
1814                    }
1815                }
1816            }
1817        } else if (count == PCRE_ERROR_NOMATCH) {
1818            /* If we previously set PCRE_NOTEMPTY after a null match,
1819               this is not necessarily the end. We need to advance
1820               the start offset, and continue. Fudge the offset values
1821               to achieve this, unless we're already at the end of the string. */
1822            if (g_notempty != 0 && start_offset < subject_len) {
1823                if (pce->compile_options & PCRE_UTF8) {
1824                    if (re_bump == NULL) {
1825                        int dummy;
1826                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1827                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1828                        zend_string_release(regex);
1829                        if (re_bump == NULL) {
1830                            RETURN_FALSE;
1831                        }
1832                    }
1833                    count = pcre_exec(re_bump, extra_bump, subject,
1834                              subject_len, start_offset,
1835                              exoptions, offsets, size_offsets);
1836                    if (count < 1) {
1837                        php_error_docref(NULL, E_WARNING, "Unknown error");
1838                        RETURN_FALSE;
1839                    }
1840                } else {
1841                    offsets[0] = start_offset;
1842                    offsets[1] = start_offset + 1;
1843                }
1844            } else
1845                break;
1846        } else {
1847            pcre_handle_exec_error(count);
1848            break;
1849        }
1850
1851        /* If we have matched an empty string, mimic what Perl's /g options does.
1852           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1853           the match again at the same point. If this fails (picked up above) we
1854           advance to the next character. */
1855        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1856
1857        /* Advance to the position right after the last full match */
1858        start_offset = offsets[1];
1859    }
1860
1861
1862    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1863
1864    if (!no_empty || start_offset < subject_len)
1865    {
1866        if (offset_capture) {
1867            /* Add the last (match, offset) pair to the return value */
1868            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1869        } else {
1870            /* Add the last piece to the return value */
1871            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1872            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1873        }
1874    }
1875
1876
1877    /* Clean up */
1878    if (size_offsets <= 32) {
1879        free_alloca(offsets, use_heap);
1880    } else {
1881        efree(offsets);
1882    }
1883}
1884/* }}} */
1885
1886/* {{{ proto string preg_quote(string str [, string delim_char])
1887   Quote regular expression characters plus an optional character */
1888static PHP_FUNCTION(preg_quote)
1889{
1890    size_t       in_str_len;
1891    char    *in_str;        /* Input string argument */
1892    char    *in_str_end;    /* End of the input string */
1893    size_t       delim_len = 0;
1894    char    *delim = NULL;  /* Additional delimiter argument */
1895    zend_string *out_str;   /* Output string with quoted characters */
1896    char    *p,             /* Iterator for input string */
1897            *q,             /* Iterator for output string */
1898             delim_char=0,  /* Delimiter character to be quoted */
1899             c;             /* Current character */
1900    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1901
1902    /* Get the arguments and check for errors */
1903#ifndef FAST_ZPP
1904    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1905                              &delim, &delim_len) == FAILURE) {
1906        return;
1907    }
1908#else
1909    ZEND_PARSE_PARAMETERS_START(1, 2)
1910        Z_PARAM_STRING(in_str, in_str_len)
1911        Z_PARAM_OPTIONAL
1912        Z_PARAM_STRING(delim, delim_len)
1913    ZEND_PARSE_PARAMETERS_END();
1914#endif
1915
1916    in_str_end = in_str + in_str_len;
1917
1918    /* Nothing to do if we got an empty string */
1919    if (in_str == in_str_end) {
1920        RETURN_EMPTY_STRING();
1921    }
1922
1923    if (delim && *delim) {
1924        delim_char = delim[0];
1925        quote_delim = 1;
1926    }
1927
1928    /* Allocate enough memory so that even if each character
1929       is quoted, we won't run out of room */
1930    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1931
1932    /* Go through the string and quote necessary characters */
1933    for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
1934        c = *p;
1935        switch(c) {
1936            case '.':
1937            case '\\':
1938            case '+':
1939            case '*':
1940            case '?':
1941            case '[':
1942            case '^':
1943            case ']':
1944            case '$':
1945            case '(':
1946            case ')':
1947            case '{':
1948            case '}':
1949            case '=':
1950            case '!':
1951            case '>':
1952            case '<':
1953            case '|':
1954            case ':':
1955            case '-':
1956                *q++ = '\\';
1957                *q++ = c;
1958                break;
1959
1960            case '\0':
1961                *q++ = '\\';
1962                *q++ = '0';
1963                *q++ = '0';
1964                *q++ = '0';
1965                break;
1966
1967            default:
1968                if (quote_delim && c == delim_char)
1969                    *q++ = '\\';
1970                *q++ = c;
1971                break;
1972        }
1973    }
1974    *q = '\0';
1975
1976    /* Reallocate string and return it */
1977    out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
1978    RETURN_NEW_STR(out_str);
1979}
1980/* }}} */
1981
1982/* {{{ proto array preg_grep(string regex, array input [, int flags])
1983   Searches array and returns entries which match regex */
1984static PHP_FUNCTION(preg_grep)
1985{
1986    zend_string         *regex;         /* Regular expression */
1987    zval                *input;         /* Input array */
1988    zend_long            flags = 0;     /* Match control flags */
1989    pcre_cache_entry    *pce;           /* Compiled regular expression */
1990
1991    /* Get arguments and do error checking */
1992#ifndef FAST_ZPP
1993    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1994                              &input, &flags) == FAILURE) {
1995        return;
1996    }
1997#else
1998    ZEND_PARSE_PARAMETERS_START(2, 3)
1999        Z_PARAM_STR(regex)
2000        Z_PARAM_ARRAY(input)
2001        Z_PARAM_OPTIONAL
2002        Z_PARAM_LONG(flags)
2003    ZEND_PARSE_PARAMETERS_END();
2004#endif
2005
2006    /* Compile regex or get it from cache. */
2007    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2008        RETURN_FALSE;
2009    }
2010
2011    pce->refcount++;
2012    php_pcre_grep_impl(pce, input, return_value, flags);
2013    pce->refcount--;
2014}
2015/* }}} */
2016
2017PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2018{
2019    zval            *entry;             /* An entry in the input array */
2020    pcre_extra      *extra = pce->extra;/* Holds results of studying */
2021    pcre_extra       extra_data;        /* Used locally for exec options */
2022    int             *offsets;           /* Array of subpattern offsets */
2023    int              size_offsets;      /* Size of the offsets array */
2024    int              count = 0;         /* Count of matched subpatterns */
2025    zend_string     *string_key;
2026    zend_ulong       num_key;
2027    zend_bool        invert;            /* Whether to return non-matching
2028                                           entries */
2029    ALLOCA_FLAG(use_heap);
2030
2031    invert = flags & PREG_GREP_INVERT ? 1 : 0;
2032
2033    if (extra == NULL) {
2034        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2035        extra = &extra_data;
2036    }
2037    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2038    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2039#ifdef PCRE_EXTRA_MARK
2040    extra->flags &= ~PCRE_EXTRA_MARK;
2041#endif
2042
2043    /* Calculate the size of the offsets array, and allocate memory for it. */
2044    size_offsets = (pce->capture_count + 1) * 3;
2045    if (size_offsets <= 32) {
2046        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2047    } else {
2048        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2049    }
2050
2051    /* Initialize return array */
2052    array_init(return_value);
2053
2054    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2055
2056    /* Go through the input array */
2057    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2058        zend_string *subject_str = zval_get_string(entry);
2059
2060        /* Perform the match */
2061        count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2062                          (int)ZSTR_LEN(subject_str), 0,
2063                          0, offsets, size_offsets);
2064
2065        /* Check for too many substrings condition. */
2066        if (count == 0) {
2067            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2068            count = size_offsets/3;
2069        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2070            pcre_handle_exec_error(count);
2071            zend_string_release(subject_str);
2072            break;
2073        }
2074
2075        /* If the entry fits our requirements */
2076        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2077            if (Z_REFCOUNTED_P(entry)) {
2078                Z_ADDREF_P(entry);
2079            }
2080
2081            /* Add to return array */
2082            if (string_key) {
2083                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2084            } else {
2085                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2086            }
2087        }
2088
2089        zend_string_release(subject_str);
2090    } ZEND_HASH_FOREACH_END();
2091
2092    /* Clean up */
2093    if (size_offsets <= 32) {
2094        free_alloca(offsets, use_heap);
2095    } else {
2096        efree(offsets);
2097    }
2098}
2099/* }}} */
2100
2101/* {{{ proto int preg_last_error()
2102   Returns the error code of the last regexp execution. */
2103static PHP_FUNCTION(preg_last_error)
2104{
2105#ifndef FAST_ZPP
2106    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2107        return;
2108    }
2109#else
2110    ZEND_PARSE_PARAMETERS_START(0, 0)
2111    ZEND_PARSE_PARAMETERS_END();
2112#endif
2113
2114    RETURN_LONG(PCRE_G(error_code));
2115}
2116/* }}} */
2117
2118/* {{{ module definition structures */
2119
2120/* {{{ arginfo */
2121ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2122    ZEND_ARG_INFO(0, pattern)
2123    ZEND_ARG_INFO(0, subject)
2124    ZEND_ARG_INFO(1, subpatterns) /* array */
2125    ZEND_ARG_INFO(0, flags)
2126    ZEND_ARG_INFO(0, offset)
2127ZEND_END_ARG_INFO()
2128
2129ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2130    ZEND_ARG_INFO(0, pattern)
2131    ZEND_ARG_INFO(0, subject)
2132    ZEND_ARG_INFO(1, subpatterns) /* array */
2133    ZEND_ARG_INFO(0, flags)
2134    ZEND_ARG_INFO(0, offset)
2135ZEND_END_ARG_INFO()
2136
2137ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2138    ZEND_ARG_INFO(0, regex)
2139    ZEND_ARG_INFO(0, replace)
2140    ZEND_ARG_INFO(0, subject)
2141    ZEND_ARG_INFO(0, limit)
2142    ZEND_ARG_INFO(1, count)
2143ZEND_END_ARG_INFO()
2144
2145ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2146    ZEND_ARG_INFO(0, regex)
2147    ZEND_ARG_INFO(0, callback)
2148    ZEND_ARG_INFO(0, subject)
2149    ZEND_ARG_INFO(0, limit)
2150    ZEND_ARG_INFO(1, count)
2151ZEND_END_ARG_INFO()
2152
2153ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2154    ZEND_ARG_INFO(0, pattern)
2155    ZEND_ARG_INFO(0, subject)
2156    ZEND_ARG_INFO(0, limit)
2157    ZEND_ARG_INFO(1, count)
2158ZEND_END_ARG_INFO()
2159
2160ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2161    ZEND_ARG_INFO(0, pattern)
2162    ZEND_ARG_INFO(0, subject)
2163    ZEND_ARG_INFO(0, limit)
2164    ZEND_ARG_INFO(0, flags)
2165ZEND_END_ARG_INFO()
2166
2167ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2168    ZEND_ARG_INFO(0, str)
2169    ZEND_ARG_INFO(0, delim_char)
2170ZEND_END_ARG_INFO()
2171
2172ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2173    ZEND_ARG_INFO(0, regex)
2174    ZEND_ARG_INFO(0, input) /* array */
2175    ZEND_ARG_INFO(0, flags)
2176ZEND_END_ARG_INFO()
2177
2178ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2179ZEND_END_ARG_INFO()
2180/* }}} */
2181
2182static const zend_function_entry pcre_functions[] = {
2183    PHP_FE(preg_match,                  arginfo_preg_match)
2184    PHP_FE(preg_match_all,              arginfo_preg_match_all)
2185    PHP_FE(preg_replace,                arginfo_preg_replace)
2186    PHP_FE(preg_replace_callback,       arginfo_preg_replace_callback)
2187    PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2188    PHP_FE(preg_filter,                 arginfo_preg_replace)
2189    PHP_FE(preg_split,                  arginfo_preg_split)
2190    PHP_FE(preg_quote,                  arginfo_preg_quote)
2191    PHP_FE(preg_grep,                   arginfo_preg_grep)
2192    PHP_FE(preg_last_error,             arginfo_preg_last_error)
2193    PHP_FE_END
2194};
2195
2196zend_module_entry pcre_module_entry = {
2197    STANDARD_MODULE_HEADER,
2198   "pcre",
2199    pcre_functions,
2200    PHP_MINIT(pcre),
2201    PHP_MSHUTDOWN(pcre),
2202    NULL,
2203    NULL,
2204    PHP_MINFO(pcre),
2205    PHP_PCRE_VERSION,
2206    PHP_MODULE_GLOBALS(pcre),
2207    PHP_GINIT(pcre),
2208    PHP_GSHUTDOWN(pcre),
2209    NULL,
2210    STANDARD_MODULE_PROPERTIES_EX
2211};
2212
2213#ifdef COMPILE_DL_PCRE
2214ZEND_GET_MODULE(pcre)
2215#endif
2216
2217/* }}} */
2218
2219#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2220
2221/*
2222 * Local variables:
2223 * tab-width: 4
2224 * c-basic-offset: 4
2225 * End:
2226 * vim600: sw=4 ts=4 fdm=marker
2227 * vim<600: sw=4 ts=4
2228 */
2229