1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
48#ifndef PCRE_NOTEMPTY_ATSTART
49# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
50#endif
51
52enum {
53    PHP_PCRE_NO_ERROR = 0,
54    PHP_PCRE_INTERNAL_ERROR,
55    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
56    PHP_PCRE_RECURSION_LIMIT_ERROR,
57    PHP_PCRE_BAD_UTF8_ERROR,
58    PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
59    PHP_PCRE_JIT_STACKLIMIT_ERROR
60};
61
62
63PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
66static void pcre_handle_exec_error(int pcre_code) /* {{{ */
67{
68    int preg_code = 0;
69
70    switch (pcre_code) {
71        case PCRE_ERROR_MATCHLIMIT:
72            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
73            break;
74
75        case PCRE_ERROR_RECURSIONLIMIT:
76            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
77            break;
78
79        case PCRE_ERROR_BADUTF8:
80            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
81            break;
82
83        case PCRE_ERROR_BADUTF8_OFFSET:
84            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
85            break;
86
87#ifdef PCRE_STUDY_JIT_COMPILE
88        case PCRE_ERROR_JIT_STACKLIMIT:
89            preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
90            break;
91#endif
92
93        default:
94            preg_code = PHP_PCRE_INTERNAL_ERROR;
95            break;
96    }
97
98    PCRE_G(error_code) = preg_code;
99}
100/* }}} */
101
102static void php_free_pcre_cache(zval *data) /* {{{ */
103{
104    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
105    if (!pce) return;
106    pcre_free(pce->re);
107    if (pce->extra) {
108        pcre_free_study(pce->extra);
109    }
110#if HAVE_SETLOCALE
111    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
112    if (pce->locale) {
113        zend_string_release(pce->locale);
114    }
115#endif
116    pefree(pce, 1);
117}
118/* }}} */
119
120static PHP_GINIT_FUNCTION(pcre) /* {{{ */
121{
122    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
123    pcre_globals->backtrack_limit = 0;
124    pcre_globals->recursion_limit = 0;
125    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
126}
127/* }}} */
128
129static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
130{
131    zend_hash_destroy(&pcre_globals->pcre_cache);
132}
133/* }}} */
134
135PHP_INI_BEGIN()
136    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
137    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
138#ifdef PCRE_STUDY_JIT_COMPILE
139    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
140#endif
141PHP_INI_END()
142
143
144/* {{{ PHP_MINFO_FUNCTION(pcre) */
145static PHP_MINFO_FUNCTION(pcre)
146{
147    int jit_yes = 0;
148
149    php_info_print_table_start();
150    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
151    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
152
153    if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
154        php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
155    } else {
156        php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
157    }
158
159    php_info_print_table_end();
160
161    DISPLAY_INI_ENTRIES();
162}
163/* }}} */
164
165/* {{{ PHP_MINIT_FUNCTION(pcre) */
166static PHP_MINIT_FUNCTION(pcre)
167{
168    REGISTER_INI_ENTRIES();
169
170    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
171    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
172    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
173    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
174    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
175    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
176    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
177
178    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
179    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
180    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
181    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
182    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
183    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
184    REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
185    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
186
187    return SUCCESS;
188}
189/* }}} */
190
191/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
192static PHP_MSHUTDOWN_FUNCTION(pcre)
193{
194    UNREGISTER_INI_ENTRIES();
195
196    return SUCCESS;
197}
198/* }}} */
199
200/* {{{ static pcre_clean_cache */
201static int pcre_clean_cache(zval *data, void *arg)
202{
203    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
204    int *num_clean = (int *)arg;
205
206    if (*num_clean > 0 && !pce->refcount) {
207        (*num_clean)--;
208        return ZEND_HASH_APPLY_REMOVE;
209    } else {
210        return ZEND_HASH_APPLY_KEEP;
211    }
212}
213/* }}} */
214
215/* {{{ static make_subpats_table */
216static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
217{
218    pcre_extra *extra = pce->extra;
219    int name_cnt = pce->name_count, name_size, ni = 0;
220    int rc;
221    char *name_table;
222    unsigned short name_idx;
223    char **subpat_names;
224    int rc1, rc2;
225
226    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
227    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
228    rc = rc2 ? rc2 : rc1;
229    if (rc < 0) {
230        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
231        return NULL;
232    }
233
234    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
235    while (ni++ < name_cnt) {
236        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
237        subpat_names[name_idx] = name_table + 2;
238        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
239            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
240            efree(subpat_names);
241            return NULL;
242        }
243        name_table += name_size;
244    }
245    return subpat_names;
246}
247/* }}} */
248
249/* {{{ static calculate_unit_length */
250/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
251static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
252{
253    int unit_len;
254
255    if (pce->compile_options & PCRE_UTF8) {
256        char *end = start;
257
258        /* skip continuation bytes */
259        while ((*++end & 0xC0) == 0x80);
260        unit_len = end - start;
261    } else {
262        unit_len = 1;
263    }
264    return unit_len;
265}
266/* }}} */
267
268/* {{{ pcre_get_compiled_regex_cache
269 */
270PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
271{
272    pcre                *re = NULL;
273    pcre_extra          *extra;
274    int                  coptions = 0;
275    int                  soptions = 0;
276    const char          *error;
277    int                  erroffset;
278    char                 delimiter;
279    char                 start_delimiter;
280    char                 end_delimiter;
281    char                *p, *pp;
282    char                *pattern;
283    int                  do_study = 0;
284    int                  poptions = 0;
285    unsigned const char *tables = NULL;
286    pcre_cache_entry    *pce;
287    pcre_cache_entry     new_entry;
288    int                  rc;
289
290    /* Try to lookup the cached regex entry, and if successful, just pass
291       back the compiled pattern, otherwise go on and compile it. */
292    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
293    if (pce) {
294#if HAVE_SETLOCALE
295        if (pce->locale == BG(locale_string) ||
296            (pce->locale && BG(locale_string) &&
297             ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
298             !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
299            (!pce->locale &&
300             ZSTR_LEN(BG(locale_string)) == 1 &&
301             ZSTR_VAL(BG(locale_string))[0] == 'C') ||
302            (!BG(locale_string) &&
303             ZSTR_LEN(pce->locale) == 1 &&
304             ZSTR_VAL(pce->locale)[0] == 'C')) {
305            return pce;
306        }
307#else
308        return pce;
309#endif
310    }
311
312    p = ZSTR_VAL(regex);
313
314    /* Parse through the leading whitespace, and display a warning if we
315       get to the end without encountering a delimiter. */
316    while (isspace((int)*(unsigned char *)p)) p++;
317    if (*p == 0) {
318        php_error_docref(NULL, E_WARNING,
319                         p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
320        return NULL;
321    }
322
323    /* Get the delimiter and display a warning if it is alphanumeric
324       or a backslash. */
325    delimiter = *p++;
326    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
327        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
328        return NULL;
329    }
330
331    start_delimiter = delimiter;
332    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
333        delimiter = pp[5];
334    end_delimiter = delimiter;
335
336    pp = p;
337
338    if (start_delimiter == end_delimiter) {
339        /* We need to iterate through the pattern, searching for the ending delimiter,
340           but skipping the backslashed delimiters.  If the ending delimiter is not
341           found, display a warning. */
342        while (*pp != 0) {
343            if (*pp == '\\' && pp[1] != 0) pp++;
344            else if (*pp == delimiter)
345                break;
346            pp++;
347        }
348    } else {
349        /* We iterate through the pattern, searching for the matching ending
350         * delimiter. For each matching starting delimiter, we increment nesting
351         * level, and decrement it for each matching ending delimiter. If we
352         * reach the end of the pattern without matching, display a warning.
353         */
354        int brackets = 1;   /* brackets nesting level */
355        while (*pp != 0) {
356            if (*pp == '\\' && pp[1] != 0) pp++;
357            else if (*pp == end_delimiter && --brackets <= 0)
358                break;
359            else if (*pp == start_delimiter)
360                brackets++;
361            pp++;
362        }
363    }
364
365    if (*pp == 0) {
366        if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
367            php_error_docref(NULL,E_WARNING, "Null byte in regex");
368        } else if (start_delimiter == end_delimiter) {
369            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
370        } else {
371            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
372        }
373        return NULL;
374    }
375
376    /* Make a copy of the actual pattern. */
377    pattern = estrndup(p, pp-p);
378
379    /* Move on to the options */
380    pp++;
381
382    /* Parse through the options, setting appropriate flags.  Display
383       a warning if we encounter an unknown modifier. */
384    while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
385        switch (*pp++) {
386            /* Perl compatible options */
387            case 'i':   coptions |= PCRE_CASELESS;      break;
388            case 'm':   coptions |= PCRE_MULTILINE;     break;
389            case 's':   coptions |= PCRE_DOTALL;        break;
390            case 'x':   coptions |= PCRE_EXTENDED;      break;
391
392            /* PCRE specific options */
393            case 'A':   coptions |= PCRE_ANCHORED;      break;
394            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
395            case 'S':   do_study  = 1;                  break;
396            case 'U':   coptions |= PCRE_UNGREEDY;      break;
397            case 'X':   coptions |= PCRE_EXTRA;         break;
398            case 'u':   coptions |= PCRE_UTF8;
399    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
400       characters, even in UTF-8 mode. However, this can be changed by setting
401       the PCRE_UCP option. */
402#ifdef PCRE_UCP
403                        coptions |= PCRE_UCP;
404#endif
405                break;
406
407            /* Custom preg options */
408            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
409
410            case ' ':
411            case '\n':
412                break;
413
414            default:
415                if (pp[-1]) {
416                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
417                } else {
418                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
419                }
420                efree(pattern);
421                return NULL;
422        }
423    }
424
425#if HAVE_SETLOCALE
426    if (BG(locale_string) &&
427        (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
428        tables = pcre_maketables();
429    }
430#endif
431
432    /* Compile pattern and display a warning if compilation failed. */
433    re = pcre_compile(pattern,
434                      coptions,
435                      &error,
436                      &erroffset,
437                      tables);
438
439    if (re == NULL) {
440        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
441        efree(pattern);
442        if (tables) {
443            pefree((void*)tables, 1);
444        }
445        return NULL;
446    }
447
448#ifdef PCRE_STUDY_JIT_COMPILE
449    if (PCRE_G(jit)) {
450        /* Enable PCRE JIT compiler */
451        do_study = 1;
452        soptions |= PCRE_STUDY_JIT_COMPILE;
453    }
454#endif
455
456    /* If study option was specified, study the pattern and
457       store the result in extra for passing to pcre_exec. */
458    if (do_study) {
459        extra = pcre_study(re, soptions, &error);
460        if (extra) {
461            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
462            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
463            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
464        }
465        if (error != NULL) {
466            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
467        }
468    } else {
469        extra = NULL;
470    }
471
472    efree(pattern);
473
474    /*
475     * If we reached cache limit, clean out the items from the head of the list;
476     * these are supposedly the oldest ones (but not necessarily the least used
477     * ones).
478     */
479    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
480        int num_clean = PCRE_CACHE_SIZE / 8;
481        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
482    }
483
484    /* Store the compiled pattern and extra info in the cache. */
485    new_entry.re = re;
486    new_entry.extra = extra;
487    new_entry.preg_options = poptions;
488    new_entry.compile_options = coptions;
489#if HAVE_SETLOCALE
490    new_entry.locale = BG(locale_string) ?
491        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
492            zend_string_copy(BG(locale_string)) :
493            zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
494        NULL;
495    new_entry.tables = tables;
496#endif
497    new_entry.refcount = 0;
498
499    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
500    if (rc < 0) {
501        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
502        return NULL;
503    }
504
505    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
506    if (rc < 0) {
507        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
508        return NULL;
509    }
510
511    /*
512     * Interned strings are not duplicated when stored in HashTable,
513     * but all the interned strings created during HTTP request are removed
514     * at end of request. However PCRE_G(pcre_cache) must be consistent
515     * on the next request as well. So we disable usage of interned strings
516     * as hash keys especually for this table.
517     * See bug #63180
518     */
519    if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
520        zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
521        GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
522        ZSTR_H(str) = ZSTR_H(regex);
523        regex = str;
524    }
525
526    pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
527
528    return pce;
529}
530/* }}} */
531
532/* {{{ pcre_get_compiled_regex
533 */
534PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
535{
536    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
537
538    if (extra) {
539        *extra = pce ? pce->extra : NULL;
540    }
541    if (preg_options) {
542        *preg_options = pce ? pce->preg_options : 0;
543    }
544
545    return pce ? pce->re : NULL;
546}
547/* }}} */
548
549/* {{{ pcre_get_compiled_regex_ex
550 */
551PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
552{
553    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
554
555    if (extra) {
556        *extra = pce ? pce->extra : NULL;
557    }
558    if (preg_options) {
559        *preg_options = pce ? pce->preg_options : 0;
560    }
561    if (compile_options) {
562        *compile_options = pce ? pce->compile_options : 0;
563    }
564
565    return pce ? pce->re : NULL;
566}
567/* }}} */
568
569/* {{{ add_offset_pair */
570static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
571{
572    zval match_pair, tmp;
573
574    array_init_size(&match_pair, 2);
575
576    /* Add (match, offset) to the return value */
577    ZVAL_STRINGL(&tmp, str, len);
578    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
579    ZVAL_LONG(&tmp, offset);
580    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
581
582    if (name) {
583        Z_ADDREF(match_pair);
584        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
585    }
586    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
587}
588/* }}} */
589
590static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
591{
592    /* parameters */
593    zend_string      *regex;            /* Regular expression */
594    zend_string      *subject;          /* String to match against */
595    pcre_cache_entry *pce;              /* Compiled regular expression */
596    zval             *subpats = NULL;   /* Array for subpatterns */
597    zend_long         flags = 0;        /* Match control flags */
598    zend_long         start_offset = 0; /* Where the new search starts */
599
600#ifndef FAST_ZPP
601    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
602                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
603        RETURN_FALSE;
604    }
605#else
606    ZEND_PARSE_PARAMETERS_START(2, 5)
607        Z_PARAM_STR(regex)
608        Z_PARAM_STR(subject)
609        Z_PARAM_OPTIONAL
610        Z_PARAM_ZVAL_EX(subpats, 0, 1)
611        Z_PARAM_LONG(flags)
612        Z_PARAM_LONG(start_offset)
613    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
614#endif
615
616    if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
617            php_error_docref(NULL, E_WARNING, "Subject is too long");
618            RETURN_FALSE;
619    }
620
621    /* Compile regex or get it from cache. */
622    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
623        RETURN_FALSE;
624    }
625
626    pce->refcount++;
627    php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
628        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
629    pce->refcount--;
630}
631/* }}} */
632
633/* {{{ php_pcre_match_impl() */
634PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
635    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
636{
637    zval             result_set,        /* Holds a set of subpatterns after
638                                           a global match */
639                    *match_sets = NULL; /* An array of sets of matches for each
640                                           subpattern after a global match */
641    pcre_extra      *extra = pce->extra;/* Holds results of studying */
642    pcre_extra       extra_data;        /* Used locally for exec options */
643    int              exoptions = 0;     /* Execution options */
644    int              count = 0;         /* Count of matched subpatterns */
645    int             *offsets;           /* Array of subpattern offsets */
646    int              num_subpats;       /* Number of captured subpatterns */
647    int              size_offsets;      /* Size of the offsets array */
648    int              matched;           /* Has anything matched */
649    int              g_notempty = 0;    /* If the match should not be empty */
650    const char     **stringlist;        /* Holds list of subpatterns */
651    char           **subpat_names;      /* Array for named subpatterns */
652    int              i;
653    int              subpats_order;     /* Order of subpattern matches */
654    int              offset_capture;    /* Capture match offsets: yes/no */
655    unsigned char   *mark = NULL;       /* Target for MARK name */
656    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
657    ALLOCA_FLAG(use_heap);
658
659    ZVAL_UNDEF(&marks);
660
661    /* Overwrite the passed-in value for subpatterns with an empty array. */
662    if (subpats != NULL) {
663        zval_dtor(subpats);
664        array_init(subpats);
665    }
666
667    subpats_order = global ? PREG_PATTERN_ORDER : 0;
668
669    if (use_flags) {
670        offset_capture = flags & PREG_OFFSET_CAPTURE;
671
672        /*
673         * subpats_order is pre-set to pattern mode so we change it only if
674         * necessary.
675         */
676        if (flags & 0xff) {
677            subpats_order = flags & 0xff;
678        }
679        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
680            (!global && subpats_order != 0)) {
681            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
682            return;
683        }
684    } else {
685        offset_capture = 0;
686    }
687
688    /* Negative offset counts from the end of the string. */
689    if (start_offset < 0) {
690        start_offset = subject_len + start_offset;
691        if (start_offset < 0) {
692            start_offset = 0;
693        }
694    }
695
696    if (extra == NULL) {
697        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
698        extra = &extra_data;
699    }
700    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
701    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
702#ifdef PCRE_EXTRA_MARK
703    extra->mark = &mark;
704    extra->flags |= PCRE_EXTRA_MARK;
705#endif
706
707    /* Calculate the size of the offsets array, and allocate memory for it. */
708    num_subpats = pce->capture_count + 1;
709    size_offsets = num_subpats * 3;
710
711    /*
712     * Build a mapping from subpattern numbers to their names. We will
713     * allocate the table only if there are any named subpatterns.
714     */
715    subpat_names = NULL;
716    if (pce->name_count > 0) {
717        subpat_names = make_subpats_table(num_subpats, pce);
718        if (!subpat_names) {
719            RETURN_FALSE;
720        }
721    }
722
723    if (size_offsets <= 32) {
724        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
725    } else {
726        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
727    }
728    memset(offsets, 0, size_offsets*sizeof(int));
729    /* Allocate match sets array and initialize the values. */
730    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
731        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
732        for (i=0; i<num_subpats; i++) {
733            array_init(&match_sets[i]);
734        }
735    }
736
737    matched = 0;
738    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
739
740    do {
741        /* Execute the regular expression. */
742        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
743                          exoptions|g_notempty, offsets, size_offsets);
744
745        /* the string was already proved to be valid UTF-8 */
746        exoptions |= PCRE_NO_UTF8_CHECK;
747
748        /* Check for too many substrings condition. */
749        if (count == 0) {
750            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
751            count = size_offsets/3;
752        }
753
754        /* If something has matched */
755        if (count > 0) {
756            matched++;
757
758            /* If subpatterns array has been passed, fill it in with values. */
759            if (subpats != NULL) {
760                /* Try to get the list of substrings and display a warning if failed. */
761                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
762                    if (subpat_names) {
763                        efree(subpat_names);
764                    }
765                    if (size_offsets <= 32) {
766                        free_alloca(offsets, use_heap);
767                    } else {
768                        efree(offsets);
769                    }
770                    if (match_sets) efree(match_sets);
771                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
772                    RETURN_FALSE;
773                }
774
775                if (global) {   /* global pattern matching */
776                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
777                        /* For each subpattern, insert it into the appropriate array. */
778                        if (offset_capture) {
779                            for (i = 0; i < count; i++) {
780                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
781                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
782                            }
783                        } else {
784                            for (i = 0; i < count; i++) {
785                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
786                                                       offsets[(i<<1)+1] - offsets[i<<1]);
787                            }
788                        }
789                        /* Add MARK, if available */
790                        if (mark) {
791                            if (Z_TYPE(marks) == IS_UNDEF) {
792                                array_init(&marks);
793                            }
794                            add_index_string(&marks, matched - 1, (char *) mark);
795                        }
796                        /*
797                         * If the number of captured subpatterns on this run is
798                         * less than the total possible number, pad the result
799                         * arrays with empty strings.
800                         */
801                        if (count < num_subpats) {
802                            for (; i < num_subpats; i++) {
803                                add_next_index_string(&match_sets[i], "");
804                            }
805                        }
806                    } else {
807                        /* Allocate the result set array */
808                        array_init_size(&result_set, count + (mark ? 1 : 0));
809
810                        /* Add all the subpatterns to it */
811                        if (subpat_names) {
812                            if (offset_capture) {
813                                for (i = 0; i < count; i++) {
814                                    add_offset_pair(&result_set, (char *)stringlist[i],
815                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
816                                }
817                            } else {
818                                for (i = 0; i < count; i++) {
819                                    if (subpat_names[i]) {
820                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
821                                                               offsets[(i<<1)+1] - offsets[i<<1]);
822                                    }
823                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
824                                                           offsets[(i<<1)+1] - offsets[i<<1]);
825                                }
826                            }
827                        } else {
828                            if (offset_capture) {
829                                for (i = 0; i < count; i++) {
830                                    add_offset_pair(&result_set, (char *)stringlist[i],
831                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
832                                }
833                            } else {
834                                for (i = 0; i < count; i++) {
835                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
836                                                           offsets[(i<<1)+1] - offsets[i<<1]);
837                                }
838                            }
839                        }
840                        /* Add MARK, if available */
841                        if (mark) {
842                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
843                        }
844                        /* And add it to the output array */
845                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
846                    }
847                } else {            /* single pattern matching */
848                    /* For each subpattern, insert it into the subpatterns array. */
849                    if (subpat_names) {
850                        if (offset_capture) {
851                            for (i = 0; i < count; i++) {
852                                add_offset_pair(subpats, (char *)stringlist[i],
853                                                offsets[(i<<1)+1] - offsets[i<<1],
854                                                offsets[i<<1], subpat_names[i]);
855                            }
856                        } else {
857                            for (i = 0; i < count; i++) {
858                                if (subpat_names[i]) {
859                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
860                                                      offsets[(i<<1)+1] - offsets[i<<1]);
861                                }
862                                add_next_index_stringl(subpats, (char *)stringlist[i],
863                                                       offsets[(i<<1)+1] - offsets[i<<1]);
864                            }
865                        }
866                    } else {
867                        if (offset_capture) {
868                            for (i = 0; i < count; i++) {
869                                add_offset_pair(subpats, (char *)stringlist[i],
870                                                offsets[(i<<1)+1] - offsets[i<<1],
871                                                offsets[i<<1], NULL);
872                            }
873                        } else {
874                            for (i = 0; i < count; i++) {
875                                add_next_index_stringl(subpats, (char *)stringlist[i],
876                                                       offsets[(i<<1)+1] - offsets[i<<1]);
877                            }
878                        }
879                    }
880                    /* Add MARK, if available */
881                    if (mark) {
882                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
883                    }
884                }
885
886                pcre_free((void *) stringlist);
887            }
888        } else if (count == PCRE_ERROR_NOMATCH) {
889            /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
890               this is not necessarily the end. We need to advance
891               the start offset, and continue. Fudge the offset values
892               to achieve this, unless we're already at the end of the string. */
893            if (g_notempty != 0 && start_offset < subject_len) {
894                int unit_len = calculate_unit_length(pce, subject + start_offset);
895
896                offsets[0] = (int)start_offset;
897                offsets[1] = (int)(start_offset + unit_len);
898            } else
899                break;
900        } else {
901            pcre_handle_exec_error(count);
902            break;
903        }
904
905        /* If we have matched an empty string, mimic what Perl's /g options does.
906           This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
907           the match again at the same point. If this fails (picked up above) we
908           advance to the next character. */
909        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
910
911        /* Advance to the position right after the last full match */
912        start_offset = offsets[1];
913    } while (global);
914
915    /* Add the match sets to the output array and clean up */
916    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
917        if (subpat_names) {
918            for (i = 0; i < num_subpats; i++) {
919                if (subpat_names[i]) {
920                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
921                                     strlen(subpat_names[i]), &match_sets[i]);
922                    Z_ADDREF(match_sets[i]);
923                }
924                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
925            }
926        } else {
927            for (i = 0; i < num_subpats; i++) {
928                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
929            }
930        }
931        efree(match_sets);
932
933        if (Z_TYPE(marks) != IS_UNDEF) {
934            add_assoc_zval(subpats, "MARK", &marks);
935        }
936    }
937
938    if (size_offsets <= 32) {
939        free_alloca(offsets, use_heap);
940    } else {
941        efree(offsets);
942    }
943    if (subpat_names) {
944        efree(subpat_names);
945    }
946
947    /* Did we encounter an error? */
948    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
949        RETVAL_LONG(matched);
950    } else {
951        RETVAL_FALSE;
952    }
953}
954/* }}} */
955
956/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
957   Perform a Perl-style regular expression match */
958static PHP_FUNCTION(preg_match)
959{
960    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
961}
962/* }}} */
963
964/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
965   Perform a Perl-style global regular expression match */
966static PHP_FUNCTION(preg_match_all)
967{
968    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
969}
970/* }}} */
971
972/* {{{ preg_get_backref
973 */
974static int preg_get_backref(char **str, int *backref)
975{
976    register char in_brace = 0;
977    register char *walk = *str;
978
979    if (walk[1] == 0)
980        return 0;
981
982    if (*walk == '$' && walk[1] == '{') {
983        in_brace = 1;
984        walk++;
985    }
986    walk++;
987
988    if (*walk >= '0' && *walk <= '9') {
989        *backref = *walk - '0';
990        walk++;
991    } else
992        return 0;
993
994    if (*walk && *walk >= '0' && *walk <= '9') {
995        *backref = *backref * 10 + *walk - '0';
996        walk++;
997    }
998
999    if (in_brace) {
1000        if (*walk == 0 || *walk != '}')
1001            return 0;
1002        else
1003            walk++;
1004    }
1005
1006    *str = walk;
1007    return 1;
1008}
1009/* }}} */
1010
1011/* {{{ preg_do_repl_func
1012 */
1013static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1014{
1015    zend_string *result_str;
1016    zval         retval;            /* Function return value */
1017    zval         args[1];           /* Argument to pass to function */
1018    int          i;
1019
1020    array_init_size(&args[0], count + (mark ? 1 : 0));
1021    if (subpat_names) {
1022        for (i = 0; i < count; i++) {
1023            if (subpat_names[i]) {
1024                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1025            }
1026            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1027        }
1028    } else {
1029        for (i = 0; i < count; i++) {
1030            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1031        }
1032    }
1033    if (mark) {
1034        add_assoc_string(&args[0], "MARK", (char *) mark);
1035    }
1036
1037    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1038        result_str = zval_get_string(&retval);
1039        zval_ptr_dtor(&retval);
1040    } else {
1041        if (!EG(exception)) {
1042            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1043        }
1044
1045        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1046    }
1047
1048    zval_ptr_dtor(&args[0]);
1049
1050    return result_str;
1051}
1052/* }}} */
1053
1054/* {{{ php_pcre_replace
1055 */
1056PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1057                              zend_string *subject_str,
1058                              char *subject, int subject_len,
1059                              zval *replace_val, int is_callable_replace,
1060                              int limit, int *replace_count)
1061{
1062    pcre_cache_entry    *pce;               /* Compiled regular expression */
1063    zend_string         *result;            /* Function result */
1064
1065    /* Compile regex or get it from cache. */
1066    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1067        return NULL;
1068    }
1069    pce->refcount++;
1070    result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1071        is_callable_replace, limit, replace_count);
1072    pce->refcount--;
1073
1074    return result;
1075}
1076/* }}} */
1077
1078/* {{{ php_pcre_replace_impl() */
1079PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
1080{
1081    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1082    pcre_extra       extra_data;        /* Used locally for exec options */
1083    int              exoptions = 0;     /* Execution options */
1084    int              count = 0;         /* Count of matched subpatterns */
1085    int             *offsets;           /* Array of subpattern offsets */
1086    char            **subpat_names;     /* Array for named subpatterns */
1087    int              num_subpats;       /* Number of captured subpatterns */
1088    int              size_offsets;      /* Size of the offsets array */
1089    int              new_len;           /* Length of needed storage */
1090    int              alloc_len;         /* Actual allocated length */
1091    int              match_len;         /* Length of the current match */
1092    int              backref;           /* Backreference number */
1093    int              start_offset;      /* Where the new search starts */
1094    int              g_notempty=0;      /* If the match should not be empty */
1095    int              replace_len=0;     /* Length of replacement string */
1096    char            *replace=NULL,      /* Replacement string */
1097                    *walkbuf,           /* Location of current replacement in the result */
1098                    *walk,              /* Used to walk the replacement string */
1099                    *match,             /* The current match */
1100                    *piece,             /* The current piece of subject */
1101                    *replace_end=NULL,  /* End of replacement string */
1102                     walk_last;         /* Last walked character */
1103    int              result_len;        /* Length of result */
1104    unsigned char   *mark = NULL;       /* Target for MARK name */
1105    zend_string     *result;            /* Result of replacement */
1106    zend_string     *eval_result=NULL;  /* Result of custom function */
1107
1108    ALLOCA_FLAG(use_heap);
1109
1110    if (extra == NULL) {
1111        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1112        extra = &extra_data;
1113    }
1114
1115    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1116    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1117
1118    if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1119        php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1120        return NULL;
1121    }
1122
1123    if (!is_callable_replace) {
1124        replace = Z_STRVAL_P(replace_val);
1125        replace_len = (int)Z_STRLEN_P(replace_val);
1126        replace_end = replace + replace_len;
1127    }
1128
1129    /* Calculate the size of the offsets array, and allocate memory for it. */
1130    num_subpats = pce->capture_count + 1;
1131    size_offsets = num_subpats * 3;
1132    if (size_offsets <= 32) {
1133        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1134    } else {
1135        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1136    }
1137
1138    /*
1139     * Build a mapping from subpattern numbers to their names. We will
1140     * allocate the table only if there are any named subpatterns.
1141     */
1142    subpat_names = NULL;
1143    if (UNEXPECTED(pce->name_count > 0)) {
1144        subpat_names = make_subpats_table(num_subpats, pce);
1145        if (!subpat_names) {
1146            return NULL;
1147        }
1148    }
1149
1150    alloc_len = 0;
1151    result = NULL;
1152
1153    /* Initialize */
1154    match = NULL;
1155    start_offset = 0;
1156    result_len = 0;
1157    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1158
1159    while (1) {
1160#ifdef PCRE_EXTRA_MARK
1161        extra->mark = &mark;
1162        extra->flags |= PCRE_EXTRA_MARK;
1163#endif
1164        /* Execute the regular expression. */
1165        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1166                          exoptions|g_notempty, offsets, size_offsets);
1167
1168        /* the string was already proved to be valid UTF-8 */
1169        exoptions |= PCRE_NO_UTF8_CHECK;
1170
1171        /* Check for too many substrings condition. */
1172        if (UNEXPECTED(count == 0)) {
1173            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1174            count = size_offsets / 3;
1175        }
1176
1177        piece = subject + start_offset;
1178
1179        /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1180        if (EXPECTED(count > 0 && limit)) {
1181            if (UNEXPECTED(replace_count)) {
1182                ++*replace_count;
1183            }
1184
1185            /* Set the match location in subject */
1186            match = subject + offsets[0];
1187
1188            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1189
1190            /* if (!is_callable_replace) */
1191            if (EXPECTED(replace)) {
1192                /* do regular substitution */
1193                walk = replace;
1194                walk_last = 0;
1195
1196                while (walk < replace_end) {
1197                    if ('\\' == *walk || '$' == *walk) {
1198                        if (walk_last == '\\') {
1199                            walk++;
1200                            walk_last = 0;
1201                            continue;
1202                        }
1203                        if (preg_get_backref(&walk, &backref)) {
1204                            if (backref < count)
1205                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1206                            continue;
1207                        }
1208                    }
1209                    new_len++;
1210                    walk++;
1211                    walk_last = walk[-1];
1212                }
1213
1214                if (new_len >= alloc_len) {
1215                    alloc_len = alloc_len + 2 * new_len;
1216                    if (result == NULL) {
1217                        result = zend_string_alloc(alloc_len, 0);
1218                    } else {
1219                        result = zend_string_extend(result, alloc_len, 0);
1220                    }
1221                }
1222
1223                /* copy the part of the string before the match */
1224                memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1225                result_len += (int)(match-piece);
1226
1227                /* copy replacement and backrefs */
1228                walkbuf = ZSTR_VAL(result) + result_len;
1229
1230                walk = replace;
1231                walk_last = 0;
1232                while (walk < replace_end) {
1233                    if ('\\' == *walk || '$' == *walk) {
1234                        if (walk_last == '\\') {
1235                            *(walkbuf-1) = *walk++;
1236                            walk_last = 0;
1237                            continue;
1238                        }
1239                        if (preg_get_backref(&walk, &backref)) {
1240                            if (backref < count) {
1241                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1242                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1243                                walkbuf += match_len;
1244                            }
1245                            continue;
1246                        }
1247                    }
1248                    *walkbuf++ = *walk++;
1249                    walk_last = walk[-1];
1250                }
1251                *walkbuf = '\0';
1252                /* increment the result length by how much we've added to the string */
1253                result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
1254            } else {
1255                /* Use custom function to get replacement string and its length. */
1256                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1257                ZEND_ASSERT(eval_result);
1258                new_len += (int)ZSTR_LEN(eval_result);
1259                if (new_len >= alloc_len) {
1260                    alloc_len = alloc_len + 2 * new_len;
1261                    if (result == NULL) {
1262                        result = zend_string_alloc(alloc_len, 0);
1263                    } else {
1264                        result = zend_string_extend(result, alloc_len, 0);
1265                    }
1266                }
1267                /* copy the part of the string before the match */
1268                memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1269                result_len += (int)(match-piece);
1270
1271                /* copy replacement and backrefs */
1272                walkbuf = ZSTR_VAL(result) + result_len;
1273
1274                /* If using custom function, copy result to the buffer and clean up. */
1275                memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1276                result_len += (int)ZSTR_LEN(eval_result);
1277                zend_string_release(eval_result);
1278            }
1279
1280            if (EXPECTED(limit)) {
1281                limit--;
1282            }
1283        } else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1284            /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1285               this is not necessarily the end. We need to advance
1286               the start offset, and continue. Fudge the offset values
1287               to achieve this, unless we're already at the end of the string. */
1288            if (g_notempty != 0 && start_offset < subject_len) {
1289                int unit_len = calculate_unit_length(pce, piece);
1290
1291                offsets[0] = start_offset;
1292                offsets[1] = start_offset + unit_len;
1293                memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1294                result_len += unit_len;
1295            } else {
1296                if (!result && subject_str) {
1297                    result = zend_string_copy(subject_str);
1298                    break;
1299                }
1300                new_len = result_len + subject_len - start_offset;
1301                if (new_len > alloc_len) {
1302                    alloc_len = new_len; /* now we know exactly how long it is */
1303                    if (NULL != result) {
1304                        result = zend_string_realloc(result, alloc_len, 0);
1305                    } else {
1306                        result = zend_string_alloc(alloc_len, 0);
1307                    }
1308                }
1309                /* stick that last bit of string on our output */
1310                memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1311                result_len += subject_len - start_offset;
1312                ZSTR_VAL(result)[result_len] = '\0';
1313                ZSTR_LEN(result) = result_len;
1314                break;
1315            }
1316        } else {
1317            pcre_handle_exec_error(count);
1318            if (result) {
1319                zend_string_free(result);
1320                result = NULL;
1321            }
1322            break;
1323        }
1324
1325        /* If we have matched an empty string, mimic what Perl's /g options does.
1326           This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1327           the match again at the same point. If this fails (picked up above) we
1328           advance to the next character. */
1329        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1330
1331        /* Advance to the next piece. */
1332        start_offset = offsets[1];
1333    }
1334
1335    if (size_offsets <= 32) {
1336        free_alloca(offsets, use_heap);
1337    } else {
1338        efree(offsets);
1339    }
1340    if (UNEXPECTED(subpat_names)) {
1341        efree(subpat_names);
1342    }
1343
1344    return result;
1345}
1346/* }}} */
1347
1348/* {{{ php_replace_in_subject
1349 */
1350static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1351{
1352    zval        *regex_entry,
1353                *replace_entry = NULL,
1354                *replace_value,
1355                 empty_replace;
1356    zend_string *result;
1357    uint32_t replace_idx;
1358    zend_string *subject_str = zval_get_string(subject);
1359
1360    /* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1361    ZVAL_EMPTY_STRING(&empty_replace);
1362
1363    if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
1364            php_error_docref(NULL, E_WARNING, "Subject is too long");
1365            return NULL;
1366    }
1367
1368    /* If regex is an array */
1369    if (Z_TYPE_P(regex) == IS_ARRAY) {
1370        replace_value = replace;
1371        replace_idx = 0;
1372
1373        /* For each entry in the regex array, get the entry */
1374        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1375            /* Make sure we're dealing with strings. */
1376            zend_string *regex_str = zval_get_string(regex_entry);
1377
1378            /* If replace is an array and not a callable construct */
1379            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1380                /* Get current entry */
1381                replace_entry = NULL;
1382                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1383                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1384                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1385                        break;
1386                    }
1387                    replace_idx++;
1388                }
1389                if (replace_entry != NULL) {
1390                    if (!is_callable_replace) {
1391                        convert_to_string_ex(replace_entry);
1392                    }
1393                    replace_value = replace_entry;
1394                    replace_idx++;
1395                } else {
1396                    /* We've run out of replacement strings, so use an empty one */
1397                    replace_value = &empty_replace;
1398                }
1399            }
1400
1401            /* Do the actual replacement and put the result back into subject_str
1402               for further replacements. */
1403            if ((result = php_pcre_replace(regex_str,
1404                                           subject_str,
1405                                           ZSTR_VAL(subject_str),
1406                                           (int)ZSTR_LEN(subject_str),
1407                                           replace_value,
1408                                           is_callable_replace,
1409                                           limit,
1410                                           replace_count)) != NULL) {
1411                zend_string_release(subject_str);
1412                subject_str = result;
1413            } else {
1414                zend_string_release(subject_str);
1415                zend_string_release(regex_str);
1416                return NULL;
1417            }
1418
1419            zend_string_release(regex_str);
1420        } ZEND_HASH_FOREACH_END();
1421
1422        return subject_str;
1423    } else {
1424        result = php_pcre_replace(Z_STR_P(regex),
1425                                  subject_str,
1426                                  ZSTR_VAL(subject_str),
1427                                  (int)ZSTR_LEN(subject_str),
1428                                  replace,
1429                                  is_callable_replace,
1430                                  limit,
1431                                  replace_count);
1432        zend_string_release(subject_str);
1433        return result;
1434    }
1435}
1436/* }}} */
1437
1438/* {{{ preg_replace_impl
1439 */
1440static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1441{
1442    zval        *subject_entry;
1443    zend_string *result;
1444    zend_string *string_key;
1445    zend_ulong   num_key;
1446    int          replace_count = 0, old_replace_count;
1447
1448    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1449        SEPARATE_ZVAL(replace);
1450        convert_to_string_ex(replace);
1451    }
1452
1453    if (Z_TYPE_P(regex) != IS_ARRAY) {
1454        SEPARATE_ZVAL(regex);
1455        convert_to_string_ex(regex);
1456    }
1457
1458    /* if subject is an array */
1459    if (Z_TYPE_P(subject) == IS_ARRAY) {
1460        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1461
1462        /* For each subject entry, convert it to string, then perform replacement
1463           and add the result to the return_value array. */
1464        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1465            old_replace_count = replace_count;
1466            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1467                if (!is_filter || replace_count > old_replace_count) {
1468                    /* Add to return array */
1469                    zval zv;
1470
1471                    ZVAL_STR(&zv, result);
1472                    if (string_key) {
1473                        zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1474                    } else {
1475                        zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1476                    }
1477                } else {
1478                    zend_string_release(result);
1479                }
1480            }
1481        } ZEND_HASH_FOREACH_END();
1482    } else {
1483        /* if subject is not an array */
1484        old_replace_count = replace_count;
1485        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1486            if (!is_filter || replace_count > old_replace_count) {
1487                RETVAL_STR(result);
1488            } else {
1489                zend_string_release(result);
1490            }
1491        }
1492    }
1493
1494    return replace_count;
1495}
1496/* }}} */
1497
1498/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1499   Perform Perl-style regular expression replacement. */
1500static PHP_FUNCTION(preg_replace)
1501{
1502    zval *regex, *replace, *subject, *zcount = NULL;
1503    zend_long limit = -1;
1504    int replace_count;
1505
1506#ifndef FAST_ZPP
1507    /* Get function parameters and do error-checking. */
1508    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1509        return;
1510    }
1511#else
1512    ZEND_PARSE_PARAMETERS_START(3, 5)
1513        Z_PARAM_ZVAL(regex)
1514        Z_PARAM_ZVAL(replace)
1515        Z_PARAM_ZVAL(subject)
1516        Z_PARAM_OPTIONAL
1517        Z_PARAM_LONG(limit)
1518        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1519    ZEND_PARSE_PARAMETERS_END();
1520#endif
1521
1522    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1523        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1524        RETURN_FALSE;
1525    }
1526
1527    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1528    if (zcount) {
1529        zval_dtor(zcount);
1530        ZVAL_LONG(zcount, replace_count);
1531    }
1532}
1533/* }}} */
1534
1535/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1536   Perform Perl-style regular expression replacement using replacement callback. */
1537static PHP_FUNCTION(preg_replace_callback)
1538{
1539    zval *regex, *replace, *subject, *zcount = NULL;
1540    zend_long limit = -1;
1541    zend_string *callback_name;
1542    int replace_count;
1543
1544#ifndef FAST_ZPP
1545    /* Get function parameters and do error-checking. */
1546    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1547        return;
1548    }
1549#else
1550    ZEND_PARSE_PARAMETERS_START(3, 5)
1551        Z_PARAM_ZVAL(regex)
1552        Z_PARAM_ZVAL(replace)
1553        Z_PARAM_ZVAL(subject)
1554        Z_PARAM_OPTIONAL
1555        Z_PARAM_LONG(limit)
1556        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1557    ZEND_PARSE_PARAMETERS_END();
1558#endif
1559
1560    if (!zend_is_callable(replace, 0, &callback_name)) {
1561        php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
1562        zend_string_release(callback_name);
1563        ZVAL_COPY(return_value, subject);
1564        return;
1565    }
1566    zend_string_release(callback_name);
1567
1568    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1569    if (zcount) {
1570        zval_dtor(zcount);
1571        ZVAL_LONG(zcount, replace_count);
1572    }
1573}
1574/* }}} */
1575
1576/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1577   Perform Perl-style regular expression replacement using replacement callback. */
1578static PHP_FUNCTION(preg_replace_callback_array)
1579{
1580    zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1581    zend_long limit = -1;
1582    zend_string *str_idx;
1583    zend_string *callback_name;
1584    int replace_count = 0;
1585
1586#ifndef FAST_ZPP
1587    /* Get function parameters and do error-checking. */
1588    if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1589        return;
1590    }
1591#else
1592    ZEND_PARSE_PARAMETERS_START(2, 4)
1593        Z_PARAM_ARRAY(pattern)
1594        Z_PARAM_ZVAL(subject)
1595        Z_PARAM_OPTIONAL
1596        Z_PARAM_LONG(limit)
1597        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1598    ZEND_PARSE_PARAMETERS_END();
1599#endif
1600
1601    ZVAL_UNDEF(&zv);
1602    ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1603        if (str_idx) {
1604            ZVAL_STR_COPY(&regex, str_idx);
1605        } else {
1606            php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1607            zval_ptr_dtor(return_value);
1608            RETURN_NULL();
1609        }
1610
1611        if (!zend_is_callable(replace, 0, &callback_name)) {
1612            php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
1613            zend_string_release(callback_name);
1614            zval_ptr_dtor(&regex);
1615            zval_ptr_dtor(return_value);
1616            ZVAL_COPY(return_value, subject);
1617            return;
1618        }
1619        zend_string_release(callback_name);
1620
1621        if (Z_ISNULL_P(return_value)) {
1622            replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1623        } else {
1624            replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1625            zval_ptr_dtor(return_value);
1626        }
1627
1628        zval_ptr_dtor(&regex);
1629
1630        if (Z_ISUNDEF(zv)) {
1631            RETURN_NULL();
1632        }
1633
1634        ZVAL_COPY_VALUE(return_value, &zv);
1635
1636        if (UNEXPECTED(EG(exception))) {
1637            zval_ptr_dtor(return_value);
1638            RETURN_NULL();
1639        }
1640    } ZEND_HASH_FOREACH_END();
1641
1642    if (zcount) {
1643        zval_dtor(zcount);
1644        ZVAL_LONG(zcount, replace_count);
1645    }
1646}
1647/* }}} */
1648
1649/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1650   Perform Perl-style regular expression replacement and only return matches. */
1651static PHP_FUNCTION(preg_filter)
1652{
1653    zval *regex, *replace, *subject, *zcount = NULL;
1654    zend_long limit = -1;
1655    int replace_count;
1656
1657#ifndef FAST_ZPP
1658    /* Get function parameters and do error-checking. */
1659    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1660        return;
1661    }
1662#else
1663    ZEND_PARSE_PARAMETERS_START(3, 5)
1664        Z_PARAM_ZVAL(regex)
1665        Z_PARAM_ZVAL(replace)
1666        Z_PARAM_ZVAL(subject)
1667        Z_PARAM_OPTIONAL
1668        Z_PARAM_LONG(limit)
1669        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1670    ZEND_PARSE_PARAMETERS_END();
1671#endif
1672
1673    if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1674        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1675        RETURN_FALSE;
1676    }
1677
1678    replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1679    if (zcount) {
1680        zval_dtor(zcount);
1681        ZVAL_LONG(zcount, replace_count);
1682    }
1683}
1684/* }}} */
1685
1686/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1687   Split string into an array using a perl-style regular expression as a delimiter */
1688static PHP_FUNCTION(preg_split)
1689{
1690    zend_string         *regex;         /* Regular expression */
1691    zend_string         *subject;       /* String to match against */
1692    zend_long            limit_val = -1;/* Integer value of limit */
1693    zend_long            flags = 0;     /* Match control flags */
1694    pcre_cache_entry    *pce;           /* Compiled regular expression */
1695
1696    /* Get function parameters and do error checking */
1697#ifndef FAST_ZPP
1698    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1699                              &subject, &limit_val, &flags) == FAILURE) {
1700        RETURN_FALSE;
1701    }
1702#else
1703    ZEND_PARSE_PARAMETERS_START(2, 4)
1704        Z_PARAM_STR(regex)
1705        Z_PARAM_STR(subject)
1706        Z_PARAM_OPTIONAL
1707        Z_PARAM_LONG(limit_val)
1708        Z_PARAM_LONG(flags)
1709    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1710#endif
1711
1712    if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
1713            php_error_docref(NULL, E_WARNING, "Subject is too long");
1714            RETURN_FALSE;
1715    }
1716
1717    /* Compile regex or get it from cache. */
1718    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1719        RETURN_FALSE;
1720    }
1721
1722    pce->refcount++;
1723    php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
1724    pce->refcount--;
1725}
1726/* }}} */
1727
1728/* {{{ php_pcre_split
1729 */
1730PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1731    zend_long limit_val, zend_long flags)
1732{
1733    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1734    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1735    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1736    pcre_extra       extra_data;        /* Used locally for exec options */
1737    int             *offsets;           /* Array of subpattern offsets */
1738    int              size_offsets;      /* Size of the offsets array */
1739    int              exoptions = 0;     /* Execution options */
1740    int              count = 0;         /* Count of matched subpatterns */
1741    int              start_offset;      /* Where the new search starts */
1742    int              next_offset;       /* End of the last delimiter match + 1 */
1743    int              g_notempty = 0;    /* If the match should not be empty */
1744    char            *last_match;        /* Location of last match */
1745    int              no_empty;          /* If NO_EMPTY flag is set */
1746    int              delim_capture;     /* If delimiters should be captured */
1747    int              offset_capture;    /* If offsets should be captured */
1748    zval             tmp;
1749    ALLOCA_FLAG(use_heap);
1750
1751    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1752    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1753    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1754
1755    if (limit_val == 0) {
1756        limit_val = -1;
1757    }
1758
1759    if (extra == NULL) {
1760        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1761        extra = &extra_data;
1762    }
1763    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1764    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1765#ifdef PCRE_EXTRA_MARK
1766    extra->flags &= ~PCRE_EXTRA_MARK;
1767#endif
1768
1769    /* Initialize return value */
1770    array_init(return_value);
1771
1772    /* Calculate the size of the offsets array, and allocate memory for it. */
1773    size_offsets = (pce->capture_count + 1) * 3;
1774    if (size_offsets <= 32) {
1775        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1776    } else {
1777        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1778    }
1779
1780    /* Start at the beginning of the string */
1781    start_offset = 0;
1782    next_offset = 0;
1783    last_match = subject;
1784    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1785
1786    /* Get next piece if no limit or limit not yet reached and something matched*/
1787    while ((limit_val == -1 || limit_val > 1)) {
1788        count = pcre_exec(pce->re, extra, subject,
1789                          subject_len, start_offset,
1790                          exoptions|g_notempty, offsets, size_offsets);
1791
1792        /* the string was already proved to be valid UTF-8 */
1793        exoptions |= PCRE_NO_UTF8_CHECK;
1794
1795        /* Check for too many substrings condition. */
1796        if (count == 0) {
1797            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1798            count = size_offsets/3;
1799        }
1800
1801        /* If something matched */
1802        if (count > 0) {
1803            if (!no_empty || &subject[offsets[0]] != last_match) {
1804
1805                if (offset_capture) {
1806                    /* Add (match, offset) pair to the return value */
1807                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1808                } else {
1809                    /* Add the piece to the return value */
1810                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1811                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1812                }
1813
1814                /* One less left to do */
1815                if (limit_val != -1)
1816                    limit_val--;
1817            }
1818
1819            last_match = &subject[offsets[1]];
1820            next_offset = offsets[1];
1821
1822            if (delim_capture) {
1823                int i, match_len;
1824                for (i = 1; i < count; i++) {
1825                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1826                    /* If we have matched a delimiter */
1827                    if (!no_empty || match_len > 0) {
1828                        if (offset_capture) {
1829                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1830                        } else {
1831                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1832                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1833                        }
1834                    }
1835                }
1836            }
1837        } else if (count == PCRE_ERROR_NOMATCH) {
1838            /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1839               this is not necessarily the end. We need to advance
1840               the start offset, and continue. Fudge the offset values
1841               to achieve this, unless we're already at the end of the string. */
1842            if (g_notempty != 0 && start_offset < subject_len) {
1843                if (pce->compile_options & PCRE_UTF8) {
1844                    if (re_bump == NULL) {
1845                        int dummy;
1846                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1847                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1848                        zend_string_release(regex);
1849                        if (re_bump == NULL) {
1850                            RETURN_FALSE;
1851                        }
1852                    }
1853                    count = pcre_exec(re_bump, extra_bump, subject,
1854                              subject_len, start_offset,
1855                              exoptions, offsets, size_offsets);
1856                    if (count < 1) {
1857                        php_error_docref(NULL, E_WARNING, "Unknown error");
1858                        RETURN_FALSE;
1859                    }
1860                } else {
1861                    offsets[0] = start_offset;
1862                    offsets[1] = start_offset + 1;
1863                }
1864            } else
1865                break;
1866        } else {
1867            pcre_handle_exec_error(count);
1868            break;
1869        }
1870
1871        /* If we have matched an empty string, mimic what Perl's /g options does.
1872           This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1873           the match again at the same point. If this fails (picked up above) we
1874           advance to the next character. */
1875        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1876
1877        /* Advance to the position right after the last full match */
1878        start_offset = offsets[1];
1879    }
1880
1881
1882    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1883
1884    if (!no_empty || start_offset < subject_len)
1885    {
1886        if (offset_capture) {
1887            /* Add the last (match, offset) pair to the return value */
1888            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1889        } else {
1890            /* Add the last piece to the return value */
1891            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1892            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1893        }
1894    }
1895
1896
1897    /* Clean up */
1898    if (size_offsets <= 32) {
1899        free_alloca(offsets, use_heap);
1900    } else {
1901        efree(offsets);
1902    }
1903}
1904/* }}} */
1905
1906/* {{{ proto string preg_quote(string str [, string delim_char])
1907   Quote regular expression characters plus an optional character */
1908static PHP_FUNCTION(preg_quote)
1909{
1910    size_t       in_str_len;
1911    char    *in_str;        /* Input string argument */
1912    char    *in_str_end;    /* End of the input string */
1913    size_t       delim_len = 0;
1914    char    *delim = NULL;  /* Additional delimiter argument */
1915    zend_string *out_str;   /* Output string with quoted characters */
1916    char    *p,             /* Iterator for input string */
1917            *q,             /* Iterator for output string */
1918             delim_char=0,  /* Delimiter character to be quoted */
1919             c;             /* Current character */
1920    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1921
1922    /* Get the arguments and check for errors */
1923#ifndef FAST_ZPP
1924    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1925                              &delim, &delim_len) == FAILURE) {
1926        return;
1927    }
1928#else
1929    ZEND_PARSE_PARAMETERS_START(1, 2)
1930        Z_PARAM_STRING(in_str, in_str_len)
1931        Z_PARAM_OPTIONAL
1932        Z_PARAM_STRING(delim, delim_len)
1933    ZEND_PARSE_PARAMETERS_END();
1934#endif
1935
1936    in_str_end = in_str + in_str_len;
1937
1938    /* Nothing to do if we got an empty string */
1939    if (in_str == in_str_end) {
1940        RETURN_EMPTY_STRING();
1941    }
1942
1943    if (delim && *delim) {
1944        delim_char = delim[0];
1945        quote_delim = 1;
1946    }
1947
1948    /* Allocate enough memory so that even if each character
1949       is quoted, we won't run out of room */
1950    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1951
1952    /* Go through the string and quote necessary characters */
1953    for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
1954        c = *p;
1955        switch(c) {
1956            case '.':
1957            case '\\':
1958            case '+':
1959            case '*':
1960            case '?':
1961            case '[':
1962            case '^':
1963            case ']':
1964            case '$':
1965            case '(':
1966            case ')':
1967            case '{':
1968            case '}':
1969            case '=':
1970            case '!':
1971            case '>':
1972            case '<':
1973            case '|':
1974            case ':':
1975            case '-':
1976                *q++ = '\\';
1977                *q++ = c;
1978                break;
1979
1980            case '\0':
1981                *q++ = '\\';
1982                *q++ = '0';
1983                *q++ = '0';
1984                *q++ = '0';
1985                break;
1986
1987            default:
1988                if (quote_delim && c == delim_char)
1989                    *q++ = '\\';
1990                *q++ = c;
1991                break;
1992        }
1993    }
1994    *q = '\0';
1995
1996    /* Reallocate string and return it */
1997    out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
1998    RETURN_NEW_STR(out_str);
1999}
2000/* }}} */
2001
2002/* {{{ proto array preg_grep(string regex, array input [, int flags])
2003   Searches array and returns entries which match regex */
2004static PHP_FUNCTION(preg_grep)
2005{
2006    zend_string         *regex;         /* Regular expression */
2007    zval                *input;         /* Input array */
2008    zend_long            flags = 0;     /* Match control flags */
2009    pcre_cache_entry    *pce;           /* Compiled regular expression */
2010
2011    /* Get arguments and do error checking */
2012#ifndef FAST_ZPP
2013    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
2014                              &input, &flags) == FAILURE) {
2015        return;
2016    }
2017#else
2018    ZEND_PARSE_PARAMETERS_START(2, 3)
2019        Z_PARAM_STR(regex)
2020        Z_PARAM_ARRAY(input)
2021        Z_PARAM_OPTIONAL
2022        Z_PARAM_LONG(flags)
2023    ZEND_PARSE_PARAMETERS_END();
2024#endif
2025
2026    /* Compile regex or get it from cache. */
2027    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2028        RETURN_FALSE;
2029    }
2030
2031    pce->refcount++;
2032    php_pcre_grep_impl(pce, input, return_value, flags);
2033    pce->refcount--;
2034}
2035/* }}} */
2036
2037PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2038{
2039    zval            *entry;             /* An entry in the input array */
2040    pcre_extra      *extra = pce->extra;/* Holds results of studying */
2041    pcre_extra       extra_data;        /* Used locally for exec options */
2042    int             *offsets;           /* Array of subpattern offsets */
2043    int              size_offsets;      /* Size of the offsets array */
2044    int              count = 0;         /* Count of matched subpatterns */
2045    zend_string     *string_key;
2046    zend_ulong       num_key;
2047    zend_bool        invert;            /* Whether to return non-matching
2048                                           entries */
2049    ALLOCA_FLAG(use_heap);
2050
2051    invert = flags & PREG_GREP_INVERT ? 1 : 0;
2052
2053    if (extra == NULL) {
2054        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2055        extra = &extra_data;
2056    }
2057    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2058    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2059#ifdef PCRE_EXTRA_MARK
2060    extra->flags &= ~PCRE_EXTRA_MARK;
2061#endif
2062
2063    /* Calculate the size of the offsets array, and allocate memory for it. */
2064    size_offsets = (pce->capture_count + 1) * 3;
2065    if (size_offsets <= 32) {
2066        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2067    } else {
2068        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2069    }
2070
2071    /* Initialize return array */
2072    array_init(return_value);
2073
2074    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2075
2076    /* Go through the input array */
2077    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2078        zend_string *subject_str = zval_get_string(entry);
2079
2080        /* Perform the match */
2081        count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2082                          (int)ZSTR_LEN(subject_str), 0,
2083                          0, offsets, size_offsets);
2084
2085        /* Check for too many substrings condition. */
2086        if (count == 0) {
2087            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2088            count = size_offsets/3;
2089        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2090            pcre_handle_exec_error(count);
2091            zend_string_release(subject_str);
2092            break;
2093        }
2094
2095        /* If the entry fits our requirements */
2096        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2097            if (Z_REFCOUNTED_P(entry)) {
2098                Z_ADDREF_P(entry);
2099            }
2100
2101            /* Add to return array */
2102            if (string_key) {
2103                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2104            } else {
2105                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2106            }
2107        }
2108
2109        zend_string_release(subject_str);
2110    } ZEND_HASH_FOREACH_END();
2111
2112    /* Clean up */
2113    if (size_offsets <= 32) {
2114        free_alloca(offsets, use_heap);
2115    } else {
2116        efree(offsets);
2117    }
2118}
2119/* }}} */
2120
2121/* {{{ proto int preg_last_error()
2122   Returns the error code of the last regexp execution. */
2123static PHP_FUNCTION(preg_last_error)
2124{
2125#ifndef FAST_ZPP
2126    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2127        return;
2128    }
2129#else
2130    ZEND_PARSE_PARAMETERS_START(0, 0)
2131    ZEND_PARSE_PARAMETERS_END();
2132#endif
2133
2134    RETURN_LONG(PCRE_G(error_code));
2135}
2136/* }}} */
2137
2138/* {{{ module definition structures */
2139
2140/* {{{ arginfo */
2141ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2142    ZEND_ARG_INFO(0, pattern)
2143    ZEND_ARG_INFO(0, subject)
2144    ZEND_ARG_INFO(1, subpatterns) /* array */
2145    ZEND_ARG_INFO(0, flags)
2146    ZEND_ARG_INFO(0, offset)
2147ZEND_END_ARG_INFO()
2148
2149ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2150    ZEND_ARG_INFO(0, pattern)
2151    ZEND_ARG_INFO(0, subject)
2152    ZEND_ARG_INFO(1, subpatterns) /* array */
2153    ZEND_ARG_INFO(0, flags)
2154    ZEND_ARG_INFO(0, offset)
2155ZEND_END_ARG_INFO()
2156
2157ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2158    ZEND_ARG_INFO(0, regex)
2159    ZEND_ARG_INFO(0, replace)
2160    ZEND_ARG_INFO(0, subject)
2161    ZEND_ARG_INFO(0, limit)
2162    ZEND_ARG_INFO(1, count)
2163ZEND_END_ARG_INFO()
2164
2165ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2166    ZEND_ARG_INFO(0, regex)
2167    ZEND_ARG_INFO(0, callback)
2168    ZEND_ARG_INFO(0, subject)
2169    ZEND_ARG_INFO(0, limit)
2170    ZEND_ARG_INFO(1, count)
2171ZEND_END_ARG_INFO()
2172
2173ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2174    ZEND_ARG_INFO(0, pattern)
2175    ZEND_ARG_INFO(0, subject)
2176    ZEND_ARG_INFO(0, limit)
2177    ZEND_ARG_INFO(1, count)
2178ZEND_END_ARG_INFO()
2179
2180ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2181    ZEND_ARG_INFO(0, pattern)
2182    ZEND_ARG_INFO(0, subject)
2183    ZEND_ARG_INFO(0, limit)
2184    ZEND_ARG_INFO(0, flags)
2185ZEND_END_ARG_INFO()
2186
2187ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2188    ZEND_ARG_INFO(0, str)
2189    ZEND_ARG_INFO(0, delim_char)
2190ZEND_END_ARG_INFO()
2191
2192ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2193    ZEND_ARG_INFO(0, regex)
2194    ZEND_ARG_INFO(0, input) /* array */
2195    ZEND_ARG_INFO(0, flags)
2196ZEND_END_ARG_INFO()
2197
2198ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2199ZEND_END_ARG_INFO()
2200/* }}} */
2201
2202static const zend_function_entry pcre_functions[] = {
2203    PHP_FE(preg_match,                  arginfo_preg_match)
2204    PHP_FE(preg_match_all,              arginfo_preg_match_all)
2205    PHP_FE(preg_replace,                arginfo_preg_replace)
2206    PHP_FE(preg_replace_callback,       arginfo_preg_replace_callback)
2207    PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
2208    PHP_FE(preg_filter,                 arginfo_preg_replace)
2209    PHP_FE(preg_split,                  arginfo_preg_split)
2210    PHP_FE(preg_quote,                  arginfo_preg_quote)
2211    PHP_FE(preg_grep,                   arginfo_preg_grep)
2212    PHP_FE(preg_last_error,             arginfo_preg_last_error)
2213    PHP_FE_END
2214};
2215
2216zend_module_entry pcre_module_entry = {
2217    STANDARD_MODULE_HEADER,
2218   "pcre",
2219    pcre_functions,
2220    PHP_MINIT(pcre),
2221    PHP_MSHUTDOWN(pcre),
2222    NULL,
2223    NULL,
2224    PHP_MINFO(pcre),
2225    PHP_PCRE_VERSION,
2226    PHP_MODULE_GLOBALS(pcre),
2227    PHP_GINIT(pcre),
2228    PHP_GSHUTDOWN(pcre),
2229    NULL,
2230    STANDARD_MODULE_PROPERTIES_EX
2231};
2232
2233#ifdef COMPILE_DL_PCRE
2234ZEND_GET_MODULE(pcre)
2235#endif
2236
2237/* }}} */
2238
2239#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2240
2241/*
2242 * Local variables:
2243 * tab-width: 4
2244 * c-basic-offset: 4
2245 * End:
2246 * vim600: sw=4 ts=4 fdm=marker
2247 * vim<600: sw=4 ts=4
2248 */
2249