1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2015 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER          1
34#define PREG_SET_ORDER              2
35#define PREG_OFFSET_CAPTURE         (1<<8)
36
37#define PREG_SPLIT_NO_EMPTY         (1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
40
41#define PREG_REPLACE_EVAL           (1<<0)
42
43#define PREG_GREP_INVERT            (1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47enum {
48    PHP_PCRE_NO_ERROR = 0,
49    PHP_PCRE_INTERNAL_ERROR,
50    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
51    PHP_PCRE_RECURSION_LIMIT_ERROR,
52    PHP_PCRE_BAD_UTF8_ERROR,
53    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
54};
55
56
57ZEND_DECLARE_MODULE_GLOBALS(pcre)
58
59
60static void pcre_handle_exec_error(int pcre_code) /* {{{ */
61{
62    int preg_code = 0;
63
64    switch (pcre_code) {
65        case PCRE_ERROR_MATCHLIMIT:
66            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
67            break;
68
69        case PCRE_ERROR_RECURSIONLIMIT:
70            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
71            break;
72
73        case PCRE_ERROR_BADUTF8:
74            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
75            break;
76
77        case PCRE_ERROR_BADUTF8_OFFSET:
78            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
79            break;
80
81        default:
82            preg_code = PHP_PCRE_INTERNAL_ERROR;
83            break;
84    }
85
86    PCRE_G(error_code) = preg_code;
87}
88/* }}} */
89
90static void php_free_pcre_cache(zval *data) /* {{{ */
91{
92    pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
93    if (!pce) return;
94    pefree(pce->re, 1);
95    if (pce->extra) {
96        pcre_free_study(pce->extra);
97    }
98#if HAVE_SETLOCALE
99    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
100    if (pce->locale) {
101        zend_string_release(pce->locale);
102    }
103#endif
104    pefree(pce, 1);
105}
106/* }}} */
107
108static PHP_GINIT_FUNCTION(pcre) /* {{{ */
109{
110    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
111    pcre_globals->backtrack_limit = 0;
112    pcre_globals->recursion_limit = 0;
113    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
114}
115/* }}} */
116
117static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
118{
119    zend_hash_destroy(&pcre_globals->pcre_cache);
120}
121/* }}} */
122
123PHP_INI_BEGIN()
124    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
125    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
126#ifdef PCRE_STUDY_JIT_COMPILE
127    STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
128#endif
129PHP_INI_END()
130
131
132/* {{{ PHP_MINFO_FUNCTION(pcre) */
133static PHP_MINFO_FUNCTION(pcre)
134{
135    php_info_print_table_start();
136    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
137    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
138    php_info_print_table_end();
139
140    DISPLAY_INI_ENTRIES();
141}
142/* }}} */
143
144/* {{{ PHP_MINIT_FUNCTION(pcre) */
145static PHP_MINIT_FUNCTION(pcre)
146{
147    REGISTER_INI_ENTRIES();
148
149    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
150    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
151    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
152    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
153    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
154    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
155    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
156
157    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
158    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
159    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
160    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
161    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
162    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
163    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
164
165    return SUCCESS;
166}
167/* }}} */
168
169/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
170static PHP_MSHUTDOWN_FUNCTION(pcre)
171{
172    UNREGISTER_INI_ENTRIES();
173
174    return SUCCESS;
175}
176/* }}} */
177
178/* {{{ static pcre_clean_cache */
179static int pcre_clean_cache(zval *data, void *arg)
180{
181    int *num_clean = (int *)arg;
182
183    if (*num_clean > 0) {
184        (*num_clean)--;
185        return 1;
186    } else {
187        return 0;
188    }
189}
190/* }}} */
191
192/* {{{ static make_subpats_table */
193static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
194{
195    pcre_extra *extra = pce->extra;
196    int name_cnt = pce->name_count, name_size, ni = 0;
197    int rc;
198    char *name_table;
199    unsigned short name_idx;
200    char **subpat_names;
201    int rc1, rc2;
202
203    rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
204    rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
205    rc = rc2 ? rc2 : rc1;
206    if (rc < 0) {
207        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
208        return NULL;
209    }
210
211    subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
212    while (ni++ < name_cnt) {
213        name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
214        subpat_names[name_idx] = name_table + 2;
215        if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
216            php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
217            efree(subpat_names);
218            return NULL;
219        }
220        name_table += name_size;
221    }
222    return subpat_names;
223}
224/* }}} */
225
226/* {{{ pcre_get_compiled_regex_cache
227 */
228PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
229{
230    pcre                *re = NULL;
231    pcre_extra          *extra;
232    int                  coptions = 0;
233    int                  soptions = 0;
234    const char          *error;
235    int                  erroffset;
236    char                 delimiter;
237    char                 start_delimiter;
238    char                 end_delimiter;
239    char                *p, *pp;
240    char                *pattern;
241    int                  do_study = 0;
242    int                  poptions = 0;
243    unsigned const char *tables = NULL;
244    pcre_cache_entry    *pce;
245    pcre_cache_entry     new_entry;
246    int                  rc;
247
248    /* Try to lookup the cached regex entry, and if successful, just pass
249       back the compiled pattern, otherwise go on and compile it. */
250    pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
251    if (pce) {
252#if HAVE_SETLOCALE
253        if (pce->locale == BG(locale_string) ||
254            (pce->locale && BG(locale_string) &&
255             pce->locale->len == BG(locale_string)->len &&
256             !memcmp(pce->locale->val, BG(locale_string)->val, pce->locale->len))) {
257            return pce;
258        }
259#else
260        return pce;
261#endif
262    }
263
264    p = regex->val;
265
266    /* Parse through the leading whitespace, and display a warning if we
267       get to the end without encountering a delimiter. */
268    while (isspace((int)*(unsigned char *)p)) p++;
269    if (*p == 0) {
270        php_error_docref(NULL, E_WARNING,
271                         p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression");
272        return NULL;
273    }
274
275    /* Get the delimiter and display a warning if it is alphanumeric
276       or a backslash. */
277    delimiter = *p++;
278    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
279        php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
280        return NULL;
281    }
282
283    start_delimiter = delimiter;
284    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
285        delimiter = pp[5];
286    end_delimiter = delimiter;
287
288    pp = p;
289
290    if (start_delimiter == end_delimiter) {
291        /* We need to iterate through the pattern, searching for the ending delimiter,
292           but skipping the backslashed delimiters.  If the ending delimiter is not
293           found, display a warning. */
294        while (*pp != 0) {
295            if (*pp == '\\' && pp[1] != 0) pp++;
296            else if (*pp == delimiter)
297                break;
298            pp++;
299        }
300    } else {
301        /* We iterate through the pattern, searching for the matching ending
302         * delimiter. For each matching starting delimiter, we increment nesting
303         * level, and decrement it for each matching ending delimiter. If we
304         * reach the end of the pattern without matching, display a warning.
305         */
306        int brackets = 1;   /* brackets nesting level */
307        while (*pp != 0) {
308            if (*pp == '\\' && pp[1] != 0) pp++;
309            else if (*pp == end_delimiter && --brackets <= 0)
310                break;
311            else if (*pp == start_delimiter)
312                brackets++;
313            pp++;
314        }
315    }
316
317    if (*pp == 0) {
318        if (pp < regex->val + regex->len) {
319            php_error_docref(NULL,E_WARNING, "Null byte in regex");
320        } else if (start_delimiter == end_delimiter) {
321            php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
322        } else {
323            php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
324        }
325        return NULL;
326    }
327
328    /* Make a copy of the actual pattern. */
329    pattern = estrndup(p, pp-p);
330
331    /* Move on to the options */
332    pp++;
333
334    /* Parse through the options, setting appropriate flags.  Display
335       a warning if we encounter an unknown modifier. */
336    while (pp < regex->val + regex->len) {
337        switch (*pp++) {
338            /* Perl compatible options */
339            case 'i':   coptions |= PCRE_CASELESS;      break;
340            case 'm':   coptions |= PCRE_MULTILINE;     break;
341            case 's':   coptions |= PCRE_DOTALL;        break;
342            case 'x':   coptions |= PCRE_EXTENDED;      break;
343
344            /* PCRE specific options */
345            case 'A':   coptions |= PCRE_ANCHORED;      break;
346            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
347            case 'S':   do_study  = 1;                  break;
348            case 'U':   coptions |= PCRE_UNGREEDY;      break;
349            case 'X':   coptions |= PCRE_EXTRA;         break;
350            case 'u':   coptions |= PCRE_UTF8;
351    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
352       characters, even in UTF-8 mode. However, this can be changed by setting
353       the PCRE_UCP option. */
354#ifdef PCRE_UCP
355                        coptions |= PCRE_UCP;
356#endif
357                break;
358
359            /* Custom preg options */
360            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
361
362            case ' ':
363            case '\n':
364                break;
365
366            default:
367                if (pp[-1]) {
368                    php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
369                } else {
370                    php_error_docref(NULL,E_WARNING, "Null byte in regex");
371                }
372                efree(pattern);
373                return NULL;
374        }
375    }
376
377#if HAVE_SETLOCALE
378    if (BG(locale_string) &&
379        (BG(locale_string)->len != 1 || BG(locale_string)->val[0] != 'C')) {
380        tables = pcre_maketables();
381    }
382#endif
383
384    /* Compile pattern and display a warning if compilation failed. */
385    re = pcre_compile(pattern,
386                      coptions,
387                      &error,
388                      &erroffset,
389                      tables);
390
391    if (re == NULL) {
392        php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
393        efree(pattern);
394        if (tables) {
395            pefree((void*)tables, 1);
396        }
397        return NULL;
398    }
399
400#ifdef PCRE_STUDY_JIT_COMPILE
401    if (PCRE_G(jit)) {
402        /* Enable PCRE JIT compiler */
403        do_study = 1;
404        soptions |= PCRE_STUDY_JIT_COMPILE;
405    }
406#endif
407
408    /* If study option was specified, study the pattern and
409       store the result in extra for passing to pcre_exec. */
410    if (do_study) {
411        extra = pcre_study(re, soptions, &error);
412        if (extra) {
413            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
414            extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
415            extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
416        }
417        if (error != NULL) {
418            php_error_docref(NULL, E_WARNING, "Error while studying pattern");
419        }
420    } else {
421        extra = NULL;
422    }
423
424    efree(pattern);
425
426    /*
427     * If we reached cache limit, clean out the items from the head of the list;
428     * these are supposedly the oldest ones (but not necessarily the least used
429     * ones).
430     */
431    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
432        int num_clean = PCRE_CACHE_SIZE / 8;
433        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
434    }
435
436    /* Store the compiled pattern and extra info in the cache. */
437    new_entry.re = re;
438    new_entry.extra = extra;
439    new_entry.preg_options = poptions;
440    new_entry.compile_options = coptions;
441#if HAVE_SETLOCALE
442    new_entry.locale = BG(locale_string) ?
443        ((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
444            zend_string_copy(BG(locale_string)) :
445            zend_string_init(BG(locale_string)->val, BG(locale_string)->len, 1)) :
446        NULL;
447    new_entry.tables = tables;
448#endif
449
450    rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
451    if (rc < 0) {
452        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
453        return NULL;
454    }
455
456    rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
457    if (rc < 0) {
458        php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
459        return NULL;
460    }
461
462    /*
463     * Interned strings are not duplicated when stored in HashTable,
464     * but all the interned strings created during HTTP request are removed
465     * at end of request. However PCRE_G(pcre_cache) must be consistent
466     * on the next request as well. So we disable usage of interned strings
467     * as hash keys especually for this table.
468     * See bug #63180
469     */
470    pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache), regex->val, regex->len, &new_entry, sizeof(pcre_cache_entry));
471
472    return pce;
473}
474/* }}} */
475
476/* {{{ pcre_get_compiled_regex
477 */
478PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
479{
480    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
481
482    if (extra) {
483        *extra = pce ? pce->extra : NULL;
484    }
485    if (preg_options) {
486        *preg_options = pce ? pce->preg_options : 0;
487    }
488
489    return pce ? pce->re : NULL;
490}
491/* }}} */
492
493/* {{{ pcre_get_compiled_regex_ex
494 */
495PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
496{
497    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
498
499    if (extra) {
500        *extra = pce ? pce->extra : NULL;
501    }
502    if (preg_options) {
503        *preg_options = pce ? pce->preg_options : 0;
504    }
505    if (compile_options) {
506        *compile_options = pce ? pce->compile_options : 0;
507    }
508
509    return pce ? pce->re : NULL;
510}
511/* }}} */
512
513/* {{{ add_offset_pair */
514static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
515{
516    zval match_pair, tmp;
517
518    array_init_size(&match_pair, 2);
519
520    /* Add (match, offset) to the return value */
521    ZVAL_STRINGL(&tmp, str, len);
522    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
523    ZVAL_LONG(&tmp, offset);
524    zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
525
526    if (name) {
527        Z_ADDREF(match_pair);
528        zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
529    }
530    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
531}
532/* }}} */
533
534static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
535{
536    /* parameters */
537    zend_string      *regex;            /* Regular expression */
538    zend_string      *subject;          /* String to match against */
539    pcre_cache_entry *pce;              /* Compiled regular expression */
540    zval             *subpats = NULL;   /* Array for subpatterns */
541    zend_long         flags = 0;        /* Match control flags */
542    zend_long         start_offset = 0; /* Where the new search starts */
543
544#ifndef FAST_ZPP
545    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
546                              &subject, &subpats, &flags, &start_offset) == FAILURE) {
547        RETURN_FALSE;
548    }
549#else
550    ZEND_PARSE_PARAMETERS_START(2, 5)
551        Z_PARAM_STR(regex)
552        Z_PARAM_STR(subject)
553        Z_PARAM_OPTIONAL
554        Z_PARAM_ZVAL_EX(subpats, 0, 1)
555        Z_PARAM_LONG(flags)
556        Z_PARAM_LONG(start_offset)
557    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
558#endif
559
560    /* Compile regex or get it from cache. */
561    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
562        RETURN_FALSE;
563    }
564
565    php_pcre_match_impl(pce, subject->val, (int)subject->len, return_value, subpats,
566        global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
567}
568/* }}} */
569
570/* {{{ php_pcre_match_impl() */
571PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
572    zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
573{
574    zval             result_set,        /* Holds a set of subpatterns after
575                                           a global match */
576                    *match_sets = NULL; /* An array of sets of matches for each
577                                           subpattern after a global match */
578    pcre_extra      *extra = pce->extra;/* Holds results of studying */
579    pcre_extra       extra_data;        /* Used locally for exec options */
580    int              exoptions = 0;     /* Execution options */
581    int              count = 0;         /* Count of matched subpatterns */
582    int             *offsets;           /* Array of subpattern offsets */
583    int              num_subpats;       /* Number of captured subpatterns */
584    int              size_offsets;      /* Size of the offsets array */
585    int              matched;           /* Has anything matched */
586    int              g_notempty = 0;    /* If the match should not be empty */
587    const char     **stringlist;        /* Holds list of subpatterns */
588    char           **subpat_names;      /* Array for named subpatterns */
589    int              i;
590    int              subpats_order;     /* Order of subpattern matches */
591    int              offset_capture;    /* Capture match offsets: yes/no */
592    unsigned char   *mark = NULL;       /* Target for MARK name */
593    zval            marks;              /* Array of marks for PREG_PATTERN_ORDER */
594    ALLOCA_FLAG(use_heap);
595
596    ZVAL_UNDEF(&marks);
597
598    /* Overwrite the passed-in value for subpatterns with an empty array. */
599    if (subpats != NULL) {
600        zval_dtor(subpats);
601        array_init(subpats);
602    }
603
604    subpats_order = global ? PREG_PATTERN_ORDER : 0;
605
606    if (use_flags) {
607        offset_capture = flags & PREG_OFFSET_CAPTURE;
608
609        /*
610         * subpats_order is pre-set to pattern mode so we change it only if
611         * necessary.
612         */
613        if (flags & 0xff) {
614            subpats_order = flags & 0xff;
615        }
616        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
617            (!global && subpats_order != 0)) {
618            php_error_docref(NULL, E_WARNING, "Invalid flags specified");
619            return;
620        }
621    } else {
622        offset_capture = 0;
623    }
624
625    /* Negative offset counts from the end of the string. */
626    if (start_offset < 0) {
627        start_offset = subject_len + start_offset;
628        if (start_offset < 0) {
629            start_offset = 0;
630        }
631    }
632
633    if (extra == NULL) {
634        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
635        extra = &extra_data;
636    }
637    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
638    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
639#ifdef PCRE_EXTRA_MARK
640    extra->mark = &mark;
641    extra->flags |= PCRE_EXTRA_MARK;
642#endif
643
644    /* Calculate the size of the offsets array, and allocate memory for it. */
645    num_subpats = pce->capture_count + 1;
646    size_offsets = num_subpats * 3;
647
648    /*
649     * Build a mapping from subpattern numbers to their names. We will
650     * allocate the table only if there are any named subpatterns.
651     */
652    subpat_names = NULL;
653    if (pce->name_count > 0) {
654        subpat_names = make_subpats_table(num_subpats, pce);
655        if (!subpat_names) {
656            RETURN_FALSE;
657        }
658    }
659
660    if (size_offsets <= 32) {
661        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
662    } else {
663        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
664    }
665    memset(offsets, 0, size_offsets*sizeof(int));
666    /* Allocate match sets array and initialize the values. */
667    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
668        match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
669        for (i=0; i<num_subpats; i++) {
670            array_init(&match_sets[i]);
671        }
672    }
673
674    matched = 0;
675    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
676
677    do {
678        /* Execute the regular expression. */
679        count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
680                          exoptions|g_notempty, offsets, size_offsets);
681
682        /* the string was already proved to be valid UTF-8 */
683        exoptions |= PCRE_NO_UTF8_CHECK;
684
685        /* Check for too many substrings condition. */
686        if (count == 0) {
687            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
688            count = size_offsets/3;
689        }
690
691        /* If something has matched */
692        if (count > 0) {
693            matched++;
694
695            /* If subpatterns array has been passed, fill it in with values. */
696            if (subpats != NULL) {
697                /* Try to get the list of substrings and display a warning if failed. */
698                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
699                    if (subpat_names) {
700                        efree(subpat_names);
701                    }
702                    if (size_offsets <= 32) {
703                        free_alloca(offsets, use_heap);
704                    } else {
705                        efree(offsets);
706                    }
707                    if (match_sets) efree(match_sets);
708                    php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
709                    RETURN_FALSE;
710                }
711
712                if (global) {   /* global pattern matching */
713                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
714                        /* For each subpattern, insert it into the appropriate array. */
715                        if (offset_capture) {
716                            for (i = 0; i < count; i++) {
717                                add_offset_pair(&match_sets[i], (char *)stringlist[i],
718                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
719                            }
720                        } else {
721                            for (i = 0; i < count; i++) {
722                                add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
723                                                       offsets[(i<<1)+1] - offsets[i<<1]);
724                            }
725                        }
726                        /* Add MARK, if available */
727                        if (mark) {
728                            if (Z_TYPE(marks) == IS_UNDEF) {
729                                array_init(&marks);
730                            }
731                            add_index_string(&marks, matched - 1, (char *) mark);
732                        }
733                        /*
734                         * If the number of captured subpatterns on this run is
735                         * less than the total possible number, pad the result
736                         * arrays with empty strings.
737                         */
738                        if (count < num_subpats) {
739                            for (; i < num_subpats; i++) {
740                                add_next_index_string(&match_sets[i], "");
741                            }
742                        }
743                    } else {
744                        /* Allocate the result set array */
745                        array_init_size(&result_set, count + (mark ? 1 : 0));
746
747                        /* Add all the subpatterns to it */
748                        if (subpat_names) {
749                            if (offset_capture) {
750                                for (i = 0; i < count; i++) {
751                                    add_offset_pair(&result_set, (char *)stringlist[i],
752                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
753                                }
754                            } else {
755                                for (i = 0; i < count; i++) {
756                                    if (subpat_names[i]) {
757                                        add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
758                                                               offsets[(i<<1)+1] - offsets[i<<1]);
759                                    }
760                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
761                                                           offsets[(i<<1)+1] - offsets[i<<1]);
762                                }
763                            }
764                        } else {
765                            if (offset_capture) {
766                                for (i = 0; i < count; i++) {
767                                    add_offset_pair(&result_set, (char *)stringlist[i],
768                                                    offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
769                                }
770                            } else {
771                                for (i = 0; i < count; i++) {
772                                    add_next_index_stringl(&result_set, (char *)stringlist[i],
773                                                           offsets[(i<<1)+1] - offsets[i<<1]);
774                                }
775                            }
776                        }
777                        /* Add MARK, if available */
778                        if (mark) {
779                            add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
780                        }
781                        /* And add it to the output array */
782                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
783                    }
784                } else {            /* single pattern matching */
785                    /* For each subpattern, insert it into the subpatterns array. */
786                    if (subpat_names) {
787                        if (offset_capture) {
788                            for (i = 0; i < count; i++) {
789                                add_offset_pair(subpats, (char *)stringlist[i],
790                                                offsets[(i<<1)+1] - offsets[i<<1],
791                                                offsets[i<<1], subpat_names[i]);
792                            }
793                        } else {
794                            for (i = 0; i < count; i++) {
795                                if (subpat_names[i]) {
796                                    add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
797                                                      offsets[(i<<1)+1] - offsets[i<<1]);
798                                }
799                                add_next_index_stringl(subpats, (char *)stringlist[i],
800                                                       offsets[(i<<1)+1] - offsets[i<<1]);
801                            }
802                        }
803                    } else {
804                        if (offset_capture) {
805                            for (i = 0; i < count; i++) {
806                                add_offset_pair(subpats, (char *)stringlist[i],
807                                                offsets[(i<<1)+1] - offsets[i<<1],
808                                                offsets[i<<1], NULL);
809                            }
810                        } else {
811                            for (i = 0; i < count; i++) {
812                                add_next_index_stringl(subpats, (char *)stringlist[i],
813                                                       offsets[(i<<1)+1] - offsets[i<<1]);
814                            }
815                        }
816                    }
817                    /* Add MARK, if available */
818                    if (mark) {
819                        add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
820                    }
821                }
822
823                pcre_free((void *) stringlist);
824            }
825        } else if (count == PCRE_ERROR_NOMATCH) {
826            /* If we previously set PCRE_NOTEMPTY after a null match,
827               this is not necessarily the end. We need to advance
828               the start offset, and continue. Fudge the offset values
829               to achieve this, unless we're already at the end of the string. */
830            if (g_notempty != 0 && start_offset < subject_len) {
831                offsets[0] = (int)start_offset;
832                offsets[1] = (int)(start_offset + 1);
833            } else
834                break;
835        } else {
836            pcre_handle_exec_error(count);
837            break;
838        }
839
840        /* If we have matched an empty string, mimic what Perl's /g options does.
841           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
842           the match again at the same point. If this fails (picked up above) we
843           advance to the next character. */
844        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
845
846        /* Advance to the position right after the last full match */
847        start_offset = offsets[1];
848    } while (global);
849
850    /* Add the match sets to the output array and clean up */
851    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
852        if (subpat_names) {
853            for (i = 0; i < num_subpats; i++) {
854                if (subpat_names[i]) {
855                    zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
856                                     strlen(subpat_names[i]), &match_sets[i]);
857                    Z_ADDREF(match_sets[i]);
858                }
859                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
860            }
861        } else {
862            for (i = 0; i < num_subpats; i++) {
863                zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
864            }
865        }
866        efree(match_sets);
867
868        if (Z_TYPE(marks) != IS_UNDEF) {
869            add_assoc_zval(subpats, "MARK", &marks);
870        }
871    }
872
873    if (size_offsets <= 32) {
874        free_alloca(offsets, use_heap);
875    } else {
876        efree(offsets);
877    }
878    if (subpat_names) {
879        efree(subpat_names);
880    }
881
882    /* Did we encounter an error? */
883    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
884        RETVAL_LONG(matched);
885    } else {
886        RETVAL_FALSE;
887    }
888}
889/* }}} */
890
891/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
892   Perform a Perl-style regular expression match */
893static PHP_FUNCTION(preg_match)
894{
895    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
896}
897/* }}} */
898
899/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
900   Perform a Perl-style global regular expression match */
901static PHP_FUNCTION(preg_match_all)
902{
903    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
904}
905/* }}} */
906
907/* {{{ preg_get_backref
908 */
909static int preg_get_backref(char **str, int *backref)
910{
911    register char in_brace = 0;
912    register char *walk = *str;
913
914    if (walk[1] == 0)
915        return 0;
916
917    if (*walk == '$' && walk[1] == '{') {
918        in_brace = 1;
919        walk++;
920    }
921    walk++;
922
923    if (*walk >= '0' && *walk <= '9') {
924        *backref = *walk - '0';
925        walk++;
926    } else
927        return 0;
928
929    if (*walk && *walk >= '0' && *walk <= '9') {
930        *backref = *backref * 10 + *walk - '0';
931        walk++;
932    }
933
934    if (in_brace) {
935        if (*walk == 0 || *walk != '}')
936            return 0;
937        else
938            walk++;
939    }
940
941    *str = walk;
942    return 1;
943}
944/* }}} */
945
946/* {{{ preg_do_repl_func
947 */
948static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
949{
950    zend_string *result_str;
951    zval         retval;            /* Function return value */
952    zval         args[1];           /* Argument to pass to function */
953    int          i;
954
955    array_init_size(&args[0], count + (mark ? 1 : 0));
956    if (subpat_names) {
957        for (i = 0; i < count; i++) {
958            if (subpat_names[i]) {
959                add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
960            }
961            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
962        }
963    } else {
964        for (i = 0; i < count; i++) {
965            add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
966        }
967    }
968    if (mark) {
969        add_assoc_string(&args[0], "MARK", (char *) mark);
970    }
971
972    if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
973        result_str = zval_get_string(&retval);
974        zval_ptr_dtor(&retval);
975    } else {
976        if (!EG(exception)) {
977            php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
978        }
979
980        result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
981    }
982
983    zval_ptr_dtor(&args[0]);
984
985    return result_str;
986}
987/* }}} */
988
989/* {{{ php_pcre_replace
990 */
991PHPAPI zend_string *php_pcre_replace(zend_string *regex,
992                              char *subject, int subject_len,
993                              zval *replace_val, int is_callable_replace,
994                              int limit, int *replace_count)
995{
996    pcre_cache_entry    *pce;               /* Compiled regular expression */
997
998    /* Compile regex or get it from cache. */
999    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1000        return NULL;
1001    }
1002
1003    return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
1004        is_callable_replace, limit, replace_count);
1005}
1006/* }}} */
1007
1008/* {{{ php_pcre_replace_impl() */
1009PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1010    int is_callable_replace, int limit, int *replace_count)
1011{
1012    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1013    pcre_extra       extra_data;        /* Used locally for exec options */
1014    int              exoptions = 0;     /* Execution options */
1015    int              count = 0;         /* Count of matched subpatterns */
1016    int             *offsets;           /* Array of subpattern offsets */
1017    char            **subpat_names;     /* Array for named subpatterns */
1018    int              num_subpats;       /* Number of captured subpatterns */
1019    int              size_offsets;      /* Size of the offsets array */
1020    int              new_len;           /* Length of needed storage */
1021    int              alloc_len;         /* Actual allocated length */
1022    int              match_len;         /* Length of the current match */
1023    int              backref;           /* Backreference number */
1024    int              start_offset;      /* Where the new search starts */
1025    int              g_notempty=0;      /* If the match should not be empty */
1026    int              replace_len=0;     /* Length of replacement string */
1027    char            *replace=NULL,      /* Replacement string */
1028                    *walkbuf,           /* Location of current replacement in the result */
1029                    *walk,              /* Used to walk the replacement string */
1030                    *match,             /* The current match */
1031                    *piece,             /* The current piece of subject */
1032                    *replace_end=NULL,  /* End of replacement string */
1033                     walk_last;         /* Last walked character */
1034    int              result_len;        /* Length of result */
1035    unsigned char   *mark = NULL;       /* Target for MARK name */
1036    zend_string     *result;            /* Result of replacement */
1037    zend_string     *eval_result=NULL;  /* Result of custom function */
1038    ALLOCA_FLAG(use_heap);
1039
1040    if (extra == NULL) {
1041        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1042        extra = &extra_data;
1043    }
1044    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1045    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1046
1047    if (pce->preg_options & PREG_REPLACE_EVAL) {
1048        php_error_docref(NULL TSRMLS_CC, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1049        return NULL;
1050    }
1051    if (!is_callable_replace) {
1052        replace = Z_STRVAL_P(replace_val);
1053        replace_len = (int)Z_STRLEN_P(replace_val);
1054        replace_end = replace + replace_len;
1055    }
1056
1057    /* Calculate the size of the offsets array, and allocate memory for it. */
1058    num_subpats = pce->capture_count + 1;
1059    size_offsets = num_subpats * 3;
1060    if (size_offsets <= 32) {
1061        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1062    } else {
1063        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1064    }
1065
1066    /*
1067     * Build a mapping from subpattern numbers to their names. We will
1068     * allocate the table only if there are any named subpatterns.
1069     */
1070    subpat_names = NULL;
1071    if (pce->name_count > 0) {
1072        subpat_names = make_subpats_table(num_subpats, pce);
1073        if (!subpat_names) {
1074            return NULL;
1075        }
1076    }
1077
1078    alloc_len = 2 * subject_len;
1079    result = zend_string_alloc(alloc_len * sizeof(char), 0);
1080
1081    /* Initialize */
1082    match = NULL;
1083    start_offset = 0;
1084    result_len = 0;
1085    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1086
1087    while (1) {
1088#ifdef PCRE_EXTRA_MARK
1089        extra->mark = &mark;
1090        extra->flags |= PCRE_EXTRA_MARK;
1091#endif
1092        /* Execute the regular expression. */
1093        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1094                          exoptions|g_notempty, offsets, size_offsets);
1095
1096        /* the string was already proved to be valid UTF-8 */
1097        exoptions |= PCRE_NO_UTF8_CHECK;
1098
1099        /* Check for too many substrings condition. */
1100        if (count == 0) {
1101            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1102            count = size_offsets/3;
1103        }
1104
1105        piece = subject + start_offset;
1106
1107        if (count > 0 && (limit == -1 || limit > 0)) {
1108            if (replace_count) {
1109                ++*replace_count;
1110            }
1111            /* Set the match location in subject */
1112            match = subject + offsets[0];
1113
1114            new_len = result_len + offsets[0] - start_offset; /* part before the match */
1115
1116            if (is_callable_replace) {
1117                /* Use custom function to get replacement string and its length. */
1118                eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1119                new_len += (int)eval_result->len;
1120            } else { /* do regular substitution */
1121                walk = replace;
1122                walk_last = 0;
1123                while (walk < replace_end) {
1124                    if ('\\' == *walk || '$' == *walk) {
1125                        if (walk_last == '\\') {
1126                            walk++;
1127                            walk_last = 0;
1128                            continue;
1129                        }
1130                        if (preg_get_backref(&walk, &backref)) {
1131                            if (backref < count)
1132                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1133                            continue;
1134                        }
1135                    }
1136                    new_len++;
1137                    walk++;
1138                    walk_last = walk[-1];
1139                }
1140            }
1141
1142            if (new_len > alloc_len) {
1143                alloc_len = alloc_len + 2 * new_len;
1144                result = zend_string_realloc(result, alloc_len, 0);
1145            }
1146            /* copy the part of the string before the match */
1147            memcpy(&result->val[result_len], piece, match-piece);
1148            result_len += (int)(match-piece);
1149
1150            /* copy replacement and backrefs */
1151            walkbuf = result->val + result_len;
1152
1153            /* If using custom function, copy result to the buffer and clean up. */
1154            if (is_callable_replace) {
1155                memcpy(walkbuf, eval_result->val, eval_result->len);
1156                result_len += (int)eval_result->len;
1157                if (eval_result) zend_string_release(eval_result);
1158            } else { /* do regular backreference copying */
1159                walk = replace;
1160                walk_last = 0;
1161                while (walk < replace_end) {
1162                    if ('\\' == *walk || '$' == *walk) {
1163                        if (walk_last == '\\') {
1164                            *(walkbuf-1) = *walk++;
1165                            walk_last = 0;
1166                            continue;
1167                        }
1168                        if (preg_get_backref(&walk, &backref)) {
1169                            if (backref < count) {
1170                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1171                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1172                                walkbuf += match_len;
1173                            }
1174                            continue;
1175                        }
1176                    }
1177                    *walkbuf++ = *walk++;
1178                    walk_last = walk[-1];
1179                }
1180                *walkbuf = '\0';
1181                /* increment the result length by how much we've added to the string */
1182                result_len += (int)(walkbuf - (result->val + result_len));
1183            }
1184
1185            if (limit != -1)
1186                limit--;
1187
1188        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1189            /* If we previously set PCRE_NOTEMPTY after a null match,
1190               this is not necessarily the end. We need to advance
1191               the start offset, and continue. Fudge the offset values
1192               to achieve this, unless we're already at the end of the string. */
1193            if (g_notempty != 0 && start_offset < subject_len) {
1194                offsets[0] = start_offset;
1195                offsets[1] = start_offset + 1;
1196                memcpy(&result->val[result_len], piece, 1);
1197                result_len++;
1198            } else {
1199                new_len = result_len + subject_len - start_offset;
1200                if (new_len > alloc_len) {
1201                    alloc_len = new_len; /* now we know exactly how long it is */
1202                    result = zend_string_realloc(result, alloc_len, 0);
1203                }
1204                /* stick that last bit of string on our output */
1205                memcpy(&result->val[result_len], piece, subject_len - start_offset);
1206                result_len += subject_len - start_offset;
1207                result->val[result_len] = '\0';
1208                break;
1209            }
1210        } else {
1211            pcre_handle_exec_error(count);
1212            zend_string_free(result);
1213            result = NULL;
1214            break;
1215        }
1216
1217        /* If we have matched an empty string, mimic what Perl's /g options does.
1218           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1219           the match again at the same point. If this fails (picked up above) we
1220           advance to the next character. */
1221        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1222
1223        /* Advance to the next piece. */
1224        start_offset = offsets[1];
1225    }
1226
1227    if (result) {
1228        result->len = result_len;
1229    }
1230    if (size_offsets <= 32) {
1231        free_alloca(offsets, use_heap);
1232    } else {
1233        efree(offsets);
1234    }
1235    if (subpat_names) {
1236        efree(subpat_names);
1237    }
1238
1239    return result;
1240}
1241/* }}} */
1242
1243/* {{{ php_replace_in_subject
1244 */
1245static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1246{
1247    zval        *regex_entry,
1248                *replace_entry = NULL,
1249                *replace_value,
1250                 empty_replace;
1251    zend_string *result;
1252    zend_string *subject_str = zval_get_string(subject);
1253    uint32_t replace_idx;
1254
1255    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1256    ZVAL_EMPTY_STRING(&empty_replace);
1257
1258    /* If regex is an array */
1259    if (Z_TYPE_P(regex) == IS_ARRAY) {
1260        replace_value = replace;
1261        replace_idx = 0;
1262
1263        /* For each entry in the regex array, get the entry */
1264        ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1265            /* Make sure we're dealing with strings. */
1266            zend_string *regex_str = zval_get_string(regex_entry);
1267
1268            /* If replace is an array and not a callable construct */
1269            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1270                /* Get current entry */
1271                replace_entry = NULL;
1272                while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1273                    if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
1274                        replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
1275                        break;
1276                    }
1277                    replace_idx++;
1278                }
1279                if (replace_entry != NULL) {
1280                    if (!is_callable_replace) {
1281                        convert_to_string_ex(replace_entry);
1282                    }
1283                    replace_value = replace_entry;
1284                    replace_idx++;
1285                } else {
1286                    /* We've run out of replacement strings, so use an empty one */
1287                    replace_value = &empty_replace;
1288                }
1289            }
1290
1291            /* Do the actual replacement and put the result back into subject_str
1292               for further replacements. */
1293            if ((result = php_pcre_replace(regex_str,
1294                                           subject_str->val,
1295                                           (int)subject_str->len,
1296                                           replace_value,
1297                                           is_callable_replace,
1298                                           limit,
1299                                           replace_count)) != NULL) {
1300                zend_string_release(subject_str);
1301                subject_str = result;
1302            } else {
1303                zend_string_release(subject_str);
1304                zend_string_release(regex_str);
1305                return NULL;
1306            }
1307
1308            zend_string_release(regex_str);
1309        } ZEND_HASH_FOREACH_END();
1310
1311        return subject_str;
1312    } else {
1313        result = php_pcre_replace(Z_STR_P(regex),
1314                                  subject_str->val,
1315                                  (int)subject_str->len,
1316                                  replace,
1317                                  is_callable_replace,
1318                                  limit,
1319                                  replace_count);
1320        zend_string_release(subject_str);
1321        return result;
1322    }
1323}
1324/* }}} */
1325
1326/* {{{ preg_replace_impl
1327 */
1328static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1329{
1330    zval            *regex,
1331                    *replace,
1332                    *subject,
1333                    *subject_entry,
1334                    *zcount = NULL;
1335    int              limit_val = -1;
1336    zend_long        limit = -1;
1337    zend_string     *result;
1338    zend_string     *string_key;
1339    zend_ulong       num_key;
1340    zend_string     *callback_name;
1341    int              replace_count=0, old_replace_count;
1342
1343#ifndef FAST_ZPP
1344    /* Get function parameters and do error-checking. */
1345    if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1346        return;
1347    }
1348#else
1349    ZEND_PARSE_PARAMETERS_START(3, 5)
1350        Z_PARAM_ZVAL(regex)
1351        Z_PARAM_ZVAL(replace)
1352        Z_PARAM_ZVAL(subject)
1353        Z_PARAM_OPTIONAL
1354        Z_PARAM_LONG(limit)
1355        Z_PARAM_ZVAL_EX(zcount, 0, 1)
1356    ZEND_PARSE_PARAMETERS_END();
1357#endif
1358
1359    if (!is_callable_replace && Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1360        php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1361        RETURN_FALSE;
1362    }
1363
1364    if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1365        SEPARATE_ZVAL(replace);
1366        convert_to_string_ex(replace);
1367    }
1368    if (is_callable_replace) {
1369        if (!zend_is_callable(replace, 0, &callback_name)) {
1370            php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name->val);
1371            zend_string_release(callback_name);
1372            ZVAL_DUP(return_value, subject);
1373            return;
1374        }
1375        zend_string_release(callback_name);
1376    }
1377
1378    if (ZEND_NUM_ARGS() > 3) {
1379        limit_val = (int)limit;
1380    }
1381
1382    if (Z_TYPE_P(regex) != IS_ARRAY) {
1383        SEPARATE_ZVAL(regex);
1384        convert_to_string_ex(regex);
1385    }
1386
1387    /* if subject is an array */
1388    if (Z_TYPE_P(subject) == IS_ARRAY) {
1389        array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1390
1391        /* For each subject entry, convert it to string, then perform replacement
1392           and add the result to the return_value array. */
1393        ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1394            old_replace_count = replace_count;
1395            if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1396                if (!is_filter || replace_count > old_replace_count) {
1397                    /* Add to return array */
1398                    if (string_key) {
1399                        add_assoc_str_ex(return_value, string_key->val, string_key->len, result);
1400                    } else {
1401                        add_index_str(return_value, num_key, result);
1402                    }
1403                } else {
1404                    zend_string_release(result);
1405                }
1406            }
1407        } ZEND_HASH_FOREACH_END();
1408    } else {    /* if subject is not an array */
1409        old_replace_count = replace_count;
1410        if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1411            if (!is_filter || replace_count > old_replace_count) {
1412                RETVAL_STR(result);
1413            } else {
1414                zend_string_release(result);
1415            }
1416        }
1417    }
1418    if (ZEND_NUM_ARGS() > 4) {
1419        zval_dtor(zcount);
1420        ZVAL_LONG(zcount, replace_count);
1421    }
1422
1423}
1424/* }}} */
1425
1426/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1427   Perform Perl-style regular expression replacement. */
1428static PHP_FUNCTION(preg_replace)
1429{
1430    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1431}
1432/* }}} */
1433
1434/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1435   Perform Perl-style regular expression replacement using replacement callback. */
1436static PHP_FUNCTION(preg_replace_callback)
1437{
1438    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1439}
1440/* }}} */
1441
1442/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1443   Perform Perl-style regular expression replacement and only return matches. */
1444static PHP_FUNCTION(preg_filter)
1445{
1446    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1447}
1448/* }}} */
1449
1450/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1451   Split string into an array using a perl-style regular expression as a delimiter */
1452static PHP_FUNCTION(preg_split)
1453{
1454    zend_string         *regex;         /* Regular expression */
1455    zend_string         *subject;       /* String to match against */
1456    zend_long            limit_val = -1;/* Integer value of limit */
1457    zend_long            flags = 0;     /* Match control flags */
1458    pcre_cache_entry    *pce;           /* Compiled regular expression */
1459
1460    /* Get function parameters and do error checking */
1461#ifndef FAST_ZPP
1462    if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1463                              &subject, &limit_val, &flags) == FAILURE) {
1464        RETURN_FALSE;
1465    }
1466#else
1467    ZEND_PARSE_PARAMETERS_START(2, 4)
1468        Z_PARAM_STR(regex)
1469        Z_PARAM_STR(subject)
1470        Z_PARAM_OPTIONAL
1471        Z_PARAM_LONG(limit_val)
1472        Z_PARAM_LONG(flags)
1473    ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1474#endif
1475
1476    /* Compile regex or get it from cache. */
1477    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1478        RETURN_FALSE;
1479    }
1480
1481    php_pcre_split_impl(pce, subject->val, (int)subject->len, return_value, (int)limit_val, flags);
1482}
1483/* }}} */
1484
1485/* {{{ php_pcre_split
1486 */
1487PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1488    zend_long limit_val, zend_long flags)
1489{
1490    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1491    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1492    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1493    pcre_extra       extra_data;        /* Used locally for exec options */
1494    int             *offsets;           /* Array of subpattern offsets */
1495    int              size_offsets;      /* Size of the offsets array */
1496    int              exoptions = 0;     /* Execution options */
1497    int              count = 0;         /* Count of matched subpatterns */
1498    int              start_offset;      /* Where the new search starts */
1499    int              next_offset;       /* End of the last delimiter match + 1 */
1500    int              g_notempty = 0;    /* If the match should not be empty */
1501    char            *last_match;        /* Location of last match */
1502    int              no_empty;          /* If NO_EMPTY flag is set */
1503    int              delim_capture;     /* If delimiters should be captured */
1504    int              offset_capture;    /* If offsets should be captured */
1505    zval             tmp;
1506    ALLOCA_FLAG(use_heap);
1507
1508    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1509    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1510    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1511
1512    if (limit_val == 0) {
1513        limit_val = -1;
1514    }
1515
1516    if (extra == NULL) {
1517        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1518        extra = &extra_data;
1519    }
1520    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1521    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1522#ifdef PCRE_EXTRA_MARK
1523    extra->flags &= ~PCRE_EXTRA_MARK;
1524#endif
1525
1526    /* Initialize return value */
1527    array_init(return_value);
1528
1529    /* Calculate the size of the offsets array, and allocate memory for it. */
1530    size_offsets = (pce->capture_count + 1) * 3;
1531    if (size_offsets <= 32) {
1532        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1533    } else {
1534        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1535    }
1536
1537    /* Start at the beginning of the string */
1538    start_offset = 0;
1539    next_offset = 0;
1540    last_match = subject;
1541    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1542
1543    /* Get next piece if no limit or limit not yet reached and something matched*/
1544    while ((limit_val == -1 || limit_val > 1)) {
1545        count = pcre_exec(pce->re, extra, subject,
1546                          subject_len, start_offset,
1547                          exoptions|g_notempty, offsets, size_offsets);
1548
1549        /* the string was already proved to be valid UTF-8 */
1550        exoptions |= PCRE_NO_UTF8_CHECK;
1551
1552        /* Check for too many substrings condition. */
1553        if (count == 0) {
1554            php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1555            count = size_offsets/3;
1556        }
1557
1558        /* If something matched */
1559        if (count > 0) {
1560            if (!no_empty || &subject[offsets[0]] != last_match) {
1561
1562                if (offset_capture) {
1563                    /* Add (match, offset) pair to the return value */
1564                    add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1565                } else {
1566                    /* Add the piece to the return value */
1567                    ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1568                    zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1569                }
1570
1571                /* One less left to do */
1572                if (limit_val != -1)
1573                    limit_val--;
1574            }
1575
1576            last_match = &subject[offsets[1]];
1577            next_offset = offsets[1];
1578
1579            if (delim_capture) {
1580                int i, match_len;
1581                for (i = 1; i < count; i++) {
1582                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1583                    /* If we have matched a delimiter */
1584                    if (!no_empty || match_len > 0) {
1585                        if (offset_capture) {
1586                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1587                        } else {
1588                            ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1589                            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1590                        }
1591                    }
1592                }
1593            }
1594        } else if (count == PCRE_ERROR_NOMATCH) {
1595            /* If we previously set PCRE_NOTEMPTY after a null match,
1596               this is not necessarily the end. We need to advance
1597               the start offset, and continue. Fudge the offset values
1598               to achieve this, unless we're already at the end of the string. */
1599            if (g_notempty != 0 && start_offset < subject_len) {
1600                if (pce->compile_options & PCRE_UTF8) {
1601                    if (re_bump == NULL) {
1602                        int dummy;
1603                        zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1604                        re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1605                        zend_string_release(regex);
1606                        if (re_bump == NULL) {
1607                            RETURN_FALSE;
1608                        }
1609                    }
1610                    count = pcre_exec(re_bump, extra_bump, subject,
1611                              subject_len, start_offset,
1612                              exoptions, offsets, size_offsets);
1613                    if (count < 1) {
1614                        php_error_docref(NULL, E_WARNING, "Unknown error");
1615                        RETURN_FALSE;
1616                    }
1617                } else {
1618                    offsets[0] = start_offset;
1619                    offsets[1] = start_offset + 1;
1620                }
1621            } else
1622                break;
1623        } else {
1624            pcre_handle_exec_error(count);
1625            break;
1626        }
1627
1628        /* If we have matched an empty string, mimic what Perl's /g options does.
1629           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1630           the match again at the same point. If this fails (picked up above) we
1631           advance to the next character. */
1632        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1633
1634        /* Advance to the position right after the last full match */
1635        start_offset = offsets[1];
1636    }
1637
1638
1639    start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1640
1641    if (!no_empty || start_offset < subject_len)
1642    {
1643        if (offset_capture) {
1644            /* Add the last (match, offset) pair to the return value */
1645            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1646        } else {
1647            /* Add the last piece to the return value */
1648            ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1649            zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1650        }
1651    }
1652
1653
1654    /* Clean up */
1655    if (size_offsets <= 32) {
1656        free_alloca(offsets, use_heap);
1657    } else {
1658        efree(offsets);
1659    }
1660}
1661/* }}} */
1662
1663/* {{{ proto string preg_quote(string str [, string delim_char])
1664   Quote regular expression characters plus an optional character */
1665static PHP_FUNCTION(preg_quote)
1666{
1667    size_t       in_str_len;
1668    char    *in_str;        /* Input string argument */
1669    char    *in_str_end;    /* End of the input string */
1670    size_t       delim_len = 0;
1671    char    *delim = NULL;  /* Additional delimiter argument */
1672    zend_string *out_str;   /* Output string with quoted characters */
1673    char    *p,             /* Iterator for input string */
1674            *q,             /* Iterator for output string */
1675             delim_char=0,  /* Delimiter character to be quoted */
1676             c;             /* Current character */
1677    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1678
1679    /* Get the arguments and check for errors */
1680#ifndef FAST_ZPP
1681    if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1682                              &delim, &delim_len) == FAILURE) {
1683        return;
1684    }
1685#else
1686    ZEND_PARSE_PARAMETERS_START(1, 2)
1687        Z_PARAM_STRING(in_str, in_str_len)
1688        Z_PARAM_OPTIONAL
1689        Z_PARAM_STRING(delim, delim_len)
1690    ZEND_PARSE_PARAMETERS_END();
1691#endif
1692
1693    in_str_end = in_str + in_str_len;
1694
1695    /* Nothing to do if we got an empty string */
1696    if (in_str == in_str_end) {
1697        RETURN_EMPTY_STRING();
1698    }
1699
1700    if (delim && *delim) {
1701        delim_char = delim[0];
1702        quote_delim = 1;
1703    }
1704
1705    /* Allocate enough memory so that even if each character
1706       is quoted, we won't run out of room */
1707    out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1708
1709    /* Go through the string and quote necessary characters */
1710    for (p = in_str, q = out_str->val; p != in_str_end; p++) {
1711        c = *p;
1712        switch(c) {
1713            case '.':
1714            case '\\':
1715            case '+':
1716            case '*':
1717            case '?':
1718            case '[':
1719            case '^':
1720            case ']':
1721            case '$':
1722            case '(':
1723            case ')':
1724            case '{':
1725            case '}':
1726            case '=':
1727            case '!':
1728            case '>':
1729            case '<':
1730            case '|':
1731            case ':':
1732            case '-':
1733                *q++ = '\\';
1734                *q++ = c;
1735                break;
1736
1737            case '\0':
1738                *q++ = '\\';
1739                *q++ = '0';
1740                *q++ = '0';
1741                *q++ = '0';
1742                break;
1743
1744            default:
1745                if (quote_delim && c == delim_char)
1746                    *q++ = '\\';
1747                *q++ = c;
1748                break;
1749        }
1750    }
1751    *q = '\0';
1752
1753    /* Reallocate string and return it */
1754    out_str = zend_string_realloc(out_str, q - out_str->val, 0);
1755    RETURN_STR(out_str);
1756}
1757/* }}} */
1758
1759/* {{{ proto array preg_grep(string regex, array input [, int flags])
1760   Searches array and returns entries which match regex */
1761static PHP_FUNCTION(preg_grep)
1762{
1763    zend_string         *regex;         /* Regular expression */
1764    zval                *input;         /* Input array */
1765    zend_long            flags = 0;     /* Match control flags */
1766    pcre_cache_entry    *pce;           /* Compiled regular expression */
1767
1768    /* Get arguments and do error checking */
1769#ifndef FAST_ZPP
1770    if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
1771                              &input, &flags) == FAILURE) {
1772        return;
1773    }
1774#else
1775    ZEND_PARSE_PARAMETERS_START(2, 3)
1776        Z_PARAM_STR(regex)
1777        Z_PARAM_ARRAY(input)
1778        Z_PARAM_OPTIONAL
1779        Z_PARAM_LONG(flags)
1780    ZEND_PARSE_PARAMETERS_END();
1781#endif
1782
1783    /* Compile regex or get it from cache. */
1784    if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1785        RETURN_FALSE;
1786    }
1787
1788    php_pcre_grep_impl(pce, input, return_value, flags);
1789}
1790/* }}} */
1791
1792PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
1793{
1794    zval            *entry;             /* An entry in the input array */
1795    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1796    pcre_extra       extra_data;        /* Used locally for exec options */
1797    int             *offsets;           /* Array of subpattern offsets */
1798    int              size_offsets;      /* Size of the offsets array */
1799    int              count = 0;         /* Count of matched subpatterns */
1800    zend_string     *string_key;
1801    zend_ulong       num_key;
1802    zend_bool        invert;            /* Whether to return non-matching
1803                                           entries */
1804    ALLOCA_FLAG(use_heap);
1805
1806    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1807
1808    if (extra == NULL) {
1809        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1810        extra = &extra_data;
1811    }
1812    extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1813    extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1814#ifdef PCRE_EXTRA_MARK
1815    extra->flags &= ~PCRE_EXTRA_MARK;
1816#endif
1817
1818    /* Calculate the size of the offsets array, and allocate memory for it. */
1819    size_offsets = (pce->capture_count + 1) * 3;
1820    if (size_offsets <= 32) {
1821        offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1822    } else {
1823        offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1824    }
1825
1826    /* Initialize return array */
1827    array_init(return_value);
1828
1829    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1830
1831    /* Go through the input array */
1832    ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
1833        zend_string *subject_str = zval_get_string(entry);
1834
1835        /* Perform the match */
1836        count = pcre_exec(pce->re, extra, subject_str->val,
1837                          (int)subject_str->len, 0,
1838                          0, offsets, size_offsets);
1839
1840        /* Check for too many substrings condition. */
1841        if (count == 0) {
1842            php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1843            count = size_offsets/3;
1844        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1845            pcre_handle_exec_error(count);
1846            zend_string_release(subject_str);
1847            break;
1848        }
1849
1850        /* If the entry fits our requirements */
1851        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1852            if (Z_REFCOUNTED_P(entry)) {
1853                Z_ADDREF_P(entry);
1854            }
1855
1856            /* Add to return array */
1857            if (string_key) {
1858                zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
1859            } else {
1860                zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
1861            }
1862        }
1863
1864        zend_string_release(subject_str);
1865    } ZEND_HASH_FOREACH_END();
1866
1867    /* Clean up */
1868    if (size_offsets <= 32) {
1869        free_alloca(offsets, use_heap);
1870    } else {
1871        efree(offsets);
1872    }
1873}
1874/* }}} */
1875
1876/* {{{ proto int preg_last_error()
1877   Returns the error code of the last regexp execution. */
1878static PHP_FUNCTION(preg_last_error)
1879{
1880#ifndef FAST_ZPP
1881    if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
1882        return;
1883    }
1884#else
1885    ZEND_PARSE_PARAMETERS_START(0, 0)
1886    ZEND_PARSE_PARAMETERS_END();
1887#endif
1888
1889    RETURN_LONG(PCRE_G(error_code));
1890}
1891/* }}} */
1892
1893/* {{{ module definition structures */
1894
1895/* {{{ arginfo */
1896ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1897    ZEND_ARG_INFO(0, pattern)
1898    ZEND_ARG_INFO(0, subject)
1899    ZEND_ARG_INFO(1, subpatterns) /* array */
1900    ZEND_ARG_INFO(0, flags)
1901    ZEND_ARG_INFO(0, offset)
1902ZEND_END_ARG_INFO()
1903
1904ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1905    ZEND_ARG_INFO(0, pattern)
1906    ZEND_ARG_INFO(0, subject)
1907    ZEND_ARG_INFO(1, subpatterns) /* array */
1908    ZEND_ARG_INFO(0, flags)
1909    ZEND_ARG_INFO(0, offset)
1910ZEND_END_ARG_INFO()
1911
1912ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1913    ZEND_ARG_INFO(0, regex)
1914    ZEND_ARG_INFO(0, replace)
1915    ZEND_ARG_INFO(0, subject)
1916    ZEND_ARG_INFO(0, limit)
1917    ZEND_ARG_INFO(1, count)
1918ZEND_END_ARG_INFO()
1919
1920ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1921    ZEND_ARG_INFO(0, regex)
1922    ZEND_ARG_INFO(0, callback)
1923    ZEND_ARG_INFO(0, subject)
1924    ZEND_ARG_INFO(0, limit)
1925    ZEND_ARG_INFO(1, count)
1926ZEND_END_ARG_INFO()
1927
1928ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1929    ZEND_ARG_INFO(0, pattern)
1930    ZEND_ARG_INFO(0, subject)
1931    ZEND_ARG_INFO(0, limit)
1932    ZEND_ARG_INFO(0, flags)
1933ZEND_END_ARG_INFO()
1934
1935ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1936    ZEND_ARG_INFO(0, str)
1937    ZEND_ARG_INFO(0, delim_char)
1938ZEND_END_ARG_INFO()
1939
1940ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1941    ZEND_ARG_INFO(0, regex)
1942    ZEND_ARG_INFO(0, input) /* array */
1943    ZEND_ARG_INFO(0, flags)
1944ZEND_END_ARG_INFO()
1945
1946ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1947ZEND_END_ARG_INFO()
1948/* }}} */
1949
1950static const zend_function_entry pcre_functions[] = {
1951    PHP_FE(preg_match,              arginfo_preg_match)
1952    PHP_FE(preg_match_all,          arginfo_preg_match_all)
1953    PHP_FE(preg_replace,            arginfo_preg_replace)
1954    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
1955    PHP_FE(preg_filter,             arginfo_preg_replace)
1956    PHP_FE(preg_split,              arginfo_preg_split)
1957    PHP_FE(preg_quote,              arginfo_preg_quote)
1958    PHP_FE(preg_grep,               arginfo_preg_grep)
1959    PHP_FE(preg_last_error,         arginfo_preg_last_error)
1960    PHP_FE_END
1961};
1962
1963zend_module_entry pcre_module_entry = {
1964    STANDARD_MODULE_HEADER,
1965   "pcre",
1966    pcre_functions,
1967    PHP_MINIT(pcre),
1968    PHP_MSHUTDOWN(pcre),
1969    NULL,
1970    NULL,
1971    PHP_MINFO(pcre),
1972    NO_VERSION_YET,
1973    PHP_MODULE_GLOBALS(pcre),
1974    PHP_GINIT(pcre),
1975    PHP_GSHUTDOWN(pcre),
1976    NULL,
1977    STANDARD_MODULE_PROPERTIES_EX
1978};
1979
1980#ifdef COMPILE_DL_PCRE
1981ZEND_GET_MODULE(pcre)
1982#endif
1983
1984/* }}} */
1985
1986#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1987
1988/*
1989 * Local variables:
1990 * tab-width: 4
1991 * c-basic-offset: 4
1992 * End:
1993 * vim600: sw=4 ts=4 fdm=marker
1994 * vim<600: sw=4 ts=4
1995 */
1996