1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2013 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/php_smart_str.h"
27
28#if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30#include "ext/standard/php_string.h"
31
32#define PREG_PATTERN_ORDER          1
33#define PREG_SET_ORDER              2
34#define PREG_OFFSET_CAPTURE         (1<<8)
35
36#define PREG_SPLIT_NO_EMPTY         (1<<0)
37#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
38#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
39
40#define PREG_REPLACE_EVAL           (1<<0)
41
42#define PREG_GREP_INVERT            (1<<0)
43
44#define PCRE_CACHE_SIZE 4096
45
46enum {
47    PHP_PCRE_NO_ERROR = 0,
48    PHP_PCRE_INTERNAL_ERROR,
49    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50    PHP_PCRE_RECURSION_LIMIT_ERROR,
51    PHP_PCRE_BAD_UTF8_ERROR,
52    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53};
54
55
56ZEND_DECLARE_MODULE_GLOBALS(pcre)
57
58
59static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60{
61    int preg_code = 0;
62
63    switch (pcre_code) {
64        case PCRE_ERROR_MATCHLIMIT:
65            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66            break;
67
68        case PCRE_ERROR_RECURSIONLIMIT:
69            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70            break;
71
72        case PCRE_ERROR_BADUTF8:
73            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74            break;
75
76        case PCRE_ERROR_BADUTF8_OFFSET:
77            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78            break;
79
80        default:
81            preg_code = PHP_PCRE_INTERNAL_ERROR;
82            break;
83    }
84
85    PCRE_G(error_code) = preg_code;
86}
87/* }}} */
88
89static void php_free_pcre_cache(void *data) /* {{{ */
90{
91    pcre_cache_entry *pce = (pcre_cache_entry *) data;
92    if (!pce) return;
93    pefree(pce->re, 1);
94    if (pce->extra) pefree(pce->extra, 1);
95#if HAVE_SETLOCALE
96    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97    pefree(pce->locale, 1);
98#endif
99}
100/* }}} */
101
102static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103{
104    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105    pcre_globals->backtrack_limit = 0;
106    pcre_globals->recursion_limit = 0;
107    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
108}
109/* }}} */
110
111static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112{
113    zend_hash_destroy(&pcre_globals->pcre_cache);
114}
115/* }}} */
116
117PHP_INI_BEGIN()
118    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120PHP_INI_END()
121
122
123/* {{{ PHP_MINFO_FUNCTION(pcre) */
124static PHP_MINFO_FUNCTION(pcre)
125{
126    php_info_print_table_start();
127    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129    php_info_print_table_end();
130
131    DISPLAY_INI_ENTRIES();
132}
133/* }}} */
134
135/* {{{ PHP_MINIT_FUNCTION(pcre) */
136static PHP_MINIT_FUNCTION(pcre)
137{
138    REGISTER_INI_ENTRIES();
139
140    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147
148    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155
156    return SUCCESS;
157}
158/* }}} */
159
160/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161static PHP_MSHUTDOWN_FUNCTION(pcre)
162{
163    UNREGISTER_INI_ENTRIES();
164
165    return SUCCESS;
166}
167/* }}} */
168
169/* {{{ static pcre_clean_cache */
170static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171{
172    int *num_clean = (int *)arg;
173
174    if (*num_clean > 0) {
175        (*num_clean)--;
176        return 1;
177    } else {
178        return 0;
179    }
180}
181/* }}} */
182
183/* {{{ static make_subpats_table */
184static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185{
186    pcre_extra *extra = pce->extra;
187    int name_cnt = 0, name_size, ni = 0;
188    int rc;
189    char *name_table;
190    unsigned short name_idx;
191    char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192
193    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194    if (rc < 0) {
195        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196        efree(subpat_names);
197        return NULL;
198    }
199    if (name_cnt > 0) {
200        int rc1, rc2;
201
202        rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203        rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204        rc = rc2 ? rc2 : rc1;
205        if (rc < 0) {
206            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207            efree(subpat_names);
208            return NULL;
209        }
210
211        while (ni++ < name_cnt) {
212            name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213            subpat_names[name_idx] = name_table + 2;
214            if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216                efree(subpat_names);
217                return NULL;
218            }
219            name_table += name_size;
220        }
221    }
222
223    return subpat_names;
224}
225/* }}} */
226
227/* {{{ pcre_get_compiled_regex_cache
228 */
229PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230{
231    pcre                *re = NULL;
232    pcre_extra          *extra;
233    int                  coptions = 0;
234    int                  soptions = 0;
235    const char          *error;
236    int                  erroffset;
237    char                 delimiter;
238    char                 start_delimiter;
239    char                 end_delimiter;
240    char                *p, *pp;
241    char                *pattern;
242    int                  do_study = 0;
243    int                  poptions = 0;
244    int             count = 0;
245    unsigned const char *tables = NULL;
246#if HAVE_SETLOCALE
247    char                *locale;
248#endif
249    pcre_cache_entry    *pce;
250    pcre_cache_entry     new_entry;
251    char                *tmp = NULL;
252
253#if HAVE_SETLOCALE
254# if defined(PHP_WIN32) && defined(ZTS)
255    _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
256# endif
257    locale = setlocale(LC_CTYPE, NULL);
258#endif
259
260    /* Try to lookup the cached regex entry, and if successful, just pass
261       back the compiled pattern, otherwise go on and compile it. */
262    if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
263        /*
264         * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
265         * is, we flush it and compile the pattern from scratch.
266         */
267        if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
268            zend_hash_clean(&PCRE_G(pcre_cache));
269        } else {
270#if HAVE_SETLOCALE
271            if (!strcmp(pce->locale, locale)) {
272#endif
273                return pce;
274#if HAVE_SETLOCALE
275            }
276#endif
277        }
278    }
279
280    p = regex;
281
282    /* Parse through the leading whitespace, and display a warning if we
283       get to the end without encountering a delimiter. */
284    while (isspace((int)*(unsigned char *)p)) p++;
285    if (*p == 0) {
286        php_error_docref(NULL TSRMLS_CC, E_WARNING,
287                         p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
288        return NULL;
289    }
290
291    /* Get the delimiter and display a warning if it is alphanumeric
292       or a backslash. */
293    delimiter = *p++;
294    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
295        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
296        return NULL;
297    }
298
299    start_delimiter = delimiter;
300    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
301        delimiter = pp[5];
302    end_delimiter = delimiter;
303
304    pp = p;
305
306    if (start_delimiter == end_delimiter) {
307        /* We need to iterate through the pattern, searching for the ending delimiter,
308           but skipping the backslashed delimiters.  If the ending delimiter is not
309           found, display a warning. */
310        while (*pp != 0) {
311            if (*pp == '\\' && pp[1] != 0) pp++;
312            else if (*pp == delimiter)
313                break;
314            pp++;
315        }
316    } else {
317        /* We iterate through the pattern, searching for the matching ending
318         * delimiter. For each matching starting delimiter, we increment nesting
319         * level, and decrement it for each matching ending delimiter. If we
320         * reach the end of the pattern without matching, display a warning.
321         */
322        int brackets = 1;   /* brackets nesting level */
323        while (*pp != 0) {
324            if (*pp == '\\' && pp[1] != 0) pp++;
325            else if (*pp == end_delimiter && --brackets <= 0)
326                break;
327            else if (*pp == start_delimiter)
328                brackets++;
329            pp++;
330        }
331    }
332
333    if (*pp == 0) {
334        if (pp < regex + regex_len) {
335            php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
336        } else if (start_delimiter == end_delimiter) {
337            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
338        } else {
339            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
340        }
341        return NULL;
342    }
343
344    /* Make a copy of the actual pattern. */
345    pattern = estrndup(p, pp-p);
346
347    /* Move on to the options */
348    pp++;
349
350    /* Parse through the options, setting appropriate flags.  Display
351       a warning if we encounter an unknown modifier. */
352    while (pp < regex + regex_len) {
353        switch (*pp++) {
354            /* Perl compatible options */
355            case 'i':   coptions |= PCRE_CASELESS;      break;
356            case 'm':   coptions |= PCRE_MULTILINE;     break;
357            case 's':   coptions |= PCRE_DOTALL;        break;
358            case 'x':   coptions |= PCRE_EXTENDED;      break;
359
360            /* PCRE specific options */
361            case 'A':   coptions |= PCRE_ANCHORED;      break;
362            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
363            case 'S':   do_study  = 1;                  break;
364            case 'U':   coptions |= PCRE_UNGREEDY;      break;
365            case 'X':   coptions |= PCRE_EXTRA;         break;
366            case 'u':   coptions |= PCRE_UTF8;
367    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
368       characters, even in UTF-8 mode. However, this can be changed by setting
369       the PCRE_UCP option. */
370#ifdef PCRE_UCP
371                        coptions |= PCRE_UCP;
372#endif
373                break;
374
375            /* Custom preg options */
376            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
377
378            case ' ':
379            case '\n':
380                break;
381
382            default:
383                if (pp[-1]) {
384                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
385                } else {
386                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
387                }
388                efree(pattern);
389                return NULL;
390        }
391    }
392
393#if HAVE_SETLOCALE
394    if (strcmp(locale, "C"))
395        tables = pcre_maketables();
396#endif
397
398    /* Compile pattern and display a warning if compilation failed. */
399    re = pcre_compile(pattern,
400                      coptions,
401                      &error,
402                      &erroffset,
403                      tables);
404
405    if (re == NULL) {
406        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
407        efree(pattern);
408        if (tables) {
409            pefree((void*)tables, 1);
410        }
411        return NULL;
412    }
413
414    /* If study option was specified, study the pattern and
415       store the result in extra for passing to pcre_exec. */
416    if (do_study) {
417        extra = pcre_study(re, soptions, &error);
418        if (extra) {
419            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
420        }
421        if (error != NULL) {
422            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
423        }
424    } else {
425        extra = NULL;
426    }
427
428    efree(pattern);
429
430    /*
431     * If we reached cache limit, clean out the items from the head of the list;
432     * these are supposedly the oldest ones (but not necessarily the least used
433     * ones).
434     */
435    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
436        int num_clean = PCRE_CACHE_SIZE / 8;
437        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
438    }
439
440    /* Store the compiled pattern and extra info in the cache. */
441    new_entry.re = re;
442    new_entry.extra = extra;
443    new_entry.preg_options = poptions;
444    new_entry.compile_options = coptions;
445#if HAVE_SETLOCALE
446    new_entry.locale = pestrdup(locale, 1);
447    new_entry.tables = tables;
448#endif
449
450    /*
451     * Interned strings are not duplicated when stored in HashTable,
452     * but all the interned strings created during HTTP request are removed
453     * at end of request. However PCRE_G(pcre_cache) must be consistent
454     * on the next request as well. So we disable usage of interned strings
455     * as hash keys especually for this table.
456     * See bug #63180
457     */
458    if (IS_INTERNED(regex)) {
459        regex = tmp = estrndup(regex, regex_len);
460    }
461
462    zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
463                        sizeof(pcre_cache_entry), (void**)&pce);
464
465    if (tmp) {
466        efree(tmp);
467    }
468
469    return pce;
470}
471/* }}} */
472
473/* {{{ pcre_get_compiled_regex
474 */
475PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
476{
477    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
478
479    if (extra) {
480        *extra = pce ? pce->extra : NULL;
481    }
482    if (preg_options) {
483        *preg_options = pce ? pce->preg_options : 0;
484    }
485
486    return pce ? pce->re : NULL;
487}
488/* }}} */
489
490/* {{{ pcre_get_compiled_regex_ex
491 */
492PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
493{
494    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
495
496    if (extra) {
497        *extra = pce ? pce->extra : NULL;
498    }
499    if (preg_options) {
500        *preg_options = pce ? pce->preg_options : 0;
501    }
502    if (compile_options) {
503        *compile_options = pce ? pce->compile_options : 0;
504    }
505
506    return pce ? pce->re : NULL;
507}
508/* }}} */
509
510/* {{{ add_offset_pair */
511static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
512{
513    zval *match_pair;
514
515    ALLOC_ZVAL(match_pair);
516    array_init(match_pair);
517    INIT_PZVAL(match_pair);
518
519    /* Add (match, offset) to the return value */
520    add_next_index_stringl(match_pair, str, len, 1);
521    add_next_index_long(match_pair, offset);
522
523    if (name) {
524        zval_add_ref(&match_pair);
525        zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
526    }
527    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
528}
529/* }}} */
530
531static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
532{
533    /* parameters */
534    char             *regex;            /* Regular expression */
535    char             *subject;          /* String to match against */
536    int               regex_len;
537    int               subject_len;
538    pcre_cache_entry *pce;              /* Compiled regular expression */
539    zval             *subpats = NULL;   /* Array for subpatterns */
540    long              flags = 0;        /* Match control flags */
541    long              start_offset = 0; /* Where the new search starts */
542
543    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", &regex, &regex_len,
544                              &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
545        RETURN_FALSE;
546    }
547
548    /* Compile regex or get it from cache. */
549    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
550        RETURN_FALSE;
551    }
552
553    php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
554        global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
555}
556/* }}} */
557
558/* {{{ php_pcre_match_impl() */
559PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
560    zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
561{
562    zval            *result_set,        /* Holds a set of subpatterns after
563                                           a global match */
564                   **match_sets = NULL; /* An array of sets of matches for each
565                                           subpattern after a global match */
566    pcre_extra      *extra = pce->extra;/* Holds results of studying */
567    pcre_extra       extra_data;        /* Used locally for exec options */
568    int              exoptions = 0;     /* Execution options */
569    int              count = 0;         /* Count of matched subpatterns */
570    int             *offsets;           /* Array of subpattern offsets */
571    int              num_subpats;       /* Number of captured subpatterns */
572    int              size_offsets;      /* Size of the offsets array */
573    int              matched;           /* Has anything matched */
574    int              g_notempty = 0;    /* If the match should not be empty */
575    const char     **stringlist;        /* Holds list of subpatterns */
576    char           **subpat_names;      /* Array for named subpatterns */
577    int              i, rc;
578    int              subpats_order;     /* Order of subpattern matches */
579    int              offset_capture;    /* Capture match offsets: yes/no */
580
581    /* Overwrite the passed-in value for subpatterns with an empty array. */
582    if (subpats != NULL) {
583        zval_dtor(subpats);
584        array_init(subpats);
585    }
586
587    subpats_order = global ? PREG_PATTERN_ORDER : 0;
588
589    if (use_flags) {
590        offset_capture = flags & PREG_OFFSET_CAPTURE;
591
592        /*
593         * subpats_order is pre-set to pattern mode so we change it only if
594         * necessary.
595         */
596        if (flags & 0xff) {
597            subpats_order = flags & 0xff;
598        }
599        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
600            (!global && subpats_order != 0)) {
601            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
602            return;
603        }
604    } else {
605        offset_capture = 0;
606    }
607
608    /* Negative offset counts from the end of the string. */
609    if (start_offset < 0) {
610        start_offset = subject_len + start_offset;
611        if (start_offset < 0) {
612            start_offset = 0;
613        }
614    }
615
616    if (extra == NULL) {
617        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
618        extra = &extra_data;
619    }
620    extra->match_limit = PCRE_G(backtrack_limit);
621    extra->match_limit_recursion = PCRE_G(recursion_limit);
622
623    /* Calculate the size of the offsets array, and allocate memory for it. */
624    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
625    if (rc < 0) {
626        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
627        RETURN_FALSE;
628    }
629    num_subpats++;
630    size_offsets = num_subpats * 3;
631
632    /*
633     * Build a mapping from subpattern numbers to their names. We will always
634     * allocate the table, even though there may be no named subpatterns. This
635     * avoids somewhat more complicated logic in the inner loops.
636     */
637    subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
638    if (!subpat_names) {
639        RETURN_FALSE;
640    }
641
642    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
643
644    /* Allocate match sets array and initialize the values. */
645    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
646        match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
647        for (i=0; i<num_subpats; i++) {
648            ALLOC_ZVAL(match_sets[i]);
649            array_init(match_sets[i]);
650            INIT_PZVAL(match_sets[i]);
651        }
652    }
653
654    matched = 0;
655    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
656
657    do {
658        /* Execute the regular expression. */
659        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
660                          exoptions|g_notempty, offsets, size_offsets);
661
662        /* the string was already proved to be valid UTF-8 */
663        exoptions |= PCRE_NO_UTF8_CHECK;
664
665        /* Check for too many substrings condition. */
666        if (count == 0) {
667            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
668            count = size_offsets/3;
669        }
670
671        /* If something has matched */
672        if (count > 0) {
673            matched++;
674
675            /* If subpatterns array has been passed, fill it in with values. */
676            if (subpats != NULL) {
677                /* Try to get the list of substrings and display a warning if failed. */
678                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
679                    efree(subpat_names);
680                    efree(offsets);
681                    if (match_sets) efree(match_sets);
682                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
683                    RETURN_FALSE;
684                }
685
686                if (global) {   /* global pattern matching */
687                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
688                        /* For each subpattern, insert it into the appropriate array. */
689                        for (i = 0; i < count; i++) {
690                            if (offset_capture) {
691                                add_offset_pair(match_sets[i], (char *)stringlist[i],
692                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
693                            } else {
694                                add_next_index_stringl(match_sets[i], (char *)stringlist[i],
695                                                       offsets[(i<<1)+1] - offsets[i<<1], 1);
696                            }
697                        }
698                        /*
699                         * If the number of captured subpatterns on this run is
700                         * less than the total possible number, pad the result
701                         * arrays with empty strings.
702                         */
703                        if (count < num_subpats) {
704                            for (; i < num_subpats; i++) {
705                                add_next_index_string(match_sets[i], "", 1);
706                            }
707                        }
708                    } else {
709                        /* Allocate the result set array */
710                        ALLOC_ZVAL(result_set);
711                        array_init(result_set);
712                        INIT_PZVAL(result_set);
713
714                        /* Add all the subpatterns to it */
715                        for (i = 0; i < count; i++) {
716                            if (offset_capture) {
717                                add_offset_pair(result_set, (char *)stringlist[i],
718                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
719                            } else {
720                                if (subpat_names[i]) {
721                                    add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
722                                                           offsets[(i<<1)+1] - offsets[i<<1], 1);
723                                }
724                                add_next_index_stringl(result_set, (char *)stringlist[i],
725                                                       offsets[(i<<1)+1] - offsets[i<<1], 1);
726                            }
727                        }
728                        /* And add it to the output array */
729                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
730                    }
731                } else {            /* single pattern matching */
732                    /* For each subpattern, insert it into the subpatterns array. */
733                    for (i = 0; i < count; i++) {
734                        if (offset_capture) {
735                            add_offset_pair(subpats, (char *)stringlist[i],
736                                            offsets[(i<<1)+1] - offsets[i<<1],
737                                            offsets[i<<1], subpat_names[i]);
738                        } else {
739                            if (subpat_names[i]) {
740                                add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
741                                                  offsets[(i<<1)+1] - offsets[i<<1], 1);
742                            }
743                            add_next_index_stringl(subpats, (char *)stringlist[i],
744                                                   offsets[(i<<1)+1] - offsets[i<<1], 1);
745                        }
746                    }
747                }
748
749                pcre_free((void *) stringlist);
750            }
751        } else if (count == PCRE_ERROR_NOMATCH) {
752            /* If we previously set PCRE_NOTEMPTY after a null match,
753               this is not necessarily the end. We need to advance
754               the start offset, and continue. Fudge the offset values
755               to achieve this, unless we're already at the end of the string. */
756            if (g_notempty != 0 && start_offset < subject_len) {
757                offsets[0] = start_offset;
758                offsets[1] = start_offset + 1;
759            } else
760                break;
761        } else {
762            pcre_handle_exec_error(count TSRMLS_CC);
763            break;
764        }
765
766        /* If we have matched an empty string, mimic what Perl's /g options does.
767           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
768           the match again at the same point. If this fails (picked up above) we
769           advance to the next character. */
770        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
771
772        /* Advance to the position right after the last full match */
773        start_offset = offsets[1];
774    } while (global);
775
776    /* Add the match sets to the output array and clean up */
777    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
778        for (i = 0; i < num_subpats; i++) {
779            if (subpat_names[i]) {
780                zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
781                                 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
782                Z_ADDREF_P(match_sets[i]);
783            }
784            zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
785        }
786        efree(match_sets);
787    }
788
789    efree(offsets);
790    efree(subpat_names);
791
792    /* Did we encounter an error? */
793    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
794        RETVAL_LONG(matched);
795    } else {
796        RETVAL_FALSE;
797    }
798}
799/* }}} */
800
801/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
802   Perform a Perl-style regular expression match */
803static PHP_FUNCTION(preg_match)
804{
805    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
806}
807/* }}} */
808
809/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
810   Perform a Perl-style global regular expression match */
811static PHP_FUNCTION(preg_match_all)
812{
813    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
814}
815/* }}} */
816
817/* {{{ preg_get_backref
818 */
819static int preg_get_backref(char **str, int *backref)
820{
821    register char in_brace = 0;
822    register char *walk = *str;
823
824    if (walk[1] == 0)
825        return 0;
826
827    if (*walk == '$' && walk[1] == '{') {
828        in_brace = 1;
829        walk++;
830    }
831    walk++;
832
833    if (*walk >= '0' && *walk <= '9') {
834        *backref = *walk - '0';
835        walk++;
836    } else
837        return 0;
838
839    if (*walk && *walk >= '0' && *walk <= '9') {
840        *backref = *backref * 10 + *walk - '0';
841        walk++;
842    }
843
844    if (in_brace) {
845        if (*walk == 0 || *walk != '}')
846            return 0;
847        else
848            walk++;
849    }
850
851    *str = walk;
852    return 1;
853}
854/* }}} */
855
856/* {{{ preg_do_repl_func
857 */
858static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
859{
860    zval        *retval_ptr;        /* Function return value */
861    zval       **args[1];           /* Argument to pass to function */
862    zval        *subpats;           /* Captured subpatterns */
863    int          result_len;        /* Return value length */
864    int          i;
865
866    MAKE_STD_ZVAL(subpats);
867    array_init(subpats);
868    for (i = 0; i < count; i++) {
869        if (subpat_names[i]) {
870            add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
871        }
872        add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
873    }
874    args[0] = &subpats;
875
876    if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
877        convert_to_string_ex(&retval_ptr);
878        *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
879        result_len = Z_STRLEN_P(retval_ptr);
880        zval_ptr_dtor(&retval_ptr);
881    } else {
882        if (!EG(exception)) {
883            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
884        }
885        result_len = offsets[1] - offsets[0];
886        *result = estrndup(&subject[offsets[0]], result_len);
887    }
888
889    zval_ptr_dtor(&subpats);
890
891    return result_len;
892}
893/* }}} */
894
895/* {{{ preg_do_eval
896 */
897static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
898                        int *offsets, int count, char **result TSRMLS_DC)
899{
900    zval         retval;            /* Return value from evaluation */
901    char        *eval_str_end,      /* End of eval string */
902                *match,             /* Current match for a backref */
903                *esc_match,         /* Quote-escaped match */
904                *walk,              /* Used to walk the code string */
905                *segment,           /* Start of segment to append while walking */
906                 walk_last;         /* Last walked character */
907    int          match_len;         /* Length of the match */
908    int          esc_match_len;     /* Length of the quote-escaped match */
909    int          result_len;        /* Length of the result of the evaluation */
910    int          backref;           /* Current backref */
911    char        *compiled_string_description;
912    smart_str    code = {0};
913
914    eval_str_end = eval_str + eval_str_len;
915    walk = segment = eval_str;
916    walk_last = 0;
917
918    while (walk < eval_str_end) {
919        /* If found a backreference.. */
920        if ('\\' == *walk || '$' == *walk) {
921            smart_str_appendl(&code, segment, walk - segment);
922            if (walk_last == '\\') {
923                code.c[code.len-1] = *walk++;
924                segment = walk;
925                walk_last = 0;
926                continue;
927            }
928            segment = walk;
929            if (preg_get_backref(&walk, &backref)) {
930                if (backref < count) {
931                    /* Find the corresponding string match and substitute it
932                       in instead of the backref */
933                    match = subject + offsets[backref<<1];
934                    match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
935                    if (match_len) {
936                        esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
937                    } else {
938                        esc_match = match;
939                        esc_match_len = 0;
940                    }
941                } else {
942                    esc_match = "";
943                    esc_match_len = 0;
944                }
945                smart_str_appendl(&code, esc_match, esc_match_len);
946
947                segment = walk;
948
949                /* Clean up and reassign */
950                if (esc_match_len)
951                    efree(esc_match);
952                continue;
953            }
954        }
955        walk++;
956        walk_last = walk[-1];
957    }
958    smart_str_appendl(&code, segment, walk - segment);
959    smart_str_0(&code);
960
961    compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
962    /* Run the code */
963    if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
964        efree(compiled_string_description);
965        php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
966        /* zend_error() does not return in this case */
967    }
968    efree(compiled_string_description);
969    convert_to_string(&retval);
970
971    /* Save the return value and its length */
972    *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
973    result_len = Z_STRLEN(retval);
974
975    /* Clean up */
976    zval_dtor(&retval);
977    smart_str_free(&code);
978
979    return result_len;
980}
981/* }}} */
982
983/* {{{ php_pcre_replace
984 */
985PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
986                              char *subject, int subject_len,
987                              zval *replace_val, int is_callable_replace,
988                              int *result_len, int limit, int *replace_count TSRMLS_DC)
989{
990    pcre_cache_entry    *pce;               /* Compiled regular expression */
991
992    /* Compile regex or get it from cache. */
993    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
994        return NULL;
995    }
996
997    return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
998        is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
999}
1000/* }}} */
1001
1002/* {{{ php_pcre_replace_impl() */
1003PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1004    int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1005{
1006    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1007    pcre_extra       extra_data;        /* Used locally for exec options */
1008    int              exoptions = 0;     /* Execution options */
1009    int              count = 0;         /* Count of matched subpatterns */
1010    int             *offsets;           /* Array of subpattern offsets */
1011    char            **subpat_names;     /* Array for named subpatterns */
1012    int              num_subpats;       /* Number of captured subpatterns */
1013    int              size_offsets;      /* Size of the offsets array */
1014    int              new_len;           /* Length of needed storage */
1015    int              alloc_len;         /* Actual allocated length */
1016    int              eval_result_len=0; /* Length of the eval'ed or
1017                                           function-returned string */
1018    int              match_len;         /* Length of the current match */
1019    int              backref;           /* Backreference number */
1020    int              eval;              /* If the replacement string should be eval'ed */
1021    int              start_offset;      /* Where the new search starts */
1022    int              g_notempty=0;      /* If the match should not be empty */
1023    int              replace_len=0;     /* Length of replacement string */
1024    char            *result,            /* Result of replacement */
1025                    *replace=NULL,      /* Replacement string */
1026                    *new_buf,           /* Temporary buffer for re-allocation */
1027                    *walkbuf,           /* Location of current replacement in the result */
1028                    *walk,              /* Used to walk the replacement string */
1029                    *match,             /* The current match */
1030                    *piece,             /* The current piece of subject */
1031                    *replace_end=NULL,  /* End of replacement string */
1032                    *eval_result,       /* Result of eval or custom function */
1033                     walk_last;         /* Last walked character */
1034    int              rc;
1035
1036    if (extra == NULL) {
1037        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1038        extra = &extra_data;
1039    }
1040    extra->match_limit = PCRE_G(backtrack_limit);
1041    extra->match_limit_recursion = PCRE_G(recursion_limit);
1042
1043    eval = pce->preg_options & PREG_REPLACE_EVAL;
1044    if (is_callable_replace) {
1045        if (eval) {
1046            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1047            return NULL;
1048        }
1049    } else {
1050        replace = Z_STRVAL_P(replace_val);
1051        replace_len = Z_STRLEN_P(replace_val);
1052        replace_end = replace + replace_len;
1053    }
1054
1055    if (eval) {
1056        php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
1057    }
1058
1059    /* Calculate the size of the offsets array, and allocate memory for it. */
1060    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1061    if (rc < 0) {
1062        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1063        return NULL;
1064    }
1065    num_subpats++;
1066    size_offsets = num_subpats * 3;
1067
1068    /*
1069     * Build a mapping from subpattern numbers to their names. We will always
1070     * allocate the table, even though there may be no named subpatterns. This
1071     * avoids somewhat more complicated logic in the inner loops.
1072     */
1073    subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1074    if (!subpat_names) {
1075        return NULL;
1076    }
1077
1078    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1079
1080    alloc_len = 2 * subject_len + 1;
1081    result = safe_emalloc(alloc_len, sizeof(char), 0);
1082
1083    /* Initialize */
1084    match = NULL;
1085    *result_len = 0;
1086    start_offset = 0;
1087    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1088
1089    while (1) {
1090        /* Execute the regular expression. */
1091        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1092                          exoptions|g_notempty, offsets, size_offsets);
1093
1094        /* the string was already proved to be valid UTF-8 */
1095        exoptions |= PCRE_NO_UTF8_CHECK;
1096
1097        /* Check for too many substrings condition. */
1098        if (count == 0) {
1099            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1100            count = size_offsets/3;
1101        }
1102
1103        piece = subject + start_offset;
1104
1105        if (count > 0 && (limit == -1 || limit > 0)) {
1106            if (replace_count) {
1107                ++*replace_count;
1108            }
1109            /* Set the match location in subject */
1110            match = subject + offsets[0];
1111
1112            new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1113
1114            /* If evaluating, do it and add the return string's length */
1115            if (eval) {
1116                eval_result_len = preg_do_eval(replace, replace_len, subject,
1117                                               offsets, count, &eval_result TSRMLS_CC);
1118                new_len += eval_result_len;
1119            } else if (is_callable_replace) {
1120                /* Use custom function to get replacement string and its length. */
1121                eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
1122                new_len += eval_result_len;
1123            } else { /* do regular substitution */
1124                walk = replace;
1125                walk_last = 0;
1126                while (walk < replace_end) {
1127                    if ('\\' == *walk || '$' == *walk) {
1128                        if (walk_last == '\\') {
1129                            walk++;
1130                            walk_last = 0;
1131                            continue;
1132                        }
1133                        if (preg_get_backref(&walk, &backref)) {
1134                            if (backref < count)
1135                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1136                            continue;
1137                        }
1138                    }
1139                    new_len++;
1140                    walk++;
1141                    walk_last = walk[-1];
1142                }
1143            }
1144
1145            if (new_len + 1 > alloc_len) {
1146                alloc_len = 1 + alloc_len + 2 * new_len;
1147                new_buf = emalloc(alloc_len);
1148                memcpy(new_buf, result, *result_len);
1149                efree(result);
1150                result = new_buf;
1151            }
1152            /* copy the part of the string before the match */
1153            memcpy(&result[*result_len], piece, match-piece);
1154            *result_len += match-piece;
1155
1156            /* copy replacement and backrefs */
1157            walkbuf = result + *result_len;
1158
1159            /* If evaluating or using custom function, copy result to the buffer
1160             * and clean up. */
1161            if (eval || is_callable_replace) {
1162                memcpy(walkbuf, eval_result, eval_result_len);
1163                *result_len += eval_result_len;
1164                STR_FREE(eval_result);
1165            } else { /* do regular backreference copying */
1166                walk = replace;
1167                walk_last = 0;
1168                while (walk < replace_end) {
1169                    if ('\\' == *walk || '$' == *walk) {
1170                        if (walk_last == '\\') {
1171                            *(walkbuf-1) = *walk++;
1172                            walk_last = 0;
1173                            continue;
1174                        }
1175                        if (preg_get_backref(&walk, &backref)) {
1176                            if (backref < count) {
1177                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1178                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1179                                walkbuf += match_len;
1180                            }
1181                            continue;
1182                        }
1183                    }
1184                    *walkbuf++ = *walk++;
1185                    walk_last = walk[-1];
1186                }
1187                *walkbuf = '\0';
1188                /* increment the result length by how much we've added to the string */
1189                *result_len += walkbuf - (result + *result_len);
1190            }
1191
1192            if (limit != -1)
1193                limit--;
1194
1195        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1196            /* If we previously set PCRE_NOTEMPTY after a null match,
1197               this is not necessarily the end. We need to advance
1198               the start offset, and continue. Fudge the offset values
1199               to achieve this, unless we're already at the end of the string. */
1200            if (g_notempty != 0 && start_offset < subject_len) {
1201                offsets[0] = start_offset;
1202                offsets[1] = start_offset + 1;
1203                memcpy(&result[*result_len], piece, 1);
1204                (*result_len)++;
1205            } else {
1206                new_len = *result_len + subject_len - start_offset;
1207                if (new_len + 1 > alloc_len) {
1208                    alloc_len = new_len + 1; /* now we know exactly how long it is */
1209                    new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1210                    memcpy(new_buf, result, *result_len);
1211                    efree(result);
1212                    result = new_buf;
1213                }
1214                /* stick that last bit of string on our output */
1215                memcpy(&result[*result_len], piece, subject_len - start_offset);
1216                *result_len += subject_len - start_offset;
1217                result[*result_len] = '\0';
1218                break;
1219            }
1220        } else {
1221            pcre_handle_exec_error(count TSRMLS_CC);
1222            efree(result);
1223            result = NULL;
1224            break;
1225        }
1226
1227        /* If we have matched an empty string, mimic what Perl's /g options does.
1228           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1229           the match again at the same point. If this fails (picked up above) we
1230           advance to the next character. */
1231        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1232
1233        /* Advance to the next piece. */
1234        start_offset = offsets[1];
1235    }
1236
1237    efree(offsets);
1238    efree(subpat_names);
1239
1240    return result;
1241}
1242/* }}} */
1243
1244/* {{{ php_replace_in_subject
1245 */
1246static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1247{
1248    zval        **regex_entry,
1249                **replace_entry = NULL,
1250                 *replace_value,
1251                  empty_replace;
1252    char        *subject_value,
1253                *result;
1254    int          subject_len;
1255
1256    /* Make sure we're dealing with strings. */
1257    convert_to_string_ex(subject);
1258    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1259    ZVAL_STRINGL(&empty_replace, "", 0, 0);
1260
1261    /* If regex is an array */
1262    if (Z_TYPE_P(regex) == IS_ARRAY) {
1263        /* Duplicate subject string for repeated replacement */
1264        subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1265        subject_len = Z_STRLEN_PP(subject);
1266        *result_len = subject_len;
1267
1268        zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1269
1270        replace_value = replace;
1271        if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1272            zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1273
1274        /* For each entry in the regex array, get the entry */
1275        while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)&regex_entry) == SUCCESS) {
1276            /* Make sure we're dealing with strings. */
1277            convert_to_string_ex(regex_entry);
1278
1279            /* If replace is an array and not a callable construct */
1280            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1281                /* Get current entry */
1282                if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1283                    if (!is_callable_replace) {
1284                        convert_to_string_ex(replace_entry);
1285                    }
1286                    replace_value = *replace_entry;
1287                    zend_hash_move_forward(Z_ARRVAL_P(replace));
1288                } else {
1289                    /* We've run out of replacement strings, so use an empty one */
1290                    replace_value = &empty_replace;
1291                }
1292            }
1293
1294            /* Do the actual replacement and put the result back into subject_value
1295               for further replacements. */
1296            if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1297                                           Z_STRLEN_PP(regex_entry),
1298                                           subject_value,
1299                                           subject_len,
1300                                           replace_value,
1301                                           is_callable_replace,
1302                                           result_len,
1303                                           limit,
1304                                           replace_count TSRMLS_CC)) != NULL) {
1305                efree(subject_value);
1306                subject_value = result;
1307                subject_len = *result_len;
1308            } else {
1309                efree(subject_value);
1310                return NULL;
1311            }
1312
1313            zend_hash_move_forward(Z_ARRVAL_P(regex));
1314        }
1315
1316        return subject_value;
1317    } else {
1318        result = php_pcre_replace(Z_STRVAL_P(regex),
1319                                  Z_STRLEN_P(regex),
1320                                  Z_STRVAL_PP(subject),
1321                                  Z_STRLEN_PP(subject),
1322                                  replace,
1323                                  is_callable_replace,
1324                                  result_len,
1325                                  limit,
1326                                  replace_count TSRMLS_CC);
1327        return result;
1328    }
1329}
1330/* }}} */
1331
1332/* {{{ preg_replace_impl
1333 */
1334static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1335{
1336    zval           **regex,
1337                   **replace,
1338                   **subject,
1339                   **subject_entry,
1340                   **zcount = NULL;
1341    char            *result;
1342    int              result_len;
1343    int              limit_val = -1;
1344    long            limit = -1;
1345    char            *string_key;
1346    ulong            num_key;
1347    char            *callback_name;
1348    int              replace_count=0, old_replace_count;
1349
1350    /* Get function parameters and do error-checking. */
1351    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1352        return;
1353    }
1354
1355    if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1356        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1357        RETURN_FALSE;
1358    }
1359
1360    SEPARATE_ZVAL(replace);
1361    if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1362        convert_to_string_ex(replace);
1363    }
1364    if (is_callable_replace) {
1365        if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1366            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1367            efree(callback_name);
1368            MAKE_COPY_ZVAL(subject, return_value);
1369            return;
1370        }
1371        efree(callback_name);
1372    }
1373
1374    SEPARATE_ZVAL(regex);
1375    SEPARATE_ZVAL(subject);
1376
1377    if (ZEND_NUM_ARGS() > 3) {
1378        limit_val = limit;
1379    }
1380
1381    if (Z_TYPE_PP(regex) != IS_ARRAY)
1382        convert_to_string_ex(regex);
1383
1384    /* if subject is an array */
1385    if (Z_TYPE_PP(subject) == IS_ARRAY) {
1386        array_init(return_value);
1387        zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1388
1389        /* For each subject entry, convert it to string, then perform replacement
1390           and add the result to the return_value array. */
1391        while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1392            SEPARATE_ZVAL(subject_entry);
1393            old_replace_count = replace_count;
1394            if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1395                if (!is_filter || replace_count > old_replace_count) {
1396                    /* Add to return array */
1397                    switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1398                    {
1399                    case HASH_KEY_IS_STRING:
1400                        add_assoc_stringl(return_value, string_key, result, result_len, 0);
1401                        break;
1402
1403                    case HASH_KEY_IS_LONG:
1404                        add_index_stringl(return_value, num_key, result, result_len, 0);
1405                        break;
1406                    }
1407                } else {
1408                    efree(result);
1409                }
1410            }
1411
1412            zend_hash_move_forward(Z_ARRVAL_PP(subject));
1413        }
1414    } else {    /* if subject is not an array */
1415        old_replace_count = replace_count;
1416        if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1417            if (!is_filter || replace_count > old_replace_count) {
1418                RETVAL_STRINGL(result, result_len, 0);
1419            } else {
1420                efree(result);
1421            }
1422        }
1423    }
1424    if (ZEND_NUM_ARGS() > 4) {
1425        zval_dtor(*zcount);
1426        ZVAL_LONG(*zcount, replace_count);
1427    }
1428
1429}
1430/* }}} */
1431
1432/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1433   Perform Perl-style regular expression replacement. */
1434static PHP_FUNCTION(preg_replace)
1435{
1436    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1437}
1438/* }}} */
1439
1440/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1441   Perform Perl-style regular expression replacement using replacement callback. */
1442static PHP_FUNCTION(preg_replace_callback)
1443{
1444    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1445}
1446/* }}} */
1447
1448/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1449   Perform Perl-style regular expression replacement and only return matches. */
1450static PHP_FUNCTION(preg_filter)
1451{
1452    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1453}
1454/* }}} */
1455
1456/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1457   Split string into an array using a perl-style regular expression as a delimiter */
1458static PHP_FUNCTION(preg_split)
1459{
1460    char                *regex;         /* Regular expression */
1461    char                *subject;       /* String to match against */
1462    int                  regex_len;
1463    int                  subject_len;
1464    long                 limit_val = -1;/* Integer value of limit */
1465    long                 flags = 0;     /* Match control flags */
1466    pcre_cache_entry    *pce;           /* Compiled regular expression */
1467
1468    /* Get function parameters and do error checking */
1469    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", &regex, &regex_len,
1470                              &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1471        RETURN_FALSE;
1472    }
1473
1474    /* Compile regex or get it from cache. */
1475    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1476        RETURN_FALSE;
1477    }
1478
1479    php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1480}
1481/* }}} */
1482
1483/* {{{ php_pcre_split
1484 */
1485PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1486    long limit_val, long flags TSRMLS_DC)
1487{
1488    pcre_extra      *extra = NULL;      /* Holds results of studying */
1489    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1490    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1491    pcre_extra       extra_data;        /* Used locally for exec options */
1492    int             *offsets;           /* Array of subpattern offsets */
1493    int              size_offsets;      /* Size of the offsets array */
1494    int              exoptions = 0;     /* Execution options */
1495    int              count = 0;         /* Count of matched subpatterns */
1496    int              start_offset;      /* Where the new search starts */
1497    int              next_offset;       /* End of the last delimiter match + 1 */
1498    int              g_notempty = 0;    /* If the match should not be empty */
1499    char            *last_match;        /* Location of last match */
1500    int              rc;
1501    int              no_empty;          /* If NO_EMPTY flag is set */
1502    int              delim_capture;     /* If delimiters should be captured */
1503    int              offset_capture;    /* If offsets should be captured */
1504
1505    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1506    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1507    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1508
1509    if (limit_val == 0) {
1510        limit_val = -1;
1511    }
1512
1513    if (extra == NULL) {
1514        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1515        extra = &extra_data;
1516    }
1517    extra->match_limit = PCRE_G(backtrack_limit);
1518    extra->match_limit_recursion = PCRE_G(recursion_limit);
1519
1520    /* Initialize return value */
1521    array_init(return_value);
1522
1523    /* Calculate the size of the offsets array, and allocate memory for it. */
1524    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1525    if (rc < 0) {
1526        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1527        RETURN_FALSE;
1528    }
1529    size_offsets = (size_offsets + 1) * 3;
1530    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1531
1532    /* Start at the beginning of the string */
1533    start_offset = 0;
1534    next_offset = 0;
1535    last_match = subject;
1536    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1537
1538    /* Get next piece if no limit or limit not yet reached and something matched*/
1539    while ((limit_val == -1 || limit_val > 1)) {
1540        count = pcre_exec(pce->re, extra, subject,
1541                          subject_len, start_offset,
1542                          exoptions|g_notempty, offsets, size_offsets);
1543
1544        /* the string was already proved to be valid UTF-8 */
1545        exoptions |= PCRE_NO_UTF8_CHECK;
1546
1547        /* Check for too many substrings condition. */
1548        if (count == 0) {
1549            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1550            count = size_offsets/3;
1551        }
1552
1553        /* If something matched */
1554        if (count > 0) {
1555            if (!no_empty || &subject[offsets[0]] != last_match) {
1556
1557                if (offset_capture) {
1558                    /* Add (match, offset) pair to the return value */
1559                    add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1560                } else {
1561                    /* Add the piece to the return value */
1562                    add_next_index_stringl(return_value, last_match,
1563                                       &subject[offsets[0]]-last_match, 1);
1564                }
1565
1566                /* One less left to do */
1567                if (limit_val != -1)
1568                    limit_val--;
1569            }
1570
1571            last_match = &subject[offsets[1]];
1572            next_offset = offsets[1];
1573
1574            if (delim_capture) {
1575                int i, match_len;
1576                for (i = 1; i < count; i++) {
1577                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1578                    /* If we have matched a delimiter */
1579                    if (!no_empty || match_len > 0) {
1580                        if (offset_capture) {
1581                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1582                        } else {
1583                            add_next_index_stringl(return_value,
1584                                                   &subject[offsets[i<<1]],
1585                                                   match_len, 1);
1586                        }
1587                    }
1588                }
1589            }
1590        } else if (count == PCRE_ERROR_NOMATCH) {
1591            /* If we previously set PCRE_NOTEMPTY after a null match,
1592               this is not necessarily the end. We need to advance
1593               the start offset, and continue. Fudge the offset values
1594               to achieve this, unless we're already at the end of the string. */
1595            if (g_notempty != 0 && start_offset < subject_len) {
1596                if (pce->compile_options & PCRE_UTF8) {
1597                    if (re_bump == NULL) {
1598                        int dummy;
1599
1600                        if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1601                            RETURN_FALSE;
1602                        }
1603                    }
1604                    count = pcre_exec(re_bump, extra_bump, subject,
1605                              subject_len, start_offset,
1606                              exoptions, offsets, size_offsets);
1607                    if (count < 1) {
1608                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1609                        RETURN_FALSE;
1610                    }
1611                } else {
1612                    offsets[0] = start_offset;
1613                    offsets[1] = start_offset + 1;
1614                }
1615            } else
1616                break;
1617        } else {
1618            pcre_handle_exec_error(count TSRMLS_CC);
1619            break;
1620        }
1621
1622        /* If we have matched an empty string, mimic what Perl's /g options does.
1623           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1624           the match again at the same point. If this fails (picked up above) we
1625           advance to the next character. */
1626        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1627
1628        /* Advance to the position right after the last full match */
1629        start_offset = offsets[1];
1630    }
1631
1632
1633    start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1634
1635    if (!no_empty || start_offset < subject_len)
1636    {
1637        if (offset_capture) {
1638            /* Add the last (match, offset) pair to the return value */
1639            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1640        } else {
1641            /* Add the last piece to the return value */
1642            add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1643        }
1644    }
1645
1646
1647    /* Clean up */
1648    efree(offsets);
1649}
1650/* }}} */
1651
1652/* {{{ proto string preg_quote(string str [, string delim_char])
1653   Quote regular expression characters plus an optional character */
1654static PHP_FUNCTION(preg_quote)
1655{
1656    int      in_str_len;
1657    char    *in_str;        /* Input string argument */
1658    char    *in_str_end;    /* End of the input string */
1659    int      delim_len = 0;
1660    char    *delim = NULL;  /* Additional delimiter argument */
1661    char    *out_str,       /* Output string with quoted characters */
1662            *p,             /* Iterator for input string */
1663            *q,             /* Iterator for output string */
1664             delim_char=0,  /* Delimiter character to be quoted */
1665             c;             /* Current character */
1666    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1667
1668    /* Get the arguments and check for errors */
1669    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1670                              &delim, &delim_len) == FAILURE) {
1671        return;
1672    }
1673
1674    in_str_end = in_str + in_str_len;
1675
1676    /* Nothing to do if we got an empty string */
1677    if (in_str == in_str_end) {
1678        RETURN_EMPTY_STRING();
1679    }
1680
1681    if (delim && *delim) {
1682        delim_char = delim[0];
1683        quote_delim = 1;
1684    }
1685
1686    /* Allocate enough memory so that even if each character
1687       is quoted, we won't run out of room */
1688    out_str = safe_emalloc(4, in_str_len, 1);
1689
1690    /* Go through the string and quote necessary characters */
1691    for(p = in_str, q = out_str; p != in_str_end; p++) {
1692        c = *p;
1693        switch(c) {
1694            case '.':
1695            case '\\':
1696            case '+':
1697            case '*':
1698            case '?':
1699            case '[':
1700            case '^':
1701            case ']':
1702            case '$':
1703            case '(':
1704            case ')':
1705            case '{':
1706            case '}':
1707            case '=':
1708            case '!':
1709            case '>':
1710            case '<':
1711            case '|':
1712            case ':':
1713            case '-':
1714                *q++ = '\\';
1715                *q++ = c;
1716                break;
1717
1718            case '\0':
1719                *q++ = '\\';
1720                *q++ = '0';
1721                *q++ = '0';
1722                *q++ = '0';
1723                break;
1724
1725            default:
1726                if (quote_delim && c == delim_char)
1727                    *q++ = '\\';
1728                *q++ = c;
1729                break;
1730        }
1731    }
1732    *q = '\0';
1733
1734    /* Reallocate string and return it */
1735    RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1736}
1737/* }}} */
1738
1739/* {{{ proto array preg_grep(string regex, array input [, int flags])
1740   Searches array and returns entries which match regex */
1741static PHP_FUNCTION(preg_grep)
1742{
1743    char                *regex;         /* Regular expression */
1744    int                  regex_len;
1745    zval                *input;         /* Input array */
1746    long                 flags = 0;     /* Match control flags */
1747    pcre_cache_entry    *pce;           /* Compiled regular expression */
1748
1749    /* Get arguments and do error checking */
1750    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", &regex, &regex_len,
1751                              &input, &flags) == FAILURE) {
1752        return;
1753    }
1754
1755    /* Compile regex or get it from cache. */
1756    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1757        RETURN_FALSE;
1758    }
1759
1760    php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1761}
1762/* }}} */
1763
1764PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1765{
1766    zval           **entry;             /* An entry in the input array */
1767    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1768    pcre_extra       extra_data;        /* Used locally for exec options */
1769    int             *offsets;           /* Array of subpattern offsets */
1770    int              size_offsets;      /* Size of the offsets array */
1771    int              count = 0;         /* Count of matched subpatterns */
1772    char            *string_key;
1773    ulong            num_key;
1774    zend_bool        invert;            /* Whether to return non-matching
1775                                           entries */
1776    int              rc;
1777
1778    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1779
1780    if (extra == NULL) {
1781        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1782        extra = &extra_data;
1783    }
1784    extra->match_limit = PCRE_G(backtrack_limit);
1785    extra->match_limit_recursion = PCRE_G(recursion_limit);
1786
1787    /* Calculate the size of the offsets array, and allocate memory for it. */
1788    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1789    if (rc < 0) {
1790        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1791        RETURN_FALSE;
1792    }
1793    size_offsets = (size_offsets + 1) * 3;
1794    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1795
1796    /* Initialize return array */
1797    array_init(return_value);
1798
1799    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1800
1801    /* Go through the input array */
1802    zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1803    while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1804        zval subject = **entry;
1805
1806        if (Z_TYPE_PP(entry) != IS_STRING) {
1807            zval_copy_ctor(&subject);
1808            convert_to_string(&subject);
1809        }
1810
1811        /* Perform the match */
1812        count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1813                          Z_STRLEN(subject), 0,
1814                          0, offsets, size_offsets);
1815
1816        /* Check for too many substrings condition. */
1817        if (count == 0) {
1818            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1819            count = size_offsets/3;
1820        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1821            pcre_handle_exec_error(count TSRMLS_CC);
1822            break;
1823        }
1824
1825        /* If the entry fits our requirements */
1826        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1827
1828            Z_ADDREF_PP(entry);
1829
1830            /* Add to return array */
1831            switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1832            {
1833                case HASH_KEY_IS_STRING:
1834                    zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1835                                     strlen(string_key)+1, entry, sizeof(zval *), NULL);
1836                    break;
1837
1838                case HASH_KEY_IS_LONG:
1839                    zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1840                                           sizeof(zval *), NULL);
1841                    break;
1842            }
1843        }
1844
1845        if (Z_TYPE_PP(entry) != IS_STRING) {
1846            zval_dtor(&subject);
1847        }
1848
1849        zend_hash_move_forward(Z_ARRVAL_P(input));
1850    }
1851    zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1852    /* Clean up */
1853    efree(offsets);
1854}
1855/* }}} */
1856
1857/* {{{ proto int preg_last_error()
1858   Returns the error code of the last regexp execution. */
1859static PHP_FUNCTION(preg_last_error)
1860{
1861    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1862        return;
1863    }
1864
1865    RETURN_LONG(PCRE_G(error_code));
1866}
1867/* }}} */
1868
1869/* {{{ module definition structures */
1870
1871/* {{{ arginfo */
1872ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1873    ZEND_ARG_INFO(0, pattern)
1874    ZEND_ARG_INFO(0, subject)
1875    ZEND_ARG_INFO(1, subpatterns) /* array */
1876    ZEND_ARG_INFO(0, flags)
1877    ZEND_ARG_INFO(0, offset)
1878ZEND_END_ARG_INFO()
1879
1880ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1881    ZEND_ARG_INFO(0, pattern)
1882    ZEND_ARG_INFO(0, subject)
1883    ZEND_ARG_INFO(1, subpatterns) /* array */
1884    ZEND_ARG_INFO(0, flags)
1885    ZEND_ARG_INFO(0, offset)
1886ZEND_END_ARG_INFO()
1887
1888ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1889    ZEND_ARG_INFO(0, regex)
1890    ZEND_ARG_INFO(0, replace)
1891    ZEND_ARG_INFO(0, subject)
1892    ZEND_ARG_INFO(0, limit)
1893    ZEND_ARG_INFO(1, count)
1894ZEND_END_ARG_INFO()
1895
1896ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1897    ZEND_ARG_INFO(0, regex)
1898    ZEND_ARG_INFO(0, callback)
1899    ZEND_ARG_INFO(0, subject)
1900    ZEND_ARG_INFO(0, limit)
1901    ZEND_ARG_INFO(1, count)
1902ZEND_END_ARG_INFO()
1903
1904ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1905    ZEND_ARG_INFO(0, pattern)
1906    ZEND_ARG_INFO(0, subject)
1907    ZEND_ARG_INFO(0, limit)
1908    ZEND_ARG_INFO(0, flags)
1909ZEND_END_ARG_INFO()
1910
1911ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1912    ZEND_ARG_INFO(0, str)
1913    ZEND_ARG_INFO(0, delim_char)
1914ZEND_END_ARG_INFO()
1915
1916ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1917    ZEND_ARG_INFO(0, regex)
1918    ZEND_ARG_INFO(0, input) /* array */
1919    ZEND_ARG_INFO(0, flags)
1920ZEND_END_ARG_INFO()
1921
1922ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1923ZEND_END_ARG_INFO()
1924/* }}} */
1925
1926static const zend_function_entry pcre_functions[] = {
1927    PHP_FE(preg_match,              arginfo_preg_match)
1928    PHP_FE(preg_match_all,          arginfo_preg_match_all)
1929    PHP_FE(preg_replace,            arginfo_preg_replace)
1930    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
1931    PHP_FE(preg_filter,             arginfo_preg_replace)
1932    PHP_FE(preg_split,              arginfo_preg_split)
1933    PHP_FE(preg_quote,              arginfo_preg_quote)
1934    PHP_FE(preg_grep,               arginfo_preg_grep)
1935    PHP_FE(preg_last_error,         arginfo_preg_last_error)
1936    PHP_FE_END
1937};
1938
1939zend_module_entry pcre_module_entry = {
1940    STANDARD_MODULE_HEADER,
1941   "pcre",
1942    pcre_functions,
1943    PHP_MINIT(pcre),
1944    PHP_MSHUTDOWN(pcre),
1945    NULL,
1946    NULL,
1947    PHP_MINFO(pcre),
1948    NO_VERSION_YET,
1949    PHP_MODULE_GLOBALS(pcre),
1950    PHP_GINIT(pcre),
1951    PHP_GSHUTDOWN(pcre),
1952    NULL,
1953    STANDARD_MODULE_PROPERTIES_EX
1954};
1955
1956#ifdef COMPILE_DL_PCRE
1957ZEND_GET_MODULE(pcre)
1958#endif
1959
1960/* }}} */
1961
1962#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1963
1964/*
1965 * Local variables:
1966 * tab-width: 4
1967 * c-basic-offset: 4
1968 * End:
1969 * vim600: sw=4 ts=4 fdm=marker
1970 * vim<600: sw=4 ts=4
1971 */
1972