1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/php_smart_str.h"
27
28#if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30#include "ext/standard/php_string.h"
31
32#define PREG_PATTERN_ORDER          1
33#define PREG_SET_ORDER              2
34#define PREG_OFFSET_CAPTURE         (1<<8)
35
36#define PREG_SPLIT_NO_EMPTY         (1<<0)
37#define PREG_SPLIT_DELIM_CAPTURE    (1<<1)
38#define PREG_SPLIT_OFFSET_CAPTURE   (1<<2)
39
40#define PREG_REPLACE_EVAL           (1<<0)
41
42#define PREG_GREP_INVERT            (1<<0)
43
44#define PCRE_CACHE_SIZE 4096
45
46enum {
47    PHP_PCRE_NO_ERROR = 0,
48    PHP_PCRE_INTERNAL_ERROR,
49    PHP_PCRE_BACKTRACK_LIMIT_ERROR,
50    PHP_PCRE_RECURSION_LIMIT_ERROR,
51    PHP_PCRE_BAD_UTF8_ERROR,
52    PHP_PCRE_BAD_UTF8_OFFSET_ERROR
53};
54
55
56ZEND_DECLARE_MODULE_GLOBALS(pcre)
57
58
59static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
60{
61    int preg_code = 0;
62
63    switch (pcre_code) {
64        case PCRE_ERROR_MATCHLIMIT:
65            preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
66            break;
67
68        case PCRE_ERROR_RECURSIONLIMIT:
69            preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
70            break;
71
72        case PCRE_ERROR_BADUTF8:
73            preg_code = PHP_PCRE_BAD_UTF8_ERROR;
74            break;
75
76        case PCRE_ERROR_BADUTF8_OFFSET:
77            preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
78            break;
79
80        default:
81            preg_code = PHP_PCRE_INTERNAL_ERROR;
82            break;
83    }
84
85    PCRE_G(error_code) = preg_code;
86}
87/* }}} */
88
89static void php_free_pcre_cache(void *data) /* {{{ */
90{
91    pcre_cache_entry *pce = (pcre_cache_entry *) data;
92    if (!pce) return;
93    pefree(pce->re, 1);
94    if (pce->extra) pefree(pce->extra, 1);
95#if HAVE_SETLOCALE
96    if ((void*)pce->tables) pefree((void*)pce->tables, 1);
97    pefree(pce->locale, 1);
98#endif
99}
100/* }}} */
101
102static PHP_GINIT_FUNCTION(pcre) /* {{{ */
103{
104    zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
105    pcre_globals->backtrack_limit = 0;
106    pcre_globals->recursion_limit = 0;
107    pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
108}
109/* }}} */
110
111static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
112{
113    zend_hash_destroy(&pcre_globals->pcre_cache);
114}
115/* }}} */
116
117PHP_INI_BEGIN()
118    STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
119    STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
120PHP_INI_END()
121
122
123/* {{{ PHP_MINFO_FUNCTION(pcre) */
124static PHP_MINFO_FUNCTION(pcre)
125{
126    php_info_print_table_start();
127    php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
128    php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
129    php_info_print_table_end();
130
131    DISPLAY_INI_ENTRIES();
132}
133/* }}} */
134
135/* {{{ PHP_MINIT_FUNCTION(pcre) */
136static PHP_MINIT_FUNCTION(pcre)
137{
138    REGISTER_INI_ENTRIES();
139
140    REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
141    REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
142    REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
143    REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
144    REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
145    REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
146    REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
147
148    REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
149    REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
150    REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
151    REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
152    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
153    REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
154    REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
155
156    return SUCCESS;
157}
158/* }}} */
159
160/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
161static PHP_MSHUTDOWN_FUNCTION(pcre)
162{
163    UNREGISTER_INI_ENTRIES();
164
165    return SUCCESS;
166}
167/* }}} */
168
169/* {{{ static pcre_clean_cache */
170static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
171{
172    int *num_clean = (int *)arg;
173
174    if (*num_clean > 0) {
175        (*num_clean)--;
176        return 1;
177    } else {
178        return 0;
179    }
180}
181/* }}} */
182
183/* {{{ static make_subpats_table */
184static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
185{
186    pcre_extra *extra = pce->extra;
187    int name_cnt = 0, name_size, ni = 0;
188    int rc;
189    char *name_table;
190    unsigned short name_idx;
191    char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
192
193    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
194    if (rc < 0) {
195        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
196        efree(subpat_names);
197        return NULL;
198    }
199    if (name_cnt > 0) {
200        int rc1, rc2;
201
202        rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
203        rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
204        rc = rc2 ? rc2 : rc1;
205        if (rc < 0) {
206            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
207            efree(subpat_names);
208            return NULL;
209        }
210
211        while (ni++ < name_cnt) {
212            name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
213            subpat_names[name_idx] = name_table + 2;
214            if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
215                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
216                efree(subpat_names);
217                return NULL;
218            }
219            name_table += name_size;
220        }
221    }
222
223    return subpat_names;
224}
225/* }}} */
226
227/* {{{ pcre_get_compiled_regex_cache
228 */
229PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
230{
231    pcre                *re = NULL;
232    pcre_extra          *extra;
233    int                  coptions = 0;
234    int                  soptions = 0;
235    const char          *error;
236    int                  erroffset;
237    char                 delimiter;
238    char                 start_delimiter;
239    char                 end_delimiter;
240    char                *p, *pp;
241    char                *pattern;
242    int                  do_study = 0;
243    int                  poptions = 0;
244    int             count = 0;
245    unsigned const char *tables = NULL;
246#if HAVE_SETLOCALE
247    char                *locale;
248#endif
249    pcre_cache_entry    *pce;
250    pcre_cache_entry     new_entry;
251    char                *tmp = NULL;
252
253#if HAVE_SETLOCALE
254# if defined(PHP_WIN32) && defined(ZTS)
255    _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
256# endif
257    locale = setlocale(LC_CTYPE, NULL);
258#endif
259
260    /* Try to lookup the cached regex entry, and if successful, just pass
261       back the compiled pattern, otherwise go on and compile it. */
262    if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
263        /*
264         * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
265         * is, we flush it and compile the pattern from scratch.
266         */
267        if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
268            zend_hash_clean(&PCRE_G(pcre_cache));
269        } else {
270#if HAVE_SETLOCALE
271            if (!strcmp(pce->locale, locale)) {
272#endif
273                return pce;
274#if HAVE_SETLOCALE
275            }
276#endif
277        }
278    }
279
280    p = regex;
281
282    /* Parse through the leading whitespace, and display a warning if we
283       get to the end without encountering a delimiter. */
284    while (isspace((int)*(unsigned char *)p)) p++;
285    if (*p == 0) {
286        php_error_docref(NULL TSRMLS_CC, E_WARNING,
287                         p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
288        return NULL;
289    }
290
291    /* Get the delimiter and display a warning if it is alphanumeric
292       or a backslash. */
293    delimiter = *p++;
294    if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
295        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
296        return NULL;
297    }
298
299    start_delimiter = delimiter;
300    if ((pp = strchr("([{< )]}> )]}>", delimiter)))
301        delimiter = pp[5];
302    end_delimiter = delimiter;
303
304    pp = p;
305
306    if (start_delimiter == end_delimiter) {
307        /* We need to iterate through the pattern, searching for the ending delimiter,
308           but skipping the backslashed delimiters.  If the ending delimiter is not
309           found, display a warning. */
310        while (*pp != 0) {
311            if (*pp == '\\' && pp[1] != 0) pp++;
312            else if (*pp == delimiter)
313                break;
314            pp++;
315        }
316    } else {
317        /* We iterate through the pattern, searching for the matching ending
318         * delimiter. For each matching starting delimiter, we increment nesting
319         * level, and decrement it for each matching ending delimiter. If we
320         * reach the end of the pattern without matching, display a warning.
321         */
322        int brackets = 1;   /* brackets nesting level */
323        while (*pp != 0) {
324            if (*pp == '\\' && pp[1] != 0) pp++;
325            else if (*pp == end_delimiter && --brackets <= 0)
326                break;
327            else if (*pp == start_delimiter)
328                brackets++;
329            pp++;
330        }
331    }
332
333    if (*pp == 0) {
334        if (pp < regex + regex_len) {
335            php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
336        } else if (start_delimiter == end_delimiter) {
337            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
338        } else {
339            php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
340        }
341        return NULL;
342    }
343
344    /* Make a copy of the actual pattern. */
345    pattern = estrndup(p, pp-p);
346
347    /* Move on to the options */
348    pp++;
349
350    /* Parse through the options, setting appropriate flags.  Display
351       a warning if we encounter an unknown modifier. */
352    while (pp < regex + regex_len) {
353        switch (*pp++) {
354            /* Perl compatible options */
355            case 'i':   coptions |= PCRE_CASELESS;      break;
356            case 'm':   coptions |= PCRE_MULTILINE;     break;
357            case 's':   coptions |= PCRE_DOTALL;        break;
358            case 'x':   coptions |= PCRE_EXTENDED;      break;
359
360            /* PCRE specific options */
361            case 'A':   coptions |= PCRE_ANCHORED;      break;
362            case 'D':   coptions |= PCRE_DOLLAR_ENDONLY;break;
363            case 'S':   do_study  = 1;                  break;
364            case 'U':   coptions |= PCRE_UNGREEDY;      break;
365            case 'X':   coptions |= PCRE_EXTRA;         break;
366            case 'u':   coptions |= PCRE_UTF8;
367    /* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
368       characters, even in UTF-8 mode. However, this can be changed by setting
369       the PCRE_UCP option. */
370#ifdef PCRE_UCP
371                        coptions |= PCRE_UCP;
372#endif
373                break;
374
375            /* Custom preg options */
376            case 'e':   poptions |= PREG_REPLACE_EVAL;  break;
377
378            case ' ':
379            case '\n':
380                break;
381
382            default:
383                if (pp[-1]) {
384                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
385                } else {
386                    php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
387                }
388                efree(pattern);
389                return NULL;
390        }
391    }
392
393#if HAVE_SETLOCALE
394    if (strcmp(locale, "C"))
395        tables = pcre_maketables();
396#endif
397
398    /* Compile pattern and display a warning if compilation failed. */
399    re = pcre_compile(pattern,
400                      coptions,
401                      &error,
402                      &erroffset,
403                      tables);
404
405    if (re == NULL) {
406        php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
407        efree(pattern);
408        if (tables) {
409            pefree((void*)tables, 1);
410        }
411        return NULL;
412    }
413
414    /* If study option was specified, study the pattern and
415       store the result in extra for passing to pcre_exec. */
416    if (do_study) {
417        extra = pcre_study(re, soptions, &error);
418        if (extra) {
419            extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
420        }
421        if (error != NULL) {
422            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
423        }
424    } else {
425        extra = NULL;
426    }
427
428    efree(pattern);
429
430    /*
431     * If we reached cache limit, clean out the items from the head of the list;
432     * these are supposedly the oldest ones (but not necessarily the least used
433     * ones).
434     */
435    if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
436        int num_clean = PCRE_CACHE_SIZE / 8;
437        zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
438    }
439
440    /* Store the compiled pattern and extra info in the cache. */
441    new_entry.re = re;
442    new_entry.extra = extra;
443    new_entry.preg_options = poptions;
444    new_entry.compile_options = coptions;
445#if HAVE_SETLOCALE
446    new_entry.locale = pestrdup(locale, 1);
447    new_entry.tables = tables;
448#endif
449
450    /*
451     * Interned strings are not duplicated when stored in HashTable,
452     * but all the interned strings created during HTTP request are removed
453     * at end of request. However PCRE_G(pcre_cache) must be consistent
454     * on the next request as well. So we disable usage of interned strings
455     * as hash keys especually for this table.
456     * See bug #63180
457     */
458    if (IS_INTERNED(regex)) {
459        regex = tmp = estrndup(regex, regex_len);
460    }
461
462    zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
463                        sizeof(pcre_cache_entry), (void**)&pce);
464
465    if (tmp) {
466        efree(tmp);
467    }
468
469    return pce;
470}
471/* }}} */
472
473/* {{{ pcre_get_compiled_regex
474 */
475PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
476{
477    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
478
479    if (extra) {
480        *extra = pce ? pce->extra : NULL;
481    }
482    if (preg_options) {
483        *preg_options = pce ? pce->preg_options : 0;
484    }
485
486    return pce ? pce->re : NULL;
487}
488/* }}} */
489
490/* {{{ pcre_get_compiled_regex_ex
491 */
492PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
493{
494    pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
495
496    if (extra) {
497        *extra = pce ? pce->extra : NULL;
498    }
499    if (preg_options) {
500        *preg_options = pce ? pce->preg_options : 0;
501    }
502    if (compile_options) {
503        *compile_options = pce ? pce->compile_options : 0;
504    }
505
506    return pce ? pce->re : NULL;
507}
508/* }}} */
509
510/* {{{ add_offset_pair */
511static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
512{
513    zval *match_pair;
514
515    ALLOC_ZVAL(match_pair);
516    array_init(match_pair);
517    INIT_PZVAL(match_pair);
518
519    /* Add (match, offset) to the return value */
520    add_next_index_stringl(match_pair, str, len, 1);
521    add_next_index_long(match_pair, offset);
522
523    if (name) {
524        zval_add_ref(&match_pair);
525        zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
526    }
527    zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
528}
529/* }}} */
530
531static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
532{
533    /* parameters */
534    char             *regex;            /* Regular expression */
535    char             *subject;          /* String to match against */
536    int               regex_len;
537    int               subject_len;
538    pcre_cache_entry *pce;              /* Compiled regular expression */
539    zval             *subpats = NULL;   /* Array for subpatterns */
540    long              flags = 0;        /* Match control flags */
541    long              start_offset = 0; /* Where the new search starts */
542
543    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", &regex, &regex_len,
544                              &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
545        RETURN_FALSE;
546    }
547
548    /* Compile regex or get it from cache. */
549    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
550        RETURN_FALSE;
551    }
552
553    php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
554        global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
555}
556/* }}} */
557
558/* {{{ php_pcre_match_impl() */
559PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
560    zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
561{
562    zval            *result_set,        /* Holds a set of subpatterns after
563                                           a global match */
564                   **match_sets = NULL; /* An array of sets of matches for each
565                                           subpattern after a global match */
566    pcre_extra      *extra = pce->extra;/* Holds results of studying */
567    pcre_extra       extra_data;        /* Used locally for exec options */
568    int              exoptions = 0;     /* Execution options */
569    int              count = 0;         /* Count of matched subpatterns */
570    int             *offsets;           /* Array of subpattern offsets */
571    int              num_subpats;       /* Number of captured subpatterns */
572    int              size_offsets;      /* Size of the offsets array */
573    int              matched;           /* Has anything matched */
574    int              g_notempty = 0;    /* If the match should not be empty */
575    const char     **stringlist;        /* Holds list of subpatterns */
576    char           **subpat_names;      /* Array for named subpatterns */
577    int              i, rc;
578    int              subpats_order;     /* Order of subpattern matches */
579    int              offset_capture;    /* Capture match offsets: yes/no */
580    unsigned char   *mark = NULL;       /* Target for MARK name */
581    zval            *marks = NULL;      /* Array of marks for PREG_PATTERN_ORDER */
582
583    /* Overwrite the passed-in value for subpatterns with an empty array. */
584    if (subpats != NULL) {
585        zval_dtor(subpats);
586        array_init(subpats);
587    }
588
589    subpats_order = global ? PREG_PATTERN_ORDER : 0;
590
591    if (use_flags) {
592        offset_capture = flags & PREG_OFFSET_CAPTURE;
593
594        /*
595         * subpats_order is pre-set to pattern mode so we change it only if
596         * necessary.
597         */
598        if (flags & 0xff) {
599            subpats_order = flags & 0xff;
600        }
601        if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
602            (!global && subpats_order != 0)) {
603            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
604            return;
605        }
606    } else {
607        offset_capture = 0;
608    }
609
610    /* Negative offset counts from the end of the string. */
611    if (start_offset < 0) {
612        start_offset = subject_len + start_offset;
613        if (start_offset < 0) {
614            start_offset = 0;
615        }
616    }
617
618    if (extra == NULL) {
619        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
620        extra = &extra_data;
621    }
622    extra->match_limit = PCRE_G(backtrack_limit);
623    extra->match_limit_recursion = PCRE_G(recursion_limit);
624#ifdef PCRE_EXTRA_MARK
625    extra->mark = &mark;
626    extra->flags |= PCRE_EXTRA_MARK;
627#endif
628
629    /* Calculate the size of the offsets array, and allocate memory for it. */
630    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
631    if (rc < 0) {
632        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
633        RETURN_FALSE;
634    }
635    num_subpats++;
636    size_offsets = num_subpats * 3;
637
638    /*
639     * Build a mapping from subpattern numbers to their names. We will always
640     * allocate the table, even though there may be no named subpatterns. This
641     * avoids somewhat more complicated logic in the inner loops.
642     */
643    subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
644    if (!subpat_names) {
645        RETURN_FALSE;
646    }
647
648    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
649
650    /* Allocate match sets array and initialize the values. */
651    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
652        match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
653        for (i=0; i<num_subpats; i++) {
654            ALLOC_ZVAL(match_sets[i]);
655            array_init(match_sets[i]);
656            INIT_PZVAL(match_sets[i]);
657        }
658    }
659
660    matched = 0;
661    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
662
663    do {
664        /* Execute the regular expression. */
665        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
666                          exoptions|g_notempty, offsets, size_offsets);
667
668        /* the string was already proved to be valid UTF-8 */
669        exoptions |= PCRE_NO_UTF8_CHECK;
670
671        /* Check for too many substrings condition. */
672        if (count == 0) {
673            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
674            count = size_offsets/3;
675        }
676
677        /* If something has matched */
678        if (count > 0) {
679            matched++;
680
681            /* If subpatterns array has been passed, fill it in with values. */
682            if (subpats != NULL) {
683                /* Try to get the list of substrings and display a warning if failed. */
684                if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
685                    efree(subpat_names);
686                    efree(offsets);
687                    if (match_sets) efree(match_sets);
688                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
689                    RETURN_FALSE;
690                }
691
692                if (global) {   /* global pattern matching */
693                    if (subpats && subpats_order == PREG_PATTERN_ORDER) {
694                        /* For each subpattern, insert it into the appropriate array. */
695                        for (i = 0; i < count; i++) {
696                            if (offset_capture) {
697                                add_offset_pair(match_sets[i], (char *)stringlist[i],
698                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
699                            } else {
700                                add_next_index_stringl(match_sets[i], (char *)stringlist[i],
701                                                       offsets[(i<<1)+1] - offsets[i<<1], 1);
702                            }
703                        }
704                        /* Add MARK, if available */
705                        if (mark) {
706                            if (!marks) {
707                                MAKE_STD_ZVAL(marks);
708                                array_init(marks);
709                            }
710                            add_index_string(marks, matched - 1, (char *) mark, 1);
711                        }
712                        /*
713                         * If the number of captured subpatterns on this run is
714                         * less than the total possible number, pad the result
715                         * arrays with empty strings.
716                         */
717                        if (count < num_subpats) {
718                            for (; i < num_subpats; i++) {
719                                add_next_index_string(match_sets[i], "", 1);
720                            }
721                        }
722                    } else {
723                        /* Allocate the result set array */
724                        ALLOC_ZVAL(result_set);
725                        array_init(result_set);
726                        INIT_PZVAL(result_set);
727
728                        /* Add all the subpatterns to it */
729                        for (i = 0; i < count; i++) {
730                            if (offset_capture) {
731                                add_offset_pair(result_set, (char *)stringlist[i],
732                                                offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
733                            } else {
734                                if (subpat_names[i]) {
735                                    add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
736                                                           offsets[(i<<1)+1] - offsets[i<<1], 1);
737                                }
738                                add_next_index_stringl(result_set, (char *)stringlist[i],
739                                                       offsets[(i<<1)+1] - offsets[i<<1], 1);
740                            }
741                        }
742                        /* Add MARK, if available */
743                        if (mark) {
744                            add_assoc_string(result_set, "MARK", (char *) mark, 1);
745                        }
746                        /* And add it to the output array */
747                        zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
748                    }
749                } else {            /* single pattern matching */
750                    /* For each subpattern, insert it into the subpatterns array. */
751                    for (i = 0; i < count; i++) {
752                        if (offset_capture) {
753                            add_offset_pair(subpats, (char *)stringlist[i],
754                                            offsets[(i<<1)+1] - offsets[i<<1],
755                                            offsets[i<<1], subpat_names[i]);
756                        } else {
757                            if (subpat_names[i]) {
758                                add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
759                                                  offsets[(i<<1)+1] - offsets[i<<1], 1);
760                            }
761                            add_next_index_stringl(subpats, (char *)stringlist[i],
762                                                   offsets[(i<<1)+1] - offsets[i<<1], 1);
763                        }
764                    }
765                    /* Add MARK, if available */
766                    if (mark) {
767                        add_assoc_string(subpats, "MARK", (char *) mark, 1);
768                    }
769                }
770
771                pcre_free((void *) stringlist);
772            }
773        } else if (count == PCRE_ERROR_NOMATCH) {
774            /* If we previously set PCRE_NOTEMPTY after a null match,
775               this is not necessarily the end. We need to advance
776               the start offset, and continue. Fudge the offset values
777               to achieve this, unless we're already at the end of the string. */
778            if (g_notempty != 0 && start_offset < subject_len) {
779                offsets[0] = start_offset;
780                offsets[1] = start_offset + 1;
781            } else
782                break;
783        } else {
784            pcre_handle_exec_error(count TSRMLS_CC);
785            break;
786        }
787
788        /* If we have matched an empty string, mimic what Perl's /g options does.
789           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
790           the match again at the same point. If this fails (picked up above) we
791           advance to the next character. */
792        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
793
794        /* Advance to the position right after the last full match */
795        start_offset = offsets[1];
796    } while (global);
797
798    /* Add the match sets to the output array and clean up */
799    if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
800        for (i = 0; i < num_subpats; i++) {
801            if (subpat_names[i]) {
802                zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
803                                 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
804                Z_ADDREF_P(match_sets[i]);
805            }
806            zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
807        }
808        efree(match_sets);
809
810        if (marks) {
811            add_assoc_zval(subpats, "MARK", marks);
812        }
813    }
814
815    efree(offsets);
816    efree(subpat_names);
817
818    /* Did we encounter an error? */
819    if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
820        RETVAL_LONG(matched);
821    } else {
822        RETVAL_FALSE;
823    }
824}
825/* }}} */
826
827/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
828   Perform a Perl-style regular expression match */
829static PHP_FUNCTION(preg_match)
830{
831    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
832}
833/* }}} */
834
835/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
836   Perform a Perl-style global regular expression match */
837static PHP_FUNCTION(preg_match_all)
838{
839    php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
840}
841/* }}} */
842
843/* {{{ preg_get_backref
844 */
845static int preg_get_backref(char **str, int *backref)
846{
847    register char in_brace = 0;
848    register char *walk = *str;
849
850    if (walk[1] == 0)
851        return 0;
852
853    if (*walk == '$' && walk[1] == '{') {
854        in_brace = 1;
855        walk++;
856    }
857    walk++;
858
859    if (*walk >= '0' && *walk <= '9') {
860        *backref = *walk - '0';
861        walk++;
862    } else
863        return 0;
864
865    if (*walk && *walk >= '0' && *walk <= '9') {
866        *backref = *backref * 10 + *walk - '0';
867        walk++;
868    }
869
870    if (in_brace) {
871        if (*walk == 0 || *walk != '}')
872            return 0;
873        else
874            walk++;
875    }
876
877    *str = walk;
878    return 1;
879}
880/* }}} */
881
882/* {{{ preg_do_repl_func
883 */
884static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC)
885{
886    zval        *retval_ptr;        /* Function return value */
887    zval       **args[1];           /* Argument to pass to function */
888    zval        *subpats;           /* Captured subpatterns */
889    int          result_len;        /* Return value length */
890    int          i;
891
892    MAKE_STD_ZVAL(subpats);
893    array_init(subpats);
894    for (i = 0; i < count; i++) {
895        if (subpat_names[i]) {
896            add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
897        }
898        add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
899    }
900    if (mark) {
901        add_assoc_string(subpats, "MARK", (char *) mark, 1);
902    }
903    args[0] = &subpats;
904
905    if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
906        convert_to_string_ex(&retval_ptr);
907        *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
908        result_len = Z_STRLEN_P(retval_ptr);
909        zval_ptr_dtor(&retval_ptr);
910    } else {
911        if (!EG(exception)) {
912            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
913        }
914        result_len = offsets[1] - offsets[0];
915        *result = estrndup(&subject[offsets[0]], result_len);
916    }
917
918    zval_ptr_dtor(&subpats);
919
920    return result_len;
921}
922/* }}} */
923
924/* {{{ preg_do_eval
925 */
926static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
927                        int *offsets, int count, char **result TSRMLS_DC)
928{
929    zval         retval;            /* Return value from evaluation */
930    char        *eval_str_end,      /* End of eval string */
931                *match,             /* Current match for a backref */
932                *esc_match,         /* Quote-escaped match */
933                *walk,              /* Used to walk the code string */
934                *segment,           /* Start of segment to append while walking */
935                 walk_last;         /* Last walked character */
936    int          match_len;         /* Length of the match */
937    int          esc_match_len;     /* Length of the quote-escaped match */
938    int          result_len;        /* Length of the result of the evaluation */
939    int          backref;           /* Current backref */
940    char        *compiled_string_description;
941    smart_str    code = {0};
942
943    eval_str_end = eval_str + eval_str_len;
944    walk = segment = eval_str;
945    walk_last = 0;
946
947    while (walk < eval_str_end) {
948        /* If found a backreference.. */
949        if ('\\' == *walk || '$' == *walk) {
950            smart_str_appendl(&code, segment, walk - segment);
951            if (walk_last == '\\') {
952                code.c[code.len-1] = *walk++;
953                segment = walk;
954                walk_last = 0;
955                continue;
956            }
957            segment = walk;
958            if (preg_get_backref(&walk, &backref)) {
959                if (backref < count) {
960                    /* Find the corresponding string match and substitute it
961                       in instead of the backref */
962                    match = subject + offsets[backref<<1];
963                    match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
964                    if (match_len) {
965                        esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
966                    } else {
967                        esc_match = match;
968                        esc_match_len = 0;
969                    }
970                } else {
971                    esc_match = "";
972                    esc_match_len = 0;
973                }
974                smart_str_appendl(&code, esc_match, esc_match_len);
975
976                segment = walk;
977
978                /* Clean up and reassign */
979                if (esc_match_len)
980                    efree(esc_match);
981                continue;
982            }
983        }
984        walk++;
985        walk_last = walk[-1];
986    }
987    smart_str_appendl(&code, segment, walk - segment);
988    smart_str_0(&code);
989
990    compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
991    /* Run the code */
992    if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
993        efree(compiled_string_description);
994        php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
995        /* zend_error() does not return in this case */
996    }
997    efree(compiled_string_description);
998    convert_to_string(&retval);
999
1000    /* Save the return value and its length */
1001    *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
1002    result_len = Z_STRLEN(retval);
1003
1004    /* Clean up */
1005    zval_dtor(&retval);
1006    smart_str_free(&code);
1007
1008    return result_len;
1009}
1010/* }}} */
1011
1012/* {{{ php_pcre_replace
1013 */
1014PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
1015                              char *subject, int subject_len,
1016                              zval *replace_val, int is_callable_replace,
1017                              int *result_len, int limit, int *replace_count TSRMLS_DC)
1018{
1019    pcre_cache_entry    *pce;               /* Compiled regular expression */
1020
1021    /* Compile regex or get it from cache. */
1022    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1023        return NULL;
1024    }
1025
1026    return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
1027        is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
1028}
1029/* }}} */
1030
1031/* {{{ php_pcre_replace_impl() */
1032PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1033    int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1034{
1035    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1036    pcre_extra       extra_data;        /* Used locally for exec options */
1037    int              exoptions = 0;     /* Execution options */
1038    int              count = 0;         /* Count of matched subpatterns */
1039    int             *offsets;           /* Array of subpattern offsets */
1040    char            **subpat_names;     /* Array for named subpatterns */
1041    int              num_subpats;       /* Number of captured subpatterns */
1042    int              size_offsets;      /* Size of the offsets array */
1043    int              new_len;           /* Length of needed storage */
1044    int              alloc_len;         /* Actual allocated length */
1045    int              eval_result_len=0; /* Length of the eval'ed or
1046                                           function-returned string */
1047    int              match_len;         /* Length of the current match */
1048    int              backref;           /* Backreference number */
1049    int              eval;              /* If the replacement string should be eval'ed */
1050    int              start_offset;      /* Where the new search starts */
1051    int              g_notempty=0;      /* If the match should not be empty */
1052    int              replace_len=0;     /* Length of replacement string */
1053    char            *result,            /* Result of replacement */
1054                    *replace=NULL,      /* Replacement string */
1055                    *new_buf,           /* Temporary buffer for re-allocation */
1056                    *walkbuf,           /* Location of current replacement in the result */
1057                    *walk,              /* Used to walk the replacement string */
1058                    *match,             /* The current match */
1059                    *piece,             /* The current piece of subject */
1060                    *replace_end=NULL,  /* End of replacement string */
1061                    *eval_result,       /* Result of eval or custom function */
1062                     walk_last;         /* Last walked character */
1063    int              rc;
1064    unsigned char   *mark = NULL;       /* Target for MARK name */
1065
1066    if (extra == NULL) {
1067        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1068        extra = &extra_data;
1069    }
1070    extra->match_limit = PCRE_G(backtrack_limit);
1071    extra->match_limit_recursion = PCRE_G(recursion_limit);
1072#ifdef PCRE_EXTRA_MARK
1073    extra->mark = &mark;
1074    extra->flags |= PCRE_EXTRA_MARK;
1075#endif
1076
1077    eval = pce->preg_options & PREG_REPLACE_EVAL;
1078    if (is_callable_replace) {
1079        if (eval) {
1080            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1081            return NULL;
1082        }
1083    } else {
1084        replace = Z_STRVAL_P(replace_val);
1085        replace_len = Z_STRLEN_P(replace_val);
1086        replace_end = replace + replace_len;
1087    }
1088
1089    if (eval) {
1090        php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
1091    }
1092
1093    /* Calculate the size of the offsets array, and allocate memory for it. */
1094    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1095    if (rc < 0) {
1096        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1097        return NULL;
1098    }
1099    num_subpats++;
1100    size_offsets = num_subpats * 3;
1101
1102    /*
1103     * Build a mapping from subpattern numbers to their names. We will always
1104     * allocate the table, even though there may be no named subpatterns. This
1105     * avoids somewhat more complicated logic in the inner loops.
1106     */
1107    subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1108    if (!subpat_names) {
1109        return NULL;
1110    }
1111
1112    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1113
1114    alloc_len = 2 * subject_len + 1;
1115    result = safe_emalloc(alloc_len, sizeof(char), 0);
1116
1117    /* Initialize */
1118    match = NULL;
1119    *result_len = 0;
1120    start_offset = 0;
1121    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1122
1123    while (1) {
1124        /* Execute the regular expression. */
1125        count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1126                          exoptions|g_notempty, offsets, size_offsets);
1127
1128        /* the string was already proved to be valid UTF-8 */
1129        exoptions |= PCRE_NO_UTF8_CHECK;
1130
1131        /* Check for too many substrings condition. */
1132        if (count == 0) {
1133            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1134            count = size_offsets/3;
1135        }
1136
1137        piece = subject + start_offset;
1138
1139        if (count > 0 && (limit == -1 || limit > 0)) {
1140            if (replace_count) {
1141                ++*replace_count;
1142            }
1143            /* Set the match location in subject */
1144            match = subject + offsets[0];
1145
1146            new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1147
1148            /* If evaluating, do it and add the return string's length */
1149            if (eval) {
1150                eval_result_len = preg_do_eval(replace, replace_len, subject,
1151                                               offsets, count, &eval_result TSRMLS_CC);
1152                new_len += eval_result_len;
1153            } else if (is_callable_replace) {
1154                /* Use custom function to get replacement string and its length. */
1155                eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC);
1156                new_len += eval_result_len;
1157            } else { /* do regular substitution */
1158                walk = replace;
1159                walk_last = 0;
1160                while (walk < replace_end) {
1161                    if ('\\' == *walk || '$' == *walk) {
1162                        if (walk_last == '\\') {
1163                            walk++;
1164                            walk_last = 0;
1165                            continue;
1166                        }
1167                        if (preg_get_backref(&walk, &backref)) {
1168                            if (backref < count)
1169                                new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1170                            continue;
1171                        }
1172                    }
1173                    new_len++;
1174                    walk++;
1175                    walk_last = walk[-1];
1176                }
1177            }
1178
1179            if (new_len + 1 > alloc_len) {
1180                alloc_len = 1 + alloc_len + 2 * new_len;
1181                new_buf = emalloc(alloc_len);
1182                memcpy(new_buf, result, *result_len);
1183                efree(result);
1184                result = new_buf;
1185            }
1186            /* copy the part of the string before the match */
1187            memcpy(&result[*result_len], piece, match-piece);
1188            *result_len += match-piece;
1189
1190            /* copy replacement and backrefs */
1191            walkbuf = result + *result_len;
1192
1193            /* If evaluating or using custom function, copy result to the buffer
1194             * and clean up. */
1195            if (eval || is_callable_replace) {
1196                memcpy(walkbuf, eval_result, eval_result_len);
1197                *result_len += eval_result_len;
1198                STR_FREE(eval_result);
1199            } else { /* do regular backreference copying */
1200                walk = replace;
1201                walk_last = 0;
1202                while (walk < replace_end) {
1203                    if ('\\' == *walk || '$' == *walk) {
1204                        if (walk_last == '\\') {
1205                            *(walkbuf-1) = *walk++;
1206                            walk_last = 0;
1207                            continue;
1208                        }
1209                        if (preg_get_backref(&walk, &backref)) {
1210                            if (backref < count) {
1211                                match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1212                                memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1213                                walkbuf += match_len;
1214                            }
1215                            continue;
1216                        }
1217                    }
1218                    *walkbuf++ = *walk++;
1219                    walk_last = walk[-1];
1220                }
1221                *walkbuf = '\0';
1222                /* increment the result length by how much we've added to the string */
1223                *result_len += walkbuf - (result + *result_len);
1224            }
1225
1226            if (limit != -1)
1227                limit--;
1228
1229        } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1230            /* If we previously set PCRE_NOTEMPTY after a null match,
1231               this is not necessarily the end. We need to advance
1232               the start offset, and continue. Fudge the offset values
1233               to achieve this, unless we're already at the end of the string. */
1234            if (g_notempty != 0 && start_offset < subject_len) {
1235                offsets[0] = start_offset;
1236                offsets[1] = start_offset + 1;
1237                memcpy(&result[*result_len], piece, 1);
1238                (*result_len)++;
1239            } else {
1240                new_len = *result_len + subject_len - start_offset;
1241                if (new_len + 1 > alloc_len) {
1242                    alloc_len = new_len + 1; /* now we know exactly how long it is */
1243                    new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1244                    memcpy(new_buf, result, *result_len);
1245                    efree(result);
1246                    result = new_buf;
1247                }
1248                /* stick that last bit of string on our output */
1249                memcpy(&result[*result_len], piece, subject_len - start_offset);
1250                *result_len += subject_len - start_offset;
1251                result[*result_len] = '\0';
1252                break;
1253            }
1254        } else {
1255            pcre_handle_exec_error(count TSRMLS_CC);
1256            efree(result);
1257            result = NULL;
1258            break;
1259        }
1260
1261        /* If we have matched an empty string, mimic what Perl's /g options does.
1262           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1263           the match again at the same point. If this fails (picked up above) we
1264           advance to the next character. */
1265        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1266
1267        /* Advance to the next piece. */
1268        start_offset = offsets[1];
1269    }
1270
1271    efree(offsets);
1272    efree(subpat_names);
1273
1274    return result;
1275}
1276/* }}} */
1277
1278/* {{{ php_replace_in_subject
1279 */
1280static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1281{
1282    zval        **regex_entry,
1283                **replace_entry = NULL,
1284                 *replace_value,
1285                  empty_replace;
1286    char        *subject_value,
1287                *result;
1288    int          subject_len;
1289
1290    /* Make sure we're dealing with strings. */
1291    convert_to_string_ex(subject);
1292    /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1293    ZVAL_STRINGL(&empty_replace, "", 0, 0);
1294
1295    /* If regex is an array */
1296    if (Z_TYPE_P(regex) == IS_ARRAY) {
1297        /* Duplicate subject string for repeated replacement */
1298        subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1299        subject_len = Z_STRLEN_PP(subject);
1300        *result_len = subject_len;
1301
1302        zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1303
1304        replace_value = replace;
1305        if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1306            zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1307
1308        /* For each entry in the regex array, get the entry */
1309        while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)&regex_entry) == SUCCESS) {
1310            /* Make sure we're dealing with strings. */
1311            convert_to_string_ex(regex_entry);
1312
1313            /* If replace is an array and not a callable construct */
1314            if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1315                /* Get current entry */
1316                if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1317                    if (!is_callable_replace) {
1318                        convert_to_string_ex(replace_entry);
1319                    }
1320                    replace_value = *replace_entry;
1321                    zend_hash_move_forward(Z_ARRVAL_P(replace));
1322                } else {
1323                    /* We've run out of replacement strings, so use an empty one */
1324                    replace_value = &empty_replace;
1325                }
1326            }
1327
1328            /* Do the actual replacement and put the result back into subject_value
1329               for further replacements. */
1330            if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1331                                           Z_STRLEN_PP(regex_entry),
1332                                           subject_value,
1333                                           subject_len,
1334                                           replace_value,
1335                                           is_callable_replace,
1336                                           result_len,
1337                                           limit,
1338                                           replace_count TSRMLS_CC)) != NULL) {
1339                efree(subject_value);
1340                subject_value = result;
1341                subject_len = *result_len;
1342            } else {
1343                efree(subject_value);
1344                return NULL;
1345            }
1346
1347            zend_hash_move_forward(Z_ARRVAL_P(regex));
1348        }
1349
1350        return subject_value;
1351    } else {
1352        result = php_pcre_replace(Z_STRVAL_P(regex),
1353                                  Z_STRLEN_P(regex),
1354                                  Z_STRVAL_PP(subject),
1355                                  Z_STRLEN_PP(subject),
1356                                  replace,
1357                                  is_callable_replace,
1358                                  result_len,
1359                                  limit,
1360                                  replace_count TSRMLS_CC);
1361        return result;
1362    }
1363}
1364/* }}} */
1365
1366/* {{{ preg_replace_impl
1367 */
1368static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1369{
1370    zval           **regex,
1371                   **replace,
1372                   **subject,
1373                   **subject_entry,
1374                   **zcount = NULL;
1375    char            *result;
1376    int              result_len;
1377    int              limit_val = -1;
1378    long            limit = -1;
1379    char            *string_key;
1380    uint             string_key_len;
1381    ulong            num_key;
1382    char            *callback_name;
1383    int              replace_count=0, old_replace_count;
1384
1385    /* Get function parameters and do error-checking. */
1386    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1387        return;
1388    }
1389
1390    if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1391        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1392        RETURN_FALSE;
1393    }
1394
1395    SEPARATE_ZVAL(replace);
1396    if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1397        convert_to_string_ex(replace);
1398    }
1399    if (is_callable_replace) {
1400        if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1401            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1402            efree(callback_name);
1403            MAKE_COPY_ZVAL(subject, return_value);
1404            return;
1405        }
1406        efree(callback_name);
1407    }
1408
1409    SEPARATE_ZVAL(regex);
1410    SEPARATE_ZVAL(subject);
1411
1412    if (ZEND_NUM_ARGS() > 3) {
1413        limit_val = limit;
1414    }
1415
1416    if (Z_TYPE_PP(regex) != IS_ARRAY)
1417        convert_to_string_ex(regex);
1418
1419    /* if subject is an array */
1420    if (Z_TYPE_PP(subject) == IS_ARRAY) {
1421        array_init(return_value);
1422        zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1423
1424        /* For each subject entry, convert it to string, then perform replacement
1425           and add the result to the return_value array. */
1426        while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1427            SEPARATE_ZVAL(subject_entry);
1428            old_replace_count = replace_count;
1429            if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1430                if (!is_filter || replace_count > old_replace_count) {
1431                    /* Add to return array */
1432                    switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL))
1433                    {
1434                    case HASH_KEY_IS_STRING:
1435                        add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0);
1436                        break;
1437
1438                    case HASH_KEY_IS_LONG:
1439                        add_index_stringl(return_value, num_key, result, result_len, 0);
1440                        break;
1441                    }
1442                } else {
1443                    efree(result);
1444                }
1445            }
1446
1447            zend_hash_move_forward(Z_ARRVAL_PP(subject));
1448        }
1449    } else {    /* if subject is not an array */
1450        old_replace_count = replace_count;
1451        if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1452            if (!is_filter || replace_count > old_replace_count) {
1453                RETVAL_STRINGL(result, result_len, 0);
1454            } else {
1455                efree(result);
1456            }
1457        }
1458    }
1459    if (ZEND_NUM_ARGS() > 4) {
1460        zval_dtor(*zcount);
1461        ZVAL_LONG(*zcount, replace_count);
1462    }
1463
1464}
1465/* }}} */
1466
1467/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1468   Perform Perl-style regular expression replacement. */
1469static PHP_FUNCTION(preg_replace)
1470{
1471    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1472}
1473/* }}} */
1474
1475/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1476   Perform Perl-style regular expression replacement using replacement callback. */
1477static PHP_FUNCTION(preg_replace_callback)
1478{
1479    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1480}
1481/* }}} */
1482
1483/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1484   Perform Perl-style regular expression replacement and only return matches. */
1485static PHP_FUNCTION(preg_filter)
1486{
1487    preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1488}
1489/* }}} */
1490
1491/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1492   Split string into an array using a perl-style regular expression as a delimiter */
1493static PHP_FUNCTION(preg_split)
1494{
1495    char                *regex;         /* Regular expression */
1496    char                *subject;       /* String to match against */
1497    int                  regex_len;
1498    int                  subject_len;
1499    long                 limit_val = -1;/* Integer value of limit */
1500    long                 flags = 0;     /* Match control flags */
1501    pcre_cache_entry    *pce;           /* Compiled regular expression */
1502
1503    /* Get function parameters and do error checking */
1504    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", &regex, &regex_len,
1505                              &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1506        RETURN_FALSE;
1507    }
1508
1509    /* Compile regex or get it from cache. */
1510    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1511        RETURN_FALSE;
1512    }
1513
1514    php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1515}
1516/* }}} */
1517
1518/* {{{ php_pcre_split
1519 */
1520PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1521    long limit_val, long flags TSRMLS_DC)
1522{
1523    pcre_extra      *extra = NULL;      /* Holds results of studying */
1524    pcre            *re_bump = NULL;    /* Regex instance for empty matches */
1525    pcre_extra      *extra_bump = NULL; /* Almost dummy */
1526    pcre_extra       extra_data;        /* Used locally for exec options */
1527    int             *offsets;           /* Array of subpattern offsets */
1528    int              size_offsets;      /* Size of the offsets array */
1529    int              exoptions = 0;     /* Execution options */
1530    int              count = 0;         /* Count of matched subpatterns */
1531    int              start_offset;      /* Where the new search starts */
1532    int              next_offset;       /* End of the last delimiter match + 1 */
1533    int              g_notempty = 0;    /* If the match should not be empty */
1534    char            *last_match;        /* Location of last match */
1535    int              rc;
1536    int              no_empty;          /* If NO_EMPTY flag is set */
1537    int              delim_capture;     /* If delimiters should be captured */
1538    int              offset_capture;    /* If offsets should be captured */
1539
1540    no_empty = flags & PREG_SPLIT_NO_EMPTY;
1541    delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1542    offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1543
1544    if (limit_val == 0) {
1545        limit_val = -1;
1546    }
1547
1548    if (extra == NULL) {
1549        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1550        extra = &extra_data;
1551    }
1552    extra->match_limit = PCRE_G(backtrack_limit);
1553    extra->match_limit_recursion = PCRE_G(recursion_limit);
1554#ifdef PCRE_EXTRA_MARK
1555    extra->flags &= ~PCRE_EXTRA_MARK;
1556#endif
1557
1558    /* Initialize return value */
1559    array_init(return_value);
1560
1561    /* Calculate the size of the offsets array, and allocate memory for it. */
1562    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1563    if (rc < 0) {
1564        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1565        RETURN_FALSE;
1566    }
1567    size_offsets = (size_offsets + 1) * 3;
1568    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1569
1570    /* Start at the beginning of the string */
1571    start_offset = 0;
1572    next_offset = 0;
1573    last_match = subject;
1574    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1575
1576    /* Get next piece if no limit or limit not yet reached and something matched*/
1577    while ((limit_val == -1 || limit_val > 1)) {
1578        count = pcre_exec(pce->re, extra, subject,
1579                          subject_len, start_offset,
1580                          exoptions|g_notempty, offsets, size_offsets);
1581
1582        /* the string was already proved to be valid UTF-8 */
1583        exoptions |= PCRE_NO_UTF8_CHECK;
1584
1585        /* Check for too many substrings condition. */
1586        if (count == 0) {
1587            php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1588            count = size_offsets/3;
1589        }
1590
1591        /* If something matched */
1592        if (count > 0) {
1593            if (!no_empty || &subject[offsets[0]] != last_match) {
1594
1595                if (offset_capture) {
1596                    /* Add (match, offset) pair to the return value */
1597                    add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1598                } else {
1599                    /* Add the piece to the return value */
1600                    add_next_index_stringl(return_value, last_match,
1601                                       &subject[offsets[0]]-last_match, 1);
1602                }
1603
1604                /* One less left to do */
1605                if (limit_val != -1)
1606                    limit_val--;
1607            }
1608
1609            last_match = &subject[offsets[1]];
1610            next_offset = offsets[1];
1611
1612            if (delim_capture) {
1613                int i, match_len;
1614                for (i = 1; i < count; i++) {
1615                    match_len = offsets[(i<<1)+1] - offsets[i<<1];
1616                    /* If we have matched a delimiter */
1617                    if (!no_empty || match_len > 0) {
1618                        if (offset_capture) {
1619                            add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1620                        } else {
1621                            add_next_index_stringl(return_value,
1622                                                   &subject[offsets[i<<1]],
1623                                                   match_len, 1);
1624                        }
1625                    }
1626                }
1627            }
1628        } else if (count == PCRE_ERROR_NOMATCH) {
1629            /* If we previously set PCRE_NOTEMPTY after a null match,
1630               this is not necessarily the end. We need to advance
1631               the start offset, and continue. Fudge the offset values
1632               to achieve this, unless we're already at the end of the string. */
1633            if (g_notempty != 0 && start_offset < subject_len) {
1634                if (pce->compile_options & PCRE_UTF8) {
1635                    if (re_bump == NULL) {
1636                        int dummy;
1637
1638                        if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1639                            RETURN_FALSE;
1640                        }
1641                    }
1642                    count = pcre_exec(re_bump, extra_bump, subject,
1643                              subject_len, start_offset,
1644                              exoptions, offsets, size_offsets);
1645                    if (count < 1) {
1646                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1647                        RETURN_FALSE;
1648                    }
1649                } else {
1650                    offsets[0] = start_offset;
1651                    offsets[1] = start_offset + 1;
1652                }
1653            } else
1654                break;
1655        } else {
1656            pcre_handle_exec_error(count TSRMLS_CC);
1657            break;
1658        }
1659
1660        /* If we have matched an empty string, mimic what Perl's /g options does.
1661           This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1662           the match again at the same point. If this fails (picked up above) we
1663           advance to the next character. */
1664        g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1665
1666        /* Advance to the position right after the last full match */
1667        start_offset = offsets[1];
1668    }
1669
1670
1671    start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1672
1673    if (!no_empty || start_offset < subject_len)
1674    {
1675        if (offset_capture) {
1676            /* Add the last (match, offset) pair to the return value */
1677            add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1678        } else {
1679            /* Add the last piece to the return value */
1680            add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1681        }
1682    }
1683
1684
1685    /* Clean up */
1686    efree(offsets);
1687}
1688/* }}} */
1689
1690/* {{{ proto string preg_quote(string str [, string delim_char])
1691   Quote regular expression characters plus an optional character */
1692static PHP_FUNCTION(preg_quote)
1693{
1694    int      in_str_len;
1695    char    *in_str;        /* Input string argument */
1696    char    *in_str_end;    /* End of the input string */
1697    int      delim_len = 0;
1698    char    *delim = NULL;  /* Additional delimiter argument */
1699    char    *out_str,       /* Output string with quoted characters */
1700            *p,             /* Iterator for input string */
1701            *q,             /* Iterator for output string */
1702             delim_char=0,  /* Delimiter character to be quoted */
1703             c;             /* Current character */
1704    zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1705
1706    /* Get the arguments and check for errors */
1707    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1708                              &delim, &delim_len) == FAILURE) {
1709        return;
1710    }
1711
1712    in_str_end = in_str + in_str_len;
1713
1714    /* Nothing to do if we got an empty string */
1715    if (in_str == in_str_end) {
1716        RETURN_EMPTY_STRING();
1717    }
1718
1719    if (delim && *delim) {
1720        delim_char = delim[0];
1721        quote_delim = 1;
1722    }
1723
1724    /* Allocate enough memory so that even if each character
1725       is quoted, we won't run out of room */
1726    out_str = safe_emalloc(4, in_str_len, 1);
1727
1728    /* Go through the string and quote necessary characters */
1729    for(p = in_str, q = out_str; p != in_str_end; p++) {
1730        c = *p;
1731        switch(c) {
1732            case '.':
1733            case '\\':
1734            case '+':
1735            case '*':
1736            case '?':
1737            case '[':
1738            case '^':
1739            case ']':
1740            case '$':
1741            case '(':
1742            case ')':
1743            case '{':
1744            case '}':
1745            case '=':
1746            case '!':
1747            case '>':
1748            case '<':
1749            case '|':
1750            case ':':
1751            case '-':
1752                *q++ = '\\';
1753                *q++ = c;
1754                break;
1755
1756            case '\0':
1757                *q++ = '\\';
1758                *q++ = '0';
1759                *q++ = '0';
1760                *q++ = '0';
1761                break;
1762
1763            default:
1764                if (quote_delim && c == delim_char)
1765                    *q++ = '\\';
1766                *q++ = c;
1767                break;
1768        }
1769    }
1770    *q = '\0';
1771
1772    /* Reallocate string and return it */
1773    RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1774}
1775/* }}} */
1776
1777/* {{{ proto array preg_grep(string regex, array input [, int flags])
1778   Searches array and returns entries which match regex */
1779static PHP_FUNCTION(preg_grep)
1780{
1781    char                *regex;         /* Regular expression */
1782    int                  regex_len;
1783    zval                *input;         /* Input array */
1784    long                 flags = 0;     /* Match control flags */
1785    pcre_cache_entry    *pce;           /* Compiled regular expression */
1786
1787    /* Get arguments and do error checking */
1788    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", &regex, &regex_len,
1789                              &input, &flags) == FAILURE) {
1790        return;
1791    }
1792
1793    /* Compile regex or get it from cache. */
1794    if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1795        RETURN_FALSE;
1796    }
1797
1798    php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1799}
1800/* }}} */
1801
1802PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1803{
1804    zval           **entry;             /* An entry in the input array */
1805    pcre_extra      *extra = pce->extra;/* Holds results of studying */
1806    pcre_extra       extra_data;        /* Used locally for exec options */
1807    int             *offsets;           /* Array of subpattern offsets */
1808    int              size_offsets;      /* Size of the offsets array */
1809    int              count = 0;         /* Count of matched subpatterns */
1810    char            *string_key;
1811    uint             string_key_len;
1812    ulong            num_key;
1813    zend_bool        invert;            /* Whether to return non-matching
1814                                           entries */
1815    int              rc;
1816
1817    invert = flags & PREG_GREP_INVERT ? 1 : 0;
1818
1819    if (extra == NULL) {
1820        extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1821        extra = &extra_data;
1822    }
1823    extra->match_limit = PCRE_G(backtrack_limit);
1824    extra->match_limit_recursion = PCRE_G(recursion_limit);
1825#ifdef PCRE_EXTRA_MARK
1826    extra->flags &= ~PCRE_EXTRA_MARK;
1827#endif
1828
1829    /* Calculate the size of the offsets array, and allocate memory for it. */
1830    rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1831    if (rc < 0) {
1832        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1833        RETURN_FALSE;
1834    }
1835    size_offsets = (size_offsets + 1) * 3;
1836    offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1837
1838    /* Initialize return array */
1839    array_init(return_value);
1840
1841    PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1842
1843    /* Go through the input array */
1844    zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1845    while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1846        zval subject = **entry;
1847
1848        if (Z_TYPE_PP(entry) != IS_STRING) {
1849            zval_copy_ctor(&subject);
1850            convert_to_string(&subject);
1851        }
1852
1853        /* Perform the match */
1854        count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1855                          Z_STRLEN(subject), 0,
1856                          0, offsets, size_offsets);
1857
1858        /* Check for too many substrings condition. */
1859        if (count == 0) {
1860            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1861            count = size_offsets/3;
1862        } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1863            pcre_handle_exec_error(count TSRMLS_CC);
1864            break;
1865        }
1866
1867        /* If the entry fits our requirements */
1868        if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1869
1870            Z_ADDREF_PP(entry);
1871
1872            /* Add to return array */
1873            switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
1874            {
1875                case HASH_KEY_IS_STRING:
1876                    zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1877                                     string_key_len, entry, sizeof(zval *), NULL);
1878                    break;
1879
1880                case HASH_KEY_IS_LONG:
1881                    zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1882                                           sizeof(zval *), NULL);
1883                    break;
1884            }
1885        }
1886
1887        if (Z_TYPE_PP(entry) != IS_STRING) {
1888            zval_dtor(&subject);
1889        }
1890
1891        zend_hash_move_forward(Z_ARRVAL_P(input));
1892    }
1893    zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1894    /* Clean up */
1895    efree(offsets);
1896}
1897/* }}} */
1898
1899/* {{{ proto int preg_last_error()
1900   Returns the error code of the last regexp execution. */
1901static PHP_FUNCTION(preg_last_error)
1902{
1903    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1904        return;
1905    }
1906
1907    RETURN_LONG(PCRE_G(error_code));
1908}
1909/* }}} */
1910
1911/* {{{ module definition structures */
1912
1913/* {{{ arginfo */
1914ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1915    ZEND_ARG_INFO(0, pattern)
1916    ZEND_ARG_INFO(0, subject)
1917    ZEND_ARG_INFO(1, subpatterns) /* array */
1918    ZEND_ARG_INFO(0, flags)
1919    ZEND_ARG_INFO(0, offset)
1920ZEND_END_ARG_INFO()
1921
1922ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1923    ZEND_ARG_INFO(0, pattern)
1924    ZEND_ARG_INFO(0, subject)
1925    ZEND_ARG_INFO(1, subpatterns) /* array */
1926    ZEND_ARG_INFO(0, flags)
1927    ZEND_ARG_INFO(0, offset)
1928ZEND_END_ARG_INFO()
1929
1930ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1931    ZEND_ARG_INFO(0, regex)
1932    ZEND_ARG_INFO(0, replace)
1933    ZEND_ARG_INFO(0, subject)
1934    ZEND_ARG_INFO(0, limit)
1935    ZEND_ARG_INFO(1, count)
1936ZEND_END_ARG_INFO()
1937
1938ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1939    ZEND_ARG_INFO(0, regex)
1940    ZEND_ARG_INFO(0, callback)
1941    ZEND_ARG_INFO(0, subject)
1942    ZEND_ARG_INFO(0, limit)
1943    ZEND_ARG_INFO(1, count)
1944ZEND_END_ARG_INFO()
1945
1946ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1947    ZEND_ARG_INFO(0, pattern)
1948    ZEND_ARG_INFO(0, subject)
1949    ZEND_ARG_INFO(0, limit)
1950    ZEND_ARG_INFO(0, flags)
1951ZEND_END_ARG_INFO()
1952
1953ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1954    ZEND_ARG_INFO(0, str)
1955    ZEND_ARG_INFO(0, delim_char)
1956ZEND_END_ARG_INFO()
1957
1958ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1959    ZEND_ARG_INFO(0, regex)
1960    ZEND_ARG_INFO(0, input) /* array */
1961    ZEND_ARG_INFO(0, flags)
1962ZEND_END_ARG_INFO()
1963
1964ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
1965ZEND_END_ARG_INFO()
1966/* }}} */
1967
1968static const zend_function_entry pcre_functions[] = {
1969    PHP_FE(preg_match,              arginfo_preg_match)
1970    PHP_FE(preg_match_all,          arginfo_preg_match_all)
1971    PHP_FE(preg_replace,            arginfo_preg_replace)
1972    PHP_FE(preg_replace_callback,   arginfo_preg_replace_callback)
1973    PHP_FE(preg_filter,             arginfo_preg_replace)
1974    PHP_FE(preg_split,              arginfo_preg_split)
1975    PHP_FE(preg_quote,              arginfo_preg_quote)
1976    PHP_FE(preg_grep,               arginfo_preg_grep)
1977    PHP_FE(preg_last_error,         arginfo_preg_last_error)
1978    PHP_FE_END
1979};
1980
1981zend_module_entry pcre_module_entry = {
1982    STANDARD_MODULE_HEADER,
1983   "pcre",
1984    pcre_functions,
1985    PHP_MINIT(pcre),
1986    PHP_MSHUTDOWN(pcre),
1987    NULL,
1988    NULL,
1989    PHP_MINFO(pcre),
1990    NO_VERSION_YET,
1991    PHP_MODULE_GLOBALS(pcre),
1992    PHP_GINIT(pcre),
1993    PHP_GSHUTDOWN(pcre),
1994    NULL,
1995    STANDARD_MODULE_PROPERTIES_EX
1996};
1997
1998#ifdef COMPILE_DL_PCRE
1999ZEND_GET_MODULE(pcre)
2000#endif
2001
2002/* }}} */
2003
2004#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2005
2006/*
2007 * Local variables:
2008 * tab-width: 4
2009 * c-basic-offset: 4
2010 * End:
2011 * vim600: sw=4 ts=4 fdm=marker
2012 * vim<600: sw=4 ts=4
2013 */
2014