1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2016 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER			1
34#define PREG_SET_ORDER				2
35#define PREG_OFFSET_CAPTURE			(1<<8)
36
37#define	PREG_SPLIT_NO_EMPTY			(1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
40
41#define PREG_REPLACE_EVAL			(1<<0)
42
43#define PREG_GREP_INVERT			(1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
48#ifndef PCRE_NOTEMPTY_ATSTART
49# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
50#endif
51
52enum {
53	PHP_PCRE_NO_ERROR = 0,
54	PHP_PCRE_INTERNAL_ERROR,
55	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
56	PHP_PCRE_RECURSION_LIMIT_ERROR,
57	PHP_PCRE_BAD_UTF8_ERROR,
58	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
59	PHP_PCRE_JIT_STACKLIMIT_ERROR
60};
61
62
63PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65
66static void pcre_handle_exec_error(int pcre_code) /* {{{ */
67{
68	int preg_code = 0;
69
70	switch (pcre_code) {
71		case PCRE_ERROR_MATCHLIMIT:
72			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
73			break;
74
75		case PCRE_ERROR_RECURSIONLIMIT:
76			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
77			break;
78
79		case PCRE_ERROR_BADUTF8:
80			preg_code = PHP_PCRE_BAD_UTF8_ERROR;
81			break;
82
83		case PCRE_ERROR_BADUTF8_OFFSET:
84			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
85			break;
86
87#ifdef PCRE_STUDY_JIT_COMPILE
88		case PCRE_ERROR_JIT_STACKLIMIT:
89			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
90			break;
91#endif
92
93		default:
94			preg_code = PHP_PCRE_INTERNAL_ERROR;
95			break;
96	}
97
98	PCRE_G(error_code) = preg_code;
99}
100/* }}} */
101
102static void php_free_pcre_cache(zval *data) /* {{{ */
103{
104	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
105	if (!pce) return;
106	pcre_free(pce->re);
107	if (pce->extra) {
108		pcre_free_study(pce->extra);
109	}
110#if HAVE_SETLOCALE
111	if ((void*)pce->tables) pefree((void*)pce->tables, 1);
112	if (pce->locale) {
113		zend_string_release(pce->locale);
114	}
115#endif
116	pefree(pce, 1);
117}
118/* }}} */
119
120static PHP_GINIT_FUNCTION(pcre) /* {{{ */
121{
122	zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
123	pcre_globals->backtrack_limit = 0;
124	pcre_globals->recursion_limit = 0;
125	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
126}
127/* }}} */
128
129static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
130{
131	zend_hash_destroy(&pcre_globals->pcre_cache);
132}
133/* }}} */
134
135PHP_INI_BEGIN()
136	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
137	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
138#ifdef PCRE_STUDY_JIT_COMPILE
139	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
140#endif
141PHP_INI_END()
142
143
144/* {{{ PHP_MINFO_FUNCTION(pcre) */
145static PHP_MINFO_FUNCTION(pcre)
146{
147	int jit_yes = 0;
148
149	php_info_print_table_start();
150	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
151	php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
152
153	if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
154		php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
155	} else {
156		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
157	}
158
159	php_info_print_table_end();
160
161	DISPLAY_INI_ENTRIES();
162}
163/* }}} */
164
165/* {{{ PHP_MINIT_FUNCTION(pcre) */
166static PHP_MINIT_FUNCTION(pcre)
167{
168	REGISTER_INI_ENTRIES();
169
170	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
171	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
172	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
173	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
174	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
175	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
176	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
177
178	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
179	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
180	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
181	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
182	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
183	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
184	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
185	REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
186
187	return SUCCESS;
188}
189/* }}} */
190
191/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
192static PHP_MSHUTDOWN_FUNCTION(pcre)
193{
194	UNREGISTER_INI_ENTRIES();
195
196	return SUCCESS;
197}
198/* }}} */
199
200/* {{{ static pcre_clean_cache */
201static int pcre_clean_cache(zval *data, void *arg)
202{
203	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
204	int *num_clean = (int *)arg;
205
206	if (*num_clean > 0 && !pce->refcount) {
207		(*num_clean)--;
208		return ZEND_HASH_APPLY_REMOVE;
209	} else {
210		return ZEND_HASH_APPLY_KEEP;
211	}
212}
213/* }}} */
214
215/* {{{ static make_subpats_table */
216static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
217{
218	pcre_extra *extra = pce->extra;
219	int name_cnt = pce->name_count, name_size, ni = 0;
220	int rc;
221	char *name_table;
222	unsigned short name_idx;
223	char **subpat_names;
224	int rc1, rc2;
225
226	rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
227	rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
228	rc = rc2 ? rc2 : rc1;
229	if (rc < 0) {
230		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
231		return NULL;
232	}
233
234	subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
235	while (ni++ < name_cnt) {
236		name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
237		subpat_names[name_idx] = name_table + 2;
238		if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
239			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
240			efree(subpat_names);
241			return NULL;
242		}
243		name_table += name_size;
244	}
245	return subpat_names;
246}
247/* }}} */
248
249/* {{{ static calculate_unit_length */
250/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
251static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
252{
253	int unit_len;
254
255	if (pce->compile_options & PCRE_UTF8) {
256		char *end = start;
257
258		/* skip continuation bytes */
259		while ((*++end & 0xC0) == 0x80);
260		unit_len = end - start;
261	} else {
262		unit_len = 1;
263	}
264	return unit_len;
265}
266/* }}} */
267
268/* {{{ pcre_get_compiled_regex_cache
269 */
270PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
271{
272	pcre				*re = NULL;
273	pcre_extra			*extra;
274	int					 coptions = 0;
275	int					 soptions = 0;
276	const char			*error;
277	int					 erroffset;
278	char				 delimiter;
279	char				 start_delimiter;
280	char				 end_delimiter;
281	char				*p, *pp;
282	char				*pattern;
283	int					 do_study = 0;
284	int					 poptions = 0;
285	unsigned const char *tables = NULL;
286	pcre_cache_entry	*pce;
287	pcre_cache_entry	 new_entry;
288	int					 rc;
289
290	/* Try to lookup the cached regex entry, and if successful, just pass
291	   back the compiled pattern, otherwise go on and compile it. */
292	pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
293	if (pce) {
294#if HAVE_SETLOCALE
295		if (pce->locale == BG(locale_string) ||
296		    (pce->locale && BG(locale_string) &&
297		     ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
298		     !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
299		    (!pce->locale &&
300		     ZSTR_LEN(BG(locale_string)) == 1 &&
301		     ZSTR_VAL(BG(locale_string))[0] == 'C') ||
302		    (!BG(locale_string) &&
303		     ZSTR_LEN(pce->locale) == 1 &&
304		     ZSTR_VAL(pce->locale)[0] == 'C')) {
305			return pce;
306		}
307#else
308		return pce;
309#endif
310	}
311
312	p = ZSTR_VAL(regex);
313
314	/* Parse through the leading whitespace, and display a warning if we
315	   get to the end without encountering a delimiter. */
316	while (isspace((int)*(unsigned char *)p)) p++;
317	if (*p == 0) {
318		php_error_docref(NULL, E_WARNING,
319						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
320		return NULL;
321	}
322
323	/* Get the delimiter and display a warning if it is alphanumeric
324	   or a backslash. */
325	delimiter = *p++;
326	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
327		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
328		return NULL;
329	}
330
331	start_delimiter = delimiter;
332	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
333		delimiter = pp[5];
334	end_delimiter = delimiter;
335
336	pp = p;
337
338	if (start_delimiter == end_delimiter) {
339		/* We need to iterate through the pattern, searching for the ending delimiter,
340		   but skipping the backslashed delimiters.  If the ending delimiter is not
341		   found, display a warning. */
342		while (*pp != 0) {
343			if (*pp == '\\' && pp[1] != 0) pp++;
344			else if (*pp == delimiter)
345				break;
346			pp++;
347		}
348	} else {
349		/* We iterate through the pattern, searching for the matching ending
350		 * delimiter. For each matching starting delimiter, we increment nesting
351		 * level, and decrement it for each matching ending delimiter. If we
352		 * reach the end of the pattern without matching, display a warning.
353		 */
354		int brackets = 1; 	/* brackets nesting level */
355		while (*pp != 0) {
356			if (*pp == '\\' && pp[1] != 0) pp++;
357			else if (*pp == end_delimiter && --brackets <= 0)
358				break;
359			else if (*pp == start_delimiter)
360				brackets++;
361			pp++;
362		}
363	}
364
365	if (*pp == 0) {
366		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
367			php_error_docref(NULL,E_WARNING, "Null byte in regex");
368		} else if (start_delimiter == end_delimiter) {
369			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
370		} else {
371			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
372		}
373		return NULL;
374	}
375
376	/* Make a copy of the actual pattern. */
377	pattern = estrndup(p, pp-p);
378
379	/* Move on to the options */
380	pp++;
381
382	/* Parse through the options, setting appropriate flags.  Display
383	   a warning if we encounter an unknown modifier. */
384	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
385		switch (*pp++) {
386			/* Perl compatible options */
387			case 'i':	coptions |= PCRE_CASELESS;		break;
388			case 'm':	coptions |= PCRE_MULTILINE;		break;
389			case 's':	coptions |= PCRE_DOTALL;		break;
390			case 'x':	coptions |= PCRE_EXTENDED;		break;
391
392			/* PCRE specific options */
393			case 'A':	coptions |= PCRE_ANCHORED;		break;
394			case 'D':	coptions |= PCRE_DOLLAR_ENDONLY;break;
395			case 'S':	do_study  = 1;					break;
396			case 'U':	coptions |= PCRE_UNGREEDY;		break;
397			case 'X':	coptions |= PCRE_EXTRA;			break;
398			case 'u':	coptions |= PCRE_UTF8;
399	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
400       characters, even in UTF-8 mode. However, this can be changed by setting
401       the PCRE_UCP option. */
402#ifdef PCRE_UCP
403						coptions |= PCRE_UCP;
404#endif
405				break;
406
407			/* Custom preg options */
408			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
409
410			case ' ':
411			case '\n':
412				break;
413
414			default:
415				if (pp[-1]) {
416					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
417				} else {
418					php_error_docref(NULL,E_WARNING, "Null byte in regex");
419				}
420				efree(pattern);
421				return NULL;
422		}
423	}
424
425#if HAVE_SETLOCALE
426	if (BG(locale_string) &&
427	    (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
428		tables = pcre_maketables();
429	}
430#endif
431
432	/* Compile pattern and display a warning if compilation failed. */
433	re = pcre_compile(pattern,
434					  coptions,
435					  &error,
436					  &erroffset,
437					  tables);
438
439	if (re == NULL) {
440		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
441		efree(pattern);
442		if (tables) {
443			pefree((void*)tables, 1);
444		}
445		return NULL;
446	}
447
448#ifdef PCRE_STUDY_JIT_COMPILE
449	if (PCRE_G(jit)) {
450		/* Enable PCRE JIT compiler */
451		do_study = 1;
452		soptions |= PCRE_STUDY_JIT_COMPILE;
453	}
454#endif
455
456	/* If study option was specified, study the pattern and
457	   store the result in extra for passing to pcre_exec. */
458	if (do_study) {
459		extra = pcre_study(re, soptions, &error);
460		if (extra) {
461			extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
462			extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
463			extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
464		}
465		if (error != NULL) {
466			php_error_docref(NULL, E_WARNING, "Error while studying pattern");
467		}
468	} else {
469		extra = NULL;
470	}
471
472	efree(pattern);
473
474	/*
475	 * If we reached cache limit, clean out the items from the head of the list;
476	 * these are supposedly the oldest ones (but not necessarily the least used
477	 * ones).
478	 */
479	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
480		int num_clean = PCRE_CACHE_SIZE / 8;
481		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
482	}
483
484	/* Store the compiled pattern and extra info in the cache. */
485	new_entry.re = re;
486	new_entry.extra = extra;
487	new_entry.preg_options = poptions;
488	new_entry.compile_options = coptions;
489#if HAVE_SETLOCALE
490	new_entry.locale = BG(locale_string) ?
491		((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
492			zend_string_copy(BG(locale_string)) :
493			zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
494		NULL;
495	new_entry.tables = tables;
496#endif
497	new_entry.refcount = 0;
498
499	rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
500	if (rc < 0) {
501		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
502		return NULL;
503	}
504
505	rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
506	if (rc < 0) {
507		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
508		return NULL;
509	}
510
511	/*
512	 * Interned strings are not duplicated when stored in HashTable,
513	 * but all the interned strings created during HTTP request are removed
514	 * at end of request. However PCRE_G(pcre_cache) must be consistent
515	 * on the next request as well. So we disable usage of interned strings
516	 * as hash keys especually for this table.
517	 * See bug #63180
518	 */
519	if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
520		zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
521		GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
522		ZSTR_H(str) = ZSTR_H(regex);
523		regex = str;
524	}
525
526	pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
527
528	return pce;
529}
530/* }}} */
531
532/* {{{ pcre_get_compiled_regex
533 */
534PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
535{
536	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
537
538	if (extra) {
539		*extra = pce ? pce->extra : NULL;
540	}
541	if (preg_options) {
542		*preg_options = pce ? pce->preg_options : 0;
543	}
544
545	return pce ? pce->re : NULL;
546}
547/* }}} */
548
549/* {{{ pcre_get_compiled_regex_ex
550 */
551PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
552{
553	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
554
555	if (extra) {
556		*extra = pce ? pce->extra : NULL;
557	}
558	if (preg_options) {
559		*preg_options = pce ? pce->preg_options : 0;
560	}
561	if (compile_options) {
562		*compile_options = pce ? pce->compile_options : 0;
563	}
564
565	return pce ? pce->re : NULL;
566}
567/* }}} */
568
569/* {{{ add_offset_pair */
570static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
571{
572	zval match_pair, tmp;
573
574	array_init_size(&match_pair, 2);
575
576	/* Add (match, offset) to the return value */
577	ZVAL_STRINGL(&tmp, str, len);
578	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
579	ZVAL_LONG(&tmp, offset);
580	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
581
582	if (name) {
583		Z_ADDREF(match_pair);
584		zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
585	}
586	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
587}
588/* }}} */
589
590static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
591{
592	/* parameters */
593	zend_string		 *regex;			/* Regular expression */
594	zend_string		 *subject;			/* String to match against */
595	pcre_cache_entry *pce;				/* Compiled regular expression */
596	zval			 *subpats = NULL;	/* Array for subpatterns */
597	zend_long		  flags = 0;		/* Match control flags */
598	zend_long		  start_offset = 0;	/* Where the new search starts */
599
600#ifndef FAST_ZPP
601	if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
602							  &subject, &subpats, &flags, &start_offset) == FAILURE) {
603		RETURN_FALSE;
604	}
605#else
606	ZEND_PARSE_PARAMETERS_START(2, 5)
607		Z_PARAM_STR(regex)
608		Z_PARAM_STR(subject)
609		Z_PARAM_OPTIONAL
610		Z_PARAM_ZVAL_EX(subpats, 0, 1)
611		Z_PARAM_LONG(flags)
612		Z_PARAM_LONG(start_offset)
613	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
614#endif
615
616	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
617			php_error_docref(NULL, E_WARNING, "Subject is too long");
618			RETURN_FALSE;
619	}
620
621	/* Compile regex or get it from cache. */
622	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
623		RETURN_FALSE;
624	}
625
626	pce->refcount++;
627	php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
628		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
629	pce->refcount--;
630}
631/* }}} */
632
633/* {{{ php_pcre_match_impl() */
634PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
635	zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
636{
637	zval			 result_set,		/* Holds a set of subpatterns after
638										   a global match */
639				    *match_sets = NULL;	/* An array of sets of matches for each
640										   subpattern after a global match */
641	pcre_extra		*extra = pce->extra;/* Holds results of studying */
642	pcre_extra		 extra_data;		/* Used locally for exec options */
643	int				 exoptions = 0;		/* Execution options */
644	int				 count = 0;			/* Count of matched subpatterns */
645	int				*offsets;			/* Array of subpattern offsets */
646	int				 num_subpats;		/* Number of captured subpatterns */
647	int				 size_offsets;		/* Size of the offsets array */
648	int				 matched;			/* Has anything matched */
649	int				 g_notempty = 0;	/* If the match should not be empty */
650	const char	   **stringlist;		/* Holds list of subpatterns */
651	char 		   **subpat_names;		/* Array for named subpatterns */
652	int				 i;
653	int				 subpats_order;		/* Order of subpattern matches */
654	int				 offset_capture;    /* Capture match offsets: yes/no */
655	unsigned char   *mark = NULL;       /* Target for MARK name */
656	zval            marks;      		/* Array of marks for PREG_PATTERN_ORDER */
657	ALLOCA_FLAG(use_heap);
658
659	ZVAL_UNDEF(&marks);
660
661	/* Overwrite the passed-in value for subpatterns with an empty array. */
662	if (subpats != NULL) {
663		zval_dtor(subpats);
664		array_init(subpats);
665	}
666
667	subpats_order = global ? PREG_PATTERN_ORDER : 0;
668
669	if (use_flags) {
670		offset_capture = flags & PREG_OFFSET_CAPTURE;
671
672		/*
673		 * subpats_order is pre-set to pattern mode so we change it only if
674		 * necessary.
675		 */
676		if (flags & 0xff) {
677			subpats_order = flags & 0xff;
678		}
679		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
680			(!global && subpats_order != 0)) {
681			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
682			return;
683		}
684	} else {
685		offset_capture = 0;
686	}
687
688	/* Negative offset counts from the end of the string. */
689	if (start_offset < 0) {
690		start_offset = subject_len + start_offset;
691		if (start_offset < 0) {
692			start_offset = 0;
693		}
694	}
695
696	if (extra == NULL) {
697		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
698		extra = &extra_data;
699	}
700	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
701	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
702#ifdef PCRE_EXTRA_MARK
703	extra->mark = &mark;
704	extra->flags |= PCRE_EXTRA_MARK;
705#endif
706
707	/* Calculate the size of the offsets array, and allocate memory for it. */
708	num_subpats = pce->capture_count + 1;
709	size_offsets = num_subpats * 3;
710
711	/*
712	 * Build a mapping from subpattern numbers to their names. We will
713	 * allocate the table only if there are any named subpatterns.
714	 */
715	subpat_names = NULL;
716	if (pce->name_count > 0) {
717		subpat_names = make_subpats_table(num_subpats, pce);
718		if (!subpat_names) {
719			RETURN_FALSE;
720		}
721	}
722
723	if (size_offsets <= 32) {
724		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
725	} else {
726		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
727	}
728	memset(offsets, 0, size_offsets*sizeof(int));
729	/* Allocate match sets array and initialize the values. */
730	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
731		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
732		for (i=0; i<num_subpats; i++) {
733			array_init(&match_sets[i]);
734		}
735	}
736
737	matched = 0;
738	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
739
740	do {
741		/* Execute the regular expression. */
742		count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
743						  exoptions|g_notempty, offsets, size_offsets);
744
745		/* the string was already proved to be valid UTF-8 */
746		exoptions |= PCRE_NO_UTF8_CHECK;
747
748		/* Check for too many substrings condition. */
749		if (count == 0) {
750			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
751			count = size_offsets/3;
752		}
753
754		/* If something has matched */
755		if (count > 0) {
756			matched++;
757
758			/* If subpatterns array has been passed, fill it in with values. */
759			if (subpats != NULL) {
760				/* Try to get the list of substrings and display a warning if failed. */
761				if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
762					if (subpat_names) {
763						efree(subpat_names);
764					}
765					if (size_offsets <= 32) {
766						free_alloca(offsets, use_heap);
767					} else {
768						efree(offsets);
769					}
770					if (match_sets) efree(match_sets);
771					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
772					RETURN_FALSE;
773				}
774
775				if (global) {	/* global pattern matching */
776					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
777						/* For each subpattern, insert it into the appropriate array. */
778						if (offset_capture) {
779							for (i = 0; i < count; i++) {
780								add_offset_pair(&match_sets[i], (char *)stringlist[i],
781												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
782							}
783						} else {
784							for (i = 0; i < count; i++) {
785								add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
786													   offsets[(i<<1)+1] - offsets[i<<1]);
787							}
788						}
789						/* Add MARK, if available */
790						if (mark) {
791							if (Z_TYPE(marks) == IS_UNDEF) {
792								array_init(&marks);
793							}
794							add_index_string(&marks, matched - 1, (char *) mark);
795						}
796						/*
797						 * If the number of captured subpatterns on this run is
798						 * less than the total possible number, pad the result
799						 * arrays with empty strings.
800						 */
801						if (count < num_subpats) {
802							for (; i < num_subpats; i++) {
803								add_next_index_string(&match_sets[i], "");
804							}
805						}
806					} else {
807						/* Allocate the result set array */
808						array_init_size(&result_set, count + (mark ? 1 : 0));
809
810						/* Add all the subpatterns to it */
811						if (subpat_names) {
812							if (offset_capture) {
813								for (i = 0; i < count; i++) {
814									add_offset_pair(&result_set, (char *)stringlist[i],
815													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
816								}
817							} else {
818								for (i = 0; i < count; i++) {
819									if (subpat_names[i]) {
820										add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
821															   offsets[(i<<1)+1] - offsets[i<<1]);
822									}
823									add_next_index_stringl(&result_set, (char *)stringlist[i],
824														   offsets[(i<<1)+1] - offsets[i<<1]);
825								}
826							}
827						} else {
828							if (offset_capture) {
829								for (i = 0; i < count; i++) {
830									add_offset_pair(&result_set, (char *)stringlist[i],
831													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
832								}
833							} else {
834								for (i = 0; i < count; i++) {
835									add_next_index_stringl(&result_set, (char *)stringlist[i],
836														   offsets[(i<<1)+1] - offsets[i<<1]);
837								}
838							}
839						}
840						/* Add MARK, if available */
841						if (mark) {
842							add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
843						}
844						/* And add it to the output array */
845						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
846					}
847				} else {			/* single pattern matching */
848					/* For each subpattern, insert it into the subpatterns array. */
849					if (subpat_names) {
850						if (offset_capture) {
851							for (i = 0; i < count; i++) {
852								add_offset_pair(subpats, (char *)stringlist[i],
853												offsets[(i<<1)+1] - offsets[i<<1],
854												offsets[i<<1], subpat_names[i]);
855							}
856						} else {
857							for (i = 0; i < count; i++) {
858								if (subpat_names[i]) {
859									add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
860													  offsets[(i<<1)+1] - offsets[i<<1]);
861								}
862								add_next_index_stringl(subpats, (char *)stringlist[i],
863													   offsets[(i<<1)+1] - offsets[i<<1]);
864							}
865						}
866					} else {
867						if (offset_capture) {
868							for (i = 0; i < count; i++) {
869								add_offset_pair(subpats, (char *)stringlist[i],
870												offsets[(i<<1)+1] - offsets[i<<1],
871												offsets[i<<1], NULL);
872							}
873						} else {
874							for (i = 0; i < count; i++) {
875								add_next_index_stringl(subpats, (char *)stringlist[i],
876													   offsets[(i<<1)+1] - offsets[i<<1]);
877							}
878						}
879					}
880					/* Add MARK, if available */
881					if (mark) {
882						add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
883					}
884				}
885
886				pcre_free((void *) stringlist);
887			}
888		} else if (count == PCRE_ERROR_NOMATCH) {
889			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
890			   this is not necessarily the end. We need to advance
891			   the start offset, and continue. Fudge the offset values
892			   to achieve this, unless we're already at the end of the string. */
893			if (g_notempty != 0 && start_offset < subject_len) {
894				int unit_len = calculate_unit_length(pce, subject + start_offset);
895
896				offsets[0] = (int)start_offset;
897				offsets[1] = (int)(start_offset + unit_len);
898			} else
899				break;
900		} else {
901			pcre_handle_exec_error(count);
902			break;
903		}
904
905		/* If we have matched an empty string, mimic what Perl's /g options does.
906		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
907		   the match again at the same point. If this fails (picked up above) we
908		   advance to the next character. */
909		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
910
911		/* Advance to the position right after the last full match */
912		start_offset = offsets[1];
913	} while (global);
914
915	/* Add the match sets to the output array and clean up */
916	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
917		if (subpat_names) {
918			for (i = 0; i < num_subpats; i++) {
919				if (subpat_names[i]) {
920					zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
921									 strlen(subpat_names[i]), &match_sets[i]);
922					Z_ADDREF(match_sets[i]);
923				}
924				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
925			}
926		} else {
927			for (i = 0; i < num_subpats; i++) {
928				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
929			}
930		}
931		efree(match_sets);
932
933		if (Z_TYPE(marks) != IS_UNDEF) {
934			add_assoc_zval(subpats, "MARK", &marks);
935		}
936	}
937
938	if (size_offsets <= 32) {
939		free_alloca(offsets, use_heap);
940	} else {
941		efree(offsets);
942	}
943	if (subpat_names) {
944		efree(subpat_names);
945	}
946
947	/* Did we encounter an error? */
948	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
949		RETVAL_LONG(matched);
950	} else {
951		RETVAL_FALSE;
952	}
953}
954/* }}} */
955
956/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
957   Perform a Perl-style regular expression match */
958static PHP_FUNCTION(preg_match)
959{
960	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
961}
962/* }}} */
963
964/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
965   Perform a Perl-style global regular expression match */
966static PHP_FUNCTION(preg_match_all)
967{
968	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
969}
970/* }}} */
971
972/* {{{ preg_get_backref
973 */
974static int preg_get_backref(char **str, int *backref)
975{
976	register char in_brace = 0;
977	register char *walk = *str;
978
979	if (walk[1] == 0)
980		return 0;
981
982	if (*walk == '$' && walk[1] == '{') {
983		in_brace = 1;
984		walk++;
985	}
986	walk++;
987
988	if (*walk >= '0' && *walk <= '9') {
989		*backref = *walk - '0';
990		walk++;
991	} else
992		return 0;
993
994	if (*walk && *walk >= '0' && *walk <= '9') {
995		*backref = *backref * 10 + *walk - '0';
996		walk++;
997	}
998
999	if (in_brace) {
1000		if (*walk == 0 || *walk != '}')
1001			return 0;
1002		else
1003			walk++;
1004	}
1005
1006	*str = walk;
1007	return 1;
1008}
1009/* }}} */
1010
1011/* {{{ preg_do_repl_func
1012 */
1013static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1014{
1015	zend_string *result_str;
1016	zval		 retval;			/* Function return value */
1017	zval	     args[1];			/* Argument to pass to function */
1018	int			 i;
1019
1020	array_init_size(&args[0], count + (mark ? 1 : 0));
1021	if (subpat_names) {
1022		for (i = 0; i < count; i++) {
1023			if (subpat_names[i]) {
1024				add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1025			}
1026			add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1027		}
1028	} else {
1029		for (i = 0; i < count; i++) {
1030			add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1031		}
1032	}
1033	if (mark) {
1034		add_assoc_string(&args[0], "MARK", (char *) mark);
1035	}
1036
1037	if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1038		result_str = zval_get_string(&retval);
1039		zval_ptr_dtor(&retval);
1040	} else {
1041		if (!EG(exception)) {
1042			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1043		}
1044
1045		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1046	}
1047
1048	zval_ptr_dtor(&args[0]);
1049
1050	return result_str;
1051}
1052/* }}} */
1053
1054/* {{{ php_pcre_replace
1055 */
1056PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1057							  zend_string *subject_str,
1058							  char *subject, int subject_len,
1059							  zval *replace_val, int is_callable_replace,
1060							  int limit, int *replace_count)
1061{
1062	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1063	zend_string	 		*result;			/* Function result */
1064
1065	/* Compile regex or get it from cache. */
1066	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1067		return NULL;
1068	}
1069	pce->refcount++;
1070	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1071		is_callable_replace, limit, replace_count);
1072	pce->refcount--;
1073
1074	return result;
1075}
1076/* }}} */
1077
1078/* {{{ php_pcre_replace_impl() */
1079PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
1080{
1081	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1082	pcre_extra		 extra_data;		/* Used locally for exec options */
1083	int				 exoptions = 0;		/* Execution options */
1084	int				 count = 0;			/* Count of matched subpatterns */
1085	int				*offsets;			/* Array of subpattern offsets */
1086	char 			**subpat_names;		/* Array for named subpatterns */
1087	int				 num_subpats;		/* Number of captured subpatterns */
1088	int				 size_offsets;		/* Size of the offsets array */
1089	int				 new_len;			/* Length of needed storage */
1090	int				 alloc_len;			/* Actual allocated length */
1091	int				 match_len;			/* Length of the current match */
1092	int				 backref;			/* Backreference number */
1093	int				 start_offset;		/* Where the new search starts */
1094	int				 g_notempty=0;		/* If the match should not be empty */
1095	int				 replace_len=0;		/* Length of replacement string */
1096	char			*replace=NULL,		/* Replacement string */
1097					*walkbuf,			/* Location of current replacement in the result */
1098					*walk,				/* Used to walk the replacement string */
1099					*match,				/* The current match */
1100					*piece,				/* The current piece of subject */
1101					*replace_end=NULL,	/* End of replacement string */
1102					 walk_last;			/* Last walked character */
1103	int				 result_len; 		/* Length of result */
1104	unsigned char   *mark = NULL;       /* Target for MARK name */
1105	zend_string		*result;			/* Result of replacement */
1106	zend_string     *eval_result=NULL;  /* Result of custom function */
1107
1108	ALLOCA_FLAG(use_heap);
1109
1110	if (extra == NULL) {
1111		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1112		extra = &extra_data;
1113	}
1114
1115	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1116	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1117
1118	if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1119		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1120		return NULL;
1121	}
1122
1123	if (!is_callable_replace) {
1124		replace = Z_STRVAL_P(replace_val);
1125		replace_len = (int)Z_STRLEN_P(replace_val);
1126		replace_end = replace + replace_len;
1127	}
1128
1129	/* Calculate the size of the offsets array, and allocate memory for it. */
1130	num_subpats = pce->capture_count + 1;
1131	size_offsets = num_subpats * 3;
1132	if (size_offsets <= 32) {
1133		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1134	} else {
1135		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1136	}
1137
1138	/*
1139	 * Build a mapping from subpattern numbers to their names. We will
1140	 * allocate the table only if there are any named subpatterns.
1141	 */
1142	subpat_names = NULL;
1143	if (UNEXPECTED(pce->name_count > 0)) {
1144		subpat_names = make_subpats_table(num_subpats, pce);
1145		if (!subpat_names) {
1146			return NULL;
1147		}
1148	}
1149
1150	alloc_len = 0;
1151	result = NULL;
1152
1153	/* Initialize */
1154	match = NULL;
1155	start_offset = 0;
1156	result_len = 0;
1157	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1158
1159	while (1) {
1160#ifdef PCRE_EXTRA_MARK
1161		extra->mark = &mark;
1162		extra->flags |= PCRE_EXTRA_MARK;
1163#endif
1164		/* Execute the regular expression. */
1165		count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1166						  exoptions|g_notempty, offsets, size_offsets);
1167
1168		/* the string was already proved to be valid UTF-8 */
1169		exoptions |= PCRE_NO_UTF8_CHECK;
1170
1171		/* Check for too many substrings condition. */
1172		if (UNEXPECTED(count == 0)) {
1173			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1174			count = size_offsets / 3;
1175		}
1176
1177		piece = subject + start_offset;
1178
1179		/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1180		if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) {
1181			if (UNEXPECTED(replace_count)) {
1182				++*replace_count;
1183			}
1184
1185			/* Set the match location in subject */
1186			match = subject + offsets[0];
1187
1188			new_len = result_len + offsets[0] - start_offset; /* part before the match */
1189
1190			/* if (!is_callable_replace) */
1191			if (EXPECTED(replace)) {
1192				/* do regular substitution */
1193				walk = replace;
1194				walk_last = 0;
1195
1196				while (walk < replace_end) {
1197					if ('\\' == *walk || '$' == *walk) {
1198						if (walk_last == '\\') {
1199							walk++;
1200							walk_last = 0;
1201							continue;
1202						}
1203						if (preg_get_backref(&walk, &backref)) {
1204							if (backref < count)
1205								new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1206							continue;
1207						}
1208					}
1209					new_len++;
1210					walk++;
1211					walk_last = walk[-1];
1212				}
1213
1214				if (new_len >= alloc_len) {
1215					alloc_len = alloc_len + 2 * new_len;
1216					if (result == NULL) {
1217						result = zend_string_alloc(alloc_len, 0);
1218					} else {
1219						result = zend_string_extend(result, alloc_len, 0);
1220					}
1221				}
1222
1223				/* copy the part of the string before the match */
1224				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1225				result_len += (int)(match-piece);
1226
1227				/* copy replacement and backrefs */
1228				walkbuf = ZSTR_VAL(result) + result_len;
1229
1230				walk = replace;
1231				walk_last = 0;
1232				while (walk < replace_end) {
1233					if ('\\' == *walk || '$' == *walk) {
1234						if (walk_last == '\\') {
1235							*(walkbuf-1) = *walk++;
1236							walk_last = 0;
1237							continue;
1238						}
1239						if (preg_get_backref(&walk, &backref)) {
1240							if (backref < count) {
1241								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1242								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1243								walkbuf += match_len;
1244							}
1245							continue;
1246						}
1247					}
1248					*walkbuf++ = *walk++;
1249					walk_last = walk[-1];
1250				}
1251				*walkbuf = '\0';
1252				/* increment the result length by how much we've added to the string */
1253				result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
1254			} else {
1255				/* Use custom function to get replacement string and its length. */
1256				eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1257				ZEND_ASSERT(eval_result);
1258				new_len += (int)ZSTR_LEN(eval_result);
1259				if (new_len >= alloc_len) {
1260					alloc_len = alloc_len + 2 * new_len;
1261					if (result == NULL) {
1262						result = zend_string_alloc(alloc_len, 0);
1263					} else {
1264						result = zend_string_extend(result, alloc_len, 0);
1265					}
1266				}
1267				/* copy the part of the string before the match */
1268				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1269				result_len += (int)(match-piece);
1270
1271				/* copy replacement and backrefs */
1272				walkbuf = ZSTR_VAL(result) + result_len;
1273
1274				/* If using custom function, copy result to the buffer and clean up. */
1275				memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1276				result_len += (int)ZSTR_LEN(eval_result);
1277				zend_string_release(eval_result);
1278			}
1279
1280			if (EXPECTED(limit)) {
1281				limit--;
1282			}
1283		} else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1284			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1285			   this is not necessarily the end. We need to advance
1286			   the start offset, and continue. Fudge the offset values
1287			   to achieve this, unless we're already at the end of the string. */
1288			if (g_notempty != 0 && start_offset < subject_len) {
1289				int unit_len = calculate_unit_length(pce, piece);
1290
1291				offsets[0] = start_offset;
1292				offsets[1] = start_offset + unit_len;
1293				memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1294				result_len += unit_len;
1295			} else {
1296				if (!result && subject_str) {
1297					result = zend_string_copy(subject_str);
1298					break;
1299				}
1300				new_len = result_len + subject_len - start_offset;
1301				if (new_len > alloc_len) {
1302					alloc_len = new_len; /* now we know exactly how long it is */
1303					if (NULL != result) {
1304						result = zend_string_realloc(result, alloc_len, 0);
1305					} else {
1306						result = zend_string_alloc(alloc_len, 0);
1307					}
1308				}
1309				/* stick that last bit of string on our output */
1310				memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1311				result_len += subject_len - start_offset;
1312				ZSTR_VAL(result)[result_len] = '\0';
1313				ZSTR_LEN(result) = result_len;
1314				break;
1315			}
1316		} else {
1317			pcre_handle_exec_error(count);
1318			if (result) {
1319				zend_string_free(result);
1320				result = NULL;
1321			}
1322			break;
1323		}
1324
1325		/* If we have matched an empty string, mimic what Perl's /g options does.
1326		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1327		   the match again at the same point. If this fails (picked up above) we
1328		   advance to the next character. */
1329		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1330
1331		/* Advance to the next piece. */
1332		start_offset = offsets[1];
1333	}
1334
1335	if (size_offsets <= 32) {
1336		free_alloca(offsets, use_heap);
1337	} else {
1338		efree(offsets);
1339	}
1340	if (UNEXPECTED(subpat_names)) {
1341		efree(subpat_names);
1342	}
1343
1344	return result;
1345}
1346/* }}} */
1347
1348/* {{{ php_replace_in_subject
1349 */
1350static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1351{
1352	zval		*regex_entry,
1353				*replace_value,
1354				 empty_replace;
1355	zend_string *result;
1356	uint32_t replace_idx;
1357	zend_string	*subject_str = zval_get_string(subject);
1358
1359	/* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1360	ZVAL_EMPTY_STRING(&empty_replace);
1361
1362	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
1363			php_error_docref(NULL, E_WARNING, "Subject is too long");
1364			return NULL;
1365	}
1366
1367	/* If regex is an array */
1368	if (Z_TYPE_P(regex) == IS_ARRAY) {
1369		replace_value = replace;
1370		replace_idx = 0;
1371
1372		/* For each entry in the regex array, get the entry */
1373		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1374			zval replace_str;
1375			/* Make sure we're dealing with strings. */
1376			zend_string *regex_str = zval_get_string(regex_entry);
1377
1378			ZVAL_UNDEF(&replace_str);
1379			/* If replace is an array and not a callable construct */
1380			if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1381				/* Get current entry */
1382				while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1383					if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) {
1384						ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val);
1385						break;
1386					}
1387					replace_idx++;
1388				}
1389				if (!Z_ISUNDEF(replace_str)) {
1390					if (!is_callable_replace) {
1391						convert_to_string(&replace_str);
1392					}
1393					replace_value = &replace_str;
1394					replace_idx++;
1395				} else {
1396					/* We've run out of replacement strings, so use an empty one */
1397					replace_value = &empty_replace;
1398				}
1399			}
1400
1401			/* Do the actual replacement and put the result back into subject_str
1402			   for further replacements. */
1403			if ((result = php_pcre_replace(regex_str,
1404										   subject_str,
1405										   ZSTR_VAL(subject_str),
1406										   (int)ZSTR_LEN(subject_str),
1407										   replace_value,
1408										   is_callable_replace,
1409										   limit,
1410										   replace_count)) != NULL) {
1411				zend_string_release(subject_str);
1412				subject_str = result;
1413			} else {
1414				zend_string_release(subject_str);
1415				zend_string_release(regex_str);
1416				zval_dtor(&replace_str);
1417				return NULL;
1418			}
1419
1420			zend_string_release(regex_str);
1421			zval_dtor(&replace_str);
1422		} ZEND_HASH_FOREACH_END();
1423
1424		return subject_str;
1425	} else {
1426		result = php_pcre_replace(Z_STR_P(regex),
1427								  subject_str,
1428								  ZSTR_VAL(subject_str),
1429								  (int)ZSTR_LEN(subject_str),
1430								  replace,
1431								  is_callable_replace,
1432								  limit,
1433								  replace_count);
1434		zend_string_release(subject_str);
1435		return result;
1436	}
1437}
1438/* }}} */
1439
1440/* {{{ preg_replace_impl
1441 */
1442static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1443{
1444	zval		*subject_entry;
1445	zend_string	*result;
1446	zend_string	*string_key;
1447	zend_ulong	 num_key;
1448	int			 replace_count = 0, old_replace_count;
1449
1450	if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1451		convert_to_string_ex(replace);
1452	}
1453
1454	if (Z_TYPE_P(regex) != IS_ARRAY) {
1455		convert_to_string_ex(regex);
1456	}
1457
1458	/* if subject is an array */
1459	if (Z_TYPE_P(subject) == IS_ARRAY) {
1460		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1461
1462		/* For each subject entry, convert it to string, then perform replacement
1463		   and add the result to the return_value array. */
1464		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1465			old_replace_count = replace_count;
1466			if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1467				if (!is_filter || replace_count > old_replace_count) {
1468					/* Add to return array */
1469					zval zv;
1470
1471					ZVAL_STR(&zv, result);
1472					if (string_key) {
1473						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1474					} else {
1475						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1476					}
1477				} else {
1478					zend_string_release(result);
1479				}
1480			}
1481		} ZEND_HASH_FOREACH_END();
1482	} else {
1483		/* if subject is not an array */
1484		old_replace_count = replace_count;
1485		if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1486			if (!is_filter || replace_count > old_replace_count) {
1487				RETVAL_STR(result);
1488			} else {
1489				zend_string_release(result);
1490			}
1491		}
1492	}
1493
1494	return replace_count;
1495}
1496/* }}} */
1497
1498/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1499   Perform Perl-style regular expression replacement. */
1500static PHP_FUNCTION(preg_replace)
1501{
1502	zval *regex, *replace, *subject, *zcount = NULL;
1503	zend_long limit = -1;
1504	int replace_count;
1505
1506#ifndef FAST_ZPP
1507	/* Get function parameters and do error-checking. */
1508	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1509		return;
1510	}
1511#else
1512	ZEND_PARSE_PARAMETERS_START(3, 5)
1513		Z_PARAM_ZVAL(regex)
1514		Z_PARAM_ZVAL(replace)
1515		Z_PARAM_ZVAL(subject)
1516		Z_PARAM_OPTIONAL
1517		Z_PARAM_LONG(limit)
1518		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1519	ZEND_PARSE_PARAMETERS_END();
1520#endif
1521
1522	if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1523		php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1524		RETURN_FALSE;
1525	}
1526
1527	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1528	if (zcount) {
1529		zval_dtor(zcount);
1530		ZVAL_LONG(zcount, replace_count);
1531	}
1532}
1533/* }}} */
1534
1535/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1536   Perform Perl-style regular expression replacement using replacement callback. */
1537static PHP_FUNCTION(preg_replace_callback)
1538{
1539	zval *regex, *replace, *subject, *zcount = NULL;
1540	zend_long limit = -1;
1541	zend_string	*callback_name;
1542	int replace_count;
1543
1544#ifndef FAST_ZPP
1545	/* Get function parameters and do error-checking. */
1546	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1547		return;
1548	}
1549#else
1550	ZEND_PARSE_PARAMETERS_START(3, 5)
1551		Z_PARAM_ZVAL(regex)
1552		Z_PARAM_ZVAL(replace)
1553		Z_PARAM_ZVAL(subject)
1554		Z_PARAM_OPTIONAL
1555		Z_PARAM_LONG(limit)
1556		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1557	ZEND_PARSE_PARAMETERS_END();
1558#endif
1559
1560	if (!zend_is_callable(replace, 0, &callback_name)) {
1561		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
1562		zend_string_release(callback_name);
1563		ZVAL_COPY(return_value, subject);
1564		return;
1565	}
1566	zend_string_release(callback_name);
1567
1568	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1569	if (zcount) {
1570		zval_dtor(zcount);
1571		ZVAL_LONG(zcount, replace_count);
1572	}
1573}
1574/* }}} */
1575
1576/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1577   Perform Perl-style regular expression replacement using replacement callback. */
1578static PHP_FUNCTION(preg_replace_callback_array)
1579{
1580	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1581	zend_long limit = -1;
1582	zend_string *str_idx;
1583	zend_string *callback_name;
1584	int replace_count = 0;
1585
1586#ifndef FAST_ZPP
1587	/* Get function parameters and do error-checking. */
1588	if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1589		return;
1590	}
1591#else
1592	ZEND_PARSE_PARAMETERS_START(2, 4)
1593		Z_PARAM_ARRAY(pattern)
1594		Z_PARAM_ZVAL(subject)
1595		Z_PARAM_OPTIONAL
1596		Z_PARAM_LONG(limit)
1597		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1598	ZEND_PARSE_PARAMETERS_END();
1599#endif
1600
1601	ZVAL_UNDEF(&zv);
1602	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1603		if (str_idx) {
1604			ZVAL_STR_COPY(&regex, str_idx);
1605		} else {
1606			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1607			zval_ptr_dtor(return_value);
1608			RETURN_NULL();
1609		}
1610
1611		if (!zend_is_callable(replace, 0, &callback_name)) {
1612			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
1613			zend_string_release(callback_name);
1614			zval_ptr_dtor(&regex);
1615			zval_ptr_dtor(return_value);
1616			ZVAL_COPY(return_value, subject);
1617			return;
1618		}
1619		zend_string_release(callback_name);
1620
1621		if (Z_ISNULL_P(return_value)) {
1622			replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1623		} else {
1624			replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1625			zval_ptr_dtor(return_value);
1626		}
1627
1628		zval_ptr_dtor(&regex);
1629
1630		if (Z_ISUNDEF(zv)) {
1631			RETURN_NULL();
1632		}
1633
1634		ZVAL_COPY_VALUE(return_value, &zv);
1635
1636		if (UNEXPECTED(EG(exception))) {
1637			zval_ptr_dtor(return_value);
1638			RETURN_NULL();
1639		}
1640	} ZEND_HASH_FOREACH_END();
1641
1642	if (zcount) {
1643		zval_dtor(zcount);
1644		ZVAL_LONG(zcount, replace_count);
1645	}
1646}
1647/* }}} */
1648
1649/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1650   Perform Perl-style regular expression replacement and only return matches. */
1651static PHP_FUNCTION(preg_filter)
1652{
1653	zval *regex, *replace, *subject, *zcount = NULL;
1654	zend_long limit = -1;
1655	int replace_count;
1656
1657#ifndef FAST_ZPP
1658	/* Get function parameters and do error-checking. */
1659	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1660		return;
1661	}
1662#else
1663	ZEND_PARSE_PARAMETERS_START(3, 5)
1664		Z_PARAM_ZVAL(regex)
1665		Z_PARAM_ZVAL(replace)
1666		Z_PARAM_ZVAL(subject)
1667		Z_PARAM_OPTIONAL
1668		Z_PARAM_LONG(limit)
1669		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1670	ZEND_PARSE_PARAMETERS_END();
1671#endif
1672
1673	if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1674		php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1675		RETURN_FALSE;
1676	}
1677
1678	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1679	if (zcount) {
1680		zval_dtor(zcount);
1681		ZVAL_LONG(zcount, replace_count);
1682	}
1683}
1684/* }}} */
1685
1686/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1687   Split string into an array using a perl-style regular expression as a delimiter */
1688static PHP_FUNCTION(preg_split)
1689{
1690	zend_string			*regex;			/* Regular expression */
1691	zend_string			*subject;		/* String to match against */
1692	zend_long			 limit_val = -1;/* Integer value of limit */
1693	zend_long			 flags = 0;		/* Match control flags */
1694	pcre_cache_entry	*pce;			/* Compiled regular expression */
1695
1696	/* Get function parameters and do error checking */
1697#ifndef FAST_ZPP
1698	if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1699							  &subject, &limit_val, &flags) == FAILURE) {
1700		RETURN_FALSE;
1701	}
1702#else
1703	ZEND_PARSE_PARAMETERS_START(2, 4)
1704		Z_PARAM_STR(regex)
1705		Z_PARAM_STR(subject)
1706		Z_PARAM_OPTIONAL
1707		Z_PARAM_LONG(limit_val)
1708		Z_PARAM_LONG(flags)
1709	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1710#endif
1711
1712	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
1713			php_error_docref(NULL, E_WARNING, "Subject is too long");
1714			RETURN_FALSE;
1715	}
1716
1717	/* Compile regex or get it from cache. */
1718	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1719		RETURN_FALSE;
1720	}
1721
1722	pce->refcount++;
1723	php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
1724	pce->refcount--;
1725}
1726/* }}} */
1727
1728/* {{{ php_pcre_split
1729 */
1730PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1731	zend_long limit_val, zend_long flags)
1732{
1733	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1734	pcre			*re_bump = NULL;	/* Regex instance for empty matches */
1735	pcre_extra		*extra_bump = NULL;	/* Almost dummy */
1736	pcre_extra		 extra_data;		/* Used locally for exec options */
1737	int				*offsets;			/* Array of subpattern offsets */
1738	int				 size_offsets;		/* Size of the offsets array */
1739	int				 exoptions = 0;		/* Execution options */
1740	int				 count = 0;			/* Count of matched subpatterns */
1741	int				 start_offset;		/* Where the new search starts */
1742	int				 next_offset;		/* End of the last delimiter match + 1 */
1743	int				 g_notempty = 0;	/* If the match should not be empty */
1744	char			*last_match;		/* Location of last match */
1745	int				 no_empty;			/* If NO_EMPTY flag is set */
1746	int				 delim_capture; 	/* If delimiters should be captured */
1747	int				 offset_capture;	/* If offsets should be captured */
1748	zval			 tmp;
1749	ALLOCA_FLAG(use_heap);
1750
1751	no_empty = flags & PREG_SPLIT_NO_EMPTY;
1752	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1753	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1754
1755	if (limit_val == 0) {
1756		limit_val = -1;
1757	}
1758
1759	if (extra == NULL) {
1760		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1761		extra = &extra_data;
1762	}
1763	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1764	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1765#ifdef PCRE_EXTRA_MARK
1766	extra->flags &= ~PCRE_EXTRA_MARK;
1767#endif
1768
1769	/* Initialize return value */
1770	array_init(return_value);
1771
1772	/* Calculate the size of the offsets array, and allocate memory for it. */
1773	size_offsets = (pce->capture_count + 1) * 3;
1774	if (size_offsets <= 32) {
1775		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1776	} else {
1777		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1778	}
1779
1780	/* Start at the beginning of the string */
1781	start_offset = 0;
1782	next_offset = 0;
1783	last_match = subject;
1784	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1785
1786	/* Get next piece if no limit or limit not yet reached and something matched*/
1787	while ((limit_val == -1 || limit_val > 1)) {
1788		count = pcre_exec(pce->re, extra, subject,
1789						  subject_len, start_offset,
1790						  exoptions|g_notempty, offsets, size_offsets);
1791
1792		/* the string was already proved to be valid UTF-8 */
1793		exoptions |= PCRE_NO_UTF8_CHECK;
1794
1795		/* Check for too many substrings condition. */
1796		if (count == 0) {
1797			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1798			count = size_offsets/3;
1799		}
1800
1801		/* If something matched */
1802		if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
1803			if (!no_empty || &subject[offsets[0]] != last_match) {
1804
1805				if (offset_capture) {
1806					/* Add (match, offset) pair to the return value */
1807					add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1808				} else {
1809					/* Add the piece to the return value */
1810					ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1811					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1812				}
1813
1814				/* One less left to do */
1815				if (limit_val != -1)
1816					limit_val--;
1817			}
1818
1819			last_match = &subject[offsets[1]];
1820			next_offset = offsets[1];
1821
1822			if (delim_capture) {
1823				int i, match_len;
1824				for (i = 1; i < count; i++) {
1825					match_len = offsets[(i<<1)+1] - offsets[i<<1];
1826					/* If we have matched a delimiter */
1827					if (!no_empty || match_len > 0) {
1828						if (offset_capture) {
1829							add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1830						} else {
1831							ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1832							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1833						}
1834					}
1835				}
1836			}
1837		} else if (count == PCRE_ERROR_NOMATCH) {
1838			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1839			   this is not necessarily the end. We need to advance
1840			   the start offset, and continue. Fudge the offset values
1841			   to achieve this, unless we're already at the end of the string. */
1842			if (g_notempty != 0 && start_offset < subject_len) {
1843				if (pce->compile_options & PCRE_UTF8) {
1844					if (re_bump == NULL) {
1845						int dummy;
1846						zend_string *regex = zend_string_init("/./us", sizeof("/./us")-1, 0);
1847						re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy);
1848						zend_string_release(regex);
1849						if (re_bump == NULL) {
1850							RETURN_FALSE;
1851						}
1852					}
1853					count = pcre_exec(re_bump, extra_bump, subject,
1854							  subject_len, start_offset,
1855							  exoptions, offsets, size_offsets);
1856					if (count < 1) {
1857						php_error_docref(NULL, E_WARNING, "Unknown error");
1858						RETURN_FALSE;
1859					}
1860				} else {
1861					offsets[0] = start_offset;
1862					offsets[1] = start_offset + 1;
1863				}
1864			} else
1865				break;
1866		} else {
1867			pcre_handle_exec_error(count);
1868			break;
1869		}
1870
1871		/* If we have matched an empty string, mimic what Perl's /g options does.
1872		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1873		   the match again at the same point. If this fails (picked up above) we
1874		   advance to the next character. */
1875		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1876
1877		/* Advance to the position right after the last full match */
1878		start_offset = offsets[1];
1879	}
1880
1881
1882	start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1883
1884	if (!no_empty || start_offset < subject_len)
1885	{
1886		if (offset_capture) {
1887			/* Add the last (match, offset) pair to the return value */
1888			add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1889		} else {
1890			/* Add the last piece to the return value */
1891			ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1892			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1893		}
1894	}
1895
1896
1897	/* Clean up */
1898	if (size_offsets <= 32) {
1899		free_alloca(offsets, use_heap);
1900	} else {
1901		efree(offsets);
1902	}
1903}
1904/* }}} */
1905
1906/* {{{ proto string preg_quote(string str [, string delim_char])
1907   Quote regular expression characters plus an optional character */
1908static PHP_FUNCTION(preg_quote)
1909{
1910	size_t		 in_str_len;
1911	char	*in_str;		/* Input string argument */
1912	char	*in_str_end;    /* End of the input string */
1913	size_t		 delim_len = 0;
1914	char	*delim = NULL;	/* Additional delimiter argument */
1915	zend_string	*out_str;	/* Output string with quoted characters */
1916	char 	*p,				/* Iterator for input string */
1917			*q,				/* Iterator for output string */
1918			 delim_char=0,	/* Delimiter character to be quoted */
1919			 c;				/* Current character */
1920	zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1921
1922	/* Get the arguments and check for errors */
1923#ifndef FAST_ZPP
1924	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1925							  &delim, &delim_len) == FAILURE) {
1926		return;
1927	}
1928#else
1929	ZEND_PARSE_PARAMETERS_START(1, 2)
1930		Z_PARAM_STRING(in_str, in_str_len)
1931		Z_PARAM_OPTIONAL
1932		Z_PARAM_STRING(delim, delim_len)
1933	ZEND_PARSE_PARAMETERS_END();
1934#endif
1935
1936	in_str_end = in_str + in_str_len;
1937
1938	/* Nothing to do if we got an empty string */
1939	if (in_str == in_str_end) {
1940		RETURN_EMPTY_STRING();
1941	}
1942
1943	if (delim && *delim) {
1944		delim_char = delim[0];
1945		quote_delim = 1;
1946	}
1947
1948	/* Allocate enough memory so that even if each character
1949	   is quoted, we won't run out of room */
1950	out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1951
1952	/* Go through the string and quote necessary characters */
1953	for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
1954		c = *p;
1955		switch(c) {
1956			case '.':
1957			case '\\':
1958			case '+':
1959			case '*':
1960			case '?':
1961			case '[':
1962			case '^':
1963			case ']':
1964			case '$':
1965			case '(':
1966			case ')':
1967			case '{':
1968			case '}':
1969			case '=':
1970			case '!':
1971			case '>':
1972			case '<':
1973			case '|':
1974			case ':':
1975			case '-':
1976				*q++ = '\\';
1977				*q++ = c;
1978				break;
1979
1980			case '\0':
1981				*q++ = '\\';
1982				*q++ = '0';
1983				*q++ = '0';
1984				*q++ = '0';
1985				break;
1986
1987			default:
1988				if (quote_delim && c == delim_char)
1989					*q++ = '\\';
1990				*q++ = c;
1991				break;
1992		}
1993	}
1994	*q = '\0';
1995
1996	/* Reallocate string and return it */
1997	out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
1998	RETURN_NEW_STR(out_str);
1999}
2000/* }}} */
2001
2002/* {{{ proto array preg_grep(string regex, array input [, int flags])
2003   Searches array and returns entries which match regex */
2004static PHP_FUNCTION(preg_grep)
2005{
2006	zend_string			*regex;			/* Regular expression */
2007	zval				*input;			/* Input array */
2008	zend_long			 flags = 0;		/* Match control flags */
2009	pcre_cache_entry	*pce;			/* Compiled regular expression */
2010
2011	/* Get arguments and do error checking */
2012#ifndef FAST_ZPP
2013	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
2014							  &input, &flags) == FAILURE) {
2015		return;
2016	}
2017#else
2018	ZEND_PARSE_PARAMETERS_START(2, 3)
2019		Z_PARAM_STR(regex)
2020		Z_PARAM_ARRAY(input)
2021		Z_PARAM_OPTIONAL
2022		Z_PARAM_LONG(flags)
2023	ZEND_PARSE_PARAMETERS_END();
2024#endif
2025
2026	/* Compile regex or get it from cache. */
2027	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2028		RETURN_FALSE;
2029	}
2030
2031	pce->refcount++;
2032	php_pcre_grep_impl(pce, input, return_value, flags);
2033	pce->refcount--;
2034}
2035/* }}} */
2036
2037PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2038{
2039	zval		    *entry;				/* An entry in the input array */
2040	pcre_extra		*extra = pce->extra;/* Holds results of studying */
2041	pcre_extra		 extra_data;		/* Used locally for exec options */
2042	int				*offsets;			/* Array of subpattern offsets */
2043	int				 size_offsets;		/* Size of the offsets array */
2044	int				 count = 0;			/* Count of matched subpatterns */
2045	zend_string		*string_key;
2046	zend_ulong		 num_key;
2047	zend_bool		 invert;			/* Whether to return non-matching
2048										   entries */
2049	ALLOCA_FLAG(use_heap);
2050
2051	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2052
2053	if (extra == NULL) {
2054		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2055		extra = &extra_data;
2056	}
2057	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2058	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2059#ifdef PCRE_EXTRA_MARK
2060	extra->flags &= ~PCRE_EXTRA_MARK;
2061#endif
2062
2063	/* Calculate the size of the offsets array, and allocate memory for it. */
2064	size_offsets = (pce->capture_count + 1) * 3;
2065	if (size_offsets <= 32) {
2066		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2067	} else {
2068		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2069	}
2070
2071	/* Initialize return array */
2072	array_init(return_value);
2073
2074	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2075
2076	/* Go through the input array */
2077	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2078		zend_string *subject_str = zval_get_string(entry);
2079
2080		/* Perform the match */
2081		count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2082						  (int)ZSTR_LEN(subject_str), 0,
2083						  0, offsets, size_offsets);
2084
2085		/* Check for too many substrings condition. */
2086		if (count == 0) {
2087			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2088			count = size_offsets/3;
2089		} else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2090			pcre_handle_exec_error(count);
2091			zend_string_release(subject_str);
2092			break;
2093		}
2094
2095		/* If the entry fits our requirements */
2096		if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2097			if (Z_REFCOUNTED_P(entry)) {
2098			   	Z_ADDREF_P(entry);
2099			}
2100
2101			/* Add to return array */
2102			if (string_key) {
2103				zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2104			} else {
2105				zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2106			}
2107		}
2108
2109		zend_string_release(subject_str);
2110	} ZEND_HASH_FOREACH_END();
2111
2112	/* Clean up */
2113	if (size_offsets <= 32) {
2114		free_alloca(offsets, use_heap);
2115	} else {
2116		efree(offsets);
2117	}
2118}
2119/* }}} */
2120
2121/* {{{ proto int preg_last_error()
2122   Returns the error code of the last regexp execution. */
2123static PHP_FUNCTION(preg_last_error)
2124{
2125#ifndef FAST_ZPP
2126	if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2127		return;
2128	}
2129#else
2130	ZEND_PARSE_PARAMETERS_START(0, 0)
2131	ZEND_PARSE_PARAMETERS_END();
2132#endif
2133
2134	RETURN_LONG(PCRE_G(error_code));
2135}
2136/* }}} */
2137
2138/* {{{ module definition structures */
2139
2140/* {{{ arginfo */
2141ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2142    ZEND_ARG_INFO(0, pattern)
2143    ZEND_ARG_INFO(0, subject)
2144    ZEND_ARG_INFO(1, subpatterns) /* array */
2145    ZEND_ARG_INFO(0, flags)
2146    ZEND_ARG_INFO(0, offset)
2147ZEND_END_ARG_INFO()
2148
2149ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2150    ZEND_ARG_INFO(0, pattern)
2151    ZEND_ARG_INFO(0, subject)
2152    ZEND_ARG_INFO(1, subpatterns) /* array */
2153    ZEND_ARG_INFO(0, flags)
2154    ZEND_ARG_INFO(0, offset)
2155ZEND_END_ARG_INFO()
2156
2157ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2158    ZEND_ARG_INFO(0, regex)
2159    ZEND_ARG_INFO(0, replace)
2160    ZEND_ARG_INFO(0, subject)
2161    ZEND_ARG_INFO(0, limit)
2162    ZEND_ARG_INFO(1, count)
2163ZEND_END_ARG_INFO()
2164
2165ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2166    ZEND_ARG_INFO(0, regex)
2167    ZEND_ARG_INFO(0, callback)
2168    ZEND_ARG_INFO(0, subject)
2169    ZEND_ARG_INFO(0, limit)
2170    ZEND_ARG_INFO(1, count)
2171ZEND_END_ARG_INFO()
2172
2173ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2174    ZEND_ARG_INFO(0, pattern)
2175    ZEND_ARG_INFO(0, subject)
2176    ZEND_ARG_INFO(0, limit)
2177    ZEND_ARG_INFO(1, count)
2178ZEND_END_ARG_INFO()
2179
2180ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2181    ZEND_ARG_INFO(0, pattern)
2182    ZEND_ARG_INFO(0, subject)
2183    ZEND_ARG_INFO(0, limit)
2184    ZEND_ARG_INFO(0, flags)
2185ZEND_END_ARG_INFO()
2186
2187ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2188    ZEND_ARG_INFO(0, str)
2189    ZEND_ARG_INFO(0, delim_char)
2190ZEND_END_ARG_INFO()
2191
2192ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2193    ZEND_ARG_INFO(0, regex)
2194    ZEND_ARG_INFO(0, input) /* array */
2195    ZEND_ARG_INFO(0, flags)
2196ZEND_END_ARG_INFO()
2197
2198ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2199ZEND_END_ARG_INFO()
2200/* }}} */
2201
2202static const zend_function_entry pcre_functions[] = {
2203	PHP_FE(preg_match,					arginfo_preg_match)
2204	PHP_FE(preg_match_all,				arginfo_preg_match_all)
2205	PHP_FE(preg_replace,				arginfo_preg_replace)
2206	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
2207	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
2208	PHP_FE(preg_filter,					arginfo_preg_replace)
2209	PHP_FE(preg_split,					arginfo_preg_split)
2210	PHP_FE(preg_quote,					arginfo_preg_quote)
2211	PHP_FE(preg_grep,					arginfo_preg_grep)
2212	PHP_FE(preg_last_error,				arginfo_preg_last_error)
2213	PHP_FE_END
2214};
2215
2216zend_module_entry pcre_module_entry = {
2217	STANDARD_MODULE_HEADER,
2218   "pcre",
2219	pcre_functions,
2220	PHP_MINIT(pcre),
2221	PHP_MSHUTDOWN(pcre),
2222	NULL,
2223	NULL,
2224	PHP_MINFO(pcre),
2225	PHP_PCRE_VERSION,
2226	PHP_MODULE_GLOBALS(pcre),
2227	PHP_GINIT(pcre),
2228	PHP_GSHUTDOWN(pcre),
2229	NULL,
2230	STANDARD_MODULE_PROPERTIES_EX
2231};
2232
2233#ifdef COMPILE_DL_PCRE
2234ZEND_GET_MODULE(pcre)
2235#endif
2236
2237/* }}} */
2238
2239#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2240
2241/*
2242 * Local variables:
2243 * tab-width: 4
2244 * c-basic-offset: 4
2245 * End:
2246 * vim600: sw=4 ts=4 fdm=marker
2247 * vim<600: sw=4 ts=4
2248 */
2249