1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2016 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Andrei Zmievski <andrei@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include "php.h"
22#include "php_ini.h"
23#include "php_globals.h"
24#include "php_pcre.h"
25#include "ext/standard/info.h"
26#include "ext/standard/basic_functions.h"
27#include "zend_smart_str.h"
28
29#if HAVE_PCRE || HAVE_BUNDLED_PCRE
30
31#include "ext/standard/php_string.h"
32
33#define PREG_PATTERN_ORDER			1
34#define PREG_SET_ORDER				2
35#define PREG_OFFSET_CAPTURE			(1<<8)
36
37#define	PREG_SPLIT_NO_EMPTY			(1<<0)
38#define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
39#define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
40
41#define PREG_REPLACE_EVAL			(1<<0)
42
43#define PREG_GREP_INVERT			(1<<0)
44
45#define PCRE_CACHE_SIZE 4096
46
47/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
48#ifndef PCRE_NOTEMPTY_ATSTART
49# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
50#endif
51
52enum {
53	PHP_PCRE_NO_ERROR = 0,
54	PHP_PCRE_INTERNAL_ERROR,
55	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
56	PHP_PCRE_RECURSION_LIMIT_ERROR,
57	PHP_PCRE_BAD_UTF8_ERROR,
58	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
59	PHP_PCRE_JIT_STACKLIMIT_ERROR
60};
61
62
63PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
64
65#ifdef PCRE_STUDY_JIT_COMPILE
66#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
67#define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
68ZEND_TLS pcre_jit_stack *jit_stack = NULL;
69#endif
70
71static void pcre_handle_exec_error(int pcre_code) /* {{{ */
72{
73	int preg_code = 0;
74
75	switch (pcre_code) {
76		case PCRE_ERROR_MATCHLIMIT:
77			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
78			break;
79
80		case PCRE_ERROR_RECURSIONLIMIT:
81			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
82			break;
83
84		case PCRE_ERROR_BADUTF8:
85			preg_code = PHP_PCRE_BAD_UTF8_ERROR;
86			break;
87
88		case PCRE_ERROR_BADUTF8_OFFSET:
89			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
90			break;
91
92#ifdef PCRE_STUDY_JIT_COMPILE
93		case PCRE_ERROR_JIT_STACKLIMIT:
94			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
95			break;
96#endif
97
98		default:
99			preg_code = PHP_PCRE_INTERNAL_ERROR;
100			break;
101	}
102
103	PCRE_G(error_code) = preg_code;
104}
105/* }}} */
106
107static void php_free_pcre_cache(zval *data) /* {{{ */
108{
109	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
110	if (!pce) return;
111	pcre_free(pce->re);
112	if (pce->extra) {
113		pcre_free_study(pce->extra);
114	}
115#if HAVE_SETLOCALE
116	if ((void*)pce->tables) pefree((void*)pce->tables, 1);
117	if (pce->locale) {
118		zend_string_release(pce->locale);
119	}
120#endif
121	pefree(pce, 1);
122}
123/* }}} */
124
125static PHP_GINIT_FUNCTION(pcre) /* {{{ */
126{
127	zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
128	pcre_globals->backtrack_limit = 0;
129	pcre_globals->recursion_limit = 0;
130	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
131}
132/* }}} */
133
134static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
135{
136	zend_hash_destroy(&pcre_globals->pcre_cache);
137
138#ifdef PCRE_STUDY_JIT_COMPILE
139	/* Stack may only be destroyed when no cached patterns
140	 	possibly associated with it do exist. */
141	if (jit_stack) {
142		pcre_jit_stack_free(jit_stack);
143		jit_stack = NULL;
144	}
145#endif
146
147}
148/* }}} */
149
150PHP_INI_BEGIN()
151	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
152	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
153#ifdef PCRE_STUDY_JIT_COMPILE
154	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateBool, jit,             zend_pcre_globals, pcre_globals)
155#endif
156PHP_INI_END()
157
158
159/* {{{ PHP_MINFO_FUNCTION(pcre) */
160static PHP_MINFO_FUNCTION(pcre)
161{
162	int jit_yes = 0;
163
164	php_info_print_table_start();
165	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
166	php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
167
168	if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
169		php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
170	} else {
171		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
172	}
173
174	php_info_print_table_end();
175
176	DISPLAY_INI_ENTRIES();
177}
178/* }}} */
179
180/* {{{ PHP_MINIT_FUNCTION(pcre) */
181static PHP_MINIT_FUNCTION(pcre)
182{
183	REGISTER_INI_ENTRIES();
184
185	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
186	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
187	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
188	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
189	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
190	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
191	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
192
193	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
194	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
195	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
196	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
197	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
198	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
199	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
200	REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
201
202	return SUCCESS;
203}
204/* }}} */
205
206/* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
207static PHP_MSHUTDOWN_FUNCTION(pcre)
208{
209	UNREGISTER_INI_ENTRIES();
210
211	return SUCCESS;
212}
213/* }}} */
214
215#ifdef PCRE_STUDY_JIT_COMPILE
216/* {{{ PHP_RINIT_FUNCTION(pcre) */
217static PHP_RINIT_FUNCTION(pcre)
218{
219	if (PCRE_G(jit)) {
220		jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
221	}
222
223	return SUCCESS;
224}
225/* }}} */
226#endif
227
228/* {{{ static pcre_clean_cache */
229static int pcre_clean_cache(zval *data, void *arg)
230{
231	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
232	int *num_clean = (int *)arg;
233
234	if (*num_clean > 0 && !pce->refcount) {
235		(*num_clean)--;
236		return ZEND_HASH_APPLY_REMOVE;
237	} else {
238		return ZEND_HASH_APPLY_KEEP;
239	}
240}
241/* }}} */
242
243/* {{{ static make_subpats_table */
244static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
245{
246	pcre_extra *extra = pce->extra;
247	int name_cnt = pce->name_count, name_size, ni = 0;
248	int rc;
249	char *name_table;
250	unsigned short name_idx;
251	char **subpat_names;
252	int rc1, rc2;
253
254	rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
255	rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
256	rc = rc2 ? rc2 : rc1;
257	if (rc < 0) {
258		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
259		return NULL;
260	}
261
262	subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
263	while (ni++ < name_cnt) {
264		name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
265		subpat_names[name_idx] = name_table + 2;
266		if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
267			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
268			efree(subpat_names);
269			return NULL;
270		}
271		name_table += name_size;
272	}
273	return subpat_names;
274}
275/* }}} */
276
277/* {{{ static calculate_unit_length */
278/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
279static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
280{
281	int unit_len;
282
283	if (pce->compile_options & PCRE_UTF8) {
284		char *end = start;
285
286		/* skip continuation bytes */
287		while ((*++end & 0xC0) == 0x80);
288		unit_len = end - start;
289	} else {
290		unit_len = 1;
291	}
292	return unit_len;
293}
294/* }}} */
295
296/* {{{ pcre_get_compiled_regex_cache
297 */
298PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
299{
300	pcre				*re = NULL;
301	pcre_extra			*extra;
302	int					 coptions = 0;
303	int					 soptions = 0;
304	const char			*error;
305	int					 erroffset;
306	char				 delimiter;
307	char				 start_delimiter;
308	char				 end_delimiter;
309	char				*p, *pp;
310	char				*pattern;
311	int					 do_study = 0;
312	int					 poptions = 0;
313	unsigned const char *tables = NULL;
314	pcre_cache_entry	*pce;
315	pcre_cache_entry	 new_entry;
316	int					 rc;
317
318	/* Try to lookup the cached regex entry, and if successful, just pass
319	   back the compiled pattern, otherwise go on and compile it. */
320	pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex);
321	if (pce) {
322#if HAVE_SETLOCALE
323		if (pce->locale == BG(locale_string) ||
324		    (pce->locale && BG(locale_string) &&
325		     ZSTR_LEN(pce->locale) == ZSTR_LEN(BG(locale_string)) &&
326		     !memcmp(ZSTR_VAL(pce->locale), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(pce->locale))) ||
327		    (!pce->locale &&
328		     ZSTR_LEN(BG(locale_string)) == 1 &&
329		     ZSTR_VAL(BG(locale_string))[0] == 'C') ||
330		    (!BG(locale_string) &&
331		     ZSTR_LEN(pce->locale) == 1 &&
332		     ZSTR_VAL(pce->locale)[0] == 'C')) {
333			return pce;
334		}
335#else
336		return pce;
337#endif
338	}
339
340	p = ZSTR_VAL(regex);
341
342	/* Parse through the leading whitespace, and display a warning if we
343	   get to the end without encountering a delimiter. */
344	while (isspace((int)*(unsigned char *)p)) p++;
345	if (*p == 0) {
346		php_error_docref(NULL, E_WARNING,
347						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
348		return NULL;
349	}
350
351	/* Get the delimiter and display a warning if it is alphanumeric
352	   or a backslash. */
353	delimiter = *p++;
354	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
355		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
356		return NULL;
357	}
358
359	start_delimiter = delimiter;
360	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
361		delimiter = pp[5];
362	end_delimiter = delimiter;
363
364	pp = p;
365
366	if (start_delimiter == end_delimiter) {
367		/* We need to iterate through the pattern, searching for the ending delimiter,
368		   but skipping the backslashed delimiters.  If the ending delimiter is not
369		   found, display a warning. */
370		while (*pp != 0) {
371			if (*pp == '\\' && pp[1] != 0) pp++;
372			else if (*pp == delimiter)
373				break;
374			pp++;
375		}
376	} else {
377		/* We iterate through the pattern, searching for the matching ending
378		 * delimiter. For each matching starting delimiter, we increment nesting
379		 * level, and decrement it for each matching ending delimiter. If we
380		 * reach the end of the pattern without matching, display a warning.
381		 */
382		int brackets = 1; 	/* brackets nesting level */
383		while (*pp != 0) {
384			if (*pp == '\\' && pp[1] != 0) pp++;
385			else if (*pp == end_delimiter && --brackets <= 0)
386				break;
387			else if (*pp == start_delimiter)
388				brackets++;
389			pp++;
390		}
391	}
392
393	if (*pp == 0) {
394		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
395			php_error_docref(NULL,E_WARNING, "Null byte in regex");
396		} else if (start_delimiter == end_delimiter) {
397			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
398		} else {
399			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
400		}
401		return NULL;
402	}
403
404	/* Make a copy of the actual pattern. */
405	pattern = estrndup(p, pp-p);
406
407	/* Move on to the options */
408	pp++;
409
410	/* Parse through the options, setting appropriate flags.  Display
411	   a warning if we encounter an unknown modifier. */
412	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
413		switch (*pp++) {
414			/* Perl compatible options */
415			case 'i':	coptions |= PCRE_CASELESS;		break;
416			case 'm':	coptions |= PCRE_MULTILINE;		break;
417			case 's':	coptions |= PCRE_DOTALL;		break;
418			case 'x':	coptions |= PCRE_EXTENDED;		break;
419
420			/* PCRE specific options */
421			case 'A':	coptions |= PCRE_ANCHORED;		break;
422			case 'D':	coptions |= PCRE_DOLLAR_ENDONLY;break;
423			case 'S':	do_study  = 1;					break;
424			case 'U':	coptions |= PCRE_UNGREEDY;		break;
425			case 'X':	coptions |= PCRE_EXTRA;			break;
426			case 'u':	coptions |= PCRE_UTF8;
427	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
428       characters, even in UTF-8 mode. However, this can be changed by setting
429       the PCRE_UCP option. */
430#ifdef PCRE_UCP
431						coptions |= PCRE_UCP;
432#endif
433				break;
434
435			/* Custom preg options */
436			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
437
438			case ' ':
439			case '\n':
440				break;
441
442			default:
443				if (pp[-1]) {
444					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
445				} else {
446					php_error_docref(NULL,E_WARNING, "Null byte in regex");
447				}
448				efree(pattern);
449				return NULL;
450		}
451	}
452
453#if HAVE_SETLOCALE
454	if (BG(locale_string) &&
455	    (ZSTR_LEN(BG(locale_string)) != 1 || ZSTR_VAL(BG(locale_string))[0] != 'C')) {
456		tables = pcre_maketables();
457	}
458#endif
459
460	/* Compile pattern and display a warning if compilation failed. */
461	re = pcre_compile(pattern,
462					  coptions,
463					  &error,
464					  &erroffset,
465					  tables);
466
467	if (re == NULL) {
468		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
469		efree(pattern);
470		if (tables) {
471			pefree((void*)tables, 1);
472		}
473		return NULL;
474	}
475
476#ifdef PCRE_STUDY_JIT_COMPILE
477	if (PCRE_G(jit)) {
478		/* Enable PCRE JIT compiler */
479		do_study = 1;
480		soptions |= PCRE_STUDY_JIT_COMPILE;
481	}
482#endif
483
484	/* If study option was specified, study the pattern and
485	   store the result in extra for passing to pcre_exec. */
486	if (do_study) {
487		extra = pcre_study(re, soptions, &error);
488		if (extra) {
489			extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
490			extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
491			extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
492#ifdef PCRE_STUDY_JIT_COMPILE
493			if (PCRE_G(jit) && jit_stack) {
494				pcre_assign_jit_stack(extra, NULL, jit_stack);
495			}
496#endif
497		}
498		if (error != NULL) {
499			php_error_docref(NULL, E_WARNING, "Error while studying pattern");
500		}
501	} else {
502		extra = NULL;
503	}
504
505	efree(pattern);
506
507	/*
508	 * If we reached cache limit, clean out the items from the head of the list;
509	 * these are supposedly the oldest ones (but not necessarily the least used
510	 * ones).
511	 */
512	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
513		int num_clean = PCRE_CACHE_SIZE / 8;
514		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
515	}
516
517	/* Store the compiled pattern and extra info in the cache. */
518	new_entry.re = re;
519	new_entry.extra = extra;
520	new_entry.preg_options = poptions;
521	new_entry.compile_options = coptions;
522#if HAVE_SETLOCALE
523	new_entry.locale = BG(locale_string) ?
524		((GC_FLAGS(BG(locale_string)) & IS_STR_PERSISTENT) ?
525			zend_string_copy(BG(locale_string)) :
526			zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1)) :
527		NULL;
528	new_entry.tables = tables;
529#endif
530	new_entry.refcount = 0;
531
532	rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
533	if (rc < 0) {
534		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
535		return NULL;
536	}
537
538	rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
539	if (rc < 0) {
540		php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
541		return NULL;
542	}
543
544	/*
545	 * Interned strings are not duplicated when stored in HashTable,
546	 * but all the interned strings created during HTTP request are removed
547	 * at end of request. However PCRE_G(pcre_cache) must be consistent
548	 * on the next request as well. So we disable usage of interned strings
549	 * as hash keys especually for this table.
550	 * See bug #63180
551	 */
552	if (!ZSTR_IS_INTERNED(regex) || !(GC_FLAGS(regex) & IS_STR_PERMANENT)) {
553		zend_string *str = zend_string_init(ZSTR_VAL(regex), ZSTR_LEN(regex), 1);
554		GC_REFCOUNT(str) = 0; /* will be incremented by zend_hash_update_mem() */
555		ZSTR_H(str) = ZSTR_H(regex);
556		regex = str;
557	}
558
559	pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, &new_entry, sizeof(pcre_cache_entry));
560
561	return pce;
562}
563/* }}} */
564
565/* {{{ pcre_get_compiled_regex
566 */
567PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
568{
569	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
570
571	if (extra) {
572		*extra = pce ? pce->extra : NULL;
573	}
574	if (preg_options) {
575		*preg_options = pce ? pce->preg_options : 0;
576	}
577
578	return pce ? pce->re : NULL;
579}
580/* }}} */
581
582/* {{{ pcre_get_compiled_regex_ex
583 */
584PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
585{
586	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
587
588	if (extra) {
589		*extra = pce ? pce->extra : NULL;
590	}
591	if (preg_options) {
592		*preg_options = pce ? pce->preg_options : 0;
593	}
594	if (compile_options) {
595		*compile_options = pce ? pce->compile_options : 0;
596	}
597
598	return pce ? pce->re : NULL;
599}
600/* }}} */
601
602/* {{{ add_offset_pair */
603static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
604{
605	zval match_pair, tmp;
606
607	array_init_size(&match_pair, 2);
608
609	/* Add (match, offset) to the return value */
610	ZVAL_STRINGL(&tmp, str, len);
611	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
612	ZVAL_LONG(&tmp, offset);
613	zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
614
615	if (name) {
616		Z_ADDREF(match_pair);
617		zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
618	}
619	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
620}
621/* }}} */
622
623static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
624{
625	/* parameters */
626	zend_string		 *regex;			/* Regular expression */
627	zend_string		 *subject;			/* String to match against */
628	pcre_cache_entry *pce;				/* Compiled regular expression */
629	zval			 *subpats = NULL;	/* Array for subpatterns */
630	zend_long		  flags = 0;		/* Match control flags */
631	zend_long		  start_offset = 0;	/* Where the new search starts */
632
633#ifndef FAST_ZPP
634	if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|z/ll", &regex,
635							  &subject, &subpats, &flags, &start_offset) == FAILURE) {
636		RETURN_FALSE;
637	}
638#else
639	ZEND_PARSE_PARAMETERS_START(2, 5)
640		Z_PARAM_STR(regex)
641		Z_PARAM_STR(subject)
642		Z_PARAM_OPTIONAL
643		Z_PARAM_ZVAL_EX(subpats, 0, 1)
644		Z_PARAM_LONG(flags)
645		Z_PARAM_LONG(start_offset)
646	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
647#endif
648
649	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
650			php_error_docref(NULL, E_WARNING, "Subject is too long");
651			RETURN_FALSE;
652	}
653
654	/* Compile regex or get it from cache. */
655	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
656		RETURN_FALSE;
657	}
658
659	pce->refcount++;
660	php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
661		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
662	pce->refcount--;
663}
664/* }}} */
665
666/* {{{ php_pcre_match_impl() */
667PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
668	zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
669{
670	zval			 result_set,		/* Holds a set of subpatterns after
671										   a global match */
672				    *match_sets = NULL;	/* An array of sets of matches for each
673										   subpattern after a global match */
674	pcre_extra		*extra = pce->extra;/* Holds results of studying */
675	pcre_extra		 extra_data;		/* Used locally for exec options */
676	int				 exoptions = 0;		/* Execution options */
677	int				 count = 0;			/* Count of matched subpatterns */
678	int				*offsets;			/* Array of subpattern offsets */
679	int				 num_subpats;		/* Number of captured subpatterns */
680	int				 size_offsets;		/* Size of the offsets array */
681	int				 matched;			/* Has anything matched */
682	int				 g_notempty = 0;	/* If the match should not be empty */
683	const char	   **stringlist;		/* Holds list of subpatterns */
684	char 		   **subpat_names;		/* Array for named subpatterns */
685	int				 i;
686	int				 subpats_order;		/* Order of subpattern matches */
687	int				 offset_capture;    /* Capture match offsets: yes/no */
688	unsigned char   *mark = NULL;       /* Target for MARK name */
689	zval            marks;      		/* Array of marks for PREG_PATTERN_ORDER */
690	ALLOCA_FLAG(use_heap);
691
692	ZVAL_UNDEF(&marks);
693
694	/* Overwrite the passed-in value for subpatterns with an empty array. */
695	if (subpats != NULL) {
696		zval_dtor(subpats);
697		array_init(subpats);
698	}
699
700	subpats_order = global ? PREG_PATTERN_ORDER : 0;
701
702	if (use_flags) {
703		offset_capture = flags & PREG_OFFSET_CAPTURE;
704
705		/*
706		 * subpats_order is pre-set to pattern mode so we change it only if
707		 * necessary.
708		 */
709		if (flags & 0xff) {
710			subpats_order = flags & 0xff;
711		}
712		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
713			(!global && subpats_order != 0)) {
714			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
715			return;
716		}
717	} else {
718		offset_capture = 0;
719	}
720
721	/* Negative offset counts from the end of the string. */
722	if (start_offset < 0) {
723		start_offset = subject_len + start_offset;
724		if (start_offset < 0) {
725			start_offset = 0;
726		}
727	}
728
729	if (extra == NULL) {
730		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
731		extra = &extra_data;
732	}
733	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
734	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
735#ifdef PCRE_EXTRA_MARK
736	extra->mark = &mark;
737	extra->flags |= PCRE_EXTRA_MARK;
738#endif
739
740	/* Calculate the size of the offsets array, and allocate memory for it. */
741	num_subpats = pce->capture_count + 1;
742	size_offsets = num_subpats * 3;
743
744	/*
745	 * Build a mapping from subpattern numbers to their names. We will
746	 * allocate the table only if there are any named subpatterns.
747	 */
748	subpat_names = NULL;
749	if (pce->name_count > 0) {
750		subpat_names = make_subpats_table(num_subpats, pce);
751		if (!subpat_names) {
752			RETURN_FALSE;
753		}
754	}
755
756	if (size_offsets <= 32) {
757		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
758	} else {
759		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
760	}
761	memset(offsets, 0, size_offsets*sizeof(int));
762	/* Allocate match sets array and initialize the values. */
763	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
764		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
765		for (i=0; i<num_subpats; i++) {
766			array_init(&match_sets[i]);
767		}
768	}
769
770	matched = 0;
771	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
772
773	do {
774		/* Execute the regular expression. */
775		count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
776						  exoptions|g_notempty, offsets, size_offsets);
777
778		/* the string was already proved to be valid UTF-8 */
779		exoptions |= PCRE_NO_UTF8_CHECK;
780
781		/* Check for too many substrings condition. */
782		if (count == 0) {
783			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
784			count = size_offsets/3;
785		}
786
787		/* If something has matched */
788		if (count > 0) {
789			matched++;
790
791			/* If subpatterns array has been passed, fill it in with values. */
792			if (subpats != NULL) {
793				/* Try to get the list of substrings and display a warning if failed. */
794				if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
795					if (subpat_names) {
796						efree(subpat_names);
797					}
798					if (size_offsets <= 32) {
799						free_alloca(offsets, use_heap);
800					} else {
801						efree(offsets);
802					}
803					if (match_sets) efree(match_sets);
804					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
805					RETURN_FALSE;
806				}
807
808				if (global) {	/* global pattern matching */
809					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
810						/* For each subpattern, insert it into the appropriate array. */
811						if (offset_capture) {
812							for (i = 0; i < count; i++) {
813								add_offset_pair(&match_sets[i], (char *)stringlist[i],
814												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
815							}
816						} else {
817							for (i = 0; i < count; i++) {
818								add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
819													   offsets[(i<<1)+1] - offsets[i<<1]);
820							}
821						}
822						/* Add MARK, if available */
823						if (mark) {
824							if (Z_TYPE(marks) == IS_UNDEF) {
825								array_init(&marks);
826							}
827							add_index_string(&marks, matched - 1, (char *) mark);
828						}
829						/*
830						 * If the number of captured subpatterns on this run is
831						 * less than the total possible number, pad the result
832						 * arrays with empty strings.
833						 */
834						if (count < num_subpats) {
835							for (; i < num_subpats; i++) {
836								add_next_index_string(&match_sets[i], "");
837							}
838						}
839					} else {
840						/* Allocate the result set array */
841						array_init_size(&result_set, count + (mark ? 1 : 0));
842
843						/* Add all the subpatterns to it */
844						if (subpat_names) {
845							if (offset_capture) {
846								for (i = 0; i < count; i++) {
847									add_offset_pair(&result_set, (char *)stringlist[i],
848													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
849								}
850							} else {
851								for (i = 0; i < count; i++) {
852									if (subpat_names[i]) {
853										add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
854															   offsets[(i<<1)+1] - offsets[i<<1]);
855									}
856									add_next_index_stringl(&result_set, (char *)stringlist[i],
857														   offsets[(i<<1)+1] - offsets[i<<1]);
858								}
859							}
860						} else {
861							if (offset_capture) {
862								for (i = 0; i < count; i++) {
863									add_offset_pair(&result_set, (char *)stringlist[i],
864													offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
865								}
866							} else {
867								for (i = 0; i < count; i++) {
868									add_next_index_stringl(&result_set, (char *)stringlist[i],
869														   offsets[(i<<1)+1] - offsets[i<<1]);
870								}
871							}
872						}
873						/* Add MARK, if available */
874						if (mark) {
875							add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
876						}
877						/* And add it to the output array */
878						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
879					}
880				} else {			/* single pattern matching */
881					/* For each subpattern, insert it into the subpatterns array. */
882					if (subpat_names) {
883						if (offset_capture) {
884							for (i = 0; i < count; i++) {
885								add_offset_pair(subpats, (char *)stringlist[i],
886												offsets[(i<<1)+1] - offsets[i<<1],
887												offsets[i<<1], subpat_names[i]);
888							}
889						} else {
890							for (i = 0; i < count; i++) {
891								if (subpat_names[i]) {
892									add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
893													  offsets[(i<<1)+1] - offsets[i<<1]);
894								}
895								add_next_index_stringl(subpats, (char *)stringlist[i],
896													   offsets[(i<<1)+1] - offsets[i<<1]);
897							}
898						}
899					} else {
900						if (offset_capture) {
901							for (i = 0; i < count; i++) {
902								add_offset_pair(subpats, (char *)stringlist[i],
903												offsets[(i<<1)+1] - offsets[i<<1],
904												offsets[i<<1], NULL);
905							}
906						} else {
907							for (i = 0; i < count; i++) {
908								add_next_index_stringl(subpats, (char *)stringlist[i],
909													   offsets[(i<<1)+1] - offsets[i<<1]);
910							}
911						}
912					}
913					/* Add MARK, if available */
914					if (mark) {
915						add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
916					}
917				}
918
919				pcre_free((void *) stringlist);
920			}
921		} else if (count == PCRE_ERROR_NOMATCH) {
922			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
923			   this is not necessarily the end. We need to advance
924			   the start offset, and continue. Fudge the offset values
925			   to achieve this, unless we're already at the end of the string. */
926			if (g_notempty != 0 && start_offset < subject_len) {
927				int unit_len = calculate_unit_length(pce, subject + start_offset);
928
929				offsets[0] = (int)start_offset;
930				offsets[1] = (int)(start_offset + unit_len);
931			} else
932				break;
933		} else {
934			pcre_handle_exec_error(count);
935			break;
936		}
937
938		/* If we have matched an empty string, mimic what Perl's /g options does.
939		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
940		   the match again at the same point. If this fails (picked up above) we
941		   advance to the next character. */
942		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
943
944		/* Advance to the position right after the last full match */
945		start_offset = offsets[1];
946	} while (global);
947
948	/* Add the match sets to the output array and clean up */
949	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
950		if (subpat_names) {
951			for (i = 0; i < num_subpats; i++) {
952				if (subpat_names[i]) {
953					zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
954									 strlen(subpat_names[i]), &match_sets[i]);
955					Z_ADDREF(match_sets[i]);
956				}
957				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
958			}
959		} else {
960			for (i = 0; i < num_subpats; i++) {
961				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
962			}
963		}
964		efree(match_sets);
965
966		if (Z_TYPE(marks) != IS_UNDEF) {
967			add_assoc_zval(subpats, "MARK", &marks);
968		}
969	}
970
971	if (size_offsets <= 32) {
972		free_alloca(offsets, use_heap);
973	} else {
974		efree(offsets);
975	}
976	if (subpat_names) {
977		efree(subpat_names);
978	}
979
980	/* Did we encounter an error? */
981	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
982		RETVAL_LONG(matched);
983	} else {
984		RETVAL_FALSE;
985	}
986}
987/* }}} */
988
989/* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
990   Perform a Perl-style regular expression match */
991static PHP_FUNCTION(preg_match)
992{
993	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
994}
995/* }}} */
996
997/* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
998   Perform a Perl-style global regular expression match */
999static PHP_FUNCTION(preg_match_all)
1000{
1001	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1002}
1003/* }}} */
1004
1005/* {{{ preg_get_backref
1006 */
1007static int preg_get_backref(char **str, int *backref)
1008{
1009	register char in_brace = 0;
1010	register char *walk = *str;
1011
1012	if (walk[1] == 0)
1013		return 0;
1014
1015	if (*walk == '$' && walk[1] == '{') {
1016		in_brace = 1;
1017		walk++;
1018	}
1019	walk++;
1020
1021	if (*walk >= '0' && *walk <= '9') {
1022		*backref = *walk - '0';
1023		walk++;
1024	} else
1025		return 0;
1026
1027	if (*walk && *walk >= '0' && *walk <= '9') {
1028		*backref = *backref * 10 + *walk - '0';
1029		walk++;
1030	}
1031
1032	if (in_brace) {
1033		if (*walk == 0 || *walk != '}')
1034			return 0;
1035		else
1036			walk++;
1037	}
1038
1039	*str = walk;
1040	return 1;
1041}
1042/* }}} */
1043
1044/* {{{ preg_do_repl_func
1045 */
1046static zend_string *preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
1047{
1048	zend_string *result_str;
1049	zval		 retval;			/* Function return value */
1050	zval	     args[1];			/* Argument to pass to function */
1051	int			 i;
1052
1053	array_init_size(&args[0], count + (mark ? 1 : 0));
1054	if (subpat_names) {
1055		for (i = 0; i < count; i++) {
1056			if (subpat_names[i]) {
1057				add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
1058			}
1059			add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1060		}
1061	} else {
1062		for (i = 0; i < count; i++) {
1063			add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
1064		}
1065	}
1066	if (mark) {
1067		add_assoc_string(&args[0], "MARK", (char *) mark);
1068	}
1069
1070	if (call_user_function_ex(EG(function_table), NULL, function, &retval, 1, args, 0, NULL) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1071		result_str = zval_get_string(&retval);
1072		zval_ptr_dtor(&retval);
1073	} else {
1074		if (!EG(exception)) {
1075			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1076		}
1077
1078		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1079	}
1080
1081	zval_ptr_dtor(&args[0]);
1082
1083	return result_str;
1084}
1085/* }}} */
1086
1087/* {{{ php_pcre_replace
1088 */
1089PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1090							  zend_string *subject_str,
1091							  char *subject, int subject_len,
1092							  zval *replace_val, int is_callable_replace,
1093							  int limit, int *replace_count)
1094{
1095	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1096	zend_string	 		*result;			/* Function result */
1097
1098	/* Compile regex or get it from cache. */
1099	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1100		return NULL;
1101	}
1102	pce->refcount++;
1103	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_val,
1104		is_callable_replace, limit, replace_count);
1105	pce->refcount--;
1106
1107	return result;
1108}
1109/* }}} */
1110
1111/* {{{ php_pcre_replace_impl() */
1112PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zval *replace_val, int is_callable_replace, int limit, int *replace_count)
1113{
1114	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1115	pcre_extra		 extra_data;		/* Used locally for exec options */
1116	int				 exoptions = 0;		/* Execution options */
1117	int				 count = 0;			/* Count of matched subpatterns */
1118	int				*offsets;			/* Array of subpattern offsets */
1119	char 			**subpat_names;		/* Array for named subpatterns */
1120	int				 num_subpats;		/* Number of captured subpatterns */
1121	int				 size_offsets;		/* Size of the offsets array */
1122	int				 new_len;			/* Length of needed storage */
1123	int				 alloc_len;			/* Actual allocated length */
1124	int				 match_len;			/* Length of the current match */
1125	int				 backref;			/* Backreference number */
1126	int				 start_offset;		/* Where the new search starts */
1127	int				 g_notempty=0;		/* If the match should not be empty */
1128	int				 replace_len=0;		/* Length of replacement string */
1129	char			*replace=NULL,		/* Replacement string */
1130					*walkbuf,			/* Location of current replacement in the result */
1131					*walk,				/* Used to walk the replacement string */
1132					*match,				/* The current match */
1133					*piece,				/* The current piece of subject */
1134					*replace_end=NULL,	/* End of replacement string */
1135					 walk_last;			/* Last walked character */
1136	int				 result_len; 		/* Length of result */
1137	unsigned char   *mark = NULL;       /* Target for MARK name */
1138	zend_string		*result;			/* Result of replacement */
1139	zend_string     *eval_result=NULL;  /* Result of custom function */
1140
1141	ALLOCA_FLAG(use_heap);
1142
1143	if (extra == NULL) {
1144		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1145		extra = &extra_data;
1146	}
1147
1148	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1149	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1150
1151	if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
1152		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
1153		return NULL;
1154	}
1155
1156	if (!is_callable_replace) {
1157		replace = Z_STRVAL_P(replace_val);
1158		replace_len = (int)Z_STRLEN_P(replace_val);
1159		replace_end = replace + replace_len;
1160	}
1161
1162	/* Calculate the size of the offsets array, and allocate memory for it. */
1163	num_subpats = pce->capture_count + 1;
1164	size_offsets = num_subpats * 3;
1165	if (size_offsets <= 32) {
1166		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1167	} else {
1168		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1169	}
1170
1171	/*
1172	 * Build a mapping from subpattern numbers to their names. We will
1173	 * allocate the table only if there are any named subpatterns.
1174	 */
1175	subpat_names = NULL;
1176	if (UNEXPECTED(pce->name_count > 0)) {
1177		subpat_names = make_subpats_table(num_subpats, pce);
1178		if (!subpat_names) {
1179			return NULL;
1180		}
1181	}
1182
1183	alloc_len = 0;
1184	result = NULL;
1185
1186	/* Initialize */
1187	match = NULL;
1188	start_offset = 0;
1189	result_len = 0;
1190	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1191
1192	while (1) {
1193#ifdef PCRE_EXTRA_MARK
1194		extra->mark = &mark;
1195		extra->flags |= PCRE_EXTRA_MARK;
1196#endif
1197		/* Execute the regular expression. */
1198		count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1199						  exoptions|g_notempty, offsets, size_offsets);
1200
1201		/* the string was already proved to be valid UTF-8 */
1202		exoptions |= PCRE_NO_UTF8_CHECK;
1203
1204		/* Check for too many substrings condition. */
1205		if (UNEXPECTED(count == 0)) {
1206			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1207			count = size_offsets / 3;
1208		}
1209
1210		piece = subject + start_offset;
1211
1212		/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
1213		if (EXPECTED(count > 0 && (offsets[1] - offsets[0] >= 0) && limit)) {
1214			if (UNEXPECTED(replace_count)) {
1215				++*replace_count;
1216			}
1217
1218			/* Set the match location in subject */
1219			match = subject + offsets[0];
1220
1221			new_len = result_len + offsets[0] - start_offset; /* part before the match */
1222
1223			/* if (!is_callable_replace) */
1224			if (EXPECTED(replace)) {
1225				/* do regular substitution */
1226				walk = replace;
1227				walk_last = 0;
1228
1229				while (walk < replace_end) {
1230					if ('\\' == *walk || '$' == *walk) {
1231						if (walk_last == '\\') {
1232							walk++;
1233							walk_last = 0;
1234							continue;
1235						}
1236						if (preg_get_backref(&walk, &backref)) {
1237							if (backref < count)
1238								new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1239							continue;
1240						}
1241					}
1242					new_len++;
1243					walk++;
1244					walk_last = walk[-1];
1245				}
1246
1247				if (new_len >= alloc_len) {
1248					alloc_len = alloc_len + 2 * new_len;
1249					if (result == NULL) {
1250						result = zend_string_alloc(alloc_len, 0);
1251					} else {
1252						result = zend_string_extend(result, alloc_len, 0);
1253					}
1254				}
1255
1256				/* copy the part of the string before the match */
1257				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1258				result_len += (int)(match-piece);
1259
1260				/* copy replacement and backrefs */
1261				walkbuf = ZSTR_VAL(result) + result_len;
1262
1263				walk = replace;
1264				walk_last = 0;
1265				while (walk < replace_end) {
1266					if ('\\' == *walk || '$' == *walk) {
1267						if (walk_last == '\\') {
1268							*(walkbuf-1) = *walk++;
1269							walk_last = 0;
1270							continue;
1271						}
1272						if (preg_get_backref(&walk, &backref)) {
1273							if (backref < count) {
1274								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1275								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1276								walkbuf += match_len;
1277							}
1278							continue;
1279						}
1280					}
1281					*walkbuf++ = *walk++;
1282					walk_last = walk[-1];
1283				}
1284				*walkbuf = '\0';
1285				/* increment the result length by how much we've added to the string */
1286				result_len += (int)(walkbuf - (ZSTR_VAL(result) + result_len));
1287			} else {
1288				/* Use custom function to get replacement string and its length. */
1289				eval_result = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark);
1290				ZEND_ASSERT(eval_result);
1291				new_len += (int)ZSTR_LEN(eval_result);
1292				if (new_len >= alloc_len) {
1293					alloc_len = alloc_len + 2 * new_len;
1294					if (result == NULL) {
1295						result = zend_string_alloc(alloc_len, 0);
1296					} else {
1297						result = zend_string_extend(result, alloc_len, 0);
1298					}
1299				}
1300				/* copy the part of the string before the match */
1301				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1302				result_len += (int)(match-piece);
1303
1304				/* copy replacement and backrefs */
1305				walkbuf = ZSTR_VAL(result) + result_len;
1306
1307				/* If using custom function, copy result to the buffer and clean up. */
1308				memcpy(walkbuf, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1309				result_len += (int)ZSTR_LEN(eval_result);
1310				zend_string_release(eval_result);
1311			}
1312
1313			if (EXPECTED(limit)) {
1314				limit--;
1315			}
1316		} else if (count == PCRE_ERROR_NOMATCH || UNEXPECTED(limit == 0)) {
1317			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1318			   this is not necessarily the end. We need to advance
1319			   the start offset, and continue. Fudge the offset values
1320			   to achieve this, unless we're already at the end of the string. */
1321			if (g_notempty != 0 && start_offset < subject_len) {
1322				int unit_len = calculate_unit_length(pce, piece);
1323
1324				offsets[0] = start_offset;
1325				offsets[1] = start_offset + unit_len;
1326				memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
1327				result_len += unit_len;
1328			} else {
1329				if (!result && subject_str) {
1330					result = zend_string_copy(subject_str);
1331					break;
1332				}
1333				new_len = result_len + subject_len - start_offset;
1334				if (new_len > alloc_len) {
1335					alloc_len = new_len; /* now we know exactly how long it is */
1336					if (NULL != result) {
1337						result = zend_string_realloc(result, alloc_len, 0);
1338					} else {
1339						result = zend_string_alloc(alloc_len, 0);
1340					}
1341				}
1342				/* stick that last bit of string on our output */
1343				memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
1344				result_len += subject_len - start_offset;
1345				ZSTR_VAL(result)[result_len] = '\0';
1346				ZSTR_LEN(result) = result_len;
1347				break;
1348			}
1349		} else {
1350			pcre_handle_exec_error(count);
1351			if (result) {
1352				zend_string_free(result);
1353				result = NULL;
1354			}
1355			break;
1356		}
1357
1358		/* If we have matched an empty string, mimic what Perl's /g options does.
1359		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1360		   the match again at the same point. If this fails (picked up above) we
1361		   advance to the next character. */
1362		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1363
1364		/* Advance to the next piece. */
1365		start_offset = offsets[1];
1366	}
1367
1368	if (size_offsets <= 32) {
1369		free_alloca(offsets, use_heap);
1370	} else {
1371		efree(offsets);
1372	}
1373	if (UNEXPECTED(subpat_names)) {
1374		efree(subpat_names);
1375	}
1376
1377	return result;
1378}
1379/* }}} */
1380
1381/* {{{ php_replace_in_subject
1382 */
1383static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int is_callable_replace, int *replace_count)
1384{
1385	zval		*regex_entry,
1386				*replace_value,
1387				 empty_replace;
1388	zend_string *result;
1389	uint32_t replace_idx;
1390	zend_string	*subject_str = zval_get_string(subject);
1391
1392	/* FIXME: This might need to be changed to ZSTR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1393	ZVAL_EMPTY_STRING(&empty_replace);
1394
1395	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str))) {
1396			php_error_docref(NULL, E_WARNING, "Subject is too long");
1397			return NULL;
1398	}
1399
1400	/* If regex is an array */
1401	if (Z_TYPE_P(regex) == IS_ARRAY) {
1402		replace_value = replace;
1403		replace_idx = 0;
1404
1405		/* For each entry in the regex array, get the entry */
1406		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
1407			zval replace_str;
1408			/* Make sure we're dealing with strings. */
1409			zend_string *regex_str = zval_get_string(regex_entry);
1410
1411			ZVAL_UNDEF(&replace_str);
1412			/* If replace is an array and not a callable construct */
1413			if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1414				/* Get current entry */
1415				while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
1416					if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNDEF) {
1417						ZVAL_COPY(&replace_str, &Z_ARRVAL_P(replace)->arData[replace_idx].val);
1418						break;
1419					}
1420					replace_idx++;
1421				}
1422				if (!Z_ISUNDEF(replace_str)) {
1423					if (!is_callable_replace) {
1424						convert_to_string(&replace_str);
1425					}
1426					replace_value = &replace_str;
1427					replace_idx++;
1428				} else {
1429					/* We've run out of replacement strings, so use an empty one */
1430					replace_value = &empty_replace;
1431				}
1432			}
1433
1434			/* Do the actual replacement and put the result back into subject_str
1435			   for further replacements. */
1436			if ((result = php_pcre_replace(regex_str,
1437										   subject_str,
1438										   ZSTR_VAL(subject_str),
1439										   (int)ZSTR_LEN(subject_str),
1440										   replace_value,
1441										   is_callable_replace,
1442										   limit,
1443										   replace_count)) != NULL) {
1444				zend_string_release(subject_str);
1445				subject_str = result;
1446			} else {
1447				zend_string_release(subject_str);
1448				zend_string_release(regex_str);
1449				zval_dtor(&replace_str);
1450				return NULL;
1451			}
1452
1453			zend_string_release(regex_str);
1454			zval_dtor(&replace_str);
1455		} ZEND_HASH_FOREACH_END();
1456
1457		return subject_str;
1458	} else {
1459		result = php_pcre_replace(Z_STR_P(regex),
1460								  subject_str,
1461								  ZSTR_VAL(subject_str),
1462								  (int)ZSTR_LEN(subject_str),
1463								  replace,
1464								  is_callable_replace,
1465								  limit,
1466								  replace_count);
1467		zend_string_release(subject_str);
1468		return result;
1469	}
1470}
1471/* }}} */
1472
1473/* {{{ preg_replace_impl
1474 */
1475static int preg_replace_impl(zval *return_value, zval *regex, zval *replace, zval *subject, zend_long limit_val, int is_callable_replace, int is_filter)
1476{
1477	zval		*subject_entry;
1478	zend_string	*result;
1479	zend_string	*string_key;
1480	zend_ulong	 num_key;
1481	int			 replace_count = 0, old_replace_count;
1482
1483	if (Z_TYPE_P(replace) != IS_ARRAY && (Z_TYPE_P(replace) != IS_OBJECT || !is_callable_replace)) {
1484		convert_to_string_ex(replace);
1485	}
1486
1487	if (Z_TYPE_P(regex) != IS_ARRAY) {
1488		convert_to_string_ex(regex);
1489	}
1490
1491	/* if subject is an array */
1492	if (Z_TYPE_P(subject) == IS_ARRAY) {
1493		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
1494
1495		/* For each subject entry, convert it to string, then perform replacement
1496		   and add the result to the return_value array. */
1497		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
1498			old_replace_count = replace_count;
1499			if ((result = php_replace_in_subject(regex, replace, subject_entry, limit_val, is_callable_replace, &replace_count)) != NULL) {
1500				if (!is_filter || replace_count > old_replace_count) {
1501					/* Add to return array */
1502					zval zv;
1503
1504					ZVAL_STR(&zv, result);
1505					if (string_key) {
1506						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
1507					} else {
1508						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
1509					}
1510				} else {
1511					zend_string_release(result);
1512				}
1513			}
1514		} ZEND_HASH_FOREACH_END();
1515	} else {
1516		/* if subject is not an array */
1517		old_replace_count = replace_count;
1518		if ((result = php_replace_in_subject(regex, replace, subject, limit_val, is_callable_replace, &replace_count)) != NULL) {
1519			if (!is_filter || replace_count > old_replace_count) {
1520				RETVAL_STR(result);
1521			} else {
1522				zend_string_release(result);
1523			}
1524		}
1525	}
1526
1527	return replace_count;
1528}
1529/* }}} */
1530
1531/* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1532   Perform Perl-style regular expression replacement. */
1533static PHP_FUNCTION(preg_replace)
1534{
1535	zval *regex, *replace, *subject, *zcount = NULL;
1536	zend_long limit = -1;
1537	int replace_count;
1538
1539#ifndef FAST_ZPP
1540	/* Get function parameters and do error-checking. */
1541	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1542		return;
1543	}
1544#else
1545	ZEND_PARSE_PARAMETERS_START(3, 5)
1546		Z_PARAM_ZVAL(regex)
1547		Z_PARAM_ZVAL(replace)
1548		Z_PARAM_ZVAL(subject)
1549		Z_PARAM_OPTIONAL
1550		Z_PARAM_LONG(limit)
1551		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1552	ZEND_PARSE_PARAMETERS_END();
1553#endif
1554
1555	if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1556		php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1557		RETURN_FALSE;
1558	}
1559
1560	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 0);
1561	if (zcount) {
1562		zval_dtor(zcount);
1563		ZVAL_LONG(zcount, replace_count);
1564	}
1565}
1566/* }}} */
1567
1568/* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1569   Perform Perl-style regular expression replacement using replacement callback. */
1570static PHP_FUNCTION(preg_replace_callback)
1571{
1572	zval *regex, *replace, *subject, *zcount = NULL;
1573	zend_long limit = -1;
1574	zend_string	*callback_name;
1575	int replace_count;
1576
1577#ifndef FAST_ZPP
1578	/* Get function parameters and do error-checking. */
1579	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1580		return;
1581	}
1582#else
1583	ZEND_PARSE_PARAMETERS_START(3, 5)
1584		Z_PARAM_ZVAL(regex)
1585		Z_PARAM_ZVAL(replace)
1586		Z_PARAM_ZVAL(subject)
1587		Z_PARAM_OPTIONAL
1588		Z_PARAM_LONG(limit)
1589		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1590	ZEND_PARSE_PARAMETERS_END();
1591#endif
1592
1593	if (!zend_is_callable(replace, 0, &callback_name)) {
1594		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
1595		zend_string_release(callback_name);
1596		ZVAL_COPY(return_value, subject);
1597		return;
1598	}
1599	zend_string_release(callback_name);
1600
1601	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 1, 0);
1602	if (zcount) {
1603		zval_dtor(zcount);
1604		ZVAL_LONG(zcount, replace_count);
1605	}
1606}
1607/* }}} */
1608
1609/* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
1610   Perform Perl-style regular expression replacement using replacement callback. */
1611static PHP_FUNCTION(preg_replace_callback_array)
1612{
1613	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
1614	zend_long limit = -1;
1615	zend_string *str_idx;
1616	zend_string *callback_name;
1617	int replace_count = 0;
1618
1619#ifndef FAST_ZPP
1620	/* Get function parameters and do error-checking. */
1621	if (zend_parse_parameters(ZEND_NUM_ARGS(), "az|lz/", &pattern, &subject, &limit, &zcount) == FAILURE) {
1622		return;
1623	}
1624#else
1625	ZEND_PARSE_PARAMETERS_START(2, 4)
1626		Z_PARAM_ARRAY(pattern)
1627		Z_PARAM_ZVAL(subject)
1628		Z_PARAM_OPTIONAL
1629		Z_PARAM_LONG(limit)
1630		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1631	ZEND_PARSE_PARAMETERS_END();
1632#endif
1633
1634	ZVAL_UNDEF(&zv);
1635	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
1636		if (str_idx) {
1637			ZVAL_STR_COPY(&regex, str_idx);
1638		} else {
1639			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
1640			zval_ptr_dtor(return_value);
1641			RETURN_NULL();
1642		}
1643
1644		if (!zend_is_callable(replace, 0, &callback_name)) {
1645			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
1646			zend_string_release(callback_name);
1647			zval_ptr_dtor(&regex);
1648			zval_ptr_dtor(return_value);
1649			ZVAL_COPY(return_value, subject);
1650			return;
1651		}
1652		zend_string_release(callback_name);
1653
1654		if (Z_ISNULL_P(return_value)) {
1655			replace_count += preg_replace_impl(&zv, &regex, replace, subject, limit, 1, 0);
1656		} else {
1657			replace_count += preg_replace_impl(&zv, &regex, replace, return_value, limit, 1, 0);
1658			zval_ptr_dtor(return_value);
1659		}
1660
1661		zval_ptr_dtor(&regex);
1662
1663		if (Z_ISUNDEF(zv)) {
1664			RETURN_NULL();
1665		}
1666
1667		ZVAL_COPY_VALUE(return_value, &zv);
1668
1669		if (UNEXPECTED(EG(exception))) {
1670			zval_ptr_dtor(return_value);
1671			RETURN_NULL();
1672		}
1673	} ZEND_HASH_FOREACH_END();
1674
1675	if (zcount) {
1676		zval_dtor(zcount);
1677		ZVAL_LONG(zcount, replace_count);
1678	}
1679}
1680/* }}} */
1681
1682/* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1683   Perform Perl-style regular expression replacement and only return matches. */
1684static PHP_FUNCTION(preg_filter)
1685{
1686	zval *regex, *replace, *subject, *zcount = NULL;
1687	zend_long limit = -1;
1688	int replace_count;
1689
1690#ifndef FAST_ZPP
1691	/* Get function parameters and do error-checking. */
1692	if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzz|lz/", &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
1693		return;
1694	}
1695#else
1696	ZEND_PARSE_PARAMETERS_START(3, 5)
1697		Z_PARAM_ZVAL(regex)
1698		Z_PARAM_ZVAL(replace)
1699		Z_PARAM_ZVAL(subject)
1700		Z_PARAM_OPTIONAL
1701		Z_PARAM_LONG(limit)
1702		Z_PARAM_ZVAL_EX(zcount, 0, 1)
1703	ZEND_PARSE_PARAMETERS_END();
1704#endif
1705
1706	if (Z_TYPE_P(replace) == IS_ARRAY && Z_TYPE_P(regex) != IS_ARRAY) {
1707		php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1708		RETURN_FALSE;
1709	}
1710
1711	replace_count = preg_replace_impl(return_value, regex, replace, subject, limit, 0, 1);
1712	if (zcount) {
1713		zval_dtor(zcount);
1714		ZVAL_LONG(zcount, replace_count);
1715	}
1716}
1717/* }}} */
1718
1719/* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1720   Split string into an array using a perl-style regular expression as a delimiter */
1721static PHP_FUNCTION(preg_split)
1722{
1723	zend_string			*regex;			/* Regular expression */
1724	zend_string			*subject;		/* String to match against */
1725	zend_long			 limit_val = -1;/* Integer value of limit */
1726	zend_long			 flags = 0;		/* Match control flags */
1727	pcre_cache_entry	*pce;			/* Compiled regular expression */
1728
1729	/* Get function parameters and do error checking */
1730#ifndef FAST_ZPP
1731	if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|ll", &regex,
1732							  &subject, &limit_val, &flags) == FAILURE) {
1733		RETURN_FALSE;
1734	}
1735#else
1736	ZEND_PARSE_PARAMETERS_START(2, 4)
1737		Z_PARAM_STR(regex)
1738		Z_PARAM_STR(subject)
1739		Z_PARAM_OPTIONAL
1740		Z_PARAM_LONG(limit_val)
1741		Z_PARAM_LONG(flags)
1742	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1743#endif
1744
1745	if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
1746			php_error_docref(NULL, E_WARNING, "Subject is too long");
1747			RETURN_FALSE;
1748	}
1749
1750	/* Compile regex or get it from cache. */
1751	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1752		RETURN_FALSE;
1753	}
1754
1755	pce->refcount++;
1756	php_pcre_split_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, (int)limit_val, flags);
1757	pce->refcount--;
1758}
1759/* }}} */
1760
1761/* {{{ php_pcre_split
1762 */
1763PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1764	zend_long limit_val, zend_long flags)
1765{
1766	pcre_extra		*extra = pce->extra;/* Holds results of studying */
1767	pcre_extra		 extra_data;		/* Used locally for exec options */
1768	int				*offsets;			/* Array of subpattern offsets */
1769	int				 size_offsets;		/* Size of the offsets array */
1770	int				 exoptions = 0;		/* Execution options */
1771	int				 count = 0;			/* Count of matched subpatterns */
1772	int				 start_offset;		/* Where the new search starts */
1773	int				 next_offset;		/* End of the last delimiter match + 1 */
1774	int				 g_notempty = 0;	/* If the match should not be empty */
1775	char			*last_match;		/* Location of last match */
1776	int				 no_empty;			/* If NO_EMPTY flag is set */
1777	int				 delim_capture; 	/* If delimiters should be captured */
1778	int				 offset_capture;	/* If offsets should be captured */
1779	zval			 tmp;
1780	ALLOCA_FLAG(use_heap);
1781
1782	no_empty = flags & PREG_SPLIT_NO_EMPTY;
1783	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1784	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1785
1786	if (limit_val == 0) {
1787		limit_val = -1;
1788	}
1789
1790	if (extra == NULL) {
1791		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1792		extra = &extra_data;
1793	}
1794	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
1795	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
1796#ifdef PCRE_EXTRA_MARK
1797	extra->flags &= ~PCRE_EXTRA_MARK;
1798#endif
1799
1800	/* Initialize return value */
1801	array_init(return_value);
1802
1803	/* Calculate the size of the offsets array, and allocate memory for it. */
1804	size_offsets = (pce->capture_count + 1) * 3;
1805	if (size_offsets <= 32) {
1806		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
1807	} else {
1808		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1809	}
1810
1811	/* Start at the beginning of the string */
1812	start_offset = 0;
1813	next_offset = 0;
1814	last_match = subject;
1815	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1816
1817	/* Get next piece if no limit or limit not yet reached and something matched*/
1818	while ((limit_val == -1 || limit_val > 1)) {
1819		count = pcre_exec(pce->re, extra, subject,
1820						  subject_len, start_offset,
1821						  exoptions|g_notempty, offsets, size_offsets);
1822
1823		/* the string was already proved to be valid UTF-8 */
1824		exoptions |= PCRE_NO_UTF8_CHECK;
1825
1826		/* Check for too many substrings condition. */
1827		if (count == 0) {
1828			php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1829			count = size_offsets/3;
1830		}
1831
1832		/* If something matched */
1833		if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
1834			if (!no_empty || &subject[offsets[0]] != last_match) {
1835
1836				if (offset_capture) {
1837					/* Add (match, offset) pair to the return value */
1838					add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
1839				} else {
1840					/* Add the piece to the return value */
1841					ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
1842					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1843				}
1844
1845				/* One less left to do */
1846				if (limit_val != -1)
1847					limit_val--;
1848			}
1849
1850			last_match = &subject[offsets[1]];
1851			next_offset = offsets[1];
1852
1853			if (delim_capture) {
1854				int i, match_len;
1855				for (i = 1; i < count; i++) {
1856					match_len = offsets[(i<<1)+1] - offsets[i<<1];
1857					/* If we have matched a delimiter */
1858					if (!no_empty || match_len > 0) {
1859						if (offset_capture) {
1860							add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1861						} else {
1862							ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
1863							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1864						}
1865					}
1866				}
1867			}
1868		} else if (count == PCRE_ERROR_NOMATCH) {
1869			/* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1870			   this is not necessarily the end. We need to advance
1871			   the start offset, and continue. Fudge the offset values
1872			   to achieve this, unless we're already at the end of the string. */
1873			if (g_notempty != 0 && start_offset < subject_len) {
1874				offsets[0] = start_offset;
1875				offsets[1] = start_offset + calculate_unit_length(pce, subject + start_offset);
1876			} else {
1877				break;
1878			}
1879		} else {
1880			pcre_handle_exec_error(count);
1881			break;
1882		}
1883
1884		/* If we have matched an empty string, mimic what Perl's /g options does.
1885		   This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1886		   the match again at the same point. If this fails (picked up above) we
1887		   advance to the next character. */
1888		g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1889
1890		/* Advance to the position right after the last full match */
1891		start_offset = offsets[1];
1892	}
1893
1894
1895	start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
1896
1897	if (!no_empty || start_offset < subject_len)
1898	{
1899		if (offset_capture) {
1900			/* Add the last (match, offset) pair to the return value */
1901			add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1902		} else {
1903			/* Add the last piece to the return value */
1904			ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);
1905			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
1906		}
1907	}
1908
1909
1910	/* Clean up */
1911	if (size_offsets <= 32) {
1912		free_alloca(offsets, use_heap);
1913	} else {
1914		efree(offsets);
1915	}
1916}
1917/* }}} */
1918
1919/* {{{ proto string preg_quote(string str [, string delim_char])
1920   Quote regular expression characters plus an optional character */
1921static PHP_FUNCTION(preg_quote)
1922{
1923	size_t		 in_str_len;
1924	char	*in_str;		/* Input string argument */
1925	char	*in_str_end;    /* End of the input string */
1926	size_t		 delim_len = 0;
1927	char	*delim = NULL;	/* Additional delimiter argument */
1928	zend_string	*out_str;	/* Output string with quoted characters */
1929	char 	*p,				/* Iterator for input string */
1930			*q,				/* Iterator for output string */
1931			 delim_char=0,	/* Delimiter character to be quoted */
1932			 c;				/* Current character */
1933	zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1934
1935	/* Get the arguments and check for errors */
1936#ifndef FAST_ZPP
1937	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &in_str, &in_str_len,
1938							  &delim, &delim_len) == FAILURE) {
1939		return;
1940	}
1941#else
1942	ZEND_PARSE_PARAMETERS_START(1, 2)
1943		Z_PARAM_STRING(in_str, in_str_len)
1944		Z_PARAM_OPTIONAL
1945		Z_PARAM_STRING(delim, delim_len)
1946	ZEND_PARSE_PARAMETERS_END();
1947#endif
1948
1949	in_str_end = in_str + in_str_len;
1950
1951	/* Nothing to do if we got an empty string */
1952	if (in_str == in_str_end) {
1953		RETURN_EMPTY_STRING();
1954	}
1955
1956	if (delim && *delim) {
1957		delim_char = delim[0];
1958		quote_delim = 1;
1959	}
1960
1961	/* Allocate enough memory so that even if each character
1962	   is quoted, we won't run out of room */
1963	out_str = zend_string_safe_alloc(4, in_str_len, 0, 0);
1964
1965	/* Go through the string and quote necessary characters */
1966	for (p = in_str, q = ZSTR_VAL(out_str); p != in_str_end; p++) {
1967		c = *p;
1968		switch(c) {
1969			case '.':
1970			case '\\':
1971			case '+':
1972			case '*':
1973			case '?':
1974			case '[':
1975			case '^':
1976			case ']':
1977			case '$':
1978			case '(':
1979			case ')':
1980			case '{':
1981			case '}':
1982			case '=':
1983			case '!':
1984			case '>':
1985			case '<':
1986			case '|':
1987			case ':':
1988			case '-':
1989				*q++ = '\\';
1990				*q++ = c;
1991				break;
1992
1993			case '\0':
1994				*q++ = '\\';
1995				*q++ = '0';
1996				*q++ = '0';
1997				*q++ = '0';
1998				break;
1999
2000			default:
2001				if (quote_delim && c == delim_char)
2002					*q++ = '\\';
2003				*q++ = c;
2004				break;
2005		}
2006	}
2007	*q = '\0';
2008
2009	/* Reallocate string and return it */
2010	out_str = zend_string_truncate(out_str, q - ZSTR_VAL(out_str), 0);
2011	RETURN_NEW_STR(out_str);
2012}
2013/* }}} */
2014
2015/* {{{ proto array preg_grep(string regex, array input [, int flags])
2016   Searches array and returns entries which match regex */
2017static PHP_FUNCTION(preg_grep)
2018{
2019	zend_string			*regex;			/* Regular expression */
2020	zval				*input;			/* Input array */
2021	zend_long			 flags = 0;		/* Match control flags */
2022	pcre_cache_entry	*pce;			/* Compiled regular expression */
2023
2024	/* Get arguments and do error checking */
2025#ifndef FAST_ZPP
2026	if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sa|l", &regex,
2027							  &input, &flags) == FAILURE) {
2028		return;
2029	}
2030#else
2031	ZEND_PARSE_PARAMETERS_START(2, 3)
2032		Z_PARAM_STR(regex)
2033		Z_PARAM_ARRAY(input)
2034		Z_PARAM_OPTIONAL
2035		Z_PARAM_LONG(flags)
2036	ZEND_PARSE_PARAMETERS_END();
2037#endif
2038
2039	/* Compile regex or get it from cache. */
2040	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2041		RETURN_FALSE;
2042	}
2043
2044	pce->refcount++;
2045	php_pcre_grep_impl(pce, input, return_value, flags);
2046	pce->refcount--;
2047}
2048/* }}} */
2049
2050PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2051{
2052	zval		    *entry;				/* An entry in the input array */
2053	pcre_extra		*extra = pce->extra;/* Holds results of studying */
2054	pcre_extra		 extra_data;		/* Used locally for exec options */
2055	int				*offsets;			/* Array of subpattern offsets */
2056	int				 size_offsets;		/* Size of the offsets array */
2057	int				 count = 0;			/* Count of matched subpatterns */
2058	zend_string		*string_key;
2059	zend_ulong		 num_key;
2060	zend_bool		 invert;			/* Whether to return non-matching
2061										   entries */
2062	ALLOCA_FLAG(use_heap);
2063
2064	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2065
2066	if (extra == NULL) {
2067		extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2068		extra = &extra_data;
2069	}
2070	extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
2071	extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
2072#ifdef PCRE_EXTRA_MARK
2073	extra->flags &= ~PCRE_EXTRA_MARK;
2074#endif
2075
2076	/* Calculate the size of the offsets array, and allocate memory for it. */
2077	size_offsets = (pce->capture_count + 1) * 3;
2078	if (size_offsets <= 32) {
2079		offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
2080	} else {
2081		offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
2082	}
2083
2084	/* Initialize return array */
2085	array_init(return_value);
2086
2087	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2088
2089	/* Go through the input array */
2090	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2091		zend_string *subject_str = zval_get_string(entry);
2092
2093		/* Perform the match */
2094		count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
2095						  (int)ZSTR_LEN(subject_str), 0,
2096						  0, offsets, size_offsets);
2097
2098		/* Check for too many substrings condition. */
2099		if (count == 0) {
2100			php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2101			count = size_offsets/3;
2102		} else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
2103			pcre_handle_exec_error(count);
2104			zend_string_release(subject_str);
2105			break;
2106		}
2107
2108		/* If the entry fits our requirements */
2109		if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
2110			if (Z_REFCOUNTED_P(entry)) {
2111			   	Z_ADDREF_P(entry);
2112			}
2113
2114			/* Add to return array */
2115			if (string_key) {
2116				zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2117			} else {
2118				zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2119			}
2120		}
2121
2122		zend_string_release(subject_str);
2123	} ZEND_HASH_FOREACH_END();
2124
2125	/* Clean up */
2126	if (size_offsets <= 32) {
2127		free_alloca(offsets, use_heap);
2128	} else {
2129		efree(offsets);
2130	}
2131}
2132/* }}} */
2133
2134/* {{{ proto int preg_last_error()
2135   Returns the error code of the last regexp execution. */
2136static PHP_FUNCTION(preg_last_error)
2137{
2138#ifndef FAST_ZPP
2139	if (zend_parse_parameters(ZEND_NUM_ARGS(), "") == FAILURE) {
2140		return;
2141	}
2142#else
2143	ZEND_PARSE_PARAMETERS_START(0, 0)
2144	ZEND_PARSE_PARAMETERS_END();
2145#endif
2146
2147	RETURN_LONG(PCRE_G(error_code));
2148}
2149/* }}} */
2150
2151/* {{{ module definition structures */
2152
2153/* {{{ arginfo */
2154ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2155    ZEND_ARG_INFO(0, pattern)
2156    ZEND_ARG_INFO(0, subject)
2157    ZEND_ARG_INFO(1, subpatterns) /* array */
2158    ZEND_ARG_INFO(0, flags)
2159    ZEND_ARG_INFO(0, offset)
2160ZEND_END_ARG_INFO()
2161
2162ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2163    ZEND_ARG_INFO(0, pattern)
2164    ZEND_ARG_INFO(0, subject)
2165    ZEND_ARG_INFO(1, subpatterns) /* array */
2166    ZEND_ARG_INFO(0, flags)
2167    ZEND_ARG_INFO(0, offset)
2168ZEND_END_ARG_INFO()
2169
2170ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
2171    ZEND_ARG_INFO(0, regex)
2172    ZEND_ARG_INFO(0, replace)
2173    ZEND_ARG_INFO(0, subject)
2174    ZEND_ARG_INFO(0, limit)
2175    ZEND_ARG_INFO(1, count)
2176ZEND_END_ARG_INFO()
2177
2178ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
2179    ZEND_ARG_INFO(0, regex)
2180    ZEND_ARG_INFO(0, callback)
2181    ZEND_ARG_INFO(0, subject)
2182    ZEND_ARG_INFO(0, limit)
2183    ZEND_ARG_INFO(1, count)
2184ZEND_END_ARG_INFO()
2185
2186ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
2187    ZEND_ARG_INFO(0, pattern)
2188    ZEND_ARG_INFO(0, subject)
2189    ZEND_ARG_INFO(0, limit)
2190    ZEND_ARG_INFO(1, count)
2191ZEND_END_ARG_INFO()
2192
2193ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
2194    ZEND_ARG_INFO(0, pattern)
2195    ZEND_ARG_INFO(0, subject)
2196    ZEND_ARG_INFO(0, limit)
2197    ZEND_ARG_INFO(0, flags)
2198ZEND_END_ARG_INFO()
2199
2200ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
2201    ZEND_ARG_INFO(0, str)
2202    ZEND_ARG_INFO(0, delim_char)
2203ZEND_END_ARG_INFO()
2204
2205ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
2206    ZEND_ARG_INFO(0, regex)
2207    ZEND_ARG_INFO(0, input) /* array */
2208    ZEND_ARG_INFO(0, flags)
2209ZEND_END_ARG_INFO()
2210
2211ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2212ZEND_END_ARG_INFO()
2213/* }}} */
2214
2215static const zend_function_entry pcre_functions[] = {
2216	PHP_FE(preg_match,					arginfo_preg_match)
2217	PHP_FE(preg_match_all,				arginfo_preg_match_all)
2218	PHP_FE(preg_replace,				arginfo_preg_replace)
2219	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
2220	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
2221	PHP_FE(preg_filter,					arginfo_preg_replace)
2222	PHP_FE(preg_split,					arginfo_preg_split)
2223	PHP_FE(preg_quote,					arginfo_preg_quote)
2224	PHP_FE(preg_grep,					arginfo_preg_grep)
2225	PHP_FE(preg_last_error,				arginfo_preg_last_error)
2226	PHP_FE_END
2227};
2228
2229zend_module_entry pcre_module_entry = {
2230	STANDARD_MODULE_HEADER,
2231   "pcre",
2232	pcre_functions,
2233	PHP_MINIT(pcre),
2234	PHP_MSHUTDOWN(pcre),
2235#ifdef PCRE_STUDY_JIT_COMPILE
2236	PHP_RINIT(pcre),
2237#else
2238	NULL
2239#endif
2240	NULL,
2241	PHP_MINFO(pcre),
2242	PHP_PCRE_VERSION,
2243	PHP_MODULE_GLOBALS(pcre),
2244	PHP_GINIT(pcre),
2245	PHP_GSHUTDOWN(pcre),
2246	NULL,
2247	STANDARD_MODULE_PROPERTIES_EX
2248};
2249
2250#ifdef COMPILE_DL_PCRE
2251ZEND_GET_MODULE(pcre)
2252#endif
2253
2254/* }}} */
2255
2256#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2257
2258/*
2259 * Local variables:
2260 * tab-width: 4
2261 * c-basic-offset: 4
2262 * End:
2263 * vim600: sw=4 ts=4 fdm=marker
2264 * vim<600: sw=4 ts=4
2265 */
2266