1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2016 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Authors: Derick Rethans <derick@php.net>                             |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php_filter.h"
22#include "filter_private.h"
23#include "zend_smart_str.h"
24
25/* {{{ STRUCTS */
26typedef unsigned long filter_map[256];
27/* }}} */
28
29/* {{{ HELPER FUNCTIONS */
30static void php_filter_encode_html(zval *value, const unsigned char *chars)
31{
32	smart_str str = {0};
33	size_t len = Z_STRLEN_P(value);
34	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35	unsigned char *e = s + len;
36
37	if (Z_STRLEN_P(value) == 0) {
38		return;
39	}
40
41	while (s < e) {
42		if (chars[*s]) {
43			smart_str_appendl(&str, "&#", 2);
44			smart_str_append_unsigned(&str, (zend_ulong)*s);
45			smart_str_appendc(&str, ';');
46		} else {
47			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48			smart_str_appendc(&str, *s);
49		}
50		s++;
51	}
52
53	smart_str_0(&str);
54	zval_ptr_dtor(value);
55	ZVAL_NEW_STR(value, str.s);
56}
57
58static const unsigned char hexchars[] = "0123456789ABCDEF";
59
60#define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
61#define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62#define DIGIT       "0123456789"
63
64#define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
65
66static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67{
68	unsigned char *p;
69	unsigned char tmp[256];
70	unsigned char *s = (unsigned char *)chars;
71	unsigned char *e = s + char_len;
72	zend_string *str;
73
74	memset(tmp, 1, sizeof(tmp)-1);
75
76	while (s < e) {
77		tmp[*s++] = '\0';
78	}
79/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80	if (encode_nul) {
81		tmp[0] = 1;
82	}
83	if (high) {
84		memset(tmp + 127, 1, sizeof(tmp) - 127);
85	}
86	if (low) {
87		memset(tmp, 1, 32);
88	}
89*/
90	str = zend_string_alloc(3 * Z_STRLEN_P(value), 0);
91	p = (unsigned char *) ZSTR_VAL(str);
92	s = (unsigned char *) Z_STRVAL_P(value);
93	e = s + Z_STRLEN_P(value);
94
95	while (s < e) {
96		if (tmp[*s]) {
97			*p++ = '%';
98			*p++ = hexchars[(unsigned char) *s >> 4];
99			*p++ = hexchars[(unsigned char) *s & 15];
100		} else {
101			*p++ = *s;
102		}
103		s++;
104	}
105	*p = '\0';
106	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
107	zval_ptr_dtor(value);
108	ZVAL_NEW_STR(value, str);
109}
110
111static void php_filter_strip(zval *value, zend_long flags)
112{
113	unsigned char *str;
114	int   i, c;
115	zend_string *buf;
116
117	/* Optimization for if no strip flags are set */
118	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
119		return;
120	}
121
122	str = (unsigned char *)Z_STRVAL_P(value);
123	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
124	c = 0;
125	for (i = 0; i < Z_STRLEN_P(value); i++) {
126		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
127		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
128		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
129		} else {
130			ZSTR_VAL(buf)[c] = str[i];
131			++c;
132		}
133	}
134	/* update zval string data */
135	ZSTR_VAL(buf)[c] = '\0';
136	ZSTR_LEN(buf) = c;
137	zval_ptr_dtor(value);
138	ZVAL_NEW_STR(value, buf);
139}
140/* }}} */
141
142/* {{{ FILTER MAP HELPERS */
143static void filter_map_init(filter_map *map)
144{
145	memset(map, 0, sizeof(filter_map));
146}
147
148static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
149{
150	size_t l, i;
151
152	l = strlen((const char*)allowed_list);
153	for (i = 0; i < l; ++i) {
154		(*map)[allowed_list[i]] = flag;
155	}
156}
157
158static void filter_map_apply(zval *value, filter_map *map)
159{
160	unsigned char *str;
161	int   i, c;
162	zend_string *buf;
163
164	str = (unsigned char *)Z_STRVAL_P(value);
165	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
166	c = 0;
167	for (i = 0; i < Z_STRLEN_P(value); i++) {
168		if ((*map)[str[i]]) {
169			ZSTR_VAL(buf)[c] = str[i];
170			++c;
171		}
172	}
173	/* update zval string data */
174	ZSTR_VAL(buf)[c] = '\0';
175	ZSTR_LEN(buf) = c;
176	zval_ptr_dtor(value);
177	ZVAL_NEW_STR(value, buf);
178}
179/* }}} */
180
181/* {{{ php_filter_string */
182void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
183{
184	size_t new_len;
185	unsigned char enc[256] = {0};
186
187	if (!Z_REFCOUNTED_P(value)) {
188		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
189	}
190
191	/* strip high/strip low ( see flags )*/
192	php_filter_strip(value, flags);
193
194	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
195		enc['\''] = enc['"'] = 1;
196	}
197	if (flags & FILTER_FLAG_ENCODE_AMP) {
198		enc['&'] = 1;
199	}
200	if (flags & FILTER_FLAG_ENCODE_LOW) {
201		memset(enc, 1, 32);
202	}
203	if (flags & FILTER_FLAG_ENCODE_HIGH) {
204		memset(enc + 127, 1, sizeof(enc) - 127);
205	}
206
207	php_filter_encode_html(value, enc);
208
209	/* strip tags, implicitly also removes \0 chars */
210	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
211	Z_STRLEN_P(value) = new_len;
212
213	if (new_len == 0) {
214		zval_dtor(value);
215		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
216			ZVAL_NULL(value);
217		} else {
218			ZVAL_EMPTY_STRING(value);
219		}
220		return;
221	}
222}
223/* }}} */
224
225/* {{{ php_filter_encoded */
226void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
227{
228	/* apply strip_high and strip_low filters */
229	php_filter_strip(value, flags);
230	/* urlencode */
231	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
232}
233/* }}} */
234
235/* {{{ php_filter_special_chars */
236void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
237{
238	unsigned char enc[256] = {0};
239
240	php_filter_strip(value, flags);
241
242	/* encodes ' " < > & \0 to numerical entities */
243	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
244
245	/* if strip low is not set, then we encode them as &#xx; */
246	memset(enc, 1, 32);
247
248	if (flags & FILTER_FLAG_ENCODE_HIGH) {
249		memset(enc + 127, 1, sizeof(enc) - 127);
250	}
251
252	php_filter_encode_html(value, enc);
253}
254/* }}} */
255
256/* {{{ php_filter_full_special_chars */
257void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
258{
259	zend_string *buf;
260	int quotes;
261
262	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
263		quotes = ENT_QUOTES;
264	} else {
265		quotes = ENT_NOQUOTES;
266	}
267	buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
268	zval_ptr_dtor(value);
269	ZVAL_STR(value, buf);
270}
271/* }}} */
272
273/* {{{ php_filter_unsafe_raw */
274void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
275{
276	/* Only if no flags are set (optimization) */
277	if (flags != 0 && Z_STRLEN_P(value) > 0) {
278		unsigned char enc[256] = {0};
279
280		php_filter_strip(value, flags);
281
282		if (flags & FILTER_FLAG_ENCODE_AMP) {
283			enc['&'] = 1;
284		}
285		if (flags & FILTER_FLAG_ENCODE_LOW) {
286			memset(enc, 1, 32);
287		}
288		if (flags & FILTER_FLAG_ENCODE_HIGH) {
289			memset(enc + 127, 1, sizeof(enc) - 127);
290		}
291
292		php_filter_encode_html(value, enc);
293	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
294		zval_dtor(value);
295		ZVAL_NULL(value);
296	}
297}
298/* }}} */
299
300/* {{{ php_filter_email */
301#define SAFE        "$-_.+"
302#define EXTRA       "!*'(),"
303#define NATIONAL    "{}|\\^~[]`"
304#define PUNCTUATION "<>#%\""
305#define RESERVED    ";/?:@&="
306
307void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
308{
309	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
310	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
311	filter_map     map;
312
313	filter_map_init(&map);
314	filter_map_update(&map, 1, allowed_list);
315	filter_map_apply(value, &map);
316}
317/* }}} */
318
319/* {{{ php_filter_url */
320void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
321{
322	/* Strip all chars not part of section 5 of
323	 * http://www.faqs.org/rfcs/rfc1738.html */
324	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
325	filter_map     map;
326
327	filter_map_init(&map);
328	filter_map_update(&map, 1, allowed_list);
329	filter_map_apply(value, &map);
330}
331/* }}} */
332
333/* {{{ php_filter_number_int */
334void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
335{
336	/* strip everything [^0-9+-] */
337	const unsigned char allowed_list[] = "+-" DIGIT;
338	filter_map     map;
339
340	filter_map_init(&map);
341	filter_map_update(&map, 1, allowed_list);
342	filter_map_apply(value, &map);
343}
344/* }}} */
345
346/* {{{ php_filter_number_float */
347void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
348{
349	/* strip everything [^0-9+-] */
350	const unsigned char allowed_list[] = "+-" DIGIT;
351	filter_map     map;
352
353	filter_map_init(&map);
354	filter_map_update(&map, 1, allowed_list);
355
356	/* depending on flags, strip '.', 'e', ",", "'" */
357	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
358		filter_map_update(&map, 2, (const unsigned char *) ".");
359	}
360	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
361		filter_map_update(&map, 3,  (const unsigned char *) ",");
362	}
363	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
364		filter_map_update(&map, 4,  (const unsigned char *) "eE");
365	}
366	filter_map_apply(value, &map);
367}
368/* }}} */
369
370/* {{{ php_filter_magic_quotes */
371void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
372{
373	zend_string *buf;
374
375	/* just call php_addslashes quotes */
376	buf = php_addslashes(Z_STR_P(value), 0);
377
378	zval_ptr_dtor(value);
379	ZVAL_STR(value, buf);
380}
381/* }}} */
382
383/*
384 * Local variables:
385 * tab-width: 4
386 * c-basic-offset: 4
387 * End:
388 * vim600: noet sw=4 ts=4 fdm=marker
389 * vim<600: noet sw=4 ts=4
390 */
391