1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Authors: Derick Rethans <derick@php.net>                             |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php_filter.h"
22#include "filter_private.h"
23#include "zend_smart_str.h"
24
25/* {{{ STRUCTS */
26typedef unsigned long filter_map[256];
27/* }}} */
28
29/* {{{ HELPER FUNCTIONS */
30static void php_filter_encode_html(zval *value, const unsigned char *chars)
31{
32    smart_str str = {0};
33    int len = Z_STRLEN_P(value);
34    unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35    unsigned char *e = s + len;
36
37    if (Z_STRLEN_P(value) == 0) {
38        return;
39    }
40
41    while (s < e) {
42        if (chars[*s]) {
43            smart_str_appendl(&str, "&#", 2);
44            smart_str_append_unsigned(&str, (zend_ulong)*s);
45            smart_str_appendc(&str, ';');
46        } else {
47            /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48            smart_str_appendc(&str, *s);
49        }
50        s++;
51    }
52
53    smart_str_0(&str);
54    zval_ptr_dtor(value);
55    ZVAL_NEW_STR(value, str.s);
56}
57
58static const unsigned char hexchars[] = "0123456789ABCDEF";
59
60#define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
61#define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62#define DIGIT       "0123456789"
63
64#define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
65
66static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67{
68    unsigned char *p;
69    unsigned char tmp[256];
70    unsigned char *s = (unsigned char *)chars;
71    unsigned char *e = s + char_len;
72    zend_string *str;
73
74    memset(tmp, 1, sizeof(tmp)-1);
75
76    while (s < e) {
77        tmp[*s++] = '\0';
78    }
79/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80    if (encode_nul) {
81        tmp[0] = 1;
82    }
83    if (high) {
84        memset(tmp + 127, 1, sizeof(tmp) - 127);
85    }
86    if (low) {
87        memset(tmp, 1, 32);
88    }
89*/
90    str = zend_string_alloc(3 * Z_STRLEN_P(value), 0);
91    p = str->val;
92    s = Z_STRVAL_P(value);
93    e = s + Z_STRLEN_P(value);
94
95    while (s < e) {
96        if (tmp[*s]) {
97            *p++ = '%';
98            *p++ = hexchars[(unsigned char) *s >> 4];
99            *p++ = hexchars[(unsigned char) *s & 15];
100        } else {
101            *p++ = *s;
102        }
103        s++;
104    }
105    *p = '\0';
106    str->len = p - (unsigned char *)str->val;
107    zval_ptr_dtor(value);
108    ZVAL_NEW_STR(value, str);
109}
110
111static void php_filter_strip(zval *value, zend_long flags)
112{
113    unsigned char *str;
114    int   i, c;
115    zend_string *buf;
116
117    /* Optimization for if no strip flags are set */
118    if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
119        return;
120    }
121
122    str = (unsigned char *)Z_STRVAL_P(value);
123    buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
124    c = 0;
125    for (i = 0; i < Z_STRLEN_P(value); i++) {
126        if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
127        } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
128        } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
129        } else {
130            buf->val[c] = str[i];
131            ++c;
132        }
133    }
134    /* update zval string data */
135    buf->val[c] = '\0';
136    buf->len = c;
137    zval_ptr_dtor(value);
138    ZVAL_NEW_STR(value, buf);
139}
140/* }}} */
141
142/* {{{ FILTER MAP HELPERS */
143static void filter_map_init(filter_map *map)
144{
145    memset(map, 0, sizeof(filter_map));
146}
147
148static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
149{
150    int l, i;
151
152    l = strlen((const char*)allowed_list);
153    for (i = 0; i < l; ++i) {
154        (*map)[allowed_list[i]] = flag;
155    }
156}
157
158static void filter_map_apply(zval *value, filter_map *map)
159{
160    unsigned char *str;
161    int   i, c;
162    zend_string *buf;
163
164    str = (unsigned char *)Z_STRVAL_P(value);
165    buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
166    c = 0;
167    for (i = 0; i < Z_STRLEN_P(value); i++) {
168        if ((*map)[str[i]]) {
169            buf->val[c] = str[i];
170            ++c;
171        }
172    }
173    /* update zval string data */
174    buf->val[c] = '\0';
175    buf->len = c;
176    zval_ptr_dtor(value);
177    ZVAL_NEW_STR(value, buf);
178}
179/* }}} */
180
181/* {{{ php_filter_string */
182void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
183{
184    size_t new_len;
185    unsigned char enc[256] = {0};
186
187    if (!Z_REFCOUNTED_P(value)) {
188        ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
189    }
190
191    /* strip high/strip low ( see flags )*/
192    php_filter_strip(value, flags);
193
194    if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
195        enc['\''] = enc['"'] = 1;
196    }
197    if (flags & FILTER_FLAG_ENCODE_AMP) {
198        enc['&'] = 1;
199    }
200    if (flags & FILTER_FLAG_ENCODE_LOW) {
201        memset(enc, 1, 32);
202    }
203    if (flags & FILTER_FLAG_ENCODE_HIGH) {
204        memset(enc + 127, 1, sizeof(enc) - 127);
205    }
206
207    php_filter_encode_html(value, enc);
208
209    /* strip tags, implicitly also removes \0 chars */
210    new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
211    Z_STRLEN_P(value) = new_len;
212
213    if (new_len == 0) {
214        zval_dtor(value);
215        if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
216            ZVAL_NULL(value);
217        } else {
218            ZVAL_EMPTY_STRING(value);
219        }
220        return;
221    }
222}
223/* }}} */
224
225/* {{{ php_filter_encoded */
226void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
227{
228    /* apply strip_high and strip_low filters */
229    php_filter_strip(value, flags);
230    /* urlencode */
231    php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
232}
233/* }}} */
234
235/* {{{ php_filter_special_chars */
236void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
237{
238    unsigned char enc[256] = {0};
239
240    php_filter_strip(value, flags);
241
242    /* encodes ' " < > & \0 to numerical entities */
243    enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
244
245    /* if strip low is not set, then we encode them as &#xx; */
246    memset(enc, 1, 32);
247
248    if (flags & FILTER_FLAG_ENCODE_HIGH) {
249        memset(enc + 127, 1, sizeof(enc) - 127);
250    }
251
252    php_filter_encode_html(value, enc);
253}
254/* }}} */
255
256/* {{{ php_filter_full_special_chars */
257void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
258{
259    zend_string *buf;
260    int quotes;
261
262    if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
263        quotes = ENT_QUOTES;
264    } else {
265        quotes = ENT_NOQUOTES;
266    }
267    buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0 TSRMLS_CC);
268    zval_ptr_dtor(value);
269    ZVAL_STR(value, buf);
270}
271/* }}} */
272
273/* {{{ php_filter_unsafe_raw */
274void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
275{
276    /* Only if no flags are set (optimization) */
277    if (flags != 0 && Z_STRLEN_P(value) > 0) {
278        unsigned char enc[256] = {0};
279
280        php_filter_strip(value, flags);
281
282        if (flags & FILTER_FLAG_ENCODE_AMP) {
283            enc['&'] = 1;
284        }
285        if (flags & FILTER_FLAG_ENCODE_LOW) {
286            memset(enc, 1, 32);
287        }
288        if (flags & FILTER_FLAG_ENCODE_HIGH) {
289            memset(enc + 127, 1, sizeof(enc) - 127);
290        }
291
292        php_filter_encode_html(value, enc);
293    } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
294        zval_dtor(value);
295        ZVAL_NULL(value);
296    }
297}
298/* }}} */
299
300/* {{{ php_filter_email */
301#define SAFE        "$-_.+"
302#define EXTRA       "!*'(),"
303#define NATIONAL    "{}|\\^~[]`"
304#define PUNCTUATION "<>#%\""
305#define RESERVED    ";/?:@&="
306
307void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
308{
309    /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
310    const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
311    filter_map     map;
312
313    filter_map_init(&map);
314    filter_map_update(&map, 1, allowed_list);
315    filter_map_apply(value, &map);
316}
317/* }}} */
318
319/* {{{ php_filter_url */
320void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
321{
322    /* Strip all chars not part of section 5 of
323     * http://www.faqs.org/rfcs/rfc1738.html */
324    const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
325    filter_map     map;
326
327    filter_map_init(&map);
328    filter_map_update(&map, 1, allowed_list);
329    filter_map_apply(value, &map);
330}
331/* }}} */
332
333/* {{{ php_filter_number_int */
334void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
335{
336    /* strip everything [^0-9+-] */
337    const unsigned char allowed_list[] = "+-" DIGIT;
338    filter_map     map;
339
340    filter_map_init(&map);
341    filter_map_update(&map, 1, allowed_list);
342    filter_map_apply(value, &map);
343}
344/* }}} */
345
346/* {{{ php_filter_number_float */
347void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
348{
349    /* strip everything [^0-9+-] */
350    const unsigned char allowed_list[] = "+-" DIGIT;
351    filter_map     map;
352
353    filter_map_init(&map);
354    filter_map_update(&map, 1, allowed_list);
355
356    /* depending on flags, strip '.', 'e', ",", "'" */
357    if (flags & FILTER_FLAG_ALLOW_FRACTION) {
358        filter_map_update(&map, 2, (const unsigned char *) ".");
359    }
360    if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
361        filter_map_update(&map, 3,  (const unsigned char *) ",");
362    }
363    if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
364        filter_map_update(&map, 4,  (const unsigned char *) "eE");
365    }
366    filter_map_apply(value, &map);
367}
368/* }}} */
369
370/* {{{ php_filter_magic_quotes */
371void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
372{
373    zend_string *buf;
374
375    /* just call php_addslashes quotes */
376    buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), 0 TSRMLS_CC);
377
378    zval_ptr_dtor(value);
379    ZVAL_STR(value, buf);
380}
381/* }}} */
382
383/*
384 * Local variables:
385 * tab-width: 4
386 * c-basic-offset: 4
387 * End:
388 * vim600: noet sw=4 ts=4 fdm=marker
389 * vim<600: noet sw=4 ts=4
390 */
391