1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 5                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Authors: Derick Rethans <derick@php.net>                             |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php_filter.h"
22#include "filter_private.h"
23#include "ext/standard/php_smart_str.h"
24
25/* {{{ STRUCTS */
26typedef unsigned long filter_map[256];
27/* }}} */
28
29/* {{{ HELPER FUNCTIONS */
30static void php_filter_encode_html(zval *value, const unsigned char *chars)
31{
32    smart_str str = {0};
33    int len = Z_STRLEN_P(value);
34    unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35    unsigned char *e = s + len;
36
37    if (Z_STRLEN_P(value) == 0) {
38        return;
39    }
40
41    while (s < e) {
42        if (chars[*s]) {
43            smart_str_appendl(&str, "&#", 2);
44            smart_str_append_unsigned(&str, (unsigned long)*s);
45            smart_str_appendc(&str, ';');
46        } else {
47            /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48            smart_str_appendc(&str, *s);
49        }
50        s++;
51    }
52
53    smart_str_0(&str);
54    str_efree(Z_STRVAL_P(value));
55    Z_STRVAL_P(value) = str.c;
56    Z_STRLEN_P(value) = str.len;
57}
58
59static const unsigned char hexchars[] = "0123456789ABCDEF";
60
61#define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
62#define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63#define DIGIT       "0123456789"
64
65#define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
66
67static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
68{
69    unsigned char *str, *p;
70    unsigned char tmp[256];
71    unsigned char *s = (unsigned char *)chars;
72    unsigned char *e = s + char_len;
73
74    memset(tmp, 1, sizeof(tmp)-1);
75
76    while (s < e) {
77        tmp[*s++] = 0;
78    }
79/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80    if (encode_nul) {
81        tmp[0] = 1;
82    }
83    if (high) {
84        memset(tmp + 127, 1, sizeof(tmp) - 127);
85    }
86    if (low) {
87        memset(tmp, 1, 32);
88    }
89*/
90    p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
91    s = (unsigned char *)Z_STRVAL_P(value);
92    e = s + Z_STRLEN_P(value);
93
94    while (s < e) {
95        if (tmp[*s]) {
96            *p++ = '%';
97            *p++ = hexchars[(unsigned char) *s >> 4];
98            *p++ = hexchars[(unsigned char) *s & 15];
99        } else {
100            *p++ = *s;
101        }
102        s++;
103    }
104    *p = '\0';
105    str_efree(Z_STRVAL_P(value));
106    Z_STRVAL_P(value) = (char *)str;
107    Z_STRLEN_P(value) = p - str;
108}
109
110static void php_filter_strip(zval *value, long flags)
111{
112    unsigned char *buf, *str;
113    int   i, c;
114
115    /* Optimization for if no strip flags are set */
116    if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
117        return;
118    }
119
120    str = (unsigned char *)Z_STRVAL_P(value);
121    buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
122    c = 0;
123    for (i = 0; i < Z_STRLEN_P(value); i++) {
124        if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
125        } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
126        } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
127        } else {
128            buf[c] = str[i];
129            ++c;
130        }
131    }
132    /* update zval string data */
133    buf[c] = '\0';
134    str_efree(Z_STRVAL_P(value));
135    Z_STRVAL_P(value) = (char *)buf;
136    Z_STRLEN_P(value) = c;
137}
138/* }}} */
139
140/* {{{ FILTER MAP HELPERS */
141static void filter_map_init(filter_map *map)
142{
143    memset(map, 0, sizeof(filter_map));
144}
145
146static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
147{
148    int l, i;
149
150    l = strlen((const char*)allowed_list);
151    for (i = 0; i < l; ++i) {
152        (*map)[allowed_list[i]] = flag;
153    }
154}
155
156static void filter_map_apply(zval *value, filter_map *map)
157{
158    unsigned char *buf, *str;
159    int   i, c;
160
161    str = (unsigned char *)Z_STRVAL_P(value);
162    buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
163    c = 0;
164    for (i = 0; i < Z_STRLEN_P(value); i++) {
165        if ((*map)[str[i]]) {
166            buf[c] = str[i];
167            ++c;
168        }
169    }
170    /* update zval string data */
171    buf[c] = '\0';
172    str_efree(Z_STRVAL_P(value));
173    Z_STRVAL_P(value) = (char *)buf;
174    Z_STRLEN_P(value) = c;
175}
176/* }}} */
177
178/* {{{ php_filter_string */
179void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
180{
181    size_t new_len;
182    unsigned char enc[256] = {0};
183
184    /* strip high/strip low ( see flags )*/
185    php_filter_strip(value, flags);
186
187    if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
188        enc['\''] = enc['"'] = 1;
189    }
190    if (flags & FILTER_FLAG_ENCODE_AMP) {
191        enc['&'] = 1;
192    }
193    if (flags & FILTER_FLAG_ENCODE_LOW) {
194        memset(enc, 1, 32);
195    }
196    if (flags & FILTER_FLAG_ENCODE_HIGH) {
197        memset(enc + 127, 1, sizeof(enc) - 127);
198    }
199
200    php_filter_encode_html(value, enc);
201
202    /* strip tags, implicitly also removes \0 chars */
203    new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
204    Z_STRLEN_P(value) = new_len;
205
206    if (new_len == 0) {
207        zval_dtor(value);
208        if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
209            ZVAL_NULL(value);
210        } else {
211            ZVAL_EMPTY_STRING(value);
212        }
213        return;
214    }
215}
216/* }}} */
217
218/* {{{ php_filter_encoded */
219void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
220{
221    /* apply strip_high and strip_low filters */
222    php_filter_strip(value, flags);
223    /* urlencode */
224    php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
225}
226/* }}} */
227
228/* {{{ php_filter_special_chars */
229void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
230{
231    unsigned char enc[256] = {0};
232
233    php_filter_strip(value, flags);
234
235    /* encodes ' " < > & \0 to numerical entities */
236    enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
237
238    /* if strip low is not set, then we encode them as &#xx; */
239    memset(enc, 1, 32);
240
241    if (flags & FILTER_FLAG_ENCODE_HIGH) {
242        memset(enc + 127, 1, sizeof(enc) - 127);
243    }
244
245    php_filter_encode_html(value, enc);
246}
247/* }}} */
248
249/* {{{ php_filter_full_special_chars */
250void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
251{
252    char *buf;
253    size_t len;
254    int quotes;
255
256    if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
257        quotes = ENT_QUOTES;
258    } else {
259        quotes = ENT_NOQUOTES;
260    }
261    buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
262    str_efree(Z_STRVAL_P(value));
263    Z_STRVAL_P(value) = buf;
264    Z_STRLEN_P(value) = len;
265}
266/* }}} */
267
268/* {{{ php_filter_unsafe_raw */
269void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
270{
271    /* Only if no flags are set (optimization) */
272    if (flags != 0 && Z_STRLEN_P(value) > 0) {
273        unsigned char enc[256] = {0};
274
275        php_filter_strip(value, flags);
276
277        if (flags & FILTER_FLAG_ENCODE_AMP) {
278            enc['&'] = 1;
279        }
280        if (flags & FILTER_FLAG_ENCODE_LOW) {
281            memset(enc, 1, 32);
282        }
283        if (flags & FILTER_FLAG_ENCODE_HIGH) {
284            memset(enc + 127, 1, sizeof(enc) - 127);
285        }
286
287        php_filter_encode_html(value, enc);
288    } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
289        zval_dtor(value);
290        ZVAL_NULL(value);
291    }
292}
293/* }}} */
294
295
296
297/* {{{ php_filter_email */
298#define SAFE        "$-_.+"
299#define EXTRA       "!*'(),"
300#define NATIONAL    "{}|\\^~[]`"
301#define PUNCTUATION "<>#%\""
302#define RESERVED    ";/?:@&="
303
304void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
305{
306    /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
307    const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
308    filter_map     map;
309
310    filter_map_init(&map);
311    filter_map_update(&map, 1, allowed_list);
312    filter_map_apply(value, &map);
313}
314/* }}} */
315
316/* {{{ php_filter_url */
317void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
318{
319    /* Strip all chars not part of section 5 of
320     * http://www.faqs.org/rfcs/rfc1738.html */
321    const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
322    filter_map     map;
323
324    filter_map_init(&map);
325    filter_map_update(&map, 1, allowed_list);
326    filter_map_apply(value, &map);
327}
328/* }}} */
329
330/* {{{ php_filter_number_int */
331void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
332{
333    /* strip everything [^0-9+-] */
334    const unsigned char allowed_list[] = "+-" DIGIT;
335    filter_map     map;
336
337    filter_map_init(&map);
338    filter_map_update(&map, 1, allowed_list);
339    filter_map_apply(value, &map);
340}
341/* }}} */
342
343/* {{{ php_filter_number_float */
344void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
345{
346    /* strip everything [^0-9+-] */
347    const unsigned char allowed_list[] = "+-" DIGIT;
348    filter_map     map;
349
350    filter_map_init(&map);
351    filter_map_update(&map, 1, allowed_list);
352
353    /* depending on flags, strip '.', 'e', ",", "'" */
354    if (flags & FILTER_FLAG_ALLOW_FRACTION) {
355        filter_map_update(&map, 2, (const unsigned char *) ".");
356    }
357    if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
358        filter_map_update(&map, 3,  (const unsigned char *) ",");
359    }
360    if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
361        filter_map_update(&map, 4,  (const unsigned char *) "eE");
362    }
363    filter_map_apply(value, &map);
364}
365/* }}} */
366
367/* {{{ php_filter_magic_quotes */
368void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
369{
370    char *buf;
371    int   len;
372
373    /* just call php_addslashes quotes */
374    buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
375
376    str_efree(Z_STRVAL_P(value));
377    Z_STRVAL_P(value) = buf;
378    Z_STRLEN_P(value) = len;
379}
380/* }}} */
381
382/*
383 * Local variables:
384 * tab-width: 4
385 * c-basic-offset: 4
386 * End:
387 * vim600: noet sw=4 ts=4 fdm=marker
388 * vim<600: noet sw=4 ts=4
389 */
390