1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Authors: Derick Rethans <derick@php.net> | 16 +----------------------------------------------------------------------+ 17*/ 18 19/* $Id$ */ 20 21#include "php_filter.h" 22#include "filter_private.h" 23#include "ext/standard/php_smart_str.h" 24 25/* {{{ STRUCTS */ 26typedef unsigned long filter_map[256]; 27/* }}} */ 28 29/* {{{ HELPER FUNCTIONS */ 30static void php_filter_encode_html(zval *value, const unsigned char *chars) 31{ 32 smart_str str = {0}; 33 int len = Z_STRLEN_P(value); 34 unsigned char *s = (unsigned char *)Z_STRVAL_P(value); 35 unsigned char *e = s + len; 36 37 if (Z_STRLEN_P(value) == 0) { 38 return; 39 } 40 41 while (s < e) { 42 if (chars[*s]) { 43 smart_str_appendl(&str, "&#", 2); 44 smart_str_append_unsigned(&str, (unsigned long)*s); 45 smart_str_appendc(&str, ';'); 46 } else { 47 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */ 48 smart_str_appendc(&str, *s); 49 } 50 s++; 51 } 52 53 smart_str_0(&str); 54 efree(Z_STRVAL_P(value)); 55 Z_STRVAL_P(value) = str.c; 56 Z_STRLEN_P(value) = str.len; 57} 58 59static const unsigned char hexchars[] = "0123456789ABCDEF"; 60 61#define LOWALPHA "abcdefghijklmnopqrstuvwxyz" 62#define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 63#define DIGIT "0123456789" 64 65#define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._" 66 67static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul) 68{ 69 unsigned char *str, *p; 70 unsigned char tmp[256]; 71 unsigned char *s = (unsigned char *)chars; 72 unsigned char *e = s + char_len; 73 74 memset(tmp, 1, sizeof(tmp)-1); 75 76 while (s < e) { 77 tmp[*s++] = 0; 78 } 79/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value 80 if (encode_nul) { 81 tmp[0] = 1; 82 } 83 if (high) { 84 memset(tmp + 127, 1, sizeof(tmp) - 127); 85 } 86 if (low) { 87 memset(tmp, 1, 32); 88 } 89*/ 90 p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1); 91 s = (unsigned char *)Z_STRVAL_P(value); 92 e = s + Z_STRLEN_P(value); 93 94 while (s < e) { 95 if (tmp[*s]) { 96 *p++ = '%'; 97 *p++ = hexchars[(unsigned char) *s >> 4]; 98 *p++ = hexchars[(unsigned char) *s & 15]; 99 } else { 100 *p++ = *s; 101 } 102 s++; 103 } 104 *p = '\0'; 105 efree(Z_STRVAL_P(value)); 106 Z_STRVAL_P(value) = (char *)str; 107 Z_STRLEN_P(value) = p - str; 108} 109 110static void php_filter_strip(zval *value, long flags) 111{ 112 unsigned char *buf, *str; 113 int i, c; 114 115 /* Optimization for if no strip flags are set */ 116 if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) { 117 return; 118 } 119 120 str = (unsigned char *)Z_STRVAL_P(value); 121 buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); 122 c = 0; 123 for (i = 0; i < Z_STRLEN_P(value); i++) { 124 if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) { 125 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) { 126 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) { 127 } else { 128 buf[c] = str[i]; 129 ++c; 130 } 131 } 132 /* update zval string data */ 133 buf[c] = '\0'; 134 efree(Z_STRVAL_P(value)); 135 Z_STRVAL_P(value) = (char *)buf; 136 Z_STRLEN_P(value) = c; 137} 138/* }}} */ 139 140/* {{{ FILTER MAP HELPERS */ 141static void filter_map_init(filter_map *map) 142{ 143 memset(map, 0, sizeof(filter_map)); 144} 145 146static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list) 147{ 148 int l, i; 149 150 l = strlen((const char*)allowed_list); 151 for (i = 0; i < l; ++i) { 152 (*map)[allowed_list[i]] = flag; 153 } 154} 155 156static void filter_map_apply(zval *value, filter_map *map) 157{ 158 unsigned char *buf, *str; 159 int i, c; 160 161 str = (unsigned char *)Z_STRVAL_P(value); 162 buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); 163 c = 0; 164 for (i = 0; i < Z_STRLEN_P(value); i++) { 165 if ((*map)[str[i]]) { 166 buf[c] = str[i]; 167 ++c; 168 } 169 } 170 /* update zval string data */ 171 buf[c] = '\0'; 172 efree(Z_STRVAL_P(value)); 173 Z_STRVAL_P(value) = (char *)buf; 174 Z_STRLEN_P(value) = c; 175} 176/* }}} */ 177 178/* {{{ php_filter_string */ 179void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) 180{ 181 size_t new_len; 182 unsigned char enc[256] = {0}; 183 184 /* strip high/strip low ( see flags )*/ 185 php_filter_strip(value, flags); 186 187 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { 188 enc['\''] = enc['"'] = 1; 189 } 190 if (flags & FILTER_FLAG_ENCODE_AMP) { 191 enc['&'] = 1; 192 } 193 if (flags & FILTER_FLAG_ENCODE_LOW) { 194 memset(enc, 1, 32); 195 } 196 if (flags & FILTER_FLAG_ENCODE_HIGH) { 197 memset(enc + 127, 1, sizeof(enc) - 127); 198 } 199 200 php_filter_encode_html(value, enc); 201 202 /* strip tags, implicitly also removes \0 chars */ 203 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1); 204 Z_STRLEN_P(value) = new_len; 205 206 if (new_len == 0) { 207 zval_dtor(value); 208 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) { 209 ZVAL_NULL(value); 210 } else { 211 ZVAL_EMPTY_STRING(value); 212 } 213 return; 214 } 215} 216/* }}} */ 217 218/* {{{ php_filter_encoded */ 219void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL) 220{ 221 /* apply strip_high and strip_low filters */ 222 php_filter_strip(value, flags); 223 /* urlencode */ 224 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1); 225} 226/* }}} */ 227 228/* {{{ php_filter_special_chars */ 229void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL) 230{ 231 unsigned char enc[256] = {0}; 232 233 php_filter_strip(value, flags); 234 235 /* encodes ' " < > & \0 to numerical entities */ 236 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1; 237 238 /* if strip low is not set, then we encode them as &#xx; */ 239 memset(enc, 1, 32); 240 241 if (flags & FILTER_FLAG_ENCODE_HIGH) { 242 memset(enc + 127, 1, sizeof(enc) - 127); 243 } 244 245 php_filter_encode_html(value, enc); 246} 247/* }}} */ 248 249/* {{{ php_filter_full_special_chars */ 250void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL) 251{ 252 char *buf; 253 int len, quotes; 254 255 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { 256 quotes = ENT_QUOTES; 257 } else { 258 quotes = ENT_NOQUOTES; 259 } 260 buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC); 261 efree(Z_STRVAL_P(value)); 262 Z_STRVAL_P(value) = buf; 263 Z_STRLEN_P(value) = len; 264} 265/* }}} */ 266 267/* {{{ php_filter_unsafe_raw */ 268void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL) 269{ 270 /* Only if no flags are set (optimization) */ 271 if (flags != 0 && Z_STRLEN_P(value) > 0) { 272 unsigned char enc[256] = {0}; 273 274 php_filter_strip(value, flags); 275 276 if (flags & FILTER_FLAG_ENCODE_AMP) { 277 enc['&'] = 1; 278 } 279 if (flags & FILTER_FLAG_ENCODE_LOW) { 280 memset(enc, 1, 32); 281 } 282 if (flags & FILTER_FLAG_ENCODE_HIGH) { 283 memset(enc + 127, 1, sizeof(enc) - 127); 284 } 285 286 php_filter_encode_html(value, enc); 287 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) { 288 zval_dtor(value); 289 ZVAL_NULL(value); 290 } 291} 292/* }}} */ 293 294 295 296/* {{{ php_filter_email */ 297#define SAFE "$-_.+" 298#define EXTRA "!*'()," 299#define NATIONAL "{}|\\^~[]`" 300#define PUNCTUATION "<>#%\"" 301#define RESERVED ";/?:@&=" 302 303void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL) 304{ 305 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */ 306 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]"; 307 filter_map map; 308 309 filter_map_init(&map); 310 filter_map_update(&map, 1, allowed_list); 311 filter_map_apply(value, &map); 312} 313/* }}} */ 314 315/* {{{ php_filter_url */ 316void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL) 317{ 318 /* Strip all chars not part of section 5 of 319 * http://www.faqs.org/rfcs/rfc1738.html */ 320 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED; 321 filter_map map; 322 323 filter_map_init(&map); 324 filter_map_update(&map, 1, allowed_list); 325 filter_map_apply(value, &map); 326} 327/* }}} */ 328 329/* {{{ php_filter_number_int */ 330void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL) 331{ 332 /* strip everything [^0-9+-] */ 333 const unsigned char allowed_list[] = "+-" DIGIT; 334 filter_map map; 335 336 filter_map_init(&map); 337 filter_map_update(&map, 1, allowed_list); 338 filter_map_apply(value, &map); 339} 340/* }}} */ 341 342/* {{{ php_filter_number_float */ 343void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL) 344{ 345 /* strip everything [^0-9+-] */ 346 const unsigned char allowed_list[] = "+-" DIGIT; 347 filter_map map; 348 349 filter_map_init(&map); 350 filter_map_update(&map, 1, allowed_list); 351 352 /* depending on flags, strip '.', 'e', ",", "'" */ 353 if (flags & FILTER_FLAG_ALLOW_FRACTION) { 354 filter_map_update(&map, 2, (const unsigned char *) "."); 355 } 356 if (flags & FILTER_FLAG_ALLOW_THOUSAND) { 357 filter_map_update(&map, 3, (const unsigned char *) ","); 358 } 359 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) { 360 filter_map_update(&map, 4, (const unsigned char *) "eE"); 361 } 362 filter_map_apply(value, &map); 363} 364/* }}} */ 365 366/* {{{ php_filter_magic_quotes */ 367void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL) 368{ 369 char *buf; 370 int len; 371 372 /* just call php_addslashes quotes */ 373 buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC); 374 375 efree(Z_STRVAL_P(value)); 376 Z_STRVAL_P(value) = buf; 377 Z_STRLEN_P(value) = len; 378} 379/* }}} */ 380 381/* 382 * Local variables: 383 * tab-width: 4 384 * c-basic-offset: 4 385 * End: 386 * vim600: noet sw=4 ts=4 fdm=marker 387 * vim<600: noet sw=4 ts=4 388 */ 389