1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Authors: Derick Rethans <derick@php.net> | 16 +----------------------------------------------------------------------+ 17*/ 18 19/* $Id$ */ 20 21#include "php_filter.h" 22#include "filter_private.h" 23#include "ext/standard/php_smart_str.h" 24 25/* {{{ STRUCTS */ 26typedef unsigned long filter_map[256]; 27/* }}} */ 28 29/* {{{ HELPER FUNCTIONS */ 30static void php_filter_encode_html(zval *value, const unsigned char *chars) 31{ 32 smart_str str = {0}; 33 int len = Z_STRLEN_P(value); 34 unsigned char *s = (unsigned char *)Z_STRVAL_P(value); 35 unsigned char *e = s + len; 36 37 if (Z_STRLEN_P(value) == 0) { 38 return; 39 } 40 41 while (s < e) { 42 if (chars[*s]) { 43 smart_str_appendl(&str, "&#", 2); 44 smart_str_append_unsigned(&str, (unsigned long)*s); 45 smart_str_appendc(&str, ';'); 46 } else { 47 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */ 48 smart_str_appendc(&str, *s); 49 } 50 s++; 51 } 52 53 smart_str_0(&str); 54 str_efree(Z_STRVAL_P(value)); 55 Z_STRVAL_P(value) = str.c; 56 Z_STRLEN_P(value) = str.len; 57} 58 59static const unsigned char hexchars[] = "0123456789ABCDEF"; 60 61#define LOWALPHA "abcdefghijklmnopqrstuvwxyz" 62#define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 63#define DIGIT "0123456789" 64 65#define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._" 66 67static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul) 68{ 69 unsigned char *str, *p; 70 unsigned char tmp[256]; 71 unsigned char *s = (unsigned char *)chars; 72 unsigned char *e = s + char_len; 73 74 memset(tmp, 1, sizeof(tmp)-1); 75 76 while (s < e) { 77 tmp[*s++] = 0; 78 } 79/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value 80 if (encode_nul) { 81 tmp[0] = 1; 82 } 83 if (high) { 84 memset(tmp + 127, 1, sizeof(tmp) - 127); 85 } 86 if (low) { 87 memset(tmp, 1, 32); 88 } 89*/ 90 p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1); 91 s = (unsigned char *)Z_STRVAL_P(value); 92 e = s + Z_STRLEN_P(value); 93 94 while (s < e) { 95 if (tmp[*s]) { 96 *p++ = '%'; 97 *p++ = hexchars[(unsigned char) *s >> 4]; 98 *p++ = hexchars[(unsigned char) *s & 15]; 99 } else { 100 *p++ = *s; 101 } 102 s++; 103 } 104 *p = '\0'; 105 str_efree(Z_STRVAL_P(value)); 106 Z_STRVAL_P(value) = (char *)str; 107 Z_STRLEN_P(value) = p - str; 108} 109 110static void php_filter_strip(zval *value, long flags) 111{ 112 unsigned char *buf, *str; 113 int i, c; 114 115 /* Optimization for if no strip flags are set */ 116 if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) { 117 return; 118 } 119 120 str = (unsigned char *)Z_STRVAL_P(value); 121 buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); 122 c = 0; 123 for (i = 0; i < Z_STRLEN_P(value); i++) { 124 if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) { 125 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) { 126 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) { 127 } else { 128 buf[c] = str[i]; 129 ++c; 130 } 131 } 132 /* update zval string data */ 133 buf[c] = '\0'; 134 str_efree(Z_STRVAL_P(value)); 135 Z_STRVAL_P(value) = (char *)buf; 136 Z_STRLEN_P(value) = c; 137} 138/* }}} */ 139 140/* {{{ FILTER MAP HELPERS */ 141static void filter_map_init(filter_map *map) 142{ 143 memset(map, 0, sizeof(filter_map)); 144} 145 146static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list) 147{ 148 int l, i; 149 150 l = strlen((const char*)allowed_list); 151 for (i = 0; i < l; ++i) { 152 (*map)[allowed_list[i]] = flag; 153 } 154} 155 156static void filter_map_apply(zval *value, filter_map *map) 157{ 158 unsigned char *buf, *str; 159 int i, c; 160 161 str = (unsigned char *)Z_STRVAL_P(value); 162 buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); 163 c = 0; 164 for (i = 0; i < Z_STRLEN_P(value); i++) { 165 if ((*map)[str[i]]) { 166 buf[c] = str[i]; 167 ++c; 168 } 169 } 170 /* update zval string data */ 171 buf[c] = '\0'; 172 str_efree(Z_STRVAL_P(value)); 173 Z_STRVAL_P(value) = (char *)buf; 174 Z_STRLEN_P(value) = c; 175} 176/* }}} */ 177 178/* {{{ php_filter_string */ 179void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) 180{ 181 size_t new_len; 182 unsigned char enc[256] = {0}; 183 184 /* strip high/strip low ( see flags )*/ 185 php_filter_strip(value, flags); 186 187 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { 188 enc['\''] = enc['"'] = 1; 189 } 190 if (flags & FILTER_FLAG_ENCODE_AMP) { 191 enc['&'] = 1; 192 } 193 if (flags & FILTER_FLAG_ENCODE_LOW) { 194 memset(enc, 1, 32); 195 } 196 if (flags & FILTER_FLAG_ENCODE_HIGH) { 197 memset(enc + 127, 1, sizeof(enc) - 127); 198 } 199 200 php_filter_encode_html(value, enc); 201 202 /* strip tags, implicitly also removes \0 chars */ 203 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1); 204 Z_STRLEN_P(value) = new_len; 205 206 if (new_len == 0) { 207 zval_dtor(value); 208 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) { 209 ZVAL_NULL(value); 210 } else { 211 ZVAL_EMPTY_STRING(value); 212 } 213 return; 214 } 215} 216/* }}} */ 217 218/* {{{ php_filter_encoded */ 219void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL) 220{ 221 /* apply strip_high and strip_low filters */ 222 php_filter_strip(value, flags); 223 /* urlencode */ 224 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1); 225} 226/* }}} */ 227 228/* {{{ php_filter_special_chars */ 229void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL) 230{ 231 unsigned char enc[256] = {0}; 232 233 php_filter_strip(value, flags); 234 235 /* encodes ' " < > & \0 to numerical entities */ 236 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1; 237 238 /* if strip low is not set, then we encode them as &#xx; */ 239 memset(enc, 1, 32); 240 241 if (flags & FILTER_FLAG_ENCODE_HIGH) { 242 memset(enc + 127, 1, sizeof(enc) - 127); 243 } 244 245 php_filter_encode_html(value, enc); 246} 247/* }}} */ 248 249/* {{{ php_filter_full_special_chars */ 250void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL) 251{ 252 char *buf; 253 size_t len; 254 int quotes; 255 256 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { 257 quotes = ENT_QUOTES; 258 } else { 259 quotes = ENT_NOQUOTES; 260 } 261 buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC); 262 str_efree(Z_STRVAL_P(value)); 263 Z_STRVAL_P(value) = buf; 264 Z_STRLEN_P(value) = len; 265} 266/* }}} */ 267 268/* {{{ php_filter_unsafe_raw */ 269void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL) 270{ 271 /* Only if no flags are set (optimization) */ 272 if (flags != 0 && Z_STRLEN_P(value) > 0) { 273 unsigned char enc[256] = {0}; 274 275 php_filter_strip(value, flags); 276 277 if (flags & FILTER_FLAG_ENCODE_AMP) { 278 enc['&'] = 1; 279 } 280 if (flags & FILTER_FLAG_ENCODE_LOW) { 281 memset(enc, 1, 32); 282 } 283 if (flags & FILTER_FLAG_ENCODE_HIGH) { 284 memset(enc + 127, 1, sizeof(enc) - 127); 285 } 286 287 php_filter_encode_html(value, enc); 288 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) { 289 zval_dtor(value); 290 ZVAL_NULL(value); 291 } 292} 293/* }}} */ 294 295 296 297/* {{{ php_filter_email */ 298#define SAFE "$-_.+" 299#define EXTRA "!*'()," 300#define NATIONAL "{}|\\^~[]`" 301#define PUNCTUATION "<>#%\"" 302#define RESERVED ";/?:@&=" 303 304void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL) 305{ 306 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */ 307 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]"; 308 filter_map map; 309 310 filter_map_init(&map); 311 filter_map_update(&map, 1, allowed_list); 312 filter_map_apply(value, &map); 313} 314/* }}} */ 315 316/* {{{ php_filter_url */ 317void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL) 318{ 319 /* Strip all chars not part of section 5 of 320 * http://www.faqs.org/rfcs/rfc1738.html */ 321 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED; 322 filter_map map; 323 324 filter_map_init(&map); 325 filter_map_update(&map, 1, allowed_list); 326 filter_map_apply(value, &map); 327} 328/* }}} */ 329 330/* {{{ php_filter_number_int */ 331void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL) 332{ 333 /* strip everything [^0-9+-] */ 334 const unsigned char allowed_list[] = "+-" DIGIT; 335 filter_map map; 336 337 filter_map_init(&map); 338 filter_map_update(&map, 1, allowed_list); 339 filter_map_apply(value, &map); 340} 341/* }}} */ 342 343/* {{{ php_filter_number_float */ 344void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL) 345{ 346 /* strip everything [^0-9+-] */ 347 const unsigned char allowed_list[] = "+-" DIGIT; 348 filter_map map; 349 350 filter_map_init(&map); 351 filter_map_update(&map, 1, allowed_list); 352 353 /* depending on flags, strip '.', 'e', ",", "'" */ 354 if (flags & FILTER_FLAG_ALLOW_FRACTION) { 355 filter_map_update(&map, 2, (const unsigned char *) "."); 356 } 357 if (flags & FILTER_FLAG_ALLOW_THOUSAND) { 358 filter_map_update(&map, 3, (const unsigned char *) ","); 359 } 360 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) { 361 filter_map_update(&map, 4, (const unsigned char *) "eE"); 362 } 363 filter_map_apply(value, &map); 364} 365/* }}} */ 366 367/* {{{ php_filter_magic_quotes */ 368void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL) 369{ 370 char *buf; 371 int len; 372 373 /* just call php_addslashes quotes */ 374 buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC); 375 376 str_efree(Z_STRVAL_P(value)); 377 Z_STRVAL_P(value) = buf; 378 Z_STRLEN_P(value) = len; 379} 380/* }}} */ 381 382/* 383 * Local variables: 384 * tab-width: 4 385 * c-basic-offset: 4 386 * End: 387 * vim600: noet sw=4 ts=4 fdm=marker 388 * vim<600: noet sw=4 ts=4 389 */ 390