root/ext/filter/sanitizing_filters.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. php_filter_encode_html
  2. php_filter_encode_url
  3. php_filter_strip
  4. filter_map_init
  5. filter_map_update
  6. filter_map_apply
  7. php_filter_string
  8. php_filter_encoded
  9. php_filter_special_chars
  10. php_filter_full_special_chars
  11. php_filter_unsafe_raw
  12. php_filter_email
  13. php_filter_url
  14. php_filter_number_int
  15. php_filter_number_float
  16. php_filter_magic_quotes

   1 /*
   2   +----------------------------------------------------------------------+
   3   | PHP Version 7                                                        |
   4   +----------------------------------------------------------------------+
   5   | Copyright (c) 1997-2016 The PHP Group                                |
   6   +----------------------------------------------------------------------+
   7   | This source file is subject to version 3.01 of the PHP license,      |
   8   | that is bundled with this package in the file LICENSE, and is        |
   9   | available through the world-wide-web at the following url:           |
  10   | http://www.php.net/license/3_01.txt                                  |
  11   | If you did not receive a copy of the PHP license and are unable to   |
  12   | obtain it through the world-wide-web, please send a note to          |
  13   | license@php.net so we can mail you a copy immediately.               |
  14   +----------------------------------------------------------------------+
  15   | Authors: Derick Rethans <derick@php.net>                             |
  16   +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 #include "php_filter.h"
  22 #include "filter_private.h"
  23 #include "zend_smart_str.h"
  24 
  25 /* {{{ STRUCTS */
  26 typedef unsigned long filter_map[256];
  27 /* }}} */
  28 
  29 /* {{{ HELPER FUNCTIONS */
  30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
  31 {
  32         smart_str str = {0};
  33         size_t len = Z_STRLEN_P(value);
  34         unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
  35         unsigned char *e = s + len;
  36 
  37         if (Z_STRLEN_P(value) == 0) {
  38                 return;
  39         }
  40 
  41         while (s < e) {
  42                 if (chars[*s]) {
  43                         smart_str_appendl(&str, "&#", 2);
  44                         smart_str_append_unsigned(&str, (zend_ulong)*s);
  45                         smart_str_appendc(&str, ';');
  46                 } else {
  47                         /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
  48                         smart_str_appendc(&str, *s);
  49                 }
  50                 s++;
  51         }
  52 
  53         smart_str_0(&str);
  54         zval_ptr_dtor(value);
  55         ZVAL_NEW_STR(value, str.s);
  56 }
  57 
  58 static const unsigned char hexchars[] = "0123456789ABCDEF";
  59 
  60 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
  61 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  62 #define DIGIT       "0123456789"
  63 
  64 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
  65 
  66 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
  67 {
  68         unsigned char *p;
  69         unsigned char tmp[256];
  70         unsigned char *s = (unsigned char *)chars;
  71         unsigned char *e = s + char_len;
  72         zend_string *str;
  73 
  74         memset(tmp, 1, sizeof(tmp)-1);
  75 
  76         while (s < e) {
  77                 tmp[*s++] = '\0';
  78         }
  79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
  80         if (encode_nul) {
  81                 tmp[0] = 1;
  82         }
  83         if (high) {
  84                 memset(tmp + 127, 1, sizeof(tmp) - 127);
  85         }
  86         if (low) {
  87                 memset(tmp, 1, 32);
  88         }
  89 */
  90         str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
  91         p = (unsigned char *) ZSTR_VAL(str);
  92         s = (unsigned char *) Z_STRVAL_P(value);
  93         e = s + Z_STRLEN_P(value);
  94 
  95         while (s < e) {
  96                 if (tmp[*s]) {
  97                         *p++ = '%';
  98                         *p++ = hexchars[(unsigned char) *s >> 4];
  99                         *p++ = hexchars[(unsigned char) *s & 15];
 100                 } else {
 101                         *p++ = *s;
 102                 }
 103                 s++;
 104         }
 105         *p = '\0';
 106         ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
 107         zval_ptr_dtor(value);
 108         ZVAL_NEW_STR(value, str);
 109 }
 110 
 111 static void php_filter_strip(zval *value, zend_long flags)
 112 {
 113         unsigned char *str;
 114         int   i, c;
 115         zend_string *buf;
 116 
 117         /* Optimization for if no strip flags are set */
 118         if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
 119                 return;
 120         }
 121 
 122         str = (unsigned char *)Z_STRVAL_P(value);
 123         buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
 124         c = 0;
 125         for (i = 0; i < Z_STRLEN_P(value); i++) {
 126                 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
 127                 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
 128                 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
 129                 } else {
 130                         ZSTR_VAL(buf)[c] = str[i];
 131                         ++c;
 132                 }
 133         }
 134         /* update zval string data */
 135         ZSTR_VAL(buf)[c] = '\0';
 136         ZSTR_LEN(buf) = c;
 137         zval_ptr_dtor(value);
 138         ZVAL_NEW_STR(value, buf);
 139 }
 140 /* }}} */
 141 
 142 /* {{{ FILTER MAP HELPERS */
 143 static void filter_map_init(filter_map *map)
 144 {
 145         memset(map, 0, sizeof(filter_map));
 146 }
 147 
 148 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
 149 {
 150         size_t l, i;
 151 
 152         l = strlen((const char*)allowed_list);
 153         for (i = 0; i < l; ++i) {
 154                 (*map)[allowed_list[i]] = flag;
 155         }
 156 }
 157 
 158 static void filter_map_apply(zval *value, filter_map *map)
 159 {
 160         unsigned char *str;
 161         int   i, c;
 162         zend_string *buf;
 163 
 164         str = (unsigned char *)Z_STRVAL_P(value);
 165         buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
 166         c = 0;
 167         for (i = 0; i < Z_STRLEN_P(value); i++) {
 168                 if ((*map)[str[i]]) {
 169                         ZSTR_VAL(buf)[c] = str[i];
 170                         ++c;
 171                 }
 172         }
 173         /* update zval string data */
 174         ZSTR_VAL(buf)[c] = '\0';
 175         ZSTR_LEN(buf) = c;
 176         zval_ptr_dtor(value);
 177         ZVAL_NEW_STR(value, buf);
 178 }
 179 /* }}} */
 180 
 181 /* {{{ php_filter_string */
 182 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
 183 {
 184         size_t new_len;
 185         unsigned char enc[256] = {0};
 186 
 187         if (!Z_REFCOUNTED_P(value)) {
 188                 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
 189         }
 190 
 191         /* strip high/strip low ( see flags )*/
 192         php_filter_strip(value, flags);
 193 
 194         if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
 195                 enc['\''] = enc['"'] = 1;
 196         }
 197         if (flags & FILTER_FLAG_ENCODE_AMP) {
 198                 enc['&'] = 1;
 199         }
 200         if (flags & FILTER_FLAG_ENCODE_LOW) {
 201                 memset(enc, 1, 32);
 202         }
 203         if (flags & FILTER_FLAG_ENCODE_HIGH) {
 204                 memset(enc + 127, 1, sizeof(enc) - 127);
 205         }
 206 
 207         php_filter_encode_html(value, enc);
 208 
 209         /* strip tags, implicitly also removes \0 chars */
 210         new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
 211         Z_STRLEN_P(value) = new_len;
 212 
 213         if (new_len == 0) {
 214                 zval_dtor(value);
 215                 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
 216                         ZVAL_NULL(value);
 217                 } else {
 218                         ZVAL_EMPTY_STRING(value);
 219                 }
 220                 return;
 221         }
 222 }
 223 /* }}} */
 224 
 225 /* {{{ php_filter_encoded */
 226 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
 227 {
 228         /* apply strip_high and strip_low filters */
 229         php_filter_strip(value, flags);
 230         /* urlencode */
 231         php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
 232 }
 233 /* }}} */
 234 
 235 /* {{{ php_filter_special_chars */
 236 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
 237 {
 238         unsigned char enc[256] = {0};
 239 
 240         php_filter_strip(value, flags);
 241 
 242         /* encodes ' " < > & \0 to numerical entities */
 243         enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
 244 
 245         /* if strip low is not set, then we encode them as &#xx; */
 246         memset(enc, 1, 32);
 247 
 248         if (flags & FILTER_FLAG_ENCODE_HIGH) {
 249                 memset(enc + 127, 1, sizeof(enc) - 127);
 250         }
 251 
 252         php_filter_encode_html(value, enc);
 253 }
 254 /* }}} */
 255 
 256 /* {{{ php_filter_full_special_chars */
 257 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
 258 {
 259         zend_string *buf;
 260         int quotes;
 261 
 262         if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
 263                 quotes = ENT_QUOTES;
 264         } else {
 265                 quotes = ENT_NOQUOTES;
 266         }
 267         buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
 268         zval_ptr_dtor(value);
 269         ZVAL_STR(value, buf);
 270 }
 271 /* }}} */
 272 
 273 /* {{{ php_filter_unsafe_raw */
 274 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
 275 {
 276         /* Only if no flags are set (optimization) */
 277         if (flags != 0 && Z_STRLEN_P(value) > 0) {
 278                 unsigned char enc[256] = {0};
 279 
 280                 php_filter_strip(value, flags);
 281 
 282                 if (flags & FILTER_FLAG_ENCODE_AMP) {
 283                         enc['&'] = 1;
 284                 }
 285                 if (flags & FILTER_FLAG_ENCODE_LOW) {
 286                         memset(enc, 1, 32);
 287                 }
 288                 if (flags & FILTER_FLAG_ENCODE_HIGH) {
 289                         memset(enc + 127, 1, sizeof(enc) - 127);
 290                 }
 291 
 292                 php_filter_encode_html(value, enc);
 293         } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
 294                 zval_dtor(value);
 295                 ZVAL_NULL(value);
 296         }
 297 }
 298 /* }}} */
 299 
 300 /* {{{ php_filter_email */
 301 #define SAFE        "$-_.+"
 302 #define EXTRA       "!*'(),"
 303 #define NATIONAL    "{}|\\^~[]`"
 304 #define PUNCTUATION "<>#%\""
 305 #define RESERVED    ";/?:@&="
 306 
 307 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
 308 {
 309         /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
 310         const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
 311         filter_map     map;
 312 
 313         filter_map_init(&map);
 314         filter_map_update(&map, 1, allowed_list);
 315         filter_map_apply(value, &map);
 316 }
 317 /* }}} */
 318 
 319 /* {{{ php_filter_url */
 320 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
 321 {
 322         /* Strip all chars not part of section 5 of
 323          * http://www.faqs.org/rfcs/rfc1738.html */
 324         const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
 325         filter_map     map;
 326 
 327         filter_map_init(&map);
 328         filter_map_update(&map, 1, allowed_list);
 329         filter_map_apply(value, &map);
 330 }
 331 /* }}} */
 332 
 333 /* {{{ php_filter_number_int */
 334 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
 335 {
 336         /* strip everything [^0-9+-] */
 337         const unsigned char allowed_list[] = "+-" DIGIT;
 338         filter_map     map;
 339 
 340         filter_map_init(&map);
 341         filter_map_update(&map, 1, allowed_list);
 342         filter_map_apply(value, &map);
 343 }
 344 /* }}} */
 345 
 346 /* {{{ php_filter_number_float */
 347 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
 348 {
 349         /* strip everything [^0-9+-] */
 350         const unsigned char allowed_list[] = "+-" DIGIT;
 351         filter_map     map;
 352 
 353         filter_map_init(&map);
 354         filter_map_update(&map, 1, allowed_list);
 355 
 356         /* depending on flags, strip '.', 'e', ",", "'" */
 357         if (flags & FILTER_FLAG_ALLOW_FRACTION) {
 358                 filter_map_update(&map, 2, (const unsigned char *) ".");
 359         }
 360         if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
 361                 filter_map_update(&map, 3,  (const unsigned char *) ",");
 362         }
 363         if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
 364                 filter_map_update(&map, 4,  (const unsigned char *) "eE");
 365         }
 366         filter_map_apply(value, &map);
 367 }
 368 /* }}} */
 369 
 370 /* {{{ php_filter_magic_quotes */
 371 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
 372 {
 373         zend_string *buf;
 374 
 375         /* just call php_addslashes quotes */
 376         buf = php_addslashes(Z_STR_P(value), 0);
 377 
 378         zval_ptr_dtor(value);
 379         ZVAL_STR(value, buf);
 380 }
 381 /* }}} */
 382 
 383 /*
 384  * Local variables:
 385  * tab-width: 4
 386  * c-basic-offset: 4
 387  * End:
 388  * vim600: noet sw=4 ts=4 fdm=marker
 389  * vim<600: noet sw=4 ts=4
 390  */

/* [<][>][^][v][top][bottom][index][help] */