root/ext/tokenizer/tokenizer.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tokenizer_token_get_all_register_constants
  2. ZEND_GET_MODULE
  3. PHP_MINFO_FUNCTION
  4. tokenize
  5. on_event
  6. tokenize_parse
  7. PHP_FUNCTION
  8. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Andrei Zmievski <andrei@php.net>                             |
  16    +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 #ifdef HAVE_CONFIG_H
  22 #include "config.h"
  23 #endif
  24 
  25 #include "php.h"
  26 #include "php_ini.h"
  27 #include "ext/standard/info.h"
  28 #include "php_tokenizer.h"
  29 
  30 #include "zend.h"
  31 #include "zend_exceptions.h"
  32 #include "zend_language_scanner.h"
  33 #include "zend_language_scanner_defs.h"
  34 #include <zend_language_parser.h>
  35 
  36 #define zendtext   LANG_SCNG(yy_text)
  37 #define zendleng   LANG_SCNG(yy_leng)
  38 #define zendcursor LANG_SCNG(yy_cursor)
  39 #define zendlimit  LANG_SCNG(yy_limit)
  40 
  41 #define TOKEN_PARSE                             1
  42 
  43 void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) {
  44         REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT);
  45 }
  46 
  47 /* {{{ arginfo */
  48 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
  49         ZEND_ARG_INFO(0, source)
  50         ZEND_ARG_INFO(0, flags)
  51 ZEND_END_ARG_INFO()
  52 
  53 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1)
  54         ZEND_ARG_INFO(0, token)
  55 ZEND_END_ARG_INFO()
  56 /* }}} */
  57 
  58 /* {{{ tokenizer_functions[]
  59  *
  60  * Every user visible function must have an entry in tokenizer_functions[].
  61  */
  62 const zend_function_entry tokenizer_functions[] = {
  63         PHP_FE(token_get_all,   arginfo_token_get_all)
  64         PHP_FE(token_name,              arginfo_token_name)
  65         PHP_FE_END
  66 };
  67 /* }}} */
  68 
  69 /* {{{ tokenizer_module_entry
  70  */
  71 zend_module_entry tokenizer_module_entry = {
  72         STANDARD_MODULE_HEADER,
  73         "tokenizer",
  74         tokenizer_functions,
  75         PHP_MINIT(tokenizer),
  76         NULL,
  77         NULL,
  78         NULL,
  79         PHP_MINFO(tokenizer),
  80         PHP_TOKENIZER_VERSION,
  81         STANDARD_MODULE_PROPERTIES
  82 };
  83 /* }}} */
  84 
  85 #ifdef COMPILE_DL_TOKENIZER
  86 ZEND_GET_MODULE(tokenizer)
  87 #endif
  88 
  89 /* {{{ PHP_MINIT_FUNCTION
  90  */
  91 PHP_MINIT_FUNCTION(tokenizer)
  92 {
  93         tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  94         tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  95         return SUCCESS;
  96 }
  97 /* }}} */
  98 
  99 /* {{{ PHP_MINFO_FUNCTION
 100  */
 101 PHP_MINFO_FUNCTION(tokenizer)
 102 {
 103         php_info_print_table_start();
 104         php_info_print_table_row(2, "Tokenizer Support", "enabled");
 105         php_info_print_table_end();
 106 }
 107 /* }}} */
 108 
 109 static zend_bool tokenize(zval *return_value, zend_string *source)
 110 {
 111         zval source_zval;
 112         zend_lex_state original_lex_state;
 113         zval token;
 114         zval keyword;
 115         int token_type;
 116         int token_line = 1;
 117         int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
 118 
 119         ZVAL_STR_COPY(&source_zval, source);
 120         zend_save_lexical_state(&original_lex_state);
 121 
 122         if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
 123                 zend_restore_lexical_state(&original_lex_state);
 124                 return 0;
 125         }
 126 
 127         LANG_SCNG(yy_state) = yycINITIAL;
 128         array_init(return_value);
 129 
 130         ZVAL_UNDEF(&token);
 131         while ((token_type = lex_scan(&token))) {
 132                 if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') {
 133                         CG(zend_lineno)++;
 134                 }
 135 
 136                 if (token_type >= 256) {
 137                         array_init(&keyword);
 138                         add_next_index_long(&keyword, token_type);
 139                         if (token_type == T_END_HEREDOC) {
 140                                 if (CG(increment_lineno)) {
 141                                         token_line = ++CG(zend_lineno);
 142                                         CG(increment_lineno) = 0;
 143                                 }
 144                         }
 145                         add_next_index_stringl(&keyword, (char *)zendtext, zendleng);
 146                         add_next_index_long(&keyword, token_line);
 147                         add_next_index_zval(return_value, &keyword);
 148                 } else {
 149                         add_next_index_stringl(return_value, (char *)zendtext, zendleng);
 150                 }
 151 
 152                 if (Z_TYPE(token) != IS_UNDEF) {
 153                         zval_dtor(&token);
 154                         ZVAL_UNDEF(&token);
 155                 }
 156 
 157                 /* after T_HALT_COMPILER collect the next three non-dropped tokens */
 158                 if (need_tokens != -1) {
 159                         if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG
 160                                 && token_type != T_COMMENT && token_type != T_DOC_COMMENT
 161                                 && --need_tokens == 0
 162                         ) {
 163                                 /* fetch the rest into a T_INLINE_HTML */
 164                                 if (zendcursor != zendlimit) {
 165                                         array_init(&keyword);
 166                                         add_next_index_long(&keyword, T_INLINE_HTML);
 167                                         add_next_index_stringl(&keyword, (char *)zendcursor, zendlimit - zendcursor);
 168                                         add_next_index_long(&keyword, token_line);
 169                                         add_next_index_zval(return_value, &keyword);
 170                                 }
 171                                 break;
 172                         }
 173                 } else if (token_type == T_HALT_COMPILER) {
 174                         need_tokens = 3;
 175                 }
 176 
 177                 token_line = CG(zend_lineno);
 178         }
 179 
 180         zval_dtor(&source_zval);
 181         zend_restore_lexical_state(&original_lex_state);
 182 
 183         return 1;
 184 }
 185 
 186 zval token_stream;
 187 
 188 void on_event(zend_php_scanner_event event, int token, int line)
 189 {
 190         zval keyword;
 191         HashTable *tokens_ht;
 192         zval *token_zv;
 193 
 194         switch (event) {
 195                 case ON_TOKEN:
 196                         if (token == END) break;
 197                         if (token >= 256) {
 198                                 array_init(&keyword);
 199                                 add_next_index_long(&keyword, token);
 200                                 add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
 201                                 add_next_index_long(&keyword, line);
 202                                 add_next_index_zval(&token_stream, &keyword);
 203                         } else {
 204                                 add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
 205                         }
 206                         break;
 207                 case ON_FEEDBACK:
 208                         tokens_ht = Z_ARRVAL(token_stream);
 209                         token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1);
 210                         if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) {
 211                                 ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token);
 212                         }
 213                         break;
 214                 case ON_STOP:
 215                         if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
 216                                 array_init(&keyword);
 217                                 add_next_index_long(&keyword, T_INLINE_HTML);
 218                                 add_next_index_stringl(&keyword,
 219                                         (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
 220                                 add_next_index_long(&keyword, CG(zend_lineno));
 221                                 add_next_index_zval(&token_stream, &keyword);
 222                         }
 223                         break;
 224         }
 225 }
 226 
 227 static zend_bool tokenize_parse(zval *return_value, zend_string *source)
 228 {
 229         zval source_zval;
 230         zend_lex_state original_lex_state;
 231         zend_bool original_in_compilation;
 232         zend_bool success;
 233 
 234         ZVAL_STR_COPY(&source_zval, source);
 235 
 236         original_in_compilation = CG(in_compilation);
 237         CG(in_compilation) = 1;
 238         zend_save_lexical_state(&original_lex_state);
 239 
 240         if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) {
 241                 CG(ast) = NULL;
 242                 CG(ast_arena) = zend_arena_create(1024 * 32);
 243                 LANG_SCNG(yy_state) = yycINITIAL;
 244                 LANG_SCNG(on_event) = on_event;
 245 
 246                 array_init(&token_stream);
 247                 if((success = (zendparse() == SUCCESS))) {
 248                         ZVAL_COPY_VALUE(return_value, &token_stream);
 249                 } else {
 250                         zval_ptr_dtor(&token_stream);
 251                 }
 252 
 253                 zend_ast_destroy(CG(ast));
 254                 zend_arena_destroy(CG(ast_arena));
 255         }
 256 
 257         /* restore compiler and scanner global states */
 258         zend_restore_lexical_state(&original_lex_state);
 259         CG(in_compilation) = original_in_compilation;
 260 
 261         zval_dtor(&source_zval);
 262 
 263         return success;
 264 }
 265 
 266 /* }}} */
 267 
 268 /* {{{ proto array token_get_all(string source [, int flags])
 269  */
 270 PHP_FUNCTION(token_get_all)
 271 {
 272         zend_string *source;
 273         zend_long flags = 0;
 274         zend_bool success;
 275 
 276         if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
 277                 return;
 278         }
 279 
 280         if (flags & TOKEN_PARSE) {
 281                 success = tokenize_parse(return_value, source);
 282         } else {
 283                 success = tokenize(return_value, source);
 284                 /* Normal token_get_all() should not throw. */
 285                 zend_clear_exception();
 286         }
 287 
 288         if (!success) RETURN_FALSE;
 289 }
 290 /* }}} */
 291 
 292 /* {{{ proto string token_name(int type)
 293  */
 294 PHP_FUNCTION(token_name)
 295 {
 296         zend_long type;
 297 
 298         if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &type) == FAILURE) {
 299                 return;
 300         }
 301 
 302         RETVAL_STRING(get_token_type_name(type));
 303 }
 304 /* }}} */
 305 
 306 /*
 307  * Local variables:
 308  * tab-width: 4
 309  * c-basic-offset: 4
 310  * End:
 311  * vim600: noet sw=4 ts=4 fdm=marker
 312  * vim<600: noet sw=4 ts=4
 313  */

/* [<][>][^][v][top][bottom][index][help] */