root/ext/standard/scanf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. BuildCharSet
  2. CharInSet
  3. ReleaseCharSet
  4. ValidateFormat
  5. php_sscanf_internal
  6. scan_set_error_return

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 7                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
  16    +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 /*
  22         scanf.c --
  23 
  24         This file contains the base code which implements sscanf and by extension
  25         fscanf. Original code is from TCL8.3.0 and bears the following copyright:
  26 
  27         This software is copyrighted by the Regents of the University of
  28         California, Sun Microsystems, Inc., Scriptics Corporation,
  29         and other parties.  The following terms apply to all files associated
  30         with the software unless explicitly disclaimed in individual files.
  31 
  32         The authors hereby grant permission to use, copy, modify, distribute,
  33         and license this software and its documentation for any purpose, provided
  34         that existing copyright notices are retained in all copies and that this
  35         notice is included verbatim in any distributions. No written agreement,
  36         license, or royalty fee is required for any of the authorized uses.
  37         Modifications to this software may be copyrighted by their authors
  38         and need not follow the licensing terms described here, provided that
  39         the new terms are clearly indicated on the first page of each file where
  40         they apply.
  41 
  42         IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
  43         FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  44         ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
  45         DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
  46         POSSIBILITY OF SUCH DAMAGE.
  47 
  48         THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  49         INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
  50         FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
  51         IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
  52         NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
  53         MODIFICATIONS.
  54 
  55         GOVERNMENT USE: If you are acquiring this software on behalf of the
  56         U.S. government, the Government shall have only "Restricted Rights"
  57         in the software and related documentation as defined in the Federal
  58         Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
  59         are acquiring the software on behalf of the Department of Defense, the
  60         software shall be classified as "Commercial Computer Software" and the
  61         Government shall have only "Restricted Rights" as defined in Clause
  62         252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
  63         authors grant the U.S. Government and others acting in its behalf
  64         permission to use and distribute the software in accordance with the
  65         terms specified in this license.
  66 */
  67 
  68 #include <stdio.h>
  69 #include <limits.h>
  70 #include <ctype.h>
  71 #include "php.h"
  72 #include "php_variables.h"
  73 #ifdef HAVE_LOCALE_H
  74 #include <locale.h>
  75 #endif
  76 #include "zend_execute.h"
  77 #include "zend_operators.h"
  78 #include "zend_strtod.h"
  79 #include "php_globals.h"
  80 #include "basic_functions.h"
  81 #include "scanf.h"
  82 
  83 /*
  84  * Flag values used internally by [f|s]canf.
  85  */
  86 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
  87 #define SCAN_SUPPRESS   0x2       /* Suppress assignment. */
  88 #define SCAN_UNSIGNED   0x4       /* Read an unsigned value. */
  89 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
  90 
  91 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
  92 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
  93 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
  94 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
  95 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
  96 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
  97 
  98 #define UCHAR(x)                (zend_uchar)(x)
  99 
 100 /*
 101  * The following structure contains the information associated with
 102  * a character set.
 103  */
 104 typedef struct CharSet {
 105         int exclude;            /* 1 if this is an exclusion set. */
 106         int nchars;
 107         char *chars;
 108         int nranges;
 109         struct Range {
 110                 char start;
 111                 char end;
 112         } *ranges;
 113 } CharSet;
 114 
 115 /*
 116  * Declarations for functions used only in this file.
 117  */
 118 static char *BuildCharSet(CharSet *cset, char *format);
 119 static int      CharInSet(CharSet *cset, int ch);
 120 static void     ReleaseCharSet(CharSet *cset);
 121 static inline void scan_set_error_return(int numVars, zval *return_value);
 122 
 123 
 124 /* {{{ BuildCharSet
 125  *----------------------------------------------------------------------
 126  *
 127  * BuildCharSet --
 128  *
 129  *      This function examines a character set format specification
 130  *      and builds a CharSet containing the individual characters and
 131  *      character ranges specified.
 132  *
 133  * Results:
 134  *      Returns the next format position.
 135  *
 136  * Side effects:
 137  *      Initializes the charset.
 138  *
 139  *----------------------------------------------------------------------
 140  */
 141 static char * BuildCharSet(CharSet *cset, char *format)
 142 {
 143         char *ch, start;
 144         int  nranges;
 145         char *end;
 146 
 147         memset(cset, 0, sizeof(CharSet));
 148 
 149         ch = format;
 150         if (*ch == '^') {
 151                 cset->exclude = 1;
 152                 ch = ++format;
 153         }
 154         end = format + 1;       /* verify this - cc */
 155 
 156         /*
 157          * Find the close bracket so we can overallocate the set.
 158          */
 159         if (*ch == ']') {
 160                 ch = end++;
 161         }
 162         nranges = 0;
 163         while (*ch != ']') {
 164                 if (*ch == '-') {
 165                         nranges++;
 166                 }
 167                 ch = end++;
 168         }
 169 
 170         cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
 171         if (nranges > 0) {
 172                 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
 173         } else {
 174                 cset->ranges = NULL;
 175         }
 176 
 177         /*
 178          * Now build the character set.
 179          */
 180         cset->nchars = cset->nranges = 0;
 181         ch    = format++;
 182         start = *ch;
 183         if (*ch == ']' || *ch == '-') {
 184                 cset->chars[cset->nchars++] = *ch;
 185                 ch = format++;
 186         }
 187         while (*ch != ']') {
 188                 if (*format == '-') {
 189                         /*
 190                          * This may be the first character of a range, so don't add
 191                          * it yet.
 192                          */
 193                         start = *ch;
 194                 } else if (*ch == '-') {
 195                         /*
 196                          * Check to see if this is the last character in the set, in which
 197                          * case it is not a range and we should add the previous character
 198                          * as well as the dash.
 199                          */
 200                         if (*format == ']') {
 201                                 cset->chars[cset->nchars++] = start;
 202                                 cset->chars[cset->nchars++] = *ch;
 203                         } else {
 204                                 ch = format++;
 205 
 206                                 /*
 207                                  * Check to see if the range is in reverse order.
 208                                  */
 209                                 if (start < *ch) {
 210                                         cset->ranges[cset->nranges].start = start;
 211                                         cset->ranges[cset->nranges].end = *ch;
 212                                 } else {
 213                                         cset->ranges[cset->nranges].start = *ch;
 214                                         cset->ranges[cset->nranges].end = start;
 215                                 }
 216                                 cset->nranges++;
 217                         }
 218                 } else {
 219                         cset->chars[cset->nchars++] = *ch;
 220                 }
 221                 ch = format++;
 222         }
 223         return format;
 224 }
 225 /* }}} */
 226 
 227 /* {{{ CharInSet
 228  *----------------------------------------------------------------------
 229  *
 230  * CharInSet --
 231  *
 232  *      Check to see if a character matches the given set.
 233  *
 234  * Results:
 235  *      Returns non-zero if the character matches the given set.
 236  *
 237  * Side effects:
 238  *      None.
 239  *
 240  *----------------------------------------------------------------------
 241  */
 242 static int CharInSet(CharSet *cset, int c)
 243 {
 244         char ch = (char) c;
 245         int i, match = 0;
 246 
 247         for (i = 0; i < cset->nchars; i++) {
 248                 if (cset->chars[i] == ch) {
 249                         match = 1;
 250                         break;
 251                 }
 252         }
 253         if (!match) {
 254                 for (i = 0; i < cset->nranges; i++) {
 255                         if ((cset->ranges[i].start <= ch)
 256                                 && (ch <= cset->ranges[i].end)) {
 257                                 match = 1;
 258                                 break;
 259                         }
 260                 }
 261         }
 262         return (cset->exclude ? !match : match);
 263 }
 264 /* }}} */
 265 
 266 /* {{{ ReleaseCharSet
 267  *----------------------------------------------------------------------
 268  *
 269  * ReleaseCharSet --
 270  *
 271  *      Free the storage associated with a character set.
 272  *
 273  * Results:
 274  *      None.
 275  *
 276  * Side effects:
 277  *      None.
 278  *
 279  *----------------------------------------------------------------------
 280  */
 281 static void ReleaseCharSet(CharSet *cset)
 282 {
 283         efree((char *)cset->chars);
 284         if (cset->ranges) {
 285                 efree((char *)cset->ranges);
 286         }
 287 }
 288 /* }}} */
 289 
 290 /* {{{ ValidateFormat
 291  *----------------------------------------------------------------------
 292  *
 293  * ValidateFormat --
 294  *
 295  *      Parse the format string and verify that it is properly formed
 296  *      and that there are exactly enough variables on the command line.
 297  *
 298  * Results:
 299  *    FAILURE or SUCCESS.
 300  *
 301  * Side effects:
 302  *     May set php_error based on abnormal conditions.
 303  *
 304  * Parameters :
 305  *     format     The format string.
 306  *     numVars    The number of variables passed to the scan command.
 307  *     totalSubs  The number of variables that will be required.
 308  *
 309  *----------------------------------------------------------------------
 310 */
 311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
 312 {
 313 #define STATIC_LIST_SIZE 16
 314         int gotXpg, gotSequential, value, i, flags;
 315         char *end, *ch = NULL;
 316         int staticAssign[STATIC_LIST_SIZE];
 317         int *nassign = staticAssign;
 318         int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
 319 
 320         /*
 321          * Initialize an array that records the number of times a variable
 322          * is assigned to by the format string.  We use this to detect if
 323          * a variable is multiply assigned or left unassigned.
 324          */
 325         if (numVars > nspace) {
 326                 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
 327                 nspace = numVars;
 328         }
 329         for (i = 0; i < nspace; i++) {
 330                 nassign[i] = 0;
 331         }
 332 
 333         xpgSize = objIndex = gotXpg = gotSequential = 0;
 334 
 335         while (*format != '\0') {
 336                 ch = format++;
 337                 flags = 0;
 338 
 339                 if (*ch != '%') {
 340                         continue;
 341                 }
 342                 ch = format++;
 343                 if (*ch == '%') {
 344                         continue;
 345                 }
 346                 if (*ch == '*') {
 347                         flags |= SCAN_SUPPRESS;
 348                         ch = format++;
 349                         goto xpgCheckDone;
 350                 }
 351 
 352                 if ( isdigit( (int)*ch ) ) {
 353                         /*
 354                          * Check for an XPG3-style %n$ specification.  Note: there
 355                          * must not be a mixture of XPG3 specs and non-XPG3 specs
 356                          * in the same format string.
 357                          */
 358                         value = ZEND_STRTOUL(format-1, &end, 10);
 359                         if (*end != '$') {
 360                                 goto notXpg;
 361                         }
 362                         format = end+1;
 363                         ch     = format++;
 364                         gotXpg = 1;
 365                         if (gotSequential) {
 366                                 goto mixedXPG;
 367                         }
 368                         objIndex = value - 1;
 369                         if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
 370                                 goto badIndex;
 371                         } else if (numVars == 0) {
 372                                 /*
 373                                  * In the case where no vars are specified, the user can
 374                                  * specify %9999$ legally, so we have to consider special
 375                                  * rules for growing the assign array.  'value' is
 376                                  * guaranteed to be > 0.
 377                                  */
 378 
 379                                 /* set a lower artificial limit on this
 380                                  * in the interest of security and resource friendliness
 381                                  * 255 arguments should be more than enough. - cc
 382                                  */
 383                                 if (value > SCAN_MAX_ARGS) {
 384                                         goto badIndex;
 385                                 }
 386 
 387                                 xpgSize = (xpgSize > value) ? xpgSize : value;
 388                         }
 389                         goto xpgCheckDone;
 390                 }
 391 
 392 notXpg:
 393                 gotSequential = 1;
 394                 if (gotXpg) {
 395 mixedXPG:
 396                         php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
 397                         goto error;
 398                 }
 399 
 400 xpgCheckDone:
 401                 /*
 402                  * Parse any width specifier.
 403                  */
 404                 if (isdigit(UCHAR(*ch))) {
 405                         value = ZEND_STRTOUL(format-1, &format, 10);
 406                         flags |= SCAN_WIDTH;
 407                         ch = format++;
 408                 }
 409 
 410                 /*
 411                  * Ignore size specifier.
 412                  */
 413                 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
 414                         ch = format++;
 415                 }
 416 
 417                 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
 418                         goto badIndex;
 419                 }
 420 
 421                 /*
 422                  * Handle the various field types.
 423                  */
 424                 switch (*ch) {
 425                         case 'n':
 426                         case 'd':
 427                         case 'D':
 428                         case 'i':
 429                         case 'o':
 430                         case 'x':
 431                         case 'X':
 432                         case 'u':
 433                         case 'f':
 434                         case 'e':
 435                         case 'E':
 436                         case 'g':
 437                         case 's':
 438                                 break;
 439 
 440                         case 'c':
 441                                 /* we differ here with the TCL implementation in allowing for */
 442                                 /* a character width specification, to be more consistent with */
 443                                 /* ANSI. since Zend auto allocates space for vars, this is no */
 444                                 /* problem - cc                                               */
 445                                 /*
 446                                 if (flags & SCAN_WIDTH) {
 447                                         php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
 448                                         goto error;
 449                                 }
 450                                 */
 451                                 break;
 452 
 453                         case '[':
 454                                 if (*format == '\0') {
 455                                         goto badSet;
 456                                 }
 457                                 ch = format++;
 458                                 if (*ch == '^') {
 459                                         if (*format == '\0') {
 460                                                 goto badSet;
 461                                         }
 462                                         ch = format++;
 463                                 }
 464                                 if (*ch == ']') {
 465                                         if (*format == '\0') {
 466                                                 goto badSet;
 467                                         }
 468                                         ch = format++;
 469                                 }
 470                                 while (*ch != ']') {
 471                                         if (*format == '\0') {
 472                                                 goto badSet;
 473                                         }
 474                                         ch = format++;
 475                                 }
 476                                 break;
 477 badSet:
 478                                 php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
 479                                 goto error;
 480 
 481                         default: {
 482                                 php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
 483                                 goto error;
 484                         }
 485                 }
 486 
 487                 if (!(flags & SCAN_SUPPRESS)) {
 488                         if (objIndex >= nspace) {
 489                                 /*
 490                                  * Expand the nassign buffer.  If we are using XPG specifiers,
 491                                  * make sure that we grow to a large enough size.  xpgSize is
 492                                  * guaranteed to be at least one larger than objIndex.
 493                                  */
 494                                 value = nspace;
 495                                 if (xpgSize) {
 496                                         nspace = xpgSize;
 497                                 } else {
 498                                         nspace += STATIC_LIST_SIZE;
 499                                 }
 500                                 if (nassign == staticAssign) {
 501                                         nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
 502                                         for (i = 0; i < STATIC_LIST_SIZE; ++i) {
 503                                                 nassign[i] = staticAssign[i];
 504                                         }
 505                                 } else {
 506                                         nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
 507                                 }
 508                                 for (i = value; i < nspace; i++) {
 509                                         nassign[i] = 0;
 510                                 }
 511                         }
 512                         nassign[objIndex]++;
 513                         objIndex++;
 514                 }
 515         } /* while (*format != '\0') */
 516 
 517         /*
 518          * Verify that all of the variable were assigned exactly once.
 519          */
 520         if (numVars == 0) {
 521                 if (xpgSize) {
 522                         numVars = xpgSize;
 523                 } else {
 524                         numVars = objIndex;
 525                 }
 526         }
 527         if (totalSubs) {
 528                 *totalSubs = numVars;
 529         }
 530         for (i = 0; i < numVars; i++) {
 531                 if (nassign[i] > 1) {
 532                         php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
 533                         goto error;
 534                 } else if (!xpgSize && (nassign[i] == 0)) {
 535                         /*
 536                          * If the space is empty, and xpgSize is 0 (means XPG wasn't
 537                          * used, and/or numVars != 0), then too many vars were given
 538                          */
 539                         php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
 540                         goto error;
 541                 }
 542         }
 543 
 544         if (nassign != staticAssign) {
 545                 efree((char *)nassign);
 546         }
 547         return SCAN_SUCCESS;
 548 
 549 badIndex:
 550         if (gotXpg) {
 551                 php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
 552         } else {
 553                 php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
 554         }
 555 
 556 error:
 557         if (nassign != staticAssign) {
 558                 efree((char *)nassign);
 559         }
 560         return SCAN_ERROR_INVALID_FORMAT;
 561 #undef STATIC_LIST_SIZE
 562 }
 563 /* }}} */
 564 
 565 /* {{{ php_sscanf_internal
 566  * This is the internal function which does processing on behalf of
 567  * both sscanf() and fscanf()
 568  *
 569  * parameters :
 570  *              string          literal string to be processed
 571  *              format          format string
 572  *              argCount        total number of elements in the args array
 573  *              args            arguments passed in from user function (f|s)scanf
 574  *              varStart        offset (in args) of 1st variable passed in to (f|s)scanf
 575  *              return_value set with the results of the scan
 576  */
 577 
 578 PHPAPI int php_sscanf_internal( char *string, char *format,
 579                                 int argCount, zval *args,
 580                                 int varStart, zval *return_value)
 581 {
 582         int  numVars, nconversions, totalVars = -1;
 583         int  i, result;
 584         zend_long value;
 585         int  objIndex;
 586         char *end, *baseString;
 587         zval *current;
 588         char op   = 0;
 589         int  base = 0;
 590         int  underflow = 0;
 591         size_t width;
 592         zend_long (*fn)() = NULL;
 593         char *ch, sch;
 594         int  flags;
 595         char buf[64];   /* Temporary buffer to hold scanned number
 596                                          * strings before they are passed to strtoul() */
 597 
 598         /* do some sanity checking */
 599         if ((varStart > argCount) || (varStart < 0)){
 600                 varStart = SCAN_MAX_ARGS + 1;
 601         }
 602         numVars = argCount - varStart;
 603         if (numVars < 0) {
 604                 numVars = 0;
 605         }
 606 
 607 #if 0
 608         zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
 609                                         string, format, numVars, varStart);
 610 #endif
 611         /*
 612          * Check for errors in the format string.
 613          */
 614         if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
 615                 scan_set_error_return( numVars, return_value );
 616                 return SCAN_ERROR_INVALID_FORMAT;
 617         }
 618 
 619         objIndex = numVars ? varStart : 0;
 620 
 621         /*
 622          * If any variables are passed, make sure they are all passed by reference
 623          */
 624         if (numVars) {
 625                 for (i = varStart;i < argCount;i++){
 626                         if ( ! Z_ISREF(args[ i ] ) ) {
 627                                 php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
 628                                 scan_set_error_return(numVars, return_value);
 629                                 return SCAN_ERROR_VAR_PASSED_BYVAL;
 630                         }
 631                 }
 632         }
 633 
 634         /*
 635          * Allocate space for the result objects. Only happens when no variables
 636          * are specified
 637          */
 638         if (!numVars) {
 639                 zval tmp;
 640 
 641                 /* allocate an array for return */
 642                 array_init(return_value);
 643 
 644                 for (i = 0; i < totalVars; i++) {
 645                         ZVAL_NULL(&tmp);
 646                         if (add_next_index_zval(return_value, &tmp) == FAILURE) {
 647                                 scan_set_error_return(0, return_value);
 648                                 return FAILURE;
 649                         }
 650                 }
 651                 varStart = 0; /* Array index starts from 0 */
 652         }
 653 
 654         baseString = string;
 655 
 656         /*
 657          * Iterate over the format string filling in the result objects until
 658          * we reach the end of input, the end of the format string, or there
 659          * is a mismatch.
 660          */
 661         nconversions = 0;
 662         /* note ! - we need to limit the loop for objIndex to keep it in bounds */
 663 
 664         while (*format != '\0') {
 665                 ch    = format++;
 666                 flags = 0;
 667 
 668                 /*
 669                  * If we see whitespace in the format, skip whitespace in the string.
 670                  */
 671                 if ( isspace( (int)*ch ) ) {
 672                         sch = *string;
 673                         while ( isspace( (int)sch ) ) {
 674                                 if (*string == '\0') {
 675                                         goto done;
 676                                 }
 677                                 string++;
 678                                 sch = *string;
 679                         }
 680                         continue;
 681                 }
 682 
 683                 if (*ch != '%') {
 684 literal:
 685                         if (*string == '\0') {
 686                                 underflow = 1;
 687                                 goto done;
 688                         }
 689                         sch = *string;
 690                         string++;
 691                         if (*ch != sch) {
 692                                 goto done;
 693                         }
 694                         continue;
 695                 }
 696 
 697                 ch = format++;
 698                 if (*ch == '%') {
 699                         goto literal;
 700                 }
 701 
 702                 /*
 703                  * Check for assignment suppression ('*') or an XPG3-style
 704                  * assignment ('%n$').
 705                  */
 706                 if (*ch == '*') {
 707                         flags |= SCAN_SUPPRESS;
 708                         ch = format++;
 709                 } else if ( isdigit(UCHAR(*ch))) {
 710                         value = ZEND_STRTOUL(format-1, &end, 10);
 711                         if (*end == '$') {
 712                                 format = end+1;
 713                                 ch = format++;
 714                                 objIndex = varStart + value - 1;
 715                         }
 716                 }
 717 
 718                 /*
 719                  * Parse any width specifier.
 720                  */
 721                 if ( isdigit(UCHAR(*ch))) {
 722                         width = ZEND_STRTOUL(format-1, &format, 10);
 723                         ch = format++;
 724                 } else {
 725                         width = 0;
 726                 }
 727 
 728                 /*
 729                  * Ignore size specifier.
 730                  */
 731                 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
 732                         ch = format++;
 733                 }
 734 
 735                 /*
 736                  * Handle the various field types.
 737                  */
 738                 switch (*ch) {
 739                         case 'n':
 740                                 if (!(flags & SCAN_SUPPRESS)) {
 741                                         if (numVars && objIndex >= argCount) {
 742                                                 break;
 743                                         } else if (numVars) {
 744                                                 current = Z_REFVAL(args[objIndex++]);
 745                                                 zval_ptr_dtor(current);
 746                                                 ZVAL_LONG(current, (zend_long)(string - baseString) );
 747                                         } else {
 748                                                 add_index_long(return_value, objIndex++, string - baseString);
 749                                         }
 750                                 }
 751                                 nconversions++;
 752                                 continue;
 753 
 754                         case 'd':
 755                         case 'D':
 756                                 op = 'i';
 757                                 base = 10;
 758                                 fn = (zend_long (*)())ZEND_STRTOL_PTR;
 759                                 break;
 760                         case 'i':
 761                                 op = 'i';
 762                                 base = 0;
 763                                 fn = (zend_long (*)())ZEND_STRTOL_PTR;
 764                                 break;
 765                         case 'o':
 766                                 op = 'i';
 767                                 base = 8;
 768                                 fn = (zend_long (*)())ZEND_STRTOL_PTR;
 769                                 break;
 770                         case 'x':
 771                         case 'X':
 772                                 op = 'i';
 773                                 base = 16;
 774                                 fn = (zend_long (*)())ZEND_STRTOL_PTR;
 775                                 break;
 776                         case 'u':
 777                                 op = 'i';
 778                                 base = 10;
 779                                 flags |= SCAN_UNSIGNED;
 780                                 fn = (zend_long (*)())ZEND_STRTOUL_PTR;
 781                                 break;
 782 
 783                         case 'f':
 784                         case 'e':
 785                         case 'E':
 786                         case 'g':
 787                                 op = 'f';
 788                                 break;
 789 
 790                         case 's':
 791                                 op = 's';
 792                                 break;
 793 
 794                         case 'c':
 795                                 op = 's';
 796                                 flags |= SCAN_NOSKIP;
 797                                 /*-cc-*/
 798                                 if (0 == width) {
 799                                         width = 1;
 800                                 }
 801                                 /*-cc-*/
 802                                 break;
 803                         case '[':
 804                                 op = '[';
 805                                 flags |= SCAN_NOSKIP;
 806                                 break;
 807                 }   /* switch */
 808 
 809                 /*
 810                  * At this point, we will need additional characters from the
 811                  * string to proceed.
 812                  */
 813                 if (*string == '\0') {
 814                         underflow = 1;
 815                         goto done;
 816                 }
 817 
 818                 /*
 819                  * Skip any leading whitespace at the beginning of a field unless
 820                  * the format suppresses this behavior.
 821                  */
 822                 if (!(flags & SCAN_NOSKIP)) {
 823                         while (*string != '\0') {
 824                                 sch = *string;
 825                                 if (! isspace((int)sch) ) {
 826                                         break;
 827                                 }
 828                                 string++;
 829                         }
 830                         if (*string == '\0') {
 831                                 underflow = 1;
 832                                 goto done;
 833                         }
 834                 }
 835 
 836                 /*
 837                  * Perform the requested scanning operation.
 838                  */
 839                 switch (op) {
 840                         case 'c':
 841                         case 's':
 842                                 /*
 843                                  * Scan a string up to width characters or whitespace.
 844                                  */
 845                                 if (width == 0) {
 846                                         width = (size_t) ~0;
 847                                 }
 848                                 end = string;
 849                                 while (*end != '\0') {
 850                                         sch = *end;
 851                                         if ( isspace( (int)sch ) ) {
 852                                                 break;
 853                                         }
 854                                         end++;
 855                                         if (--width == 0) {
 856                                            break;
 857                                         }
 858                                 }
 859                                 if (!(flags & SCAN_SUPPRESS)) {
 860                                         if (numVars && objIndex >= argCount) {
 861                                                 break;
 862                                         } else if (numVars) {
 863                                                 current = Z_REFVAL(args[objIndex++]);
 864                                                 zval_ptr_dtor(current);
 865                                                 ZVAL_STRINGL(current, string, end-string);
 866                                         } else {
 867                                                 add_index_stringl(return_value, objIndex++, string, end-string);
 868                                         }
 869                                 }
 870                                 string = end;
 871                                 break;
 872 
 873                         case '[': {
 874                                 CharSet cset;
 875 
 876                                 if (width == 0) {
 877                                         width = (size_t) ~0;
 878                                 }
 879                                 end = string;
 880 
 881                                 format = BuildCharSet(&cset, format);
 882                                 while (*end != '\0') {
 883                                         sch = *end;
 884                                         if (!CharInSet(&cset, (int)sch)) {
 885                                                 break;
 886                                         }
 887                                         end++;
 888                                         if (--width == 0) {
 889                                                 break;
 890                                         }
 891                                 }
 892                                 ReleaseCharSet(&cset);
 893 
 894                                 if (string == end) {
 895                                         /*
 896                                          * Nothing matched the range, stop processing
 897                                          */
 898                                         goto done;
 899                                 }
 900                                 if (!(flags & SCAN_SUPPRESS)) {
 901                                         if (numVars && objIndex >= argCount) {
 902                                                 break;
 903                                         } else if (numVars) {
 904                                                 current = Z_REFVAL(args[objIndex++]);
 905                                                 zval_ptr_dtor(current);
 906                                                 ZVAL_STRINGL(current, string, end-string);
 907                                         } else {
 908                                                 add_index_stringl(return_value, objIndex++, string, end-string);
 909                                         }
 910                                 }
 911                                 string = end;
 912                                 break;
 913                         }
 914 /*
 915                         case 'c':
 916                            / Scan a single character./
 917 
 918                                 sch = *string;
 919                                 string++;
 920                                 if (!(flags & SCAN_SUPPRESS)) {
 921                                         if (numVars) {
 922                                                 char __buf[2];
 923                                                 __buf[0] = sch;
 924                                                 __buf[1] = '\0';;
 925                                                 current = args[objIndex++];
 926                                                 zval_dtor(*current);
 927                                                 ZVAL_STRINGL( *current, __buf, 1);
 928                                         } else {
 929                                                 add_index_stringl(return_value, objIndex++, &sch, 1);
 930                                         }
 931                                 }
 932                                 break;
 933 */
 934                         case 'i':
 935                                 /*
 936                                  * Scan an unsigned or signed integer.
 937                                  */
 938                                 /*-cc-*/
 939                                 buf[0] = '\0';
 940                                 /*-cc-*/
 941                                 if ((width == 0) || (width > sizeof(buf) - 1)) {
 942                                         width = sizeof(buf) - 1;
 943                                 }
 944 
 945                                 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
 946                                 for (end = buf; width > 0; width--) {
 947                                         switch (*string) {
 948                                                 /*
 949                                                  * The 0 digit has special meaning at the beginning of
 950                                                  * a number.  If we are unsure of the base, it
 951                                                  * indicates that we are in base 8 or base 16 (if it is
 952                                                  * followed by an 'x').
 953                                                  */
 954                                                 case '0':
 955                                                         /*-cc-*/
 956                                                         if (base == 16) {
 957                                                                 flags |= SCAN_XOK;
 958                                                         }
 959                                                         /*-cc-*/
 960                                                         if (base == 0) {
 961                                                                 base = 8;
 962                                                                 flags |= SCAN_XOK;
 963                                                         }
 964                                                         if (flags & SCAN_NOZERO) {
 965                                                                 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
 966                                                         } else {
 967                                                                 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 968                                                         }
 969                                                         goto addToInt;
 970 
 971                                                 case '1': case '2': case '3': case '4':
 972                                                 case '5': case '6': case '7':
 973                                                         if (base == 0) {
 974                                                                 base = 10;
 975                                                         }
 976                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 977                                                         goto addToInt;
 978 
 979                                                 case '8': case '9':
 980                                                         if (base == 0) {
 981                                                                 base = 10;
 982                                                         }
 983                                                         if (base <= 8) {
 984                                                            break;
 985                                                         }
 986                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 987                                                         goto addToInt;
 988 
 989                                                 case 'A': case 'B': case 'C':
 990                                                 case 'D': case 'E': case 'F':
 991                                                 case 'a': case 'b': case 'c':
 992                                                 case 'd': case 'e': case 'f':
 993                                                         if (base <= 10) {
 994                                                                 break;
 995                                                         }
 996                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 997                                                         goto addToInt;
 998 
 999                                                 case '+': case '-':
1000                                                         if (flags & SCAN_SIGNOK) {
1001                                                                 flags &= ~SCAN_SIGNOK;
1002                                                                 goto addToInt;
1003                                                         }
1004                                                         break;
1005 
1006                                                 case 'x': case 'X':
1007                                                         if ((flags & SCAN_XOK) && (end == buf+1)) {
1008                                                                 base = 16;
1009                                                                 flags &= ~SCAN_XOK;
1010                                                                 goto addToInt;
1011                                                         }
1012                                                         break;
1013                                         }
1014 
1015                                         /*
1016                                          * We got an illegal character so we are done accumulating.
1017                                          */
1018                                         break;
1019 
1020 addToInt:
1021                                         /*
1022                                          * Add the character to the temporary buffer.
1023                                          */
1024                                         *end++ = *string++;
1025                                         if (*string == '\0') {
1026                                                 break;
1027                                         }
1028                                 }
1029 
1030                                 /*
1031                                  * Check to see if we need to back up because we only got a
1032                                  * sign or a trailing x after a 0.
1033                                  */
1034                                 if (flags & SCAN_NODIGITS) {
1035                                         if (*string == '\0') {
1036                                                 underflow = 1;
1037                                         }
1038                                         goto done;
1039                                 } else if (end[-1] == 'x' || end[-1] == 'X') {
1040                                         end--;
1041                                         string--;
1042                                 }
1043 
1044                                 /*
1045                                  * Scan the value from the temporary buffer.  If we are
1046                                  * returning a large unsigned value, we have to convert it back
1047                                  * to a string since PHP only supports signed values.
1048                                  */
1049                                 if (!(flags & SCAN_SUPPRESS)) {
1050                                         *end = '\0';
1051                                         value = (zend_long) (*fn)(buf, NULL, base);
1052                                         if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1053                                                 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1054                                                 if (numVars && objIndex >= argCount) {
1055                                                         break;
1056                                                 } else if (numVars) {
1057                                                   /* change passed value type to string */
1058                                                         current = Z_REFVAL(args[objIndex++]);
1059                                                         zval_ptr_dtor(current);
1060                                                         ZVAL_STRING(current, buf);
1061                                                 } else {
1062                                                         add_index_string(return_value, objIndex++, buf);
1063                                                 }
1064                                         } else {
1065                                                 if (numVars && objIndex >= argCount) {
1066                                                         break;
1067                                                 } else if (numVars) {
1068                                                         current = Z_REFVAL(args[objIndex++]);
1069                                                         zval_ptr_dtor(current);
1070                                                         ZVAL_LONG(current, value);
1071                                                 } else {
1072                                                         add_index_long(return_value, objIndex++, value);
1073                                                 }
1074                                         }
1075                                 }
1076                                 break;
1077 
1078                         case 'f':
1079                                 /*
1080                                  * Scan a floating point number
1081                                  */
1082                                 buf[0] = '\0';     /* call me pedantic */
1083                                 if ((width == 0) || (width > sizeof(buf) - 1)) {
1084                                         width = sizeof(buf) - 1;
1085                                 }
1086                                 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1087                                 for (end = buf; width > 0; width--) {
1088                                         switch (*string) {
1089                                                 case '0': case '1': case '2': case '3':
1090                                                 case '4': case '5': case '6': case '7':
1091                                                 case '8': case '9':
1092                                                         flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1093                                                         goto addToFloat;
1094                                                 case '+':
1095                                                 case '-':
1096                                                         if (flags & SCAN_SIGNOK) {
1097                                                                 flags &= ~SCAN_SIGNOK;
1098                                                                 goto addToFloat;
1099                                                         }
1100                                                         break;
1101                                                 case '.':
1102                                                         if (flags & SCAN_PTOK) {
1103                                                                 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1104                                                                 goto addToFloat;
1105                                                         }
1106                                                         break;
1107                                                 case 'e':
1108                                                 case 'E':
1109                                                         /*
1110                                                          * An exponent is not allowed until there has
1111                                                          * been at least one digit.
1112                                                          */
1113                                                         if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1114                                                                 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1115                                                                         | SCAN_SIGNOK | SCAN_NODIGITS;
1116                                                                 goto addToFloat;
1117                                                         }
1118                                                         break;
1119                                         }
1120 
1121                                         /*
1122                                          * We got an illegal character so we are done accumulating.
1123                                          */
1124                                         break;
1125 
1126 addToFloat:
1127                                         /*
1128                                          * Add the character to the temporary buffer.
1129                                          */
1130                                         *end++ = *string++;
1131                                         if (*string == '\0') {
1132                                                 break;
1133                                         }
1134                                 }
1135 
1136                                 /*
1137                                  * Check to see if we need to back up because we saw a
1138                                  * trailing 'e' or sign.
1139                                  */
1140                                 if (flags & SCAN_NODIGITS) {
1141                                         if (flags & SCAN_EXPOK) {
1142                                                 /*
1143                                                  * There were no digits at all so scanning has
1144                                                  * failed and we are done.
1145                                                  */
1146                                                 if (*string == '\0') {
1147                                                         underflow = 1;
1148                                                 }
1149                                                 goto done;
1150                                         }
1151 
1152                                         /*
1153                                          * We got a bad exponent ('e' and maybe a sign).
1154                                          */
1155                                         end--;
1156                                         string--;
1157                                         if (*end != 'e' && *end != 'E') {
1158                                                 end--;
1159                                                 string--;
1160                                         }
1161                                 }
1162 
1163                                 /*
1164                                  * Scan the value from the temporary buffer.
1165                                  */
1166                                 if (!(flags & SCAN_SUPPRESS)) {
1167                                         double dvalue;
1168                                         *end = '\0';
1169                                         dvalue = zend_strtod(buf, NULL);
1170                                         if (numVars && objIndex >= argCount) {
1171                                                 break;
1172                                         } else if (numVars) {
1173                                                 current = Z_REFVAL(args[objIndex++]);
1174                                                 zval_ptr_dtor(current);
1175                                                 ZVAL_DOUBLE(current, dvalue);
1176                                         } else {
1177                                                 add_index_double(return_value, objIndex++, dvalue );
1178                                         }
1179                                 }
1180                                 break;
1181                 } /* switch (op) */
1182                 nconversions++;
1183         } /*  while (*format != '\0') */
1184 
1185 done:
1186         result = SCAN_SUCCESS;
1187 
1188         if (underflow && (0==nconversions)) {
1189                 scan_set_error_return( numVars, return_value );
1190                 result = SCAN_ERROR_EOF;
1191         } else if (numVars) {
1192                 convert_to_long(return_value );
1193                 Z_LVAL_P(return_value) = nconversions;
1194         } else if (nconversions < totalVars) {
1195                 /* TODO: not all elements converted. we need to prune the list - cc */
1196         }
1197         return result;
1198 }
1199 /* }}} */
1200 
1201 /* the compiler choked when i tried to make this a macro    */
1202 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1203 {
1204         if (numVars) {
1205                 ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1206         } else {
1207                 /* convert_to_null calls destructor */
1208                 convert_to_null(return_value);
1209         }
1210 }
1211 /* }}} */
1212 
1213 /*
1214  * Local variables:
1215  * tab-width: 4
1216  * c-basic-offset: 4
1217  * End:
1218  * vim600: sw=4 ts=4 fdm=marker
1219  * vim<600: sw=4 ts=4
1220  */

/* [<][>][^][v][top][bottom][index][help] */