This source file includes following definitions.
- php_url_free
- php_replace_controlchars_ex
- php_replace_controlchars
- php_url_parse
- php_url_parse_ex
- PHP_FUNCTION
- php_htoi
- php_url_encode
- PHP_FUNCTION
- PHP_FUNCTION
- php_url_decode
- php_raw_url_encode
- PHP_FUNCTION
- PHP_FUNCTION
- php_raw_url_decode
- PHP_FUNCTION
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <sys/types.h>
24
25 #include "php.h"
26
27 #include "url.h"
28 #include "file.h"
29 #ifdef _OSD_POSIX
30 #ifndef APACHE
31 #error On this EBCDIC platform, PHP is only supported as an Apache module.
32 #else
33 #ifndef CHARSET_EBCDIC
34 #define CHARSET_EBCDIC
35 #endif
36 #include "ebcdic.h"
37 #endif
38 #endif
39
40
41
42 PHPAPI void php_url_free(php_url *theurl)
43 {
44 if (theurl->scheme)
45 efree(theurl->scheme);
46 if (theurl->user)
47 efree(theurl->user);
48 if (theurl->pass)
49 efree(theurl->pass);
50 if (theurl->host)
51 efree(theurl->host);
52 if (theurl->path)
53 efree(theurl->path);
54 if (theurl->query)
55 efree(theurl->query);
56 if (theurl->fragment)
57 efree(theurl->fragment);
58 efree(theurl);
59 }
60
61
62
63
64 PHPAPI char *php_replace_controlchars_ex(char *str, size_t len)
65 {
66 unsigned char *s = (unsigned char *)str;
67 unsigned char *e = (unsigned char *)str + len;
68
69 if (!str) {
70 return (NULL);
71 }
72
73 while (s < e) {
74
75 if (iscntrl(*s)) {
76 *s='_';
77 }
78 s++;
79 }
80
81 return (str);
82 }
83
84
85 PHPAPI char *php_replace_controlchars(char *str)
86 {
87 return php_replace_controlchars_ex(str, strlen(str));
88 }
89
90 PHPAPI php_url *php_url_parse(char const *str)
91 {
92 return php_url_parse_ex(str, strlen(str));
93 }
94
95
96
97 PHPAPI php_url *php_url_parse_ex(char const *str, size_t length)
98 {
99 char port_buf[6];
100 php_url *ret = ecalloc(1, sizeof(php_url));
101 char const *s, *e, *p, *pp, *ue;
102
103 s = str;
104 ue = s + length;
105
106
107 if ((e = memchr(s, ':', length)) && (e - s)) {
108
109 p = s;
110 while (p < e) {
111
112 if (!isalpha(*p) && !isdigit(*p) && *p != '+' && *p != '.' && *p != '-') {
113 if (e + 1 < ue && e < s + strcspn(s, "?#")) {
114 goto parse_port;
115 } else {
116 goto just_path;
117 }
118 }
119 p++;
120 }
121
122 if (*(e + 1) == '\0') {
123 ret->scheme = estrndup(s, (e - s));
124 php_replace_controlchars_ex(ret->scheme, (e - s));
125 goto end;
126 }
127
128
129
130
131
132 if (*(e+1) != '/') {
133
134
135
136 p = e + 1;
137 while (isdigit(*p)) {
138 p++;
139 }
140
141 if ((*p == '\0' || *p == '/') && (p - e) < 7) {
142 goto parse_port;
143 }
144
145 ret->scheme = estrndup(s, (e-s));
146 php_replace_controlchars_ex(ret->scheme, (e - s));
147
148 length -= ++e - s;
149 s = e;
150 goto just_path;
151 } else {
152 ret->scheme = estrndup(s, (e-s));
153 php_replace_controlchars_ex(ret->scheme, (e - s));
154
155 if (*(e+2) == '/') {
156 s = e + 3;
157 if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
158 if (*(e + 3) == '/') {
159
160
161
162 if (*(e + 5) == ':') {
163 s = e + 4;
164 }
165 goto nohost;
166 }
167 }
168 } else {
169 if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
170 s = e + 1;
171 goto nohost;
172 } else {
173 length -= ++e - s;
174 s = e;
175 goto just_path;
176 }
177 }
178 }
179 } else if (e) {
180 parse_port:
181 p = e + 1;
182 pp = p;
183
184 while (pp-p < 6 && isdigit(*pp)) {
185 pp++;
186 }
187
188 if (pp - p > 0 && pp - p < 6 && (*pp == '/' || *pp == '\0')) {
189 zend_long port;
190 memcpy(port_buf, p, (pp - p));
191 port_buf[pp - p] = '\0';
192 port = ZEND_STRTOL(port_buf, NULL, 10);
193 if (port > 0 && port <= 65535) {
194 ret->port = (unsigned short) port;
195 if (*s == '/' && *(s + 1) == '/') {
196 s += 2;
197 }
198 } else {
199 if (ret->scheme) efree(ret->scheme);
200 efree(ret);
201 return NULL;
202 }
203 } else if (p == pp && *pp == '\0') {
204 if (ret->scheme) efree(ret->scheme);
205 efree(ret);
206 return NULL;
207 } else if (*s == '/' && *(s + 1) == '/') {
208 s += 2;
209 } else {
210 goto just_path;
211 }
212 } else if (*s == '/' && *(s + 1) == '/') {
213 s += 2;
214 } else {
215 just_path:
216 ue = s + length;
217 goto nohost;
218 }
219
220 e = ue;
221
222 if (!(p = memchr(s, '/', (ue - s)))) {
223 char *query, *fragment;
224
225 query = memchr(s, '?', (ue - s));
226 fragment = memchr(s, '#', (ue - s));
227
228 if (query && fragment) {
229 if (query > fragment) {
230 e = fragment;
231 } else {
232 e = query;
233 }
234 } else if (query) {
235 e = query;
236 } else if (fragment) {
237 e = fragment;
238 }
239 } else {
240 e = p;
241 }
242
243
244 if ((p = zend_memrchr(s, '@', (e-s)))) {
245 if ((pp = memchr(s, ':', (p-s)))) {
246 ret->user = estrndup(s, (pp-s));
247 php_replace_controlchars_ex(ret->user, (pp - s));
248
249 pp++;
250 ret->pass = estrndup(pp, (p-pp));
251 php_replace_controlchars_ex(ret->pass, (p-pp));
252 } else {
253 ret->user = estrndup(s, (p-s));
254 php_replace_controlchars_ex(ret->user, (p-s));
255 }
256
257 s = p + 1;
258 }
259
260
261 if (*s == '[' && *(e-1) == ']') {
262
263
264
265 p = s;
266 } else {
267
268
269 for(p = e; p >= s && *p != ':'; p--);
270 }
271
272 if (p >= s && *p == ':') {
273 if (!ret->port) {
274 p++;
275 if (e-p > 5) {
276 if (ret->scheme) efree(ret->scheme);
277 if (ret->user) efree(ret->user);
278 if (ret->pass) efree(ret->pass);
279 efree(ret);
280 return NULL;
281 } else if (e - p > 0) {
282 zend_long port;
283 memcpy(port_buf, p, (e - p));
284 port_buf[e - p] = '\0';
285 port = ZEND_STRTOL(port_buf, NULL, 10);
286 if (port > 0 && port <= 65535) {
287 ret->port = (unsigned short)port;
288 } else {
289 if (ret->scheme) efree(ret->scheme);
290 if (ret->user) efree(ret->user);
291 if (ret->pass) efree(ret->pass);
292 efree(ret);
293 return NULL;
294 }
295 }
296 p--;
297 }
298 } else {
299 p = e;
300 }
301
302
303 if ((p-s) < 1) {
304 if (ret->scheme) efree(ret->scheme);
305 if (ret->user) efree(ret->user);
306 if (ret->pass) efree(ret->pass);
307 efree(ret);
308 return NULL;
309 }
310
311 ret->host = estrndup(s, (p-s));
312 php_replace_controlchars_ex(ret->host, (p - s));
313
314 if (e == ue) {
315 return ret;
316 }
317
318 s = e;
319
320 nohost:
321
322 if ((p = memchr(s, '?', (ue - s)))) {
323 pp = memchr(s, '#', (ue - s));
324
325 if (pp && pp < p) {
326 if (pp - s) {
327 ret->path = estrndup(s, (pp-s));
328 php_replace_controlchars_ex(ret->path, (pp - s));
329 }
330 p = pp;
331 goto label_parse;
332 }
333
334 if (p - s) {
335 ret->path = estrndup(s, (p-s));
336 php_replace_controlchars_ex(ret->path, (p - s));
337 }
338
339 if (pp) {
340 if (pp - ++p) {
341 ret->query = estrndup(p, (pp-p));
342 php_replace_controlchars_ex(ret->query, (pp - p));
343 }
344 p = pp;
345 goto label_parse;
346 } else if (++p - ue) {
347 ret->query = estrndup(p, (ue-p));
348 php_replace_controlchars_ex(ret->query, (ue - p));
349 }
350 } else if ((p = memchr(s, '#', (ue - s)))) {
351 if (p - s) {
352 ret->path = estrndup(s, (p-s));
353 php_replace_controlchars_ex(ret->path, (p - s));
354 }
355
356 label_parse:
357 p++;
358
359 if (ue - p) {
360 ret->fragment = estrndup(p, (ue-p));
361 php_replace_controlchars_ex(ret->fragment, (ue - p));
362 }
363 } else {
364 ret->path = estrndup(s, (ue-s));
365 php_replace_controlchars_ex(ret->path, (ue - s));
366 }
367 end:
368 return ret;
369 }
370
371
372
373
374 PHP_FUNCTION(parse_url)
375 {
376 char *str;
377 size_t str_len;
378 php_url *resource;
379 zend_long key = -1;
380
381 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &str, &str_len, &key) == FAILURE) {
382 return;
383 }
384
385 resource = php_url_parse_ex(str, str_len);
386 if (resource == NULL) {
387
388 RETURN_FALSE;
389 }
390
391 if (key > -1) {
392 switch (key) {
393 case PHP_URL_SCHEME:
394 if (resource->scheme != NULL) RETVAL_STRING(resource->scheme);
395 break;
396 case PHP_URL_HOST:
397 if (resource->host != NULL) RETVAL_STRING(resource->host);
398 break;
399 case PHP_URL_PORT:
400 if (resource->port != 0) RETVAL_LONG(resource->port);
401 break;
402 case PHP_URL_USER:
403 if (resource->user != NULL) RETVAL_STRING(resource->user);
404 break;
405 case PHP_URL_PASS:
406 if (resource->pass != NULL) RETVAL_STRING(resource->pass);
407 break;
408 case PHP_URL_PATH:
409 if (resource->path != NULL) RETVAL_STRING(resource->path);
410 break;
411 case PHP_URL_QUERY:
412 if (resource->query != NULL) RETVAL_STRING(resource->query);
413 break;
414 case PHP_URL_FRAGMENT:
415 if (resource->fragment != NULL) RETVAL_STRING(resource->fragment);
416 break;
417 default:
418 php_error_docref(NULL, E_WARNING, "Invalid URL component identifier " ZEND_LONG_FMT, key);
419 RETVAL_FALSE;
420 }
421 goto done;
422 }
423
424
425 array_init(return_value);
426
427
428 if (resource->scheme != NULL)
429 add_assoc_string(return_value, "scheme", resource->scheme);
430 if (resource->host != NULL)
431 add_assoc_string(return_value, "host", resource->host);
432 if (resource->port != 0)
433 add_assoc_long(return_value, "port", resource->port);
434 if (resource->user != NULL)
435 add_assoc_string(return_value, "user", resource->user);
436 if (resource->pass != NULL)
437 add_assoc_string(return_value, "pass", resource->pass);
438 if (resource->path != NULL)
439 add_assoc_string(return_value, "path", resource->path);
440 if (resource->query != NULL)
441 add_assoc_string(return_value, "query", resource->query);
442 if (resource->fragment != NULL)
443 add_assoc_string(return_value, "fragment", resource->fragment);
444 done:
445 php_url_free(resource);
446 }
447
448
449
450
451 static int php_htoi(char *s)
452 {
453 int value;
454 int c;
455
456 c = ((unsigned char *)s)[0];
457 if (isupper(c))
458 c = tolower(c);
459 value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
460
461 c = ((unsigned char *)s)[1];
462 if (isupper(c))
463 c = tolower(c);
464 value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
465
466 return (value);
467 }
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 static unsigned char hexchars[] = "0123456789ABCDEF";
484
485
486
487 PHPAPI zend_string *php_url_encode(char const *s, size_t len)
488 {
489 register unsigned char c;
490 unsigned char *to;
491 unsigned char const *from, *end;
492 zend_string *start;
493
494 from = (unsigned char *)s;
495 end = (unsigned char *)s + len;
496 start = zend_string_alloc(3 * len, 0);
497 to = (unsigned char*)ZSTR_VAL(start);
498
499 while (from < end) {
500 c = *from++;
501
502 if (c == ' ') {
503 *to++ = '+';
504 #ifndef CHARSET_EBCDIC
505 } else if ((c < '0' && c != '-' && c != '.') ||
506 (c < 'A' && c > '9') ||
507 (c > 'Z' && c < 'a' && c != '_') ||
508 (c > 'z')) {
509 to[0] = '%';
510 to[1] = hexchars[c >> 4];
511 to[2] = hexchars[c & 15];
512 to += 3;
513 #else
514 } else if (!isalnum(c) && strchr("_-.", c) == NULL) {
515
516 to[0] = '%';
517 to[1] = hexchars[os_toascii[c] >> 4];
518 to[2] = hexchars[os_toascii[c] & 15];
519 to += 3;
520 #endif
521 } else {
522 *to++ = c;
523 }
524 }
525 *to = '\0';
526
527 start = zend_string_truncate(start, to - (unsigned char*)ZSTR_VAL(start), 0);
528
529 return start;
530 }
531
532
533
534
535 PHP_FUNCTION(urlencode)
536 {
537 zend_string *in_str;
538
539 #ifndef FAST_ZPP
540 if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &in_str) == FAILURE) {
541 return;
542 }
543 #else
544 ZEND_PARSE_PARAMETERS_START(1, 1)
545 Z_PARAM_STR(in_str)
546 ZEND_PARSE_PARAMETERS_END();
547 #endif
548
549 RETURN_STR(php_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
550 }
551
552
553
554
555 PHP_FUNCTION(urldecode)
556 {
557 zend_string *in_str, *out_str;
558
559 #ifndef FAST_ZPP
560 if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &in_str) == FAILURE) {
561 return;
562 }
563 #else
564 ZEND_PARSE_PARAMETERS_START(1, 1)
565 Z_PARAM_STR(in_str)
566 ZEND_PARSE_PARAMETERS_END();
567 #endif
568
569 out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
570 ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
571
572 RETURN_NEW_STR(out_str);
573 }
574
575
576
577
578 PHPAPI size_t php_url_decode(char *str, size_t len)
579 {
580 char *dest = str;
581 char *data = str;
582
583 while (len--) {
584 if (*data == '+') {
585 *dest = ' ';
586 }
587 else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
588 && isxdigit((int) *(data + 2))) {
589 #ifndef CHARSET_EBCDIC
590 *dest = (char) php_htoi(data + 1);
591 #else
592 *dest = os_toebcdic[(char) php_htoi(data + 1)];
593 #endif
594 data += 2;
595 len -= 2;
596 } else {
597 *dest = *data;
598 }
599 data++;
600 dest++;
601 }
602 *dest = '\0';
603 return dest - str;
604 }
605
606
607
608
609 PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len)
610 {
611 register size_t x, y;
612 zend_string *str;
613
614 str = zend_string_alloc(3 * len, 0);
615 for (x = 0, y = 0; len--; x++, y++) {
616 ZSTR_VAL(str)[y] = (unsigned char) s[x];
617 #ifndef CHARSET_EBCDIC
618 if ((ZSTR_VAL(str)[y] < '0' && ZSTR_VAL(str)[y] != '-' && ZSTR_VAL(str)[y] != '.') ||
619 (ZSTR_VAL(str)[y] < 'A' && ZSTR_VAL(str)[y] > '9') ||
620 (ZSTR_VAL(str)[y] > 'Z' && ZSTR_VAL(str)[y] < 'a' && ZSTR_VAL(str)[y] != '_') ||
621 (ZSTR_VAL(str)[y] > 'z' && ZSTR_VAL(str)[y] != '~')) {
622 ZSTR_VAL(str)[y++] = '%';
623 ZSTR_VAL(str)[y++] = hexchars[(unsigned char) s[x] >> 4];
624 ZSTR_VAL(str)[y] = hexchars[(unsigned char) s[x] & 15];
625 #else
626 if (!isalnum(ZSTR_VAL(str)[y]) && strchr("_-.~", ZSTR_VAL(str)[y]) != NULL) {
627 ZSTR_VAL(str)[y++] = '%';
628 ZSTR_VAL(str)[y++] = hexchars[os_toascii[(unsigned char) s[x]] >> 4];
629 ZSTR_VAL(str)[y] = hexchars[os_toascii[(unsigned char) s[x]] & 15];
630 #endif
631 }
632 }
633 ZSTR_VAL(str)[y] = '\0';
634 str = zend_string_truncate(str, y, 0);
635
636 return str;
637 }
638
639
640
641
642 PHP_FUNCTION(rawurlencode)
643 {
644 zend_string *in_str;
645
646 #ifndef FAST_ZPP
647 if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &in_str) == FAILURE) {
648 return;
649 }
650 #else
651 ZEND_PARSE_PARAMETERS_START(1, 1)
652 Z_PARAM_STR(in_str)
653 ZEND_PARSE_PARAMETERS_END();
654 #endif
655
656 RETURN_STR(php_raw_url_encode(ZSTR_VAL(in_str), ZSTR_LEN(in_str)));
657 }
658
659
660
661
662 PHP_FUNCTION(rawurldecode)
663 {
664 zend_string *in_str, *out_str;
665
666 #ifndef FAST_ZPP
667 if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &in_str) == FAILURE) {
668 return;
669 }
670 #else
671 ZEND_PARSE_PARAMETERS_START(1, 1)
672 Z_PARAM_STR(in_str)
673 ZEND_PARSE_PARAMETERS_END();
674 #endif
675
676 out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
677 ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
678
679 RETURN_NEW_STR(out_str);
680 }
681
682
683
684
685 PHPAPI size_t php_raw_url_decode(char *str, size_t len)
686 {
687 char *dest = str;
688 char *data = str;
689
690 while (len--) {
691 if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
692 && isxdigit((int) *(data + 2))) {
693 #ifndef CHARSET_EBCDIC
694 *dest = (char) php_htoi(data + 1);
695 #else
696 *dest = os_toebcdic[(char) php_htoi(data + 1)];
697 #endif
698 data += 2;
699 len -= 2;
700 } else {
701 *dest = *data;
702 }
703 data++;
704 dest++;
705 }
706 *dest = '\0';
707 return dest - str;
708 }
709
710
711
712
713 PHP_FUNCTION(get_headers)
714 {
715 char *url;
716 size_t url_len;
717 php_stream_context *context;
718 php_stream *stream;
719 zval *prev_val, *hdr = NULL, *h;
720 HashTable *hashT;
721 zend_long format = 0;
722
723 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &url, &url_len, &format) == FAILURE) {
724 return;
725 }
726 context = FG(default_context) ? FG(default_context) : (FG(default_context) = php_stream_context_alloc());
727
728 if (!(stream = php_stream_open_wrapper_ex(url, "r", REPORT_ERRORS | STREAM_USE_URL | STREAM_ONLY_GET_HEADERS, NULL, context))) {
729 RETURN_FALSE;
730 }
731
732 if (Z_TYPE(stream->wrapperdata) != IS_ARRAY) {
733 php_stream_close(stream);
734 RETURN_FALSE;
735 }
736
737 array_init(return_value);
738
739
740 if ((h = zend_hash_str_find(HASH_OF(&stream->wrapperdata), "headers", sizeof("headers")-1)) != NULL && Z_TYPE_P(h) == IS_ARRAY) {
741
742 if (!Z_ARRVAL_P(h)->nNumOfElements) {
743 php_stream_getc(stream);
744 }
745 h = zend_hash_str_find(HASH_OF(&stream->wrapperdata), "headers", sizeof("headers")-1);
746 hashT = Z_ARRVAL_P(h);
747 } else {
748 hashT = HASH_OF(&stream->wrapperdata);
749 }
750
751 ZEND_HASH_FOREACH_VAL(hashT, hdr) {
752 if (Z_TYPE_P(hdr) != IS_STRING) {
753 continue;
754 }
755 if (!format) {
756 no_name_header:
757 add_next_index_str(return_value, zend_string_copy(Z_STR_P(hdr)));
758 } else {
759 char c;
760 char *s, *p;
761
762 if ((p = strchr(Z_STRVAL_P(hdr), ':'))) {
763 c = *p;
764 *p = '\0';
765 s = p + 1;
766 while (isspace((int)*(unsigned char *)s)) {
767 s++;
768 }
769
770 if ((prev_val = zend_hash_str_find(Z_ARRVAL_P(return_value), Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)))) == NULL) {
771 add_assoc_stringl_ex(return_value, Z_STRVAL_P(hdr), (p - Z_STRVAL_P(hdr)), s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
772 } else {
773 convert_to_array(prev_val);
774 add_next_index_stringl(prev_val, s, (Z_STRLEN_P(hdr) - (s - Z_STRVAL_P(hdr))));
775 }
776
777 *p = c;
778 } else {
779 goto no_name_header;
780 }
781 }
782 } ZEND_HASH_FOREACH_END();
783
784 php_stream_close(stream);
785 }
786
787
788
789
790
791
792
793
794
795