1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2016 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "zend_smart_str.h"
44
45static void tag_dtor(zval *zv)
46{
47	free(Z_PTR_P(zv));
48}
49
50static PHP_INI_MH(OnUpdateTags)
51{
52	url_adapt_state_ex_t *ctx;
53	char *key;
54	char *tmp;
55	char *lasts = NULL;
56
57	ctx = &BG(url_adapt_state_ex);
58
59	tmp = estrndup(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
60
61	if (ctx->tags)
62		zend_hash_destroy(ctx->tags);
63	else {
64		ctx->tags = malloc(sizeof(HashTable));
65		if (!ctx->tags) {
66			return FAILURE;
67		}
68	}
69
70	zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
71
72	for (key = php_strtok_r(tmp, ",", &lasts);
73			key;
74			key = php_strtok_r(NULL, ",", &lasts)) {
75		char *val;
76
77		val = strchr(key, '=');
78		if (val) {
79			char *q;
80			size_t keylen;
81
82			*val++ = '\0';
83			for (q = key; *q; q++)
84				*q = tolower(*q);
85			keylen = q - key;
86			/* key is stored withOUT NUL
87			   val is stored WITH    NUL */
88			zend_hash_str_add_mem(ctx->tags, key, keylen, val, strlen(val)+1);
89		}
90	}
91
92	efree(tmp);
93
94	return SUCCESS;
95}
96
97PHP_INI_BEGIN()
98	STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
99PHP_INI_END()
100
101/*!re2c
102any = [\000-\377];
103N = (any\[<]);
104alpha = [a-zA-Z];
105alphanamespace = [a-zA-Z:];
106alphadash = ([a-zA-Z] | "-");
107*/
108
109#define YYFILL(n) goto done
110#define YYCTYPE unsigned char
111#define YYCURSOR p
112#define YYLIMIT q
113#define YYMARKER r
114
115static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
116{
117	register const char *p, *q;
118	const char *bash = NULL;
119	const char *sep = "?";
120
121	q = (p = ZSTR_VAL(url->s)) + ZSTR_LEN(url->s);
122
123scan:
124/*!re2c
125  ":"		{ smart_str_append_smart_str(dest, url); return; }
126  "?"		{ sep = separator; goto scan; }
127  "#"		{ bash = p - 1; goto done; }
128  (any\[:?#])+		{ goto scan; }
129*/
130done:
131
132	/* Don't modify URLs of the format "#mark" */
133	if (bash && bash - ZSTR_VAL(url->s) == 0) {
134		smart_str_append_smart_str(dest, url);
135		return;
136	}
137
138	if (bash)
139		smart_str_appendl(dest, ZSTR_VAL(url->s), bash - ZSTR_VAL(url->s));
140	else
141		smart_str_append_smart_str(dest, url);
142
143	smart_str_appends(dest, sep);
144	smart_str_append_smart_str(dest, url_app);
145
146	if (bash)
147		smart_str_appendl(dest, bash, q - bash);
148}
149
150
151#undef YYFILL
152#undef YYCTYPE
153#undef YYCURSOR
154#undef YYLIMIT
155#undef YYMARKER
156
157static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type)
158{
159	char f = 0;
160
161	if (strncasecmp(ZSTR_VAL(ctx->arg.s), ctx->lookup_data, ZSTR_LEN(ctx->arg.s)) == 0)
162		f = 1;
163
164	if (quotes)
165		smart_str_appendc(&ctx->result, type);
166	if (f) {
167		append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
168	} else {
169		smart_str_append_smart_str(&ctx->result, &ctx->val);
170	}
171	if (quotes)
172		smart_str_appendc(&ctx->result, type);
173}
174
175enum {
176	STATE_PLAIN = 0,
177	STATE_TAG,
178	STATE_NEXT_ARG,
179	STATE_ARG,
180	STATE_BEFORE_VAL,
181	STATE_VAL
182};
183
184#define YYFILL(n) goto stop
185#define YYCTYPE unsigned char
186#define YYCURSOR xp
187#define YYLIMIT end
188#define YYMARKER q
189#define STATE ctx->state
190
191#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR
192#define STD_ARGS ctx, start, xp
193
194#if SCANNER_DEBUG
195#define scdebug(x) printf x
196#else
197#define scdebug(x)
198#endif
199
200static inline void passthru(STD_PARA)
201{
202	scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
203	smart_str_appendl(&ctx->result, start, YYCURSOR - start);
204}
205
206/*
207 * This function appends a hidden input field after a <form> or
208 * <fieldset>.  The latter is important for XHTML.
209 */
210
211static void handle_form(STD_PARA)
212{
213	int doit = 0;
214
215	if (ZSTR_LEN(ctx->form_app.s) > 0) {
216		switch (ZSTR_LEN(ctx->tag.s)) {
217			case sizeof("form") - 1:
218				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "form", sizeof("form") - 1)) {
219					doit = 1;
220				}
221				if (doit && ctx->val.s && ctx->lookup_data && *ctx->lookup_data) {
222					char *e, *p = (char *)zend_memnstr(ZSTR_VAL(ctx->val.s), "://", sizeof("://") - 1, ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s));
223					if (p) {
224						e = memchr(p, '/', (ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s)) - p);
225						if (!e) {
226							e = ZSTR_VAL(ctx->val.s) + ZSTR_LEN(ctx->val.s);
227						}
228						if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
229							doit = 0;
230						}
231					}
232				}
233				break;
234
235			case sizeof("fieldset") - 1:
236				if (!strncasecmp(ZSTR_VAL(ctx->tag.s), "fieldset", sizeof("fieldset") - 1)) {
237					doit = 1;
238				}
239				break;
240		}
241
242		if (doit)
243			smart_str_append_smart_str(&ctx->result, &ctx->form_app);
244	}
245}
246
247/*
248 *  HANDLE_TAG copies the HTML Tag and checks whether we
249 *  have that tag in our table. If we might modify it,
250 *  we continue to scan the tag, otherwise we simply copy the complete
251 *  HTML stuff to the result buffer.
252 */
253
254static inline void handle_tag(STD_PARA)
255{
256	int ok = 0;
257	unsigned int i;
258
259	if (ctx->tag.s) {
260		ZSTR_LEN(ctx->tag.s) = 0;
261	}
262	smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
263	for (i = 0; i < ZSTR_LEN(ctx->tag.s); i++)
264		ZSTR_VAL(ctx->tag.s)[i] = tolower((int)(unsigned char)ZSTR_VAL(ctx->tag.s)[i]);
265    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
266	if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ZSTR_VAL(ctx->tag.s), ZSTR_LEN(ctx->tag.s))) != NULL)
267		ok = 1;
268	STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
269}
270
271static inline void handle_arg(STD_PARA)
272{
273	if (ctx->arg.s) {
274		ZSTR_LEN(ctx->arg.s) = 0;
275	}
276	smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
277}
278
279static inline void handle_val(STD_PARA, char quotes, char type)
280{
281	smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
282	tag_arg(ctx, quotes, type);
283}
284
285static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen)
286{
287	char *end, *q;
288	char *xp;
289	char *start;
290	size_t rest;
291
292	smart_str_appendl(&ctx->buf, newdata, newlen);
293
294	YYCURSOR = ZSTR_VAL(ctx->buf.s);
295	YYLIMIT = ZSTR_VAL(ctx->buf.s) + ZSTR_LEN(ctx->buf.s);
296
297	switch (STATE) {
298		case STATE_PLAIN: goto state_plain;
299		case STATE_TAG: goto state_tag;
300		case STATE_NEXT_ARG: goto state_next_arg;
301		case STATE_ARG: goto state_arg;
302		case STATE_BEFORE_VAL: goto state_before_val;
303		case STATE_VAL: goto state_val;
304	}
305
306
307state_plain_begin:
308	STATE = STATE_PLAIN;
309
310state_plain:
311	start = YYCURSOR;
312/*!re2c
313  "<"				{ passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
314  N+ 				{ passthru(STD_ARGS); goto state_plain; }
315*/
316
317state_tag:
318	start = YYCURSOR;
319/*!re2c
320  alphanamespace+	{ handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
321  any		{ passthru(STD_ARGS); goto state_plain_begin; }
322*/
323
324state_next_arg_begin:
325	STATE = STATE_NEXT_ARG;
326
327state_next_arg:
328	start = YYCURSOR;
329/*!re2c
330  [/]? [>]		{ passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
331  [ \v\r\t\n]+	{ passthru(STD_ARGS); goto state_next_arg; }
332  alpha		{ --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
333  any		{ passthru(STD_ARGS); goto state_plain_begin; }
334*/
335
336state_arg:
337	start = YYCURSOR;
338/*!re2c
339  alpha alphadash*	{ passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
340  any		{ passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
341*/
342
343state_before_val:
344	start = YYCURSOR;
345/*!re2c
346  [ ]* "=" [ ]*		{ passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
347  any				{ --YYCURSOR; goto state_next_arg_begin; }
348*/
349
350
351state_val:
352	start = YYCURSOR;
353/*!re2c
354  ["] (any\[">])* ["]	{ handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
355  ['] (any\['>])* [']	{ handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
356  (any\[ \r\t\n>'"])+	{ handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
357  any					{ passthru(STD_ARGS); goto state_next_arg_begin; }
358*/
359
360stop:
361	if (YYLIMIT < start) {
362		/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
363		rest = 0;
364	} else {
365		rest = YYLIMIT - start;
366		scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
367	}
368
369	if (rest) memmove(ZSTR_VAL(ctx->buf.s), start, rest);
370	ZSTR_LEN(ctx->buf.s) = rest;
371}
372
373
374PHPAPI char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen, int urlencode)
375{
376	char *result;
377	smart_str surl = {0};
378	smart_str buf = {0};
379	smart_str url_app = {0};
380	zend_string *encoded;
381
382	smart_str_appendl(&surl, url, urllen);
383
384	if (urlencode) {
385		encoded = php_raw_url_encode(name, strlen(name));
386		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
387		zend_string_free(encoded);
388	} else {
389		smart_str_appends(&url_app, name);
390	}
391	smart_str_appendc(&url_app, '=');
392	if (urlencode) {
393		encoded = php_raw_url_encode(value, strlen(value));
394		smart_str_appendl(&url_app, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
395		zend_string_free(encoded);
396	} else {
397		smart_str_appends(&url_app, value);
398	}
399
400	append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
401
402	smart_str_0(&buf);
403	if (newlen) *newlen = ZSTR_LEN(buf.s);
404	result = estrndup(ZSTR_VAL(buf.s), ZSTR_LEN(buf.s));
405
406	smart_str_free(&url_app);
407	smart_str_free(&buf);
408
409	return result;
410}
411
412
413static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush)
414{
415	url_adapt_state_ex_t *ctx;
416	char *retval;
417
418	ctx = &BG(url_adapt_state_ex);
419
420	xx_mainloop(ctx, src, srclen);
421
422	if (!ctx->result.s) {
423		smart_str_appendl(&ctx->result, "", 0);
424		*newlen = 0;
425	} else {
426		*newlen = ZSTR_LEN(ctx->result.s);
427	}
428	smart_str_0(&ctx->result);
429	if (do_flush) {
430		smart_str_append(&ctx->result, ctx->buf.s);
431		*newlen += ZSTR_LEN(ctx->buf.s);
432		smart_str_free(&ctx->buf);
433		smart_str_free(&ctx->val);
434	}
435	retval = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
436	smart_str_free(&ctx->result);
437	return retval;
438}
439
440static int php_url_scanner_ex_activate(void)
441{
442	url_adapt_state_ex_t *ctx;
443
444	ctx = &BG(url_adapt_state_ex);
445
446	memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
447
448	return SUCCESS;
449}
450
451static int php_url_scanner_ex_deactivate(void)
452{
453	url_adapt_state_ex_t *ctx;
454
455	ctx = &BG(url_adapt_state_ex);
456
457	smart_str_free(&ctx->result);
458	smart_str_free(&ctx->buf);
459	smart_str_free(&ctx->tag);
460	smart_str_free(&ctx->arg);
461
462	return SUCCESS;
463}
464
465static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode)
466{
467	size_t len;
468
469	if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) {
470		*handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0));
471		if (sizeof(uint) < sizeof(size_t)) {
472			if (len > UINT_MAX)
473				len = UINT_MAX;
474		}
475		*handled_output_len = len;
476	} else if (ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) == 0) {
477		url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
478		if (ctx->buf.s && ZSTR_LEN(ctx->buf.s)) {
479			smart_str_append(&ctx->result, ctx->buf.s);
480			smart_str_appendl(&ctx->result, output, output_len);
481
482			*handled_output = estrndup(ZSTR_VAL(ctx->result.s), ZSTR_LEN(ctx->result.s));
483			*handled_output_len = ZSTR_LEN(ctx->buf.s) + output_len;
484
485			smart_str_free(&ctx->buf);
486			smart_str_free(&ctx->result);
487		} else {
488			*handled_output = estrndup(output, *handled_output_len = output_len);
489		}
490	} else {
491		*handled_output = NULL;
492	}
493}
494
495PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int urlencode)
496{
497	smart_str sname = {0};
498	smart_str svalue = {0};
499	zend_string *encoded;
500
501	if (!BG(url_adapt_state_ex).active) {
502		php_url_scanner_ex_activate();
503		php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS);
504		BG(url_adapt_state_ex).active = 1;
505	}
506
507	if (BG(url_adapt_state_ex).url_app.s && ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) != 0) {
508		smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
509	}
510
511	if (urlencode) {
512		encoded = php_raw_url_encode(name, name_len);
513		smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
514		zend_string_free(encoded);
515		encoded = php_raw_url_encode(value, value_len);
516		smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
517		zend_string_free(encoded);
518	} else {
519		smart_str_appendl(&sname, name, name_len);
520		smart_str_appendl(&svalue, value, value_len);
521	}
522
523	smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &sname);
524	smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
525	smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &svalue);
526
527	smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
528	smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &sname);
529	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
530	smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &svalue);
531	smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
532
533	smart_str_free(&sname);
534	smart_str_free(&svalue);
535
536	return SUCCESS;
537}
538
539PHPAPI int php_url_scanner_reset_vars(void)
540{
541	if (BG(url_adapt_state_ex).form_app.s) {
542		ZSTR_LEN(BG(url_adapt_state_ex).form_app.s) = 0;
543	}
544	if (BG(url_adapt_state_ex).url_app.s) {
545		ZSTR_LEN(BG(url_adapt_state_ex).url_app.s) = 0;
546	}
547
548	return SUCCESS;
549}
550
551PHP_MINIT_FUNCTION(url_scanner)
552{
553	BG(url_adapt_state_ex).tags = NULL;
554
555	BG(url_adapt_state_ex).form_app.s = BG(url_adapt_state_ex).url_app.s = NULL;
556
557	REGISTER_INI_ENTRIES();
558	return SUCCESS;
559}
560
561PHP_MSHUTDOWN_FUNCTION(url_scanner)
562{
563	UNREGISTER_INI_ENTRIES();
564
565	return SUCCESS;
566}
567
568PHP_RINIT_FUNCTION(url_scanner)
569{
570	BG(url_adapt_state_ex).active = 0;
571
572	return SUCCESS;
573}
574
575PHP_RSHUTDOWN_FUNCTION(url_scanner)
576{
577	if (BG(url_adapt_state_ex).active) {
578		php_url_scanner_ex_deactivate();
579		BG(url_adapt_state_ex).active = 0;
580	}
581
582	smart_str_free(&BG(url_adapt_state_ex).form_app);
583	smart_str_free(&BG(url_adapt_state_ex).url_app);
584
585	return SUCCESS;
586}
587