1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "zend_smart_str.h"
44
45static void tag_dtor(zval *zv)
46{
47    free(Z_PTR_P(zv));
48}
49
50static PHP_INI_MH(OnUpdateTags)
51{
52    url_adapt_state_ex_t *ctx;
53    char *key;
54    char *lasts;
55    char *tmp;
56
57    ctx = &BG(url_adapt_state_ex);
58
59    tmp = estrndup(new_value->val, new_value->len);
60
61    if (ctx->tags)
62        zend_hash_destroy(ctx->tags);
63    else {
64        ctx->tags = malloc(sizeof(HashTable));
65        if (!ctx->tags) {
66            return FAILURE;
67        }
68    }
69
70    zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
71
72    for (key = php_strtok_r(tmp, ",", &lasts);
73            key;
74            key = php_strtok_r(NULL, ",", &lasts)) {
75        char *val;
76
77        val = strchr(key, '=');
78        if (val) {
79            char *q;
80            int keylen;
81
82            *val++ = '\0';
83            for (q = key; *q; q++)
84                *q = tolower(*q);
85            keylen = q - key;
86            /* key is stored withOUT NUL
87               val is stored WITH    NUL */
88            zend_hash_str_add_mem(ctx->tags, key, keylen, val, strlen(val)+1);
89        }
90    }
91
92    efree(tmp);
93
94    return SUCCESS;
95}
96
97PHP_INI_BEGIN()
98    STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
99PHP_INI_END()
100
101/*!re2c
102any = [\000-\377];
103N = (any\[<]);
104alpha = [a-zA-Z];
105alphanamespace = [a-zA-Z:];
106alphadash = ([a-zA-Z] | "-");
107*/
108
109#define YYFILL(n) goto done
110#define YYCTYPE unsigned char
111#define YYCURSOR p
112#define YYLIMIT q
113#define YYMARKER r
114
115static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
116{
117    register const char *p, *q;
118    const char *bash = NULL;
119    const char *sep = "?";
120
121    q = (p = url->s->val) + url->s->len;
122
123scan:
124/*!re2c
125  ":"       { smart_str_append_smart_str(dest, url); return; }
126  "?"       { sep = separator; goto scan; }
127  "#"       { bash = p - 1; goto done; }
128  (any\[:?#])+      { goto scan; }
129*/
130done:
131
132    /* Don't modify URLs of the format "#mark" */
133    if (bash && bash - url->s->val == 0) {
134        smart_str_append_smart_str(dest, url);
135        return;
136    }
137
138    if (bash)
139        smart_str_appendl(dest, url->s->val, bash - url->s->val);
140    else
141        smart_str_append_smart_str(dest, url);
142
143    smart_str_appends(dest, sep);
144    smart_str_append_smart_str(dest, url_app);
145
146    if (bash)
147        smart_str_appendl(dest, bash, q - bash);
148}
149
150
151#undef YYFILL
152#undef YYCTYPE
153#undef YYCURSOR
154#undef YYLIMIT
155#undef YYMARKER
156
157static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
158{
159    char f = 0;
160
161    if (strncasecmp(ctx->arg.s->val, ctx->lookup_data, ctx->arg.s->len) == 0)
162        f = 1;
163
164    if (quotes)
165        smart_str_appendc(&ctx->result, type);
166    if (f) {
167        append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
168    } else {
169        smart_str_append_smart_str(&ctx->result, &ctx->val);
170    }
171    if (quotes)
172        smart_str_appendc(&ctx->result, type);
173}
174
175enum {
176    STATE_PLAIN = 0,
177    STATE_TAG,
178    STATE_NEXT_ARG,
179    STATE_ARG,
180    STATE_BEFORE_VAL,
181    STATE_VAL
182};
183
184#define YYFILL(n) goto stop
185#define YYCTYPE unsigned char
186#define YYCURSOR xp
187#define YYLIMIT end
188#define YYMARKER q
189#define STATE ctx->state
190
191#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
192#define STD_ARGS ctx, start, xp TSRMLS_CC
193
194#if SCANNER_DEBUG
195#define scdebug(x) printf x
196#else
197#define scdebug(x)
198#endif
199
200static inline void passthru(STD_PARA)
201{
202    scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
203    smart_str_appendl(&ctx->result, start, YYCURSOR - start);
204}
205
206/*
207 * This function appends a hidden input field after a <form> or
208 * <fieldset>.  The latter is important for XHTML.
209 */
210
211static void handle_form(STD_PARA)
212{
213    int doit = 0;
214
215    if (ctx->form_app.s->len > 0) {
216        switch (ctx->tag.s->len) {
217            case sizeof("form") - 1:
218                if (!strncasecmp(ctx->tag.s->val, "form", sizeof("form") - 1)) {
219                    doit = 1;
220                }
221                if (doit && ctx->val.s && ctx->lookup_data && *ctx->lookup_data) {
222                    char *e, *p = (char *)zend_memnstr(ctx->val.s->val, "://", sizeof("://") - 1, ctx->val.s->val + ctx->val.s->len);
223                    if (p) {
224                        e = memchr(p, '/', (ctx->val.s->val + ctx->val.s->len) - p);
225                        if (!e) {
226                            e = ctx->val.s->val + ctx->val.s->len;
227                        }
228                        if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
229                            doit = 0;
230                        }
231                    }
232                }
233                break;
234
235            case sizeof("fieldset") - 1:
236                if (!strncasecmp(ctx->tag.s->val, "fieldset", sizeof("fieldset") - 1)) {
237                    doit = 1;
238                }
239                break;
240        }
241
242        if (doit)
243            smart_str_append_smart_str(&ctx->result, &ctx->form_app);
244    }
245}
246
247/*
248 *  HANDLE_TAG copies the HTML Tag and checks whether we
249 *  have that tag in our table. If we might modify it,
250 *  we continue to scan the tag, otherwise we simply copy the complete
251 *  HTML stuff to the result buffer.
252 */
253
254static inline void handle_tag(STD_PARA)
255{
256    int ok = 0;
257    unsigned int i;
258
259    if (ctx->tag.s) {
260        ctx->tag.s->len = 0;
261    }
262    smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
263    for (i = 0; i < ctx->tag.s->len; i++)
264        ctx->tag.s->val[i] = tolower((int)(unsigned char)ctx->tag.s->val[i]);
265    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
266    if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ctx->tag.s->val, ctx->tag.s->len)) != NULL)
267        ok = 1;
268    STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
269}
270
271static inline void handle_arg(STD_PARA)
272{
273    if (ctx->arg.s) {
274        ctx->arg.s->len = 0;
275    }
276    smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
277}
278
279static inline void handle_val(STD_PARA, char quotes, char type)
280{
281    smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
282    tag_arg(ctx, quotes, type TSRMLS_CC);
283}
284
285static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
286{
287    char *end, *q;
288    char *xp;
289    char *start;
290    int rest;
291
292    smart_str_appendl(&ctx->buf, newdata, newlen);
293
294    YYCURSOR = ctx->buf.s->val;
295    YYLIMIT = ctx->buf.s->val + ctx->buf.s->len;
296
297    switch (STATE) {
298        case STATE_PLAIN: goto state_plain;
299        case STATE_TAG: goto state_tag;
300        case STATE_NEXT_ARG: goto state_next_arg;
301        case STATE_ARG: goto state_arg;
302        case STATE_BEFORE_VAL: goto state_before_val;
303        case STATE_VAL: goto state_val;
304    }
305
306
307state_plain_begin:
308    STATE = STATE_PLAIN;
309
310state_plain:
311    start = YYCURSOR;
312/*!re2c
313  "<"               { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
314  N+                { passthru(STD_ARGS); goto state_plain; }
315*/
316
317state_tag:
318    start = YYCURSOR;
319/*!re2c
320  alphanamespace+   { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
321  any       { passthru(STD_ARGS); goto state_plain_begin; }
322*/
323
324state_next_arg_begin:
325    STATE = STATE_NEXT_ARG;
326
327state_next_arg:
328    start = YYCURSOR;
329/*!re2c
330  [/]? [>]      { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
331  [ \v\r\t\n]+  { passthru(STD_ARGS); goto state_next_arg; }
332  alpha     { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
333  any       { passthru(STD_ARGS); goto state_plain_begin; }
334*/
335
336state_arg:
337    start = YYCURSOR;
338/*!re2c
339  alpha alphadash*  { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
340  any       { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
341*/
342
343state_before_val:
344    start = YYCURSOR;
345/*!re2c
346  [ ]* "=" [ ]*     { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
347  any               { --YYCURSOR; goto state_next_arg_begin; }
348*/
349
350
351state_val:
352    start = YYCURSOR;
353/*!re2c
354  ["] (any\[">])* ["]   { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
355  ['] (any\['>])* [']   { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
356  (any\[ \r\t\n>'"])+   { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
357  any                   { passthru(STD_ARGS); goto state_next_arg_begin; }
358*/
359
360stop:
361    rest = YYLIMIT - start;
362    scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
363    /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
364    if (rest < 0) rest = 0;
365
366    if (rest) memmove(ctx->buf.s->val, start, rest);
367    ctx->buf.s->len = rest;
368}
369
370char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
371{
372    char *result;
373    smart_str surl = {0};
374    smart_str buf = {0};
375    smart_str url_app = {0};
376
377    smart_str_setl(&surl, url, urllen);
378
379    smart_str_appends(&url_app, name);
380    smart_str_appendc(&url_app, '=');
381    smart_str_appends(&url_app, value);
382
383    append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
384
385    smart_str_0(&buf);
386    if (newlen) *newlen = buf.s->len;
387    result = estrndup(buf.s->val, buf.s->len);
388
389    smart_str_free(&url_app);
390    smart_str_free(&buf);
391
392    return result;
393}
394
395
396static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
397{
398    url_adapt_state_ex_t *ctx;
399    char *retval;
400
401    ctx = &BG(url_adapt_state_ex);
402
403    xx_mainloop(ctx, src, srclen TSRMLS_CC);
404
405    if (!ctx->result.s) {
406        smart_str_appendl(&ctx->result, "", 0);
407        *newlen = 0;
408    } else {
409        *newlen = ctx->result.s->len;
410    }
411    smart_str_0(&ctx->result);
412    if (do_flush) {
413        smart_str_append(&ctx->result, ctx->buf.s);
414        *newlen += ctx->buf.s->len;
415        smart_str_free(&ctx->buf);
416        smart_str_free(&ctx->val);
417    }
418    retval = estrndup(ctx->result.s->val, ctx->result.s->len);
419    smart_str_free(&ctx->result);
420    return retval;
421}
422
423static int php_url_scanner_ex_activate(TSRMLS_D)
424{
425    url_adapt_state_ex_t *ctx;
426
427    ctx = &BG(url_adapt_state_ex);
428
429    memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
430
431    return SUCCESS;
432}
433
434static int php_url_scanner_ex_deactivate(TSRMLS_D)
435{
436    url_adapt_state_ex_t *ctx;
437
438    ctx = &BG(url_adapt_state_ex);
439
440    smart_str_free(&ctx->result);
441    smart_str_free(&ctx->buf);
442    smart_str_free(&ctx->tag);
443    smart_str_free(&ctx->arg);
444
445    return SUCCESS;
446}
447
448static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
449{
450    size_t len;
451
452    if (BG(url_adapt_state_ex).url_app.s->len != 0) {
453        *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
454        if (sizeof(uint) < sizeof(size_t)) {
455            if (len > UINT_MAX)
456                len = UINT_MAX;
457        }
458        *handled_output_len = len;
459    } else if (BG(url_adapt_state_ex).url_app.s->len == 0) {
460        url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
461        if (ctx->buf.s && ctx->buf.s->len) {
462            smart_str_append(&ctx->result, ctx->buf.s);
463            smart_str_appendl(&ctx->result, output, output_len);
464
465            *handled_output = estrndup(ctx->result.s->val, ctx->result.s->len);
466            *handled_output_len = ctx->buf.s->len + output_len;
467
468            smart_str_free(&ctx->buf);
469            smart_str_free(&ctx->result);
470        } else {
471            *handled_output = estrndup(output, *handled_output_len = output_len);
472        }
473    } else {
474        *handled_output = NULL;
475    }
476}
477
478PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
479{
480    smart_str val = {0};
481    zend_string *encoded;
482
483    if (!BG(url_adapt_state_ex).active) {
484        php_url_scanner_ex_activate(TSRMLS_C);
485        php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
486        BG(url_adapt_state_ex).active = 1;
487    }
488
489
490    if (BG(url_adapt_state_ex).url_app.s && BG(url_adapt_state_ex).url_app.s->len != 0) {
491        smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
492    }
493
494    if (urlencode) {
495        encoded = php_url_encode(value, value_len);
496        smart_str_setl(&val, encoded->val, encoded->len);
497    } else {
498        smart_str_setl(&val, value, value_len);
499    }
500
501    smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
502    smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
503    smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &val);
504
505    smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
506    smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
507    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
508    smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &val);
509    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
510
511    if (urlencode) {
512        zend_string_free(encoded);
513    }
514    smart_str_free(&val);
515
516    return SUCCESS;
517}
518
519PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
520{
521    if (BG(url_adapt_state_ex).form_app.s) {
522        BG(url_adapt_state_ex).form_app.s->len = 0;
523    }
524    if (BG(url_adapt_state_ex).url_app.s) {
525        BG(url_adapt_state_ex).url_app.s->len = 0;
526    }
527
528    return SUCCESS;
529}
530
531PHP_MINIT_FUNCTION(url_scanner)
532{
533    BG(url_adapt_state_ex).tags = NULL;
534
535    BG(url_adapt_state_ex).form_app.s = BG(url_adapt_state_ex).url_app.s = NULL;
536
537    REGISTER_INI_ENTRIES();
538    return SUCCESS;
539}
540
541PHP_MSHUTDOWN_FUNCTION(url_scanner)
542{
543    UNREGISTER_INI_ENTRIES();
544
545    return SUCCESS;
546}
547
548PHP_RINIT_FUNCTION(url_scanner)
549{
550    BG(url_adapt_state_ex).active = 0;
551
552    return SUCCESS;
553}
554
555PHP_RSHUTDOWN_FUNCTION(url_scanner)
556{
557    if (BG(url_adapt_state_ex).active) {
558        php_url_scanner_ex_deactivate(TSRMLS_C);
559        BG(url_adapt_state_ex).active = 0;
560    }
561
562    smart_str_free(&BG(url_adapt_state_ex).form_app);
563    smart_str_free(&BG(url_adapt_state_ex).url_app);
564
565    return SUCCESS;
566}
567