1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 7                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "zend_smart_str.h"
44
45static void tag_dtor(zval *zv)
46{
47    free(Z_PTR_P(zv));
48}
49
50static PHP_INI_MH(OnUpdateTags)
51{
52    url_adapt_state_ex_t *ctx;
53    char *key;
54    char *lasts;
55    char *tmp;
56
57    ctx = &BG(url_adapt_state_ex);
58
59    tmp = estrndup(new_value->val, new_value->len);
60
61    if (ctx->tags)
62        zend_hash_destroy(ctx->tags);
63    else {
64        ctx->tags = malloc(sizeof(HashTable));
65        if (!ctx->tags) {
66            return FAILURE;
67        }
68    }
69
70    zend_hash_init(ctx->tags, 0, NULL, tag_dtor, 1);
71
72    for (key = php_strtok_r(tmp, ",", &lasts);
73            key;
74            key = php_strtok_r(NULL, ",", &lasts)) {
75        char *val;
76
77        val = strchr(key, '=');
78        if (val) {
79            char *q;
80            size_t keylen;
81
82            *val++ = '\0';
83            for (q = key; *q; q++)
84                *q = tolower(*q);
85            keylen = q - key;
86            /* key is stored withOUT NUL
87               val is stored WITH    NUL */
88            zend_hash_str_add_mem(ctx->tags, key, keylen, val, strlen(val)+1);
89        }
90    }
91
92    efree(tmp);
93
94    return SUCCESS;
95}
96
97PHP_INI_BEGIN()
98    STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
99PHP_INI_END()
100
101/*!re2c
102any = [\000-\377];
103N = (any\[<]);
104alpha = [a-zA-Z];
105alphanamespace = [a-zA-Z:];
106alphadash = ([a-zA-Z] | "-");
107*/
108
109#define YYFILL(n) goto done
110#define YYCTYPE unsigned char
111#define YYCURSOR p
112#define YYLIMIT q
113#define YYMARKER r
114
115static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
116{
117    register const char *p, *q;
118    const char *bash = NULL;
119    const char *sep = "?";
120
121    q = (p = url->s->val) + url->s->len;
122
123scan:
124/*!re2c
125  ":"       { smart_str_append_smart_str(dest, url); return; }
126  "?"       { sep = separator; goto scan; }
127  "#"       { bash = p - 1; goto done; }
128  (any\[:?#])+      { goto scan; }
129*/
130done:
131
132    /* Don't modify URLs of the format "#mark" */
133    if (bash && bash - url->s->val == 0) {
134        smart_str_append_smart_str(dest, url);
135        return;
136    }
137
138    if (bash)
139        smart_str_appendl(dest, url->s->val, bash - url->s->val);
140    else
141        smart_str_append_smart_str(dest, url);
142
143    smart_str_appends(dest, sep);
144    smart_str_append_smart_str(dest, url_app);
145
146    if (bash)
147        smart_str_appendl(dest, bash, q - bash);
148}
149
150
151#undef YYFILL
152#undef YYCTYPE
153#undef YYCURSOR
154#undef YYLIMIT
155#undef YYMARKER
156
157static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
158{
159    char f = 0;
160
161    if (strncasecmp(ctx->arg.s->val, ctx->lookup_data, ctx->arg.s->len) == 0)
162        f = 1;
163
164    if (quotes)
165        smart_str_appendc(&ctx->result, type);
166    if (f) {
167        append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
168    } else {
169        smart_str_append_smart_str(&ctx->result, &ctx->val);
170    }
171    if (quotes)
172        smart_str_appendc(&ctx->result, type);
173}
174
175enum {
176    STATE_PLAIN = 0,
177    STATE_TAG,
178    STATE_NEXT_ARG,
179    STATE_ARG,
180    STATE_BEFORE_VAL,
181    STATE_VAL
182};
183
184#define YYFILL(n) goto stop
185#define YYCTYPE unsigned char
186#define YYCURSOR xp
187#define YYLIMIT end
188#define YYMARKER q
189#define STATE ctx->state
190
191#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
192#define STD_ARGS ctx, start, xp TSRMLS_CC
193
194#if SCANNER_DEBUG
195#define scdebug(x) printf x
196#else
197#define scdebug(x)
198#endif
199
200static inline void passthru(STD_PARA)
201{
202    scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
203    smart_str_appendl(&ctx->result, start, YYCURSOR - start);
204}
205
206/*
207 * This function appends a hidden input field after a <form> or
208 * <fieldset>.  The latter is important for XHTML.
209 */
210
211static void handle_form(STD_PARA)
212{
213    int doit = 0;
214
215    if (ctx->form_app.s->len > 0) {
216        switch (ctx->tag.s->len) {
217            case sizeof("form") - 1:
218                if (!strncasecmp(ctx->tag.s->val, "form", sizeof("form") - 1)) {
219                    doit = 1;
220                }
221                if (doit && ctx->val.s && ctx->lookup_data && *ctx->lookup_data) {
222                    char *e, *p = (char *)zend_memnstr(ctx->val.s->val, "://", sizeof("://") - 1, ctx->val.s->val + ctx->val.s->len);
223                    if (p) {
224                        e = memchr(p, '/', (ctx->val.s->val + ctx->val.s->len) - p);
225                        if (!e) {
226                            e = ctx->val.s->val + ctx->val.s->len;
227                        }
228                        if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
229                            doit = 0;
230                        }
231                    }
232                }
233                break;
234
235            case sizeof("fieldset") - 1:
236                if (!strncasecmp(ctx->tag.s->val, "fieldset", sizeof("fieldset") - 1)) {
237                    doit = 1;
238                }
239                break;
240        }
241
242        if (doit)
243            smart_str_append_smart_str(&ctx->result, &ctx->form_app);
244    }
245}
246
247/*
248 *  HANDLE_TAG copies the HTML Tag and checks whether we
249 *  have that tag in our table. If we might modify it,
250 *  we continue to scan the tag, otherwise we simply copy the complete
251 *  HTML stuff to the result buffer.
252 */
253
254static inline void handle_tag(STD_PARA)
255{
256    int ok = 0;
257    unsigned int i;
258
259    if (ctx->tag.s) {
260        ctx->tag.s->len = 0;
261    }
262    smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
263    for (i = 0; i < ctx->tag.s->len; i++)
264        ctx->tag.s->val[i] = tolower((int)(unsigned char)ctx->tag.s->val[i]);
265    /* intentionally using str_find here, in case the hash value is set, but the string val is changed later */
266    if ((ctx->lookup_data = zend_hash_str_find_ptr(ctx->tags, ctx->tag.s->val, ctx->tag.s->len)) != NULL)
267        ok = 1;
268    STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
269}
270
271static inline void handle_arg(STD_PARA)
272{
273    if (ctx->arg.s) {
274        ctx->arg.s->len = 0;
275    }
276    smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
277}
278
279static inline void handle_val(STD_PARA, char quotes, char type)
280{
281    smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
282    tag_arg(ctx, quotes, type TSRMLS_CC);
283}
284
285static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
286{
287    char *end, *q;
288    char *xp;
289    char *start;
290    size_t rest;
291
292    smart_str_appendl(&ctx->buf, newdata, newlen);
293
294    YYCURSOR = ctx->buf.s->val;
295    YYLIMIT = ctx->buf.s->val + ctx->buf.s->len;
296
297    switch (STATE) {
298        case STATE_PLAIN: goto state_plain;
299        case STATE_TAG: goto state_tag;
300        case STATE_NEXT_ARG: goto state_next_arg;
301        case STATE_ARG: goto state_arg;
302        case STATE_BEFORE_VAL: goto state_before_val;
303        case STATE_VAL: goto state_val;
304    }
305
306
307state_plain_begin:
308    STATE = STATE_PLAIN;
309
310state_plain:
311    start = YYCURSOR;
312/*!re2c
313  "<"               { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
314  N+                { passthru(STD_ARGS); goto state_plain; }
315*/
316
317state_tag:
318    start = YYCURSOR;
319/*!re2c
320  alphanamespace+   { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
321  any       { passthru(STD_ARGS); goto state_plain_begin; }
322*/
323
324state_next_arg_begin:
325    STATE = STATE_NEXT_ARG;
326
327state_next_arg:
328    start = YYCURSOR;
329/*!re2c
330  [/]? [>]      { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
331  [ \v\r\t\n]+  { passthru(STD_ARGS); goto state_next_arg; }
332  alpha     { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
333  any       { passthru(STD_ARGS); goto state_plain_begin; }
334*/
335
336state_arg:
337    start = YYCURSOR;
338/*!re2c
339  alpha alphadash*  { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
340  any       { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
341*/
342
343state_before_val:
344    start = YYCURSOR;
345/*!re2c
346  [ ]* "=" [ ]*     { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
347  any               { --YYCURSOR; goto state_next_arg_begin; }
348*/
349
350
351state_val:
352    start = YYCURSOR;
353/*!re2c
354  ["] (any\[">])* ["]   { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
355  ['] (any\['>])* [']   { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
356  (any\[ \r\t\n>'"])+   { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
357  any                   { passthru(STD_ARGS); goto state_next_arg_begin; }
358*/
359
360stop:
361    if (YYLIMIT < start) {
362        /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
363        rest = 0;
364    } else {
365        rest = YYLIMIT - start;
366        scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
367    }
368
369    if (rest) memmove(ctx->buf.s->val, start, rest);
370    ctx->buf.s->len = rest;
371}
372
373char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
374{
375    char *result;
376    smart_str surl = {0};
377    smart_str buf = {0};
378    smart_str url_app = {0};
379
380    smart_str_setl(&surl, url, urllen);
381
382    smart_str_appends(&url_app, name);
383    smart_str_appendc(&url_app, '=');
384    smart_str_appends(&url_app, value);
385
386    append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
387
388    smart_str_0(&buf);
389    if (newlen) *newlen = buf.s->len;
390    result = estrndup(buf.s->val, buf.s->len);
391
392    smart_str_free(&url_app);
393    smart_str_free(&buf);
394
395    return result;
396}
397
398
399static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
400{
401    url_adapt_state_ex_t *ctx;
402    char *retval;
403
404    ctx = &BG(url_adapt_state_ex);
405
406    xx_mainloop(ctx, src, srclen TSRMLS_CC);
407
408    if (!ctx->result.s) {
409        smart_str_appendl(&ctx->result, "", 0);
410        *newlen = 0;
411    } else {
412        *newlen = ctx->result.s->len;
413    }
414    smart_str_0(&ctx->result);
415    if (do_flush) {
416        smart_str_append(&ctx->result, ctx->buf.s);
417        *newlen += ctx->buf.s->len;
418        smart_str_free(&ctx->buf);
419        smart_str_free(&ctx->val);
420    }
421    retval = estrndup(ctx->result.s->val, ctx->result.s->len);
422    smart_str_free(&ctx->result);
423    return retval;
424}
425
426static int php_url_scanner_ex_activate(TSRMLS_D)
427{
428    url_adapt_state_ex_t *ctx;
429
430    ctx = &BG(url_adapt_state_ex);
431
432    memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
433
434    return SUCCESS;
435}
436
437static int php_url_scanner_ex_deactivate(TSRMLS_D)
438{
439    url_adapt_state_ex_t *ctx;
440
441    ctx = &BG(url_adapt_state_ex);
442
443    smart_str_free(&ctx->result);
444    smart_str_free(&ctx->buf);
445    smart_str_free(&ctx->tag);
446    smart_str_free(&ctx->arg);
447
448    return SUCCESS;
449}
450
451static void php_url_scanner_output_handler(char *output, size_t output_len, char **handled_output, size_t *handled_output_len, int mode TSRMLS_DC)
452{
453    size_t len;
454
455    if (BG(url_adapt_state_ex).url_app.s->len != 0) {
456        *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
457        if (sizeof(uint) < sizeof(size_t)) {
458            if (len > UINT_MAX)
459                len = UINT_MAX;
460        }
461        *handled_output_len = len;
462    } else if (BG(url_adapt_state_ex).url_app.s->len == 0) {
463        url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
464        if (ctx->buf.s && ctx->buf.s->len) {
465            smart_str_append(&ctx->result, ctx->buf.s);
466            smart_str_appendl(&ctx->result, output, output_len);
467
468            *handled_output = estrndup(ctx->result.s->val, ctx->result.s->len);
469            *handled_output_len = ctx->buf.s->len + output_len;
470
471            smart_str_free(&ctx->buf);
472            smart_str_free(&ctx->result);
473        } else {
474            *handled_output = estrndup(output, *handled_output_len = output_len);
475        }
476    } else {
477        *handled_output = NULL;
478    }
479}
480
481PHPAPI int php_url_scanner_add_var(char *name, size_t name_len, char *value, size_t value_len, int urlencode TSRMLS_DC)
482{
483    smart_str val = {0};
484    zend_string *encoded;
485
486    if (!BG(url_adapt_state_ex).active) {
487        php_url_scanner_ex_activate(TSRMLS_C);
488        php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
489        BG(url_adapt_state_ex).active = 1;
490    }
491
492
493    if (BG(url_adapt_state_ex).url_app.s && BG(url_adapt_state_ex).url_app.s->len != 0) {
494        smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
495    }
496
497    if (urlencode) {
498        encoded = php_url_encode(value, value_len);
499        smart_str_setl(&val, encoded->val, encoded->len);
500    } else {
501        smart_str_setl(&val, value, value_len);
502    }
503
504    smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
505    smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
506    smart_str_append_smart_str(&BG(url_adapt_state_ex).url_app, &val);
507
508    smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
509    smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
510    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
511    smart_str_append_smart_str(&BG(url_adapt_state_ex).form_app, &val);
512    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
513
514    if (urlencode) {
515        zend_string_free(encoded);
516    }
517    smart_str_free(&val);
518
519    return SUCCESS;
520}
521
522PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
523{
524    if (BG(url_adapt_state_ex).form_app.s) {
525        BG(url_adapt_state_ex).form_app.s->len = 0;
526    }
527    if (BG(url_adapt_state_ex).url_app.s) {
528        BG(url_adapt_state_ex).url_app.s->len = 0;
529    }
530
531    return SUCCESS;
532}
533
534PHP_MINIT_FUNCTION(url_scanner)
535{
536    BG(url_adapt_state_ex).tags = NULL;
537
538    BG(url_adapt_state_ex).form_app.s = BG(url_adapt_state_ex).url_app.s = NULL;
539
540    REGISTER_INI_ENTRIES();
541    return SUCCESS;
542}
543
544PHP_MSHUTDOWN_FUNCTION(url_scanner)
545{
546    UNREGISTER_INI_ENTRIES();
547
548    return SUCCESS;
549}
550
551PHP_RINIT_FUNCTION(url_scanner)
552{
553    BG(url_adapt_state_ex).active = 0;
554
555    return SUCCESS;
556}
557
558PHP_RSHUTDOWN_FUNCTION(url_scanner)
559{
560    if (BG(url_adapt_state_ex).active) {
561        php_url_scanner_ex_deactivate(TSRMLS_C);
562        BG(url_adapt_state_ex).active = 0;
563    }
564
565    smart_str_free(&BG(url_adapt_state_ex).form_app);
566    smart_str_free(&BG(url_adapt_state_ex).url_app);
567
568    return SUCCESS;
569}
570