1/*
2  +----------------------------------------------------------------------+
3  | PHP Version 5                                                        |
4  +----------------------------------------------------------------------+
5  | Copyright (c) 1997-2014 The PHP Group                                |
6  +----------------------------------------------------------------------+
7  | This source file is subject to version 3.01 of the PHP license,      |
8  | that is bundled with this package in the file LICENSE, and is        |
9  | available through the world-wide-web at the following url:           |
10  | http://www.php.net/license/3_01.txt                                  |
11  | If you did not receive a copy of the PHP license and are unable to   |
12  | obtain it through the world-wide-web, please send a note to          |
13  | license@php.net so we can mail you a copy immediately.               |
14  +----------------------------------------------------------------------+
15  | Author: Sascha Schumann <sascha@schumann.cx>                         |
16  +----------------------------------------------------------------------+
17*/
18
19/* $Id$ */
20
21#include "php.h"
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#ifdef HAVE_LIMITS_H
27#include <limits.h>
28#endif
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33
34#include "php_ini.h"
35#include "php_globals.h"
36#define STATE_TAG SOME_OTHER_STATE_TAG
37#include "basic_functions.h"
38#include "url.h"
39#undef STATE_TAG
40
41#define url_scanner url_scanner_ex
42
43#include "php_smart_str.h"
44
45static PHP_INI_MH(OnUpdateTags)
46{
47    url_adapt_state_ex_t *ctx;
48    char *key;
49    char *lasts;
50    char *tmp;
51
52    ctx = &BG(url_adapt_state_ex);
53
54    tmp = estrndup(new_value, new_value_length);
55
56    if (ctx->tags)
57        zend_hash_destroy(ctx->tags);
58    else {
59        ctx->tags = malloc(sizeof(HashTable));
60        if (!ctx->tags) {
61            return FAILURE;
62        }
63    }
64
65    zend_hash_init(ctx->tags, 0, NULL, NULL, 1);
66
67    for (key = php_strtok_r(tmp, ",", &lasts);
68            key;
69            key = php_strtok_r(NULL, ",", &lasts)) {
70        char *val;
71
72        val = strchr(key, '=');
73        if (val) {
74            char *q;
75            int keylen;
76
77            *val++ = '\0';
78            for (q = key; *q; q++)
79                *q = tolower(*q);
80            keylen = q - key;
81            /* key is stored withOUT NUL
82               val is stored WITH    NUL */
83            zend_hash_add(ctx->tags, key, keylen, val, strlen(val)+1, NULL);
84        }
85    }
86
87    efree(tmp);
88
89    return SUCCESS;
90}
91
92PHP_INI_BEGIN()
93    STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=,fieldset=", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
94PHP_INI_END()
95
96/*!re2c
97any = [\000-\377];
98N = (any\[<]);
99alpha = [a-zA-Z];
100alphanamespace = [a-zA-Z:];
101alphadash = ([a-zA-Z] | "-");
102*/
103
104#define YYFILL(n) goto done
105#define YYCTYPE unsigned char
106#define YYCURSOR p
107#define YYLIMIT q
108#define YYMARKER r
109
110static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *url_app, const char *separator)
111{
112    register const char *p, *q;
113    const char *bash = NULL;
114    const char *sep = "?";
115
116    q = (p = url->c) + url->len;
117
118scan:
119/*!re2c
120  ":"       { smart_str_append(dest, url); return; }
121  "?"       { sep = separator; goto scan; }
122  "#"       { bash = p - 1; goto done; }
123  (any\[:?#])+      { goto scan; }
124*/
125done:
126
127    /* Don't modify URLs of the format "#mark" */
128    if (bash && bash - url->c == 0) {
129        smart_str_append(dest, url);
130        return;
131    }
132
133    if (bash)
134        smart_str_appendl(dest, url->c, bash - url->c);
135    else
136        smart_str_append(dest, url);
137
138    smart_str_appends(dest, sep);
139    smart_str_append(dest, url_app);
140
141    if (bash)
142        smart_str_appendl(dest, bash, q - bash);
143}
144
145
146#undef YYFILL
147#undef YYCTYPE
148#undef YYCURSOR
149#undef YYLIMIT
150#undef YYMARKER
151
152static inline void tag_arg(url_adapt_state_ex_t *ctx, char quotes, char type TSRMLS_DC)
153{
154    char f = 0;
155
156    if (strncasecmp(ctx->arg.c, ctx->lookup_data, ctx->arg.len) == 0)
157        f = 1;
158
159    if (quotes)
160        smart_str_appendc(&ctx->result, type);
161    if (f) {
162        append_modified_url(&ctx->val, &ctx->result, &ctx->url_app, PG(arg_separator).output);
163    } else {
164        smart_str_append(&ctx->result, &ctx->val);
165    }
166    if (quotes)
167        smart_str_appendc(&ctx->result, type);
168}
169
170enum {
171    STATE_PLAIN = 0,
172    STATE_TAG,
173    STATE_NEXT_ARG,
174    STATE_ARG,
175    STATE_BEFORE_VAL,
176    STATE_VAL
177};
178
179#define YYFILL(n) goto stop
180#define YYCTYPE unsigned char
181#define YYCURSOR xp
182#define YYLIMIT end
183#define YYMARKER q
184#define STATE ctx->state
185
186#define STD_PARA url_adapt_state_ex_t *ctx, char *start, char *YYCURSOR TSRMLS_DC
187#define STD_ARGS ctx, start, xp TSRMLS_CC
188
189#if SCANNER_DEBUG
190#define scdebug(x) printf x
191#else
192#define scdebug(x)
193#endif
194
195static inline void passthru(STD_PARA)
196{
197    scdebug(("appending %d chars, starting with %c\n", YYCURSOR-start, *start));
198    smart_str_appendl(&ctx->result, start, YYCURSOR - start);
199}
200
201/*
202 * This function appends a hidden input field after a <form> or
203 * <fieldset>.  The latter is important for XHTML.
204 */
205
206static void handle_form(STD_PARA)
207{
208    int doit = 0;
209
210    if (ctx->form_app.len > 0) {
211        switch (ctx->tag.len) {
212            case sizeof("form") - 1:
213                if (!strncasecmp(ctx->tag.c, "form", sizeof("form") - 1)) {
214                    doit = 1;
215                }
216                if (doit && ctx->val.c && ctx->lookup_data && *ctx->lookup_data) {
217                    char *e, *p = zend_memnstr(ctx->val.c, "://", sizeof("://") - 1, ctx->val.c + ctx->val.len);
218                    if (p) {
219                        e = memchr(p, '/', (ctx->val.c + ctx->val.len) - p);
220                        if (!e) {
221                            e = ctx->val.c + ctx->val.len;
222                        }
223                        if ((e - p) && strncasecmp(p, ctx->lookup_data, (e - p))) {
224                            doit = 0;
225                        }
226                    }
227                }
228                break;
229
230            case sizeof("fieldset") - 1:
231                if (!strncasecmp(ctx->tag.c, "fieldset", sizeof("fieldset") - 1)) {
232                    doit = 1;
233                }
234                break;
235        }
236
237        if (doit)
238            smart_str_append(&ctx->result, &ctx->form_app);
239    }
240}
241
242/*
243 *  HANDLE_TAG copies the HTML Tag and checks whether we
244 *  have that tag in our table. If we might modify it,
245 *  we continue to scan the tag, otherwise we simply copy the complete
246 *  HTML stuff to the result buffer.
247 */
248
249static inline void handle_tag(STD_PARA)
250{
251    int ok = 0;
252    unsigned int i;
253
254    ctx->tag.len = 0;
255    smart_str_appendl(&ctx->tag, start, YYCURSOR - start);
256    for (i = 0; i < ctx->tag.len; i++)
257        ctx->tag.c[i] = tolower((int)(unsigned char)ctx->tag.c[i]);
258    if (zend_hash_find(ctx->tags, ctx->tag.c, ctx->tag.len, (void **) &ctx->lookup_data) == SUCCESS)
259        ok = 1;
260    STATE = ok ? STATE_NEXT_ARG : STATE_PLAIN;
261}
262
263static inline void handle_arg(STD_PARA)
264{
265    ctx->arg.len = 0;
266    smart_str_appendl(&ctx->arg, start, YYCURSOR - start);
267}
268
269static inline void handle_val(STD_PARA, char quotes, char type)
270{
271    smart_str_setl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2);
272    tag_arg(ctx, quotes, type TSRMLS_CC);
273}
274
275static inline void xx_mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen TSRMLS_DC)
276{
277    char *end, *q;
278    char *xp;
279    char *start;
280    int rest;
281
282    smart_str_appendl(&ctx->buf, newdata, newlen);
283
284    YYCURSOR = ctx->buf.c;
285    YYLIMIT = ctx->buf.c + ctx->buf.len;
286
287    switch (STATE) {
288        case STATE_PLAIN: goto state_plain;
289        case STATE_TAG: goto state_tag;
290        case STATE_NEXT_ARG: goto state_next_arg;
291        case STATE_ARG: goto state_arg;
292        case STATE_BEFORE_VAL: goto state_before_val;
293        case STATE_VAL: goto state_val;
294    }
295
296
297state_plain_begin:
298    STATE = STATE_PLAIN;
299
300state_plain:
301    start = YYCURSOR;
302/*!re2c
303  "<"               { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
304  N+                { passthru(STD_ARGS); goto state_plain; }
305*/
306
307state_tag:
308    start = YYCURSOR;
309/*!re2c
310  alphanamespace+   { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
311  any       { passthru(STD_ARGS); goto state_plain_begin; }
312*/
313
314state_next_arg_begin:
315    STATE = STATE_NEXT_ARG;
316
317state_next_arg:
318    start = YYCURSOR;
319/*!re2c
320  [/]? [>]      { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
321  [ \v\r\t\n]+  { passthru(STD_ARGS); goto state_next_arg; }
322  alpha     { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
323  any       { passthru(STD_ARGS); goto state_plain_begin; }
324*/
325
326state_arg:
327    start = YYCURSOR;
328/*!re2c
329  alpha alphadash*  { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
330  any       { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
331*/
332
333state_before_val:
334    start = YYCURSOR;
335/*!re2c
336  [ ]* "=" [ ]*     { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
337  any               { --YYCURSOR; goto state_next_arg_begin; }
338*/
339
340
341state_val:
342    start = YYCURSOR;
343/*!re2c
344  ["] (any\[">])* ["]   { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
345  ['] (any\['>])* [']   { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
346  (any\[ \r\t\n>'"])+   { handle_val(STD_ARGS, 0, ' '); goto state_next_arg_begin; }
347  any                   { passthru(STD_ARGS); goto state_next_arg_begin; }
348*/
349
350stop:
351    rest = YYLIMIT - start;
352    scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
353    /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
354    if (rest < 0) rest = 0;
355
356    if (rest) memmove(ctx->buf.c, start, rest);
357    ctx->buf.len = rest;
358}
359
360char *php_url_scanner_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen TSRMLS_DC)
361{
362    smart_str surl = {0};
363    smart_str buf = {0};
364    smart_str url_app = {0};
365
366    smart_str_setl(&surl, url, urllen);
367
368    smart_str_appends(&url_app, name);
369    smart_str_appendc(&url_app, '=');
370    smart_str_appends(&url_app, value);
371
372    append_modified_url(&surl, &buf, &url_app, PG(arg_separator).output);
373
374    smart_str_0(&buf);
375    if (newlen) *newlen = buf.len;
376
377    smart_str_free(&url_app);
378
379    return buf.c;
380}
381
382
383static char *url_adapt_ext(const char *src, size_t srclen, size_t *newlen, zend_bool do_flush TSRMLS_DC)
384{
385    url_adapt_state_ex_t *ctx;
386    char *retval;
387
388    ctx = &BG(url_adapt_state_ex);
389
390    xx_mainloop(ctx, src, srclen TSRMLS_CC);
391
392    *newlen = ctx->result.len;
393    if (!ctx->result.c) {
394        smart_str_appendl(&ctx->result, "", 0);
395    }
396    smart_str_0(&ctx->result);
397    if (do_flush) {
398        smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
399        *newlen += ctx->buf.len;
400        smart_str_free(&ctx->buf);
401    }
402    retval = ctx->result.c;
403    ctx->result.c = NULL;
404    ctx->result.len = 0;
405    return retval;
406}
407
408static int php_url_scanner_ex_activate(TSRMLS_D)
409{
410    url_adapt_state_ex_t *ctx;
411
412    ctx = &BG(url_adapt_state_ex);
413
414    memset(ctx, 0, ((size_t) &((url_adapt_state_ex_t *)0)->tags));
415
416    return SUCCESS;
417}
418
419static int php_url_scanner_ex_deactivate(TSRMLS_D)
420{
421    url_adapt_state_ex_t *ctx;
422
423    ctx = &BG(url_adapt_state_ex);
424
425    smart_str_free(&ctx->result);
426    smart_str_free(&ctx->buf);
427    smart_str_free(&ctx->tag);
428    smart_str_free(&ctx->arg);
429
430    return SUCCESS;
431}
432
433static void php_url_scanner_output_handler(char *output, uint output_len, char **handled_output, uint *handled_output_len, int mode TSRMLS_DC)
434{
435    size_t len;
436
437    if (BG(url_adapt_state_ex).url_app.len != 0) {
438        *handled_output = url_adapt_ext(output, output_len, &len, (zend_bool) (mode & (PHP_OUTPUT_HANDLER_END | PHP_OUTPUT_HANDLER_CONT | PHP_OUTPUT_HANDLER_FLUSH | PHP_OUTPUT_HANDLER_FINAL) ? 1 : 0) TSRMLS_CC);
439        if (sizeof(uint) < sizeof(size_t)) {
440            if (len > UINT_MAX)
441                len = UINT_MAX;
442        }
443        *handled_output_len = len;
444    } else if (BG(url_adapt_state_ex).url_app.len == 0) {
445        url_adapt_state_ex_t *ctx = &BG(url_adapt_state_ex);
446        if (ctx->buf.len) {
447            smart_str_appendl(&ctx->result, ctx->buf.c, ctx->buf.len);
448            smart_str_appendl(&ctx->result, output, output_len);
449
450            *handled_output = ctx->result.c;
451            *handled_output_len = ctx->buf.len + output_len;
452
453            ctx->result.c = NULL;
454            ctx->result.len = 0;
455            smart_str_free(&ctx->buf);
456        } else {
457            *handled_output = estrndup(output, *handled_output_len = output_len);
458        }
459    } else {
460        *handled_output = NULL;
461    }
462}
463
464PHPAPI int php_url_scanner_add_var(char *name, int name_len, char *value, int value_len, int urlencode TSRMLS_DC)
465{
466    char *encoded = NULL;
467    int encoded_len;
468    smart_str val;
469
470    if (! BG(url_adapt_state_ex).active) {
471        php_url_scanner_ex_activate(TSRMLS_C);
472        php_output_start_internal(ZEND_STRL("URL-Rewriter"), php_url_scanner_output_handler, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC);
473        BG(url_adapt_state_ex).active = 1;
474    }
475
476
477    if (BG(url_adapt_state_ex).url_app.len != 0) {
478        smart_str_appends(&BG(url_adapt_state_ex).url_app, PG(arg_separator).output);
479    }
480
481    if (urlencode) {
482        encoded = php_url_encode(value, value_len, &encoded_len);
483        smart_str_setl(&val, encoded, encoded_len);
484    } else {
485        smart_str_setl(&val, value, value_len);
486    }
487
488    smart_str_appendl(&BG(url_adapt_state_ex).url_app, name, name_len);
489    smart_str_appendc(&BG(url_adapt_state_ex).url_app, '=');
490    smart_str_append(&BG(url_adapt_state_ex).url_app, &val);
491
492    smart_str_appends(&BG(url_adapt_state_ex).form_app, "<input type=\"hidden\" name=\"");
493    smart_str_appendl(&BG(url_adapt_state_ex).form_app, name, name_len);
494    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" value=\"");
495    smart_str_append(&BG(url_adapt_state_ex).form_app, &val);
496    smart_str_appends(&BG(url_adapt_state_ex).form_app, "\" />");
497
498    if (urlencode)
499        efree(encoded);
500
501    return SUCCESS;
502}
503
504PHPAPI int php_url_scanner_reset_vars(TSRMLS_D)
505{
506    BG(url_adapt_state_ex).form_app.len = 0;
507    BG(url_adapt_state_ex).url_app.len = 0;
508
509    return SUCCESS;
510}
511
512PHP_MINIT_FUNCTION(url_scanner)
513{
514    BG(url_adapt_state_ex).tags = NULL;
515
516    BG(url_adapt_state_ex).form_app.c = BG(url_adapt_state_ex).url_app.c = 0;
517    BG(url_adapt_state_ex).form_app.len = BG(url_adapt_state_ex).url_app.len = 0;
518
519    REGISTER_INI_ENTRIES();
520    return SUCCESS;
521}
522
523PHP_MSHUTDOWN_FUNCTION(url_scanner)
524{
525    UNREGISTER_INI_ENTRIES();
526
527    return SUCCESS;
528}
529
530PHP_RINIT_FUNCTION(url_scanner)
531{
532    BG(url_adapt_state_ex).active = 0;
533
534    return SUCCESS;
535}
536
537PHP_RSHUTDOWN_FUNCTION(url_scanner)
538{
539    if (BG(url_adapt_state_ex).active) {
540        php_url_scanner_ex_deactivate(TSRMLS_C);
541        BG(url_adapt_state_ex).active = 0;
542    }
543
544    smart_str_free(&BG(url_adapt_state_ex).form_app);
545    smart_str_free(&BG(url_adapt_state_ex).url_app);
546
547    return SUCCESS;
548}
549