1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16   |         Rui Hirokawa <hirokawa@php.net>                              |
17   +----------------------------------------------------------------------+
18 */
19
20/* $Id$ */
21
22/*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 *   2000.5.19  Release php-4.0RC2_jstring-1.0
27 *   2001.4.1   Release php4_jstring-1.0.91
28 *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29 *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32/*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 *    Hironori Sato <satoh@jpnnet.com>
42 *    Shigeru Kanemoto <sgk@happysize.co.jp>
43 *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47/* {{{ includes */
48#ifdef HAVE_CONFIG_H
49#include "config.h"
50#endif
51
52#include "php.h"
53#include "php_ini.h"
54#include "php_variables.h"
55#include "mbstring.h"
56#include "ext/standard/php_string.h"
57#include "ext/standard/php_mail.h"
58#include "ext/standard/exec.h"
59#include "ext/standard/url.h"
60#include "main/php_output.h"
61#include "ext/standard/info.h"
62
63#include "libmbfl/mbfl/mbfl_allocators.h"
64#include "libmbfl/mbfl/mbfilter_pass.h"
65
66#include "php_variables.h"
67#include "php_globals.h"
68#include "rfc1867.h"
69#include "php_content_types.h"
70#include "SAPI.h"
71#include "php_unicode.h"
72#include "TSRM.h"
73
74#include "mb_gpc.h"
75
76#if HAVE_MBREGEX
77#include "php_mbregex.h"
78#endif
79
80#include "zend_multibyte.h"
81
82#if HAVE_ONIG
83#include "php_onig_compat.h"
84#include <oniguruma.h>
85#undef UChar
86#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
87#include "ext/pcre/php_pcre.h"
88#endif
89/* }}} */
90
91#if HAVE_MBSTRING
92
93/* {{{ prototypes */
94ZEND_DECLARE_MODULE_GLOBALS(mbstring)
95
96static PHP_GINIT_FUNCTION(mbstring);
97static PHP_GSHUTDOWN_FUNCTION(mbstring);
98
99static void php_mb_populate_current_detect_order_list(TSRMLS_D);
100
101static int php_mb_encoding_translation(TSRMLS_D);
102
103static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
104
105static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
106
107/* }}} */
108
109/* {{{ php_mb_default_identify_list */
110typedef struct _php_mb_nls_ident_list {
111    enum mbfl_no_language lang;
112    const enum mbfl_no_encoding *list;
113    size_t list_size;
114} php_mb_nls_ident_list;
115
116static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117    mbfl_no_encoding_ascii,
118    mbfl_no_encoding_jis,
119    mbfl_no_encoding_utf8,
120    mbfl_no_encoding_euc_jp,
121    mbfl_no_encoding_sjis
122};
123
124static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125    mbfl_no_encoding_ascii,
126    mbfl_no_encoding_utf8,
127    mbfl_no_encoding_euc_cn,
128    mbfl_no_encoding_cp936
129};
130
131static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132    mbfl_no_encoding_ascii,
133    mbfl_no_encoding_utf8,
134    mbfl_no_encoding_euc_tw,
135    mbfl_no_encoding_big5
136};
137
138static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139    mbfl_no_encoding_ascii,
140    mbfl_no_encoding_utf8,
141    mbfl_no_encoding_euc_kr,
142    mbfl_no_encoding_uhc
143};
144
145static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146    mbfl_no_encoding_ascii,
147    mbfl_no_encoding_utf8,
148    mbfl_no_encoding_koi8r,
149    mbfl_no_encoding_cp1251,
150    mbfl_no_encoding_cp866
151};
152
153static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154    mbfl_no_encoding_ascii,
155    mbfl_no_encoding_utf8,
156    mbfl_no_encoding_armscii8
157};
158
159static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160    mbfl_no_encoding_ascii,
161    mbfl_no_encoding_utf8,
162    mbfl_no_encoding_cp1254,
163    mbfl_no_encoding_8859_9
164};
165
166static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167    mbfl_no_encoding_ascii,
168    mbfl_no_encoding_utf8,
169    mbfl_no_encoding_koi8u
170};
171
172static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173    mbfl_no_encoding_ascii,
174    mbfl_no_encoding_utf8
175};
176
177
178static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179    { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180    { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181    { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182    { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183    { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184    { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185    { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186    { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187    { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188};
189
190/* }}} */
191
192/* {{{ mb_overload_def mb_ovld[] */
193static const struct mb_overload_def mb_ovld[] = {
194    {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195    {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196    {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197    {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198    {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199    {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200    {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201    {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202    {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203    {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204    {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205    {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206    {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207#if HAVE_MBREGEX
208    {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
209    {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
210    {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
211    {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
212    {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
213#endif
214    {0, NULL, NULL, NULL}
215};
216/* }}} */
217
218/* {{{ arginfo */
219ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
220    ZEND_ARG_INFO(0, language)
221ZEND_END_ARG_INFO()
222
223ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
224    ZEND_ARG_INFO(0, encoding)
225ZEND_END_ARG_INFO()
226
227ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
228    ZEND_ARG_INFO(0, type)
229ZEND_END_ARG_INFO()
230
231ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
232    ZEND_ARG_INFO(0, encoding)
233ZEND_END_ARG_INFO()
234
235ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
236    ZEND_ARG_INFO(0, encoding)
237ZEND_END_ARG_INFO()
238
239ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
240    ZEND_ARG_INFO(0, substchar)
241ZEND_END_ARG_INFO()
242
243ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
244    ZEND_ARG_INFO(0, encoding)
245ZEND_END_ARG_INFO()
246
247ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
248    ZEND_ARG_INFO(0, encoded_string)
249    ZEND_ARG_INFO(1, result)
250ZEND_END_ARG_INFO()
251
252ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
253    ZEND_ARG_INFO(0, contents)
254    ZEND_ARG_INFO(0, status)
255ZEND_END_ARG_INFO()
256
257ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
258    ZEND_ARG_INFO(0, str)
259    ZEND_ARG_INFO(0, encoding)
260ZEND_END_ARG_INFO()
261
262ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
263    ZEND_ARG_INFO(0, haystack)
264    ZEND_ARG_INFO(0, needle)
265    ZEND_ARG_INFO(0, offset)
266    ZEND_ARG_INFO(0, encoding)
267ZEND_END_ARG_INFO()
268
269ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
270    ZEND_ARG_INFO(0, haystack)
271    ZEND_ARG_INFO(0, needle)
272    ZEND_ARG_INFO(0, offset)
273    ZEND_ARG_INFO(0, encoding)
274ZEND_END_ARG_INFO()
275
276ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
277    ZEND_ARG_INFO(0, haystack)
278    ZEND_ARG_INFO(0, needle)
279    ZEND_ARG_INFO(0, offset)
280    ZEND_ARG_INFO(0, encoding)
281ZEND_END_ARG_INFO()
282
283ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
284    ZEND_ARG_INFO(0, haystack)
285    ZEND_ARG_INFO(0, needle)
286    ZEND_ARG_INFO(0, offset)
287    ZEND_ARG_INFO(0, encoding)
288ZEND_END_ARG_INFO()
289
290ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
291    ZEND_ARG_INFO(0, haystack)
292    ZEND_ARG_INFO(0, needle)
293    ZEND_ARG_INFO(0, part)
294    ZEND_ARG_INFO(0, encoding)
295ZEND_END_ARG_INFO()
296
297ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
298    ZEND_ARG_INFO(0, haystack)
299    ZEND_ARG_INFO(0, needle)
300    ZEND_ARG_INFO(0, part)
301    ZEND_ARG_INFO(0, encoding)
302ZEND_END_ARG_INFO()
303
304ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
305    ZEND_ARG_INFO(0, haystack)
306    ZEND_ARG_INFO(0, needle)
307    ZEND_ARG_INFO(0, part)
308    ZEND_ARG_INFO(0, encoding)
309ZEND_END_ARG_INFO()
310
311ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
312    ZEND_ARG_INFO(0, haystack)
313    ZEND_ARG_INFO(0, needle)
314    ZEND_ARG_INFO(0, part)
315    ZEND_ARG_INFO(0, encoding)
316ZEND_END_ARG_INFO()
317
318ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
319    ZEND_ARG_INFO(0, haystack)
320    ZEND_ARG_INFO(0, needle)
321    ZEND_ARG_INFO(0, encoding)
322ZEND_END_ARG_INFO()
323
324ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
325    ZEND_ARG_INFO(0, str)
326    ZEND_ARG_INFO(0, start)
327    ZEND_ARG_INFO(0, length)
328    ZEND_ARG_INFO(0, encoding)
329ZEND_END_ARG_INFO()
330
331ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
332    ZEND_ARG_INFO(0, str)
333    ZEND_ARG_INFO(0, start)
334    ZEND_ARG_INFO(0, length)
335    ZEND_ARG_INFO(0, encoding)
336ZEND_END_ARG_INFO()
337
338ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
339    ZEND_ARG_INFO(0, str)
340    ZEND_ARG_INFO(0, encoding)
341ZEND_END_ARG_INFO()
342
343ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
344    ZEND_ARG_INFO(0, str)
345    ZEND_ARG_INFO(0, start)
346    ZEND_ARG_INFO(0, width)
347    ZEND_ARG_INFO(0, trimmarker)
348    ZEND_ARG_INFO(0, encoding)
349ZEND_END_ARG_INFO()
350
351ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
352    ZEND_ARG_INFO(0, str)
353    ZEND_ARG_INFO(0, to)
354    ZEND_ARG_INFO(0, from)
355ZEND_END_ARG_INFO()
356
357ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
358    ZEND_ARG_INFO(0, sourcestring)
359    ZEND_ARG_INFO(0, mode)
360    ZEND_ARG_INFO(0, encoding)
361ZEND_END_ARG_INFO()
362
363ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
364    ZEND_ARG_INFO(0, sourcestring)
365    ZEND_ARG_INFO(0, encoding)
366ZEND_END_ARG_INFO()
367
368ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
369    ZEND_ARG_INFO(0, sourcestring)
370    ZEND_ARG_INFO(0, encoding)
371ZEND_END_ARG_INFO()
372
373ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
374    ZEND_ARG_INFO(0, str)
375    ZEND_ARG_INFO(0, encoding_list)
376    ZEND_ARG_INFO(0, strict)
377ZEND_END_ARG_INFO()
378
379ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
380ZEND_END_ARG_INFO()
381
382ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
383    ZEND_ARG_INFO(0, encoding)
384ZEND_END_ARG_INFO()
385
386ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
387    ZEND_ARG_INFO(0, str)
388    ZEND_ARG_INFO(0, charset)
389    ZEND_ARG_INFO(0, transfer)
390    ZEND_ARG_INFO(0, linefeed)
391    ZEND_ARG_INFO(0, indent)
392ZEND_END_ARG_INFO()
393
394ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
395    ZEND_ARG_INFO(0, string)
396ZEND_END_ARG_INFO()
397
398ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
399    ZEND_ARG_INFO(0, str)
400    ZEND_ARG_INFO(0, option)
401    ZEND_ARG_INFO(0, encoding)
402ZEND_END_ARG_INFO()
403
404ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
405    ZEND_ARG_INFO(0, to)
406    ZEND_ARG_INFO(0, from)
407    ZEND_ARG_VARIADIC_INFO(1, vars)
408ZEND_END_ARG_INFO()
409
410ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
411    ZEND_ARG_INFO(0, string)
412    ZEND_ARG_INFO(0, convmap)
413    ZEND_ARG_INFO(0, encoding)
414    ZEND_ARG_INFO(0, is_hex)
415ZEND_END_ARG_INFO()
416
417ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
418    ZEND_ARG_INFO(0, string)
419    ZEND_ARG_INFO(0, convmap)
420    ZEND_ARG_INFO(0, encoding)
421ZEND_END_ARG_INFO()
422
423ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
424    ZEND_ARG_INFO(0, to)
425    ZEND_ARG_INFO(0, subject)
426    ZEND_ARG_INFO(0, message)
427    ZEND_ARG_INFO(0, additional_headers)
428    ZEND_ARG_INFO(0, additional_parameters)
429ZEND_END_ARG_INFO()
430
431ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
432    ZEND_ARG_INFO(0, type)
433ZEND_END_ARG_INFO()
434
435ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
436    ZEND_ARG_INFO(0, var)
437    ZEND_ARG_INFO(0, encoding)
438ZEND_END_ARG_INFO()
439
440ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
441    ZEND_ARG_INFO(0, encoding)
442ZEND_END_ARG_INFO()
443
444ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
445    ZEND_ARG_INFO(0, pattern)
446    ZEND_ARG_INFO(0, string)
447    ZEND_ARG_INFO(1, registers)
448ZEND_END_ARG_INFO()
449
450ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
451    ZEND_ARG_INFO(0, pattern)
452    ZEND_ARG_INFO(0, string)
453    ZEND_ARG_INFO(1, registers)
454ZEND_END_ARG_INFO()
455
456ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
457    ZEND_ARG_INFO(0, pattern)
458    ZEND_ARG_INFO(0, replacement)
459    ZEND_ARG_INFO(0, string)
460    ZEND_ARG_INFO(0, option)
461ZEND_END_ARG_INFO()
462
463ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
464    ZEND_ARG_INFO(0, pattern)
465    ZEND_ARG_INFO(0, replacement)
466    ZEND_ARG_INFO(0, string)
467ZEND_END_ARG_INFO()
468
469ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
470    ZEND_ARG_INFO(0, pattern)
471    ZEND_ARG_INFO(0, callback)
472    ZEND_ARG_INFO(0, string)
473    ZEND_ARG_INFO(0, option)
474ZEND_END_ARG_INFO()
475
476ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
477    ZEND_ARG_INFO(0, pattern)
478    ZEND_ARG_INFO(0, string)
479    ZEND_ARG_INFO(0, limit)
480ZEND_END_ARG_INFO()
481
482ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
483    ZEND_ARG_INFO(0, pattern)
484    ZEND_ARG_INFO(0, string)
485    ZEND_ARG_INFO(0, option)
486ZEND_END_ARG_INFO()
487
488ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
489    ZEND_ARG_INFO(0, pattern)
490    ZEND_ARG_INFO(0, option)
491ZEND_END_ARG_INFO()
492
493ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
494    ZEND_ARG_INFO(0, pattern)
495    ZEND_ARG_INFO(0, option)
496ZEND_END_ARG_INFO()
497
498ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
499    ZEND_ARG_INFO(0, pattern)
500    ZEND_ARG_INFO(0, option)
501ZEND_END_ARG_INFO()
502
503ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
504    ZEND_ARG_INFO(0, string)
505    ZEND_ARG_INFO(0, pattern)
506    ZEND_ARG_INFO(0, option)
507ZEND_END_ARG_INFO()
508
509ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
510ZEND_END_ARG_INFO()
511
512ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
513ZEND_END_ARG_INFO()
514
515ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
516    ZEND_ARG_INFO(0, position)
517ZEND_END_ARG_INFO()
518
519ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
520    ZEND_ARG_INFO(0, options)
521ZEND_END_ARG_INFO()
522/* }}} */
523
524/* {{{ zend_function_entry mbstring_functions[] */
525const zend_function_entry mbstring_functions[] = {
526    PHP_FE(mb_convert_case,         arginfo_mb_convert_case)
527    PHP_FE(mb_strtoupper,           arginfo_mb_strtoupper)
528    PHP_FE(mb_strtolower,           arginfo_mb_strtolower)
529    PHP_FE(mb_language,             arginfo_mb_language)
530    PHP_FE(mb_internal_encoding,    arginfo_mb_internal_encoding)
531    PHP_FE(mb_http_input,           arginfo_mb_http_input)
532    PHP_FE(mb_http_output,          arginfo_mb_http_output)
533    PHP_FE(mb_detect_order,         arginfo_mb_detect_order)
534    PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
535    PHP_FE(mb_parse_str,            arginfo_mb_parse_str)
536    PHP_FE(mb_output_handler,       arginfo_mb_output_handler)
537    PHP_FE(mb_preferred_mime_name,  arginfo_mb_preferred_mime_name)
538    PHP_FE(mb_strlen,               arginfo_mb_strlen)
539    PHP_FE(mb_strpos,               arginfo_mb_strpos)
540    PHP_FE(mb_strrpos,              arginfo_mb_strrpos)
541    PHP_FE(mb_stripos,              arginfo_mb_stripos)
542    PHP_FE(mb_strripos,             arginfo_mb_strripos)
543    PHP_FE(mb_strstr,               arginfo_mb_strstr)
544    PHP_FE(mb_strrchr,              arginfo_mb_strrchr)
545    PHP_FE(mb_stristr,              arginfo_mb_stristr)
546    PHP_FE(mb_strrichr,             arginfo_mb_strrichr)
547    PHP_FE(mb_substr_count,         arginfo_mb_substr_count)
548    PHP_FE(mb_substr,               arginfo_mb_substr)
549    PHP_FE(mb_strcut,               arginfo_mb_strcut)
550    PHP_FE(mb_strwidth,             arginfo_mb_strwidth)
551    PHP_FE(mb_strimwidth,           arginfo_mb_strimwidth)
552    PHP_FE(mb_convert_encoding,     arginfo_mb_convert_encoding)
553    PHP_FE(mb_detect_encoding,      arginfo_mb_detect_encoding)
554    PHP_FE(mb_list_encodings,       arginfo_mb_list_encodings)
555    PHP_FE(mb_encoding_aliases,     arginfo_mb_encoding_aliases)
556    PHP_FE(mb_convert_kana,         arginfo_mb_convert_kana)
557    PHP_FE(mb_encode_mimeheader,    arginfo_mb_encode_mimeheader)
558    PHP_FE(mb_decode_mimeheader,    arginfo_mb_decode_mimeheader)
559    PHP_FE(mb_convert_variables,    arginfo_mb_convert_variables)
560    PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
561    PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
562    PHP_FE(mb_send_mail,            arginfo_mb_send_mail)
563    PHP_FE(mb_get_info,             arginfo_mb_get_info)
564    PHP_FE(mb_check_encoding,       arginfo_mb_check_encoding)
565#if HAVE_MBREGEX
566    PHP_MBREGEX_FUNCTION_ENTRIES
567#endif
568    PHP_FE_END
569};
570/* }}} */
571
572/* {{{ zend_module_entry mbstring_module_entry */
573zend_module_entry mbstring_module_entry = {
574    STANDARD_MODULE_HEADER,
575    "mbstring",
576    mbstring_functions,
577    PHP_MINIT(mbstring),
578    PHP_MSHUTDOWN(mbstring),
579    PHP_RINIT(mbstring),
580    PHP_RSHUTDOWN(mbstring),
581    PHP_MINFO(mbstring),
582    NO_VERSION_YET,
583    PHP_MODULE_GLOBALS(mbstring),
584    PHP_GINIT(mbstring),
585    PHP_GSHUTDOWN(mbstring),
586    NULL,
587    STANDARD_MODULE_PROPERTIES_EX
588};
589/* }}} */
590
591/* {{{ static sapi_post_entry php_post_entries[] */
592static sapi_post_entry php_post_entries[] = {
593    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
594    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
595    { NULL, 0, NULL, NULL }
596};
597/* }}} */
598
599#ifdef COMPILE_DL_MBSTRING
600ZEND_GET_MODULE(mbstring)
601#endif
602
603static char *get_internal_encoding(TSRMLS_D) {
604    if (PG(internal_encoding) && PG(internal_encoding)[0]) {
605        return PG(internal_encoding);
606    } else if (SG(default_charset)) {
607        return SG(default_charset);
608    }
609    return "";
610}
611
612static char *get_input_encoding(TSRMLS_D) {
613    if (PG(input_encoding) && PG(input_encoding)[0]) {
614        return PG(input_encoding);
615    } else if (SG(default_charset)) {
616        return SG(default_charset);
617    }
618    return "";
619}
620
621static char *get_output_encoding(TSRMLS_D) {
622    if (PG(output_encoding) && PG(output_encoding)[0]) {
623        return PG(output_encoding);
624    } else if (SG(default_charset)) {
625        return SG(default_charset);
626    }
627    return "";
628}
629
630
631/* {{{ allocators */
632static void *_php_mb_allocators_malloc(unsigned int sz)
633{
634    return emalloc(sz);
635}
636
637static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
638{
639    return erealloc(ptr, sz);
640}
641
642static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
643{
644    return ecalloc(nelems, szelem);
645}
646
647static void _php_mb_allocators_free(void *ptr)
648{
649    efree(ptr);
650}
651
652static void *_php_mb_allocators_pmalloc(unsigned int sz)
653{
654    return pemalloc(sz, 1);
655}
656
657static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
658{
659    return perealloc(ptr, sz, 1);
660}
661
662static void _php_mb_allocators_pfree(void *ptr)
663{
664    pefree(ptr, 1);
665}
666
667static mbfl_allocators _php_mb_allocators = {
668    _php_mb_allocators_malloc,
669    _php_mb_allocators_realloc,
670    _php_mb_allocators_calloc,
671    _php_mb_allocators_free,
672    _php_mb_allocators_pmalloc,
673    _php_mb_allocators_prealloc,
674    _php_mb_allocators_pfree
675};
676/* }}} */
677
678/* {{{ static sapi_post_entry mbstr_post_entries[] */
679static sapi_post_entry mbstr_post_entries[] = {
680    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
681    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
682    { NULL, 0, NULL, NULL }
683};
684/* }}} */
685
686/* {{{ static int php_mb_parse_encoding_list()
687 *  Return 0 if input contains any illegal encoding, otherwise 1.
688 *  Even if any illegal encoding is detected the result may contain a list
689 *  of parsed encodings.
690 */
691static int
692php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
693{
694    int size, bauto, ret = SUCCESS;
695    size_t n;
696    char *p, *p1, *p2, *endp, *tmpstr;
697    const mbfl_encoding **entry, **list;
698
699    list = NULL;
700    if (value == NULL || value_length <= 0) {
701        if (return_list) {
702            *return_list = NULL;
703        }
704        if (return_size) {
705            *return_size = 0;
706        }
707        return FAILURE;
708    } else {
709        /* copy the value string for work */
710        if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
711            tmpstr = (char *)estrndup(value+1, value_length-2);
712            value_length -= 2;
713        }
714        else
715            tmpstr = (char *)estrndup(value, value_length);
716        if (tmpstr == NULL) {
717            return FAILURE;
718        }
719        /* count the number of listed encoding names */
720        endp = tmpstr + value_length;
721        n = 1;
722        p1 = tmpstr;
723        while ((p2 = (char*)php_memnstr(p1, ",", 1, endp)) != NULL) {
724            p1 = p2 + 1;
725            n++;
726        }
727        size = n + MBSTRG(default_detect_order_list_size);
728        /* make list */
729        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
730        if (list != NULL) {
731            entry = list;
732            n = 0;
733            bauto = 0;
734            p1 = tmpstr;
735            do {
736                p2 = p = (char*)php_memnstr(p1, ",", 1, endp);
737                if (p == NULL) {
738                    p = endp;
739                }
740                *p = '\0';
741                /* trim spaces */
742                while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
743                    p1++;
744                }
745                p--;
746                while (p > p1 && (*p == ' ' || *p == '\t')) {
747                    *p = '\0';
748                    p--;
749                }
750                /* convert to the encoding number and check encoding */
751                if (strcasecmp(p1, "auto") == 0) {
752                    if (!bauto) {
753                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
754                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
755                        size_t i;
756                        bauto = 1;
757                        for (i = 0; i < identify_list_size; i++) {
758                            *entry++ = mbfl_no2encoding(*src++);
759                            n++;
760                        }
761                    }
762                } else {
763                    const mbfl_encoding *encoding = mbfl_name2encoding(p1);
764                    if (encoding) {
765                        *entry++ = encoding;
766                        n++;
767                    } else {
768                        ret = 0;
769                    }
770                }
771                p1 = p2 + 1;
772            } while (n < size && p2 != NULL);
773            if (n > 0) {
774                if (return_list) {
775                    *return_list = list;
776                } else {
777                    pefree(list, persistent);
778                }
779            } else {
780                pefree(list, persistent);
781                if (return_list) {
782                    *return_list = NULL;
783                }
784                ret = 0;
785            }
786            if (return_size) {
787                *return_size = n;
788            }
789        } else {
790            if (return_list) {
791                *return_list = NULL;
792            }
793            if (return_size) {
794                *return_size = 0;
795            }
796            ret = 0;
797        }
798        efree(tmpstr);
799    }
800
801    return ret;
802}
803/* }}} */
804
805/* {{{ static int php_mb_parse_encoding_array()
806 *  Return 0 if input contains any illegal encoding, otherwise 1.
807 *  Even if any illegal encoding is detected the result may contain a list
808 *  of parsed encodings.
809 */
810static int
811php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
812{
813    zval *hash_entry;
814    HashTable *target_hash;
815    int i, n, size, bauto, ret = SUCCESS;
816    const mbfl_encoding **list, **entry;
817
818    list = NULL;
819    if (Z_TYPE_P(array) == IS_ARRAY) {
820        target_hash = Z_ARRVAL_P(array);
821        i = zend_hash_num_elements(target_hash);
822        size = i + MBSTRG(default_detect_order_list_size);
823        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
824        if (list != NULL) {
825            entry = list;
826            bauto = 0;
827            n = 0;
828            ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
829                convert_to_string_ex(hash_entry);
830                if (strcasecmp(Z_STRVAL_P(hash_entry), "auto") == 0) {
831                    if (!bauto) {
832                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
833                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
834                        size_t j;
835
836                        bauto = 1;
837                        for (j = 0; j < identify_list_size; j++) {
838                            *entry++ = mbfl_no2encoding(*src++);
839                            n++;
840                        }
841                    }
842                } else {
843                    const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_P(hash_entry));
844                    if (encoding) {
845                        *entry++ = encoding;
846                        n++;
847                    } else {
848                        ret = FAILURE;
849                    }
850                }
851                i--;
852            } ZEND_HASH_FOREACH_END();
853            if (n > 0) {
854                if (return_list) {
855                    *return_list = list;
856                } else {
857                    pefree(list, persistent);
858                }
859            } else {
860                pefree(list, persistent);
861                if (return_list) {
862                    *return_list = NULL;
863                }
864                ret = FAILURE;
865            }
866            if (return_size) {
867                *return_size = n;
868            }
869        } else {
870            if (return_list) {
871                *return_list = NULL;
872            }
873            if (return_size) {
874                *return_size = 0;
875            }
876            ret = FAILURE;
877        }
878    }
879
880    return ret;
881}
882/* }}} */
883
884/* {{{ zend_multibyte interface */
885static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
886{
887    return (const zend_encoding*)mbfl_name2encoding(encoding_name);
888}
889
890static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
891{
892    return ((const mbfl_encoding *)encoding)->name;
893}
894
895static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
896{
897    const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
898    if (encoding->flag & MBFL_ENCTYPE_SBCS) {
899        return 1;
900    }
901    if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
902        return 1;
903    }
904    return 0;
905}
906
907static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
908{
909    mbfl_string string;
910
911    if (!list) {
912        list = (const zend_encoding **)MBSTRG(current_detect_order_list);
913        list_size = MBSTRG(current_detect_order_list_size);
914    }
915
916    mbfl_string_init(&string);
917    string.no_language = MBSTRG(language);
918    string.val = (unsigned char *)arg_string;
919    string.len = arg_length;
920    return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
921}
922
923static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
924{
925    mbfl_string string, result;
926    mbfl_buffer_converter *convd;
927    int status, loc;
928
929    /* new encoding */
930    /* initialize string */
931    mbfl_string_init(&string);
932    mbfl_string_init(&result);
933    string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
934    string.no_language = MBSTRG(language);
935    string.val = (unsigned char*)from;
936    string.len = from_length;
937
938    /* initialize converter */
939    convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
940    if (convd == NULL) {
941        return -1;
942    }
943    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
944    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
945
946    /* do it */
947    status = mbfl_buffer_converter_feed2(convd, &string, &loc);
948    if (status) {
949        mbfl_buffer_converter_delete(convd);
950        return (size_t)-1;
951    }
952
953    mbfl_buffer_converter_flush(convd);
954    if (!mbfl_buffer_converter_result(convd, &result)) {
955        mbfl_buffer_converter_delete(convd);
956        return (size_t)-1;
957    }
958
959    *to = result.val;
960    *to_length = result.len;
961
962    mbfl_buffer_converter_delete(convd);
963
964    return loc;
965}
966
967static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
968{
969    return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
970}
971
972static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
973{
974    return (const zend_encoding *)MBSTRG(internal_encoding);
975}
976
977static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
978{
979    MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
980    return SUCCESS;
981}
982
983static zend_multibyte_functions php_mb_zend_multibyte_functions = {
984    "mbstring",
985    php_mb_zend_encoding_fetcher,
986    php_mb_zend_encoding_name_getter,
987    php_mb_zend_encoding_lexer_compatibility_checker,
988    php_mb_zend_encoding_detector,
989    php_mb_zend_encoding_converter,
990    php_mb_zend_encoding_list_parser,
991    php_mb_zend_internal_encoding_getter,
992    php_mb_zend_internal_encoding_setter
993};
994/* }}} */
995
996static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
997static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
998static void _php_mb_free_regex(void *opaque);
999
1000#if HAVE_ONIG
1001/* {{{ _php_mb_compile_regex */
1002static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1003{
1004    php_mb_regex_t *retval;
1005    OnigErrorInfo err_info;
1006    int err_code;
1007
1008    if ((err_code = onig_new(&retval,
1009            (const OnigUChar *)pattern,
1010            (const OnigUChar *)pattern + strlen(pattern),
1011            ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1012            ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1013        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1014        onig_error_code_to_str(err_str, err_code, err_info);
1015        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
1016        retval = NULL;
1017    }
1018    return retval;
1019}
1020/* }}} */
1021
1022/* {{{ _php_mb_match_regex */
1023static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1024{
1025    return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1026            (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1027            (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1028}
1029/* }}} */
1030
1031/* {{{ _php_mb_free_regex */
1032static void _php_mb_free_regex(void *opaque)
1033{
1034    onig_free((php_mb_regex_t *)opaque);
1035}
1036/* }}} */
1037#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1038/* {{{ _php_mb_compile_regex */
1039static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1040{
1041    pcre *retval;
1042    const char *err_str;
1043    int err_offset;
1044
1045    if (!(retval = pcre_compile(pattern,
1046            PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1047        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1048    }
1049    return retval;
1050}
1051/* }}} */
1052
1053/* {{{ _php_mb_match_regex */
1054static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1055{
1056    return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1057            0, NULL, 0) >= 0;
1058}
1059/* }}} */
1060
1061/* {{{ _php_mb_free_regex */
1062static void _php_mb_free_regex(void *opaque)
1063{
1064    pcre_free(opaque);
1065}
1066/* }}} */
1067#endif
1068
1069/* {{{ php_mb_nls_get_default_detect_order_list */
1070static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1071{
1072    size_t i;
1073
1074    *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1075    *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1076
1077    for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1078        if (php_mb_default_identify_list[i].lang == lang) {
1079            *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1080            *plist_size = php_mb_default_identify_list[i].list_size;
1081            return 1;
1082        }
1083    }
1084    return 0;
1085}
1086/* }}} */
1087
1088static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1089{
1090    char *result = emalloc(len + 2);
1091    char *resp = result;
1092    int i;
1093
1094    for (i = 0; i < len && start[i] != quote; ++i) {
1095        if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1096            *resp++ = start[++i];
1097        } else {
1098            size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1099
1100            while (j-- > 0 && i < len) {
1101                *resp++ = start[i++];
1102            }
1103            --i;
1104        }
1105    }
1106
1107    *resp = '\0';
1108    return result;
1109}
1110
1111static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1112{
1113    char *pos = *line, quote;
1114    char *res;
1115
1116    while (*pos && *pos != stop) {
1117        if ((quote = *pos) == '"' || quote == '\'') {
1118            ++pos;
1119            while (*pos && *pos != quote) {
1120                if (*pos == '\\' && pos[1] && pos[1] == quote) {
1121                    pos += 2;
1122                } else {
1123                    ++pos;
1124                }
1125            }
1126            if (*pos) {
1127                ++pos;
1128            }
1129        } else {
1130            pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1131
1132        }
1133    }
1134    if (*pos == '\0') {
1135        res = estrdup(*line);
1136        *line += strlen(*line);
1137        return res;
1138    }
1139
1140    res = estrndup(*line, pos - *line);
1141
1142    while (*pos == stop) {
1143        pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1144    }
1145
1146    *line = pos;
1147    return res;
1148}
1149/* }}} */
1150
1151static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1152{
1153    while (*str && isspace(*(unsigned char *)str)) {
1154        ++str;
1155    }
1156
1157    if (!*str) {
1158        return estrdup("");
1159    }
1160
1161    if (*str == '"' || *str == '\'') {
1162        char quote = *str;
1163
1164        str++;
1165        return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1166    } else {
1167        char *strend = str;
1168
1169        while (*strend && !isspace(*(unsigned char *)strend)) {
1170            ++strend;
1171        }
1172        return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1173    }
1174}
1175/* }}} */
1176
1177static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1178{
1179    char *s, *s2;
1180    const size_t filename_len = strlen(filename);
1181
1182    /* The \ check should technically be needed for win32 systems only where
1183     * it is a valid path separator. However, IE in all it's wisdom always sends
1184     * the full path of the file on the user's filesystem, which means that unless
1185     * the user does basename() they get a bogus file name. Until IE's user base drops
1186     * to nill or problem is fixed this code must remain enabled for all systems. */
1187    s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1188    s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1189
1190    if (s && s2) {
1191        if (s > s2) {
1192            return ++s;
1193        } else {
1194            return ++s2;
1195        }
1196    } else if (s) {
1197        return ++s;
1198    } else if (s2) {
1199        return ++s2;
1200    } else {
1201        return filename;
1202    }
1203}
1204/* }}} */
1205
1206/* {{{ php.ini directive handler */
1207/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
1208static PHP_INI_MH(OnUpdate_mbstring_language)
1209{
1210    enum mbfl_no_language no_language;
1211
1212    no_language = mbfl_name2no_language(new_value);
1213    if (no_language == mbfl_no_language_invalid) {
1214        MBSTRG(language) = mbfl_no_language_neutral;
1215        return FAILURE;
1216    }
1217    MBSTRG(language) = no_language;
1218    php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1219    return SUCCESS;
1220}
1221/* }}} */
1222
1223/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
1224static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1225{
1226    const mbfl_encoding **list;
1227    size_t size;
1228
1229    if (!new_value) {
1230        if (MBSTRG(detect_order_list)) {
1231            pefree(MBSTRG(detect_order_list), 1);
1232        }
1233        MBSTRG(detect_order_list) = NULL;
1234        MBSTRG(detect_order_list_size) = 0;
1235        return SUCCESS;
1236    }
1237
1238    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1239        return FAILURE;
1240    }
1241
1242    if (MBSTRG(detect_order_list)) {
1243        pefree(MBSTRG(detect_order_list), 1);
1244    }
1245    MBSTRG(detect_order_list) = list;
1246    MBSTRG(detect_order_list_size) = size;
1247    return SUCCESS;
1248}
1249/* }}} */
1250
1251/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1252static PHP_INI_MH(OnUpdate_mbstring_http_input)
1253{
1254    const mbfl_encoding **list;
1255    size_t size;
1256
1257    if (!new_value) {
1258        if (MBSTRG(http_input_list)) {
1259            pefree(MBSTRG(http_input_list), 1);
1260        }
1261        if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) {
1262            MBSTRG(http_input_list) = list;
1263            MBSTRG(http_input_list_size) = size;
1264            return SUCCESS;
1265        }
1266        MBSTRG(http_input_list) = NULL;
1267        MBSTRG(http_input_list_size) = 0;
1268        return SUCCESS;
1269    }
1270
1271    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1272        return FAILURE;
1273    }
1274
1275    if (MBSTRG(http_input_list)) {
1276        pefree(MBSTRG(http_input_list), 1);
1277    }
1278    MBSTRG(http_input_list) = list;
1279    MBSTRG(http_input_list_size) = size;
1280
1281    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1282        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1283    }
1284
1285    return SUCCESS;
1286}
1287/* }}} */
1288
1289/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1290static PHP_INI_MH(OnUpdate_mbstring_http_output)
1291{
1292    const mbfl_encoding *encoding;
1293
1294    if (new_value == NULL || new_value_length == 0) {
1295        encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C));
1296        if (!encoding) {
1297            MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1298            MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1299            return SUCCESS;
1300        }
1301    } else {
1302        encoding = mbfl_name2encoding(new_value);
1303        if (!encoding) {
1304            MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1305            MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1306            return FAILURE;
1307        }
1308    }
1309    MBSTRG(http_output_encoding) = encoding;
1310    MBSTRG(current_http_output_encoding) = encoding;
1311
1312    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1313        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1314    }
1315
1316    return SUCCESS;
1317}
1318/* }}} */
1319
1320/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1321int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1322{
1323    const mbfl_encoding *encoding;
1324
1325    if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1326        /* falls back to UTF-8 if an unknown encoding name is given */
1327        encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1328    }
1329    MBSTRG(internal_encoding) = encoding;
1330    MBSTRG(current_internal_encoding) = encoding;
1331#if HAVE_MBREGEX
1332    {
1333        const char *enc_name = new_value;
1334        if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1335            /* falls back to UTF-8 if an unknown encoding name is given */
1336            enc_name = "UTF-8";
1337            php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1338        }
1339        php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1340    }
1341#endif
1342    return SUCCESS;
1343}
1344/* }}} */
1345
1346/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1347static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1348{
1349    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1350        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1351    }
1352
1353    if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1354        return FAILURE;
1355    }
1356
1357    if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1358        if (new_value_length) {
1359            return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1360        } else {
1361            return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC);
1362        }
1363    } else {
1364        /* the corresponding mbstring globals needs to be set according to the
1365         * ini value in the later stage because it never falls back to the
1366         * default value if 1. no value for mbstring.internal_encoding is given,
1367         * 2. mbstring.language directive is processed in per-dir or runtime
1368         * context and 3. call to the handler for mbstring.language is done
1369         * after mbstring.internal_encoding is handled. */
1370        return SUCCESS;
1371    }
1372}
1373/* }}} */
1374
1375/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1376static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1377{
1378    int c;
1379    char *endptr = NULL;
1380
1381    if (new_value != NULL) {
1382        if (strcasecmp("none", new_value) == 0) {
1383            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1384            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1385        } else if (strcasecmp("long", new_value) == 0) {
1386            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1387            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1388        } else if (strcasecmp("entity", new_value) == 0) {
1389            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1390            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1391        } else {
1392            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1393            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1394            if (new_value_length >0) {
1395                c = strtol(new_value, &endptr, 0);
1396                if (*endptr == '\0') {
1397                    MBSTRG(filter_illegal_substchar) = c;
1398                    MBSTRG(current_filter_illegal_substchar) = c;
1399                }
1400            }
1401        }
1402    } else {
1403        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1404        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1405        MBSTRG(filter_illegal_substchar) = 0x3f;    /* '?' */
1406        MBSTRG(current_filter_illegal_substchar) = 0x3f;    /* '?' */
1407    }
1408
1409    return SUCCESS;
1410}
1411/* }}} */
1412
1413/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1414static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1415{
1416    if (new_value == NULL) {
1417        return FAILURE;
1418    }
1419
1420    OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1421
1422    if (MBSTRG(encoding_translation)) {
1423        sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1424        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1425    } else {
1426        sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1427        sapi_register_post_entries(php_post_entries TSRMLS_CC);
1428    }
1429
1430    return SUCCESS;
1431}
1432/* }}} */
1433
1434/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1435static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1436{
1437    zval tmp;
1438    void *re = NULL;
1439
1440    if (!new_value) {
1441        new_value = entry->orig_value;
1442        new_value_length = entry->orig_value_length;
1443    }
1444    php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1445
1446    if (Z_STRLEN(tmp) > 0) {
1447        if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1448            zval_dtor(&tmp);
1449            return FAILURE;
1450        }
1451    }
1452
1453    if (MBSTRG(http_output_conv_mimetypes)) {
1454        _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1455    }
1456
1457    MBSTRG(http_output_conv_mimetypes) = re;
1458
1459    zval_dtor(&tmp);
1460    return SUCCESS;
1461}
1462/* }}} */
1463/* }}} */
1464
1465/* {{{ php.ini directive registration */
1466PHP_INI_BEGIN()
1467    PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1468    PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1469    PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1470    PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1471    STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1472    PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1473    STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1474    PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1475
1476    STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1477        PHP_INI_SYSTEM | PHP_INI_PERDIR,
1478        OnUpdate_mbstring_encoding_translation,
1479        encoding_translation, zend_mbstring_globals, mbstring_globals)
1480    PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1481        "^(text/|application/xhtml\\+xml)",
1482        PHP_INI_ALL,
1483        OnUpdate_mbstring_http_output_conv_mimetypes)
1484
1485    STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1486        PHP_INI_ALL,
1487        OnUpdateLong,
1488        strict_detection, zend_mbstring_globals, mbstring_globals)
1489PHP_INI_END()
1490/* }}} */
1491
1492/* {{{ module global initialize handler */
1493static PHP_GINIT_FUNCTION(mbstring)
1494{
1495    mbstring_globals->language = mbfl_no_language_uni;
1496    mbstring_globals->internal_encoding = NULL;
1497    mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1498    mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1499    mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1500    mbstring_globals->http_input_identify = NULL;
1501    mbstring_globals->http_input_identify_get = NULL;
1502    mbstring_globals->http_input_identify_post = NULL;
1503    mbstring_globals->http_input_identify_cookie = NULL;
1504    mbstring_globals->http_input_identify_string = NULL;
1505    mbstring_globals->http_input_list = NULL;
1506    mbstring_globals->http_input_list_size = 0;
1507    mbstring_globals->detect_order_list = NULL;
1508    mbstring_globals->detect_order_list_size = 0;
1509    mbstring_globals->current_detect_order_list = NULL;
1510    mbstring_globals->current_detect_order_list_size = 0;
1511    mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1512    mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1513    mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1514    mbstring_globals->filter_illegal_substchar = 0x3f;  /* '?' */
1515    mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1516    mbstring_globals->current_filter_illegal_substchar = 0x3f;  /* '?' */
1517    mbstring_globals->illegalchars = 0;
1518    mbstring_globals->func_overload = 0;
1519    mbstring_globals->encoding_translation = 0;
1520    mbstring_globals->strict_detection = 0;
1521    mbstring_globals->outconv = NULL;
1522    mbstring_globals->http_output_conv_mimetypes = NULL;
1523#if HAVE_MBREGEX
1524    mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1525#endif
1526}
1527/* }}} */
1528
1529/* {{{ PHP_GSHUTDOWN_FUNCTION */
1530static PHP_GSHUTDOWN_FUNCTION(mbstring)
1531{
1532    if (mbstring_globals->http_input_list) {
1533        free(mbstring_globals->http_input_list);
1534    }
1535    if (mbstring_globals->detect_order_list) {
1536        free(mbstring_globals->detect_order_list);
1537    }
1538    if (mbstring_globals->http_output_conv_mimetypes) {
1539        _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1540    }
1541#if HAVE_MBREGEX
1542    php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1543#endif
1544}
1545/* }}} */
1546
1547/* {{{ PHP_MINIT_FUNCTION(mbstring) */
1548PHP_MINIT_FUNCTION(mbstring)
1549{
1550    __mbfl_allocators = &_php_mb_allocators;
1551
1552    REGISTER_INI_ENTRIES();
1553
1554    /* This is a global handler. Should not be set in a per-request handler. */
1555    sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1556
1557    /* Post handlers are stored in the thread-local context. */
1558    if (MBSTRG(encoding_translation)) {
1559        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1560    }
1561
1562    REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1563    REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1564    REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1565
1566    REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1567    REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1568    REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1569
1570#if HAVE_MBREGEX
1571    PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1572#endif
1573
1574    if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1575        return FAILURE;
1576    }
1577
1578    php_rfc1867_set_multibyte_callbacks(
1579        php_mb_encoding_translation,
1580        php_mb_gpc_get_detect_order,
1581        php_mb_gpc_set_input_encoding,
1582        php_mb_rfc1867_getword,
1583        php_mb_rfc1867_getword_conf,
1584        php_mb_rfc1867_basename);
1585
1586    return SUCCESS;
1587}
1588/* }}} */
1589
1590/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1591PHP_MSHUTDOWN_FUNCTION(mbstring)
1592{
1593    UNREGISTER_INI_ENTRIES();
1594
1595#if HAVE_MBREGEX
1596    PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1597#endif
1598
1599    return SUCCESS;
1600}
1601/* }}} */
1602
1603/* {{{ PHP_RINIT_FUNCTION(mbstring) */
1604PHP_RINIT_FUNCTION(mbstring)
1605{
1606    zend_function *func, *orig;
1607    const struct mb_overload_def *p;
1608
1609    MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1610    MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1611    MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1612    MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1613
1614    MBSTRG(illegalchars) = 0;
1615
1616    php_mb_populate_current_detect_order_list(TSRMLS_C);
1617
1618    /* override original function. */
1619    if (MBSTRG(func_overload)){
1620        p = &(mb_ovld[0]);
1621
1622        CG(compiler_options) |= ZEND_COMPILE_NO_BUILTIN_STRLEN;
1623        while (p->type > 0) {
1624            if ((MBSTRG(func_overload) & p->type) == p->type &&
1625                !zend_hash_str_exists(EG(function_table), p->save_func, strlen(p->save_func))
1626            ) {
1627                func = zend_hash_str_find_ptr(EG(function_table), p->ovld_func, strlen(p->ovld_func));
1628
1629                if ((orig = zend_hash_str_find_ptr(EG(function_table), p->orig_func, strlen(p->orig_func))) == NULL) {
1630                    php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1631                    return FAILURE;
1632                } else {
1633                    ZEND_ASSERT(orig->type == ZEND_INTERNAL_FUNCTION);
1634                    zend_hash_str_add_mem(EG(function_table), p->save_func, strlen(p->save_func), orig, sizeof(zend_internal_function));
1635                    function_add_ref(orig);
1636
1637                    if (zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), func, sizeof(zend_internal_function)) == NULL) {
1638                        php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1639                        return FAILURE;
1640                    }
1641
1642                    function_add_ref(func);
1643                }
1644            }
1645            p++;
1646        }
1647    }
1648#if HAVE_MBREGEX
1649    PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1650#endif
1651    zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1652
1653    return SUCCESS;
1654}
1655/* }}} */
1656
1657/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1658PHP_RSHUTDOWN_FUNCTION(mbstring)
1659{
1660    const struct mb_overload_def *p;
1661    zend_function *orig;
1662
1663    if (MBSTRG(current_detect_order_list) != NULL) {
1664        efree(MBSTRG(current_detect_order_list));
1665        MBSTRG(current_detect_order_list) = NULL;
1666        MBSTRG(current_detect_order_list_size) = 0;
1667    }
1668    if (MBSTRG(outconv) != NULL) {
1669        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1670        mbfl_buffer_converter_delete(MBSTRG(outconv));
1671        MBSTRG(outconv) = NULL;
1672    }
1673
1674    /* clear http input identification. */
1675    MBSTRG(http_input_identify) = NULL;
1676    MBSTRG(http_input_identify_post) = NULL;
1677    MBSTRG(http_input_identify_get) = NULL;
1678    MBSTRG(http_input_identify_cookie) = NULL;
1679    MBSTRG(http_input_identify_string) = NULL;
1680
1681    /*  clear overloaded function. */
1682    if (MBSTRG(func_overload)){
1683        p = &(mb_ovld[0]);
1684        while (p->type > 0) {
1685            if ((MBSTRG(func_overload) & p->type) == p->type &&
1686                (orig = zend_hash_str_find_ptr(EG(function_table), p->save_func, strlen(p->save_func)))) {
1687
1688                zend_hash_str_update_mem(EG(function_table), p->orig_func, strlen(p->orig_func), orig, sizeof(zend_internal_function));
1689                function_add_ref(orig);
1690                zend_hash_str_del(EG(function_table), p->save_func, strlen(p->save_func));
1691            }
1692            p++;
1693        }
1694        CG(compiler_options) &= ~ZEND_COMPILE_NO_BUILTIN_STRLEN;
1695    }
1696
1697#if HAVE_MBREGEX
1698    PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1699#endif
1700
1701    return SUCCESS;
1702}
1703/* }}} */
1704
1705/* {{{ PHP_MINFO_FUNCTION(mbstring) */
1706PHP_MINFO_FUNCTION(mbstring)
1707{
1708    php_info_print_table_start();
1709    php_info_print_table_row(2, "Multibyte Support", "enabled");
1710    php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1711    php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1712    {
1713        char tmp[256];
1714        snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1715        php_info_print_table_row(2, "libmbfl version", tmp);
1716    }
1717    php_info_print_table_end();
1718
1719    php_info_print_table_start();
1720    php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1721    php_info_print_table_end();
1722
1723#if HAVE_MBREGEX
1724    PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1725#endif
1726
1727    DISPLAY_INI_ENTRIES();
1728}
1729/* }}} */
1730
1731/* {{{ proto string mb_language([string language])
1732   Sets the current language or Returns the current language as a string */
1733PHP_FUNCTION(mb_language)
1734{
1735    char *name = NULL;
1736    size_t name_len = 0;
1737
1738    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1739        return;
1740    }
1741    if (name == NULL) {
1742        RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1743    } else {
1744        zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1745        if (FAILURE == zend_alter_ini_entry(ini_name, name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1746            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1747            RETVAL_FALSE;
1748        } else {
1749            RETVAL_TRUE;
1750        }
1751        zend_string_release(ini_name);
1752    }
1753}
1754/* }}} */
1755
1756/* {{{ proto string mb_internal_encoding([string encoding])
1757   Sets the current internal encoding or Returns the current internal encoding as a string */
1758PHP_FUNCTION(mb_internal_encoding)
1759{
1760    const char *name = NULL;
1761    size_t name_len;
1762    const mbfl_encoding *encoding;
1763
1764    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1765        return;
1766    }
1767    if (name == NULL) {
1768        name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1769        if (name != NULL) {
1770            RETURN_STRING(name);
1771        } else {
1772            RETURN_FALSE;
1773        }
1774    } else {
1775        encoding = mbfl_name2encoding(name);
1776        if (!encoding) {
1777            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1778            RETURN_FALSE;
1779        } else {
1780            MBSTRG(current_internal_encoding) = encoding;
1781            RETURN_TRUE;
1782        }
1783    }
1784}
1785/* }}} */
1786
1787/* {{{ proto mixed mb_http_input([string type])
1788   Returns the input encoding */
1789PHP_FUNCTION(mb_http_input)
1790{
1791    char *typ = NULL;
1792    size_t typ_len;
1793    int retname;
1794    char *list, *temp;
1795    const mbfl_encoding *result = NULL;
1796
1797    retname = 1;
1798    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1799        return;
1800    }
1801    if (typ == NULL) {
1802        result = MBSTRG(http_input_identify);
1803    } else {
1804        switch (*typ) {
1805        case 'G':
1806        case 'g':
1807            result = MBSTRG(http_input_identify_get);
1808            break;
1809        case 'P':
1810        case 'p':
1811            result = MBSTRG(http_input_identify_post);
1812            break;
1813        case 'C':
1814        case 'c':
1815            result = MBSTRG(http_input_identify_cookie);
1816            break;
1817        case 'S':
1818        case 's':
1819            result = MBSTRG(http_input_identify_string);
1820            break;
1821        case 'I':
1822        case 'i':
1823            {
1824                const mbfl_encoding **entry = MBSTRG(http_input_list);
1825                const size_t n = MBSTRG(http_input_list_size);
1826                size_t i;
1827                array_init(return_value);
1828                for (i = 0; i < n; i++) {
1829                    add_next_index_string(return_value, (*entry)->name);
1830                    entry++;
1831                }
1832                retname = 0;
1833            }
1834            break;
1835        case 'L':
1836        case 'l':
1837            {
1838                const mbfl_encoding **entry = MBSTRG(http_input_list);
1839                const size_t n = MBSTRG(http_input_list_size);
1840                size_t i;
1841                list = NULL;
1842                for (i = 0; i < n; i++) {
1843                    if (list) {
1844                        temp = list;
1845                        spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1846                        efree(temp);
1847                        if (!list) {
1848                            break;
1849                        }
1850                    } else {
1851                        list = estrdup((*entry)->name);
1852                    }
1853                    entry++;
1854                }
1855            }
1856            if (!list) {
1857                RETURN_FALSE;
1858            }
1859            RETVAL_STRING(list);
1860            efree(list);
1861            retname = 0;
1862            break;
1863        default:
1864            result = MBSTRG(http_input_identify);
1865            break;
1866        }
1867    }
1868
1869    if (retname) {
1870        if (result) {
1871            RETVAL_STRING(result->name);
1872        } else {
1873            RETVAL_FALSE;
1874        }
1875    }
1876}
1877/* }}} */
1878
1879/* {{{ proto string mb_http_output([string encoding])
1880   Sets the current output_encoding or returns the current output_encoding as a string */
1881PHP_FUNCTION(mb_http_output)
1882{
1883    const char *name = NULL;
1884    size_t name_len;
1885    const mbfl_encoding *encoding;
1886
1887    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1888        return;
1889    }
1890
1891    if (name == NULL) {
1892        name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1893        if (name != NULL) {
1894            RETURN_STRING(name);
1895        } else {
1896            RETURN_FALSE;
1897        }
1898    } else {
1899        encoding = mbfl_name2encoding(name);
1900        if (!encoding) {
1901            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1902            RETURN_FALSE;
1903        } else {
1904            MBSTRG(current_http_output_encoding) = encoding;
1905            RETURN_TRUE;
1906        }
1907    }
1908}
1909/* }}} */
1910
1911/* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1912   Sets the current detect_order or Return the current detect_order as a array */
1913PHP_FUNCTION(mb_detect_order)
1914{
1915    zval *arg1 = NULL;
1916
1917    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z", &arg1) == FAILURE) {
1918        return;
1919    }
1920
1921    if (!arg1) {
1922        size_t i;
1923        size_t n = MBSTRG(current_detect_order_list_size);
1924        const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1925        array_init(return_value);
1926        for (i = 0; i < n; i++) {
1927            add_next_index_string(return_value, (*entry)->name);
1928            entry++;
1929        }
1930    } else {
1931        const mbfl_encoding **list = NULL;
1932        size_t size = 0;
1933        switch (Z_TYPE_P(arg1)) {
1934            case IS_ARRAY:
1935                if (FAILURE == php_mb_parse_encoding_array(arg1, &list, &size, 0 TSRMLS_CC)) {
1936                    if (list) {
1937                        efree(list);
1938                    }
1939                    RETURN_FALSE;
1940                }
1941                break;
1942            default:
1943                convert_to_string_ex(arg1);
1944                if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), &list, &size, 0 TSRMLS_CC)) {
1945                    if (list) {
1946                        efree(list);
1947                    }
1948                    RETURN_FALSE;
1949                }
1950                break;
1951        }
1952
1953        if (list == NULL) {
1954            RETURN_FALSE;
1955        }
1956
1957        if (MBSTRG(current_detect_order_list)) {
1958            efree(MBSTRG(current_detect_order_list));
1959        }
1960        MBSTRG(current_detect_order_list) = list;
1961        MBSTRG(current_detect_order_list_size) = size;
1962        RETURN_TRUE;
1963    }
1964}
1965/* }}} */
1966
1967/* {{{ proto mixed mb_substitute_character([mixed substchar])
1968   Sets the current substitute_character or returns the current substitute_character */
1969PHP_FUNCTION(mb_substitute_character)
1970{
1971    zval *arg1 = NULL;
1972
1973    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|z", &arg1) == FAILURE) {
1974        return;
1975    }
1976
1977    if (!arg1) {
1978        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1979            RETURN_STRING("none");
1980        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1981            RETURN_STRING("long");
1982        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1983            RETURN_STRING("entity");
1984        } else {
1985            RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1986        }
1987    } else {
1988        RETVAL_TRUE;
1989
1990        switch (Z_TYPE_P(arg1)) {
1991            case IS_STRING:
1992                if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1993                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1994                } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1995                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1996                } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1997                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1998                } else {
1999                    convert_to_long_ex(arg1);
2000
2001                    if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2002                        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2003                        MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2004                    } else {
2005                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2006                        RETURN_FALSE;
2007                    }
2008                }
2009                break;
2010            default:
2011                convert_to_long_ex(arg1);
2012                if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2013                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2014                    MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
2015                } else {
2016                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2017                    RETURN_FALSE;
2018                }
2019                break;
2020        }
2021    }
2022}
2023/* }}} */
2024
2025/* {{{ proto string mb_preferred_mime_name(string encoding)
2026   Return the preferred MIME name (charset) as a string */
2027PHP_FUNCTION(mb_preferred_mime_name)
2028{
2029    enum mbfl_no_encoding no_encoding;
2030    char *name = NULL;
2031    size_t name_len;
2032
2033    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2034        return;
2035    } else {
2036        no_encoding = mbfl_name2no_encoding(name);
2037        if (no_encoding == mbfl_no_encoding_invalid) {
2038            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2039            RETVAL_FALSE;
2040        } else {
2041            const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2042            if (preferred_name == NULL || *preferred_name == '\0') {
2043                php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2044                RETVAL_FALSE;
2045            } else {
2046                RETVAL_STRING((char *)preferred_name);
2047            }
2048        }
2049    }
2050}
2051/* }}} */
2052
2053#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2054#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2055
2056/* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2057   Parses GET/POST/COOKIE data and sets global variables */
2058PHP_FUNCTION(mb_parse_str)
2059{
2060    zval *track_vars_array = NULL;
2061    char *encstr = NULL;
2062    size_t encstr_len;
2063    php_mb_encoding_handler_info_t info;
2064    const mbfl_encoding *detected;
2065
2066    track_vars_array = NULL;
2067    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z/", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2068        return;
2069    }
2070
2071    if (track_vars_array != NULL) {
2072        /* Clear out the array */
2073        zval_dtor(track_vars_array);
2074        array_init(track_vars_array);
2075    }
2076
2077    encstr = estrndup(encstr, encstr_len);
2078
2079    info.data_type              = PARSE_STRING;
2080    info.separator              = PG(arg_separator).input;
2081    info.report_errors          = 1;
2082    info.to_encoding            = MBSTRG(current_internal_encoding);
2083    info.to_language            = MBSTRG(language);
2084    info.from_encodings         = MBSTRG(http_input_list);
2085    info.num_from_encodings     = MBSTRG(http_input_list_size);
2086    info.from_language          = MBSTRG(language);
2087
2088    if (track_vars_array != NULL) {
2089        detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2090    } else {
2091        zval tmp;
2092        zend_array *symbol_table = zend_rebuild_symbol_table(TSRMLS_C);
2093
2094        ZVAL_ARR(&tmp, symbol_table);
2095        detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2096    }
2097
2098    MBSTRG(http_input_identify) = detected;
2099
2100    RETVAL_BOOL(detected);
2101
2102    if (encstr != NULL) efree(encstr);
2103}
2104/* }}} */
2105
2106/* {{{ proto string mb_output_handler(string contents, int status)
2107   Returns string in output buffer converted to the http_output encoding */
2108PHP_FUNCTION(mb_output_handler)
2109{
2110    char *arg_string;
2111    size_t arg_string_len;
2112    zend_long arg_status;
2113    mbfl_string string, result;
2114    const char *charset;
2115    char *p;
2116    const mbfl_encoding *encoding;
2117    int last_feed, len;
2118    unsigned char send_text_mimetype = 0;
2119    char *s, *mimetype = NULL;
2120
2121    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2122        return;
2123    }
2124
2125    encoding = MBSTRG(current_http_output_encoding);
2126
2127    /* start phase only */
2128    if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2129        /* delete the converter just in case. */
2130        if (MBSTRG(outconv)) {
2131            MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2132            mbfl_buffer_converter_delete(MBSTRG(outconv));
2133            MBSTRG(outconv) = NULL;
2134        }
2135        if (encoding == &mbfl_encoding_pass) {
2136            RETURN_STRINGL(arg_string, arg_string_len);
2137        }
2138
2139        /* analyze mime type */
2140        if (SG(sapi_headers).mimetype &&
2141            _php_mb_match_regex(
2142                MBSTRG(http_output_conv_mimetypes),
2143                SG(sapi_headers).mimetype,
2144                strlen(SG(sapi_headers).mimetype))) {
2145            if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2146                mimetype = estrdup(SG(sapi_headers).mimetype);
2147            } else {
2148                mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2149            }
2150            send_text_mimetype = 1;
2151        } else if (SG(sapi_headers).send_default_content_type) {
2152            mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2153        }
2154
2155        /* if content-type is not yet set, set it and activate the converter */
2156        if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2157            charset = encoding->mime_name;
2158            if (charset) {
2159                len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2160                if (sapi_add_header(p, len, 0) != FAILURE) {
2161                    SG(sapi_headers).send_default_content_type = 0;
2162                }
2163            }
2164            /* activate the converter */
2165            MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2166            if (send_text_mimetype){
2167                efree(mimetype);
2168            }
2169        }
2170    }
2171
2172    /* just return if the converter is not activated. */
2173    if (MBSTRG(outconv) == NULL) {
2174        RETURN_STRINGL(arg_string, arg_string_len);
2175    }
2176
2177    /* flag */
2178    last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2179    /* mode */
2180    mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2181    mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2182
2183    /* feed the string */
2184    mbfl_string_init(&string);
2185    /* these are not needed. convd has encoding info.
2186    string.no_language = MBSTRG(language);
2187    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2188    */
2189    string.val = (unsigned char *)arg_string;
2190    string.len = arg_string_len;
2191    mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2192    if (last_feed) {
2193        mbfl_buffer_converter_flush(MBSTRG(outconv));
2194    }
2195    /* get the converter output, and return it */
2196    mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2197    // TODO: avoid reallocation ???
2198    RETVAL_STRINGL((char *)result.val, result.len);     /* the string is already strdup()'ed */
2199    efree(result.val);
2200
2201    /* delete the converter if it is the last feed. */
2202    if (last_feed) {
2203        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2204        mbfl_buffer_converter_delete(MBSTRG(outconv));
2205        MBSTRG(outconv) = NULL;
2206    }
2207}
2208/* }}} */
2209
2210/* {{{ proto int mb_strlen(string str [, string encoding])
2211   Get character numbers of a string */
2212PHP_FUNCTION(mb_strlen)
2213{
2214    int n;
2215    mbfl_string string;
2216    char *enc_name = NULL;
2217    size_t enc_name_len;
2218
2219    mbfl_string_init(&string);
2220
2221    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2222        return;
2223    }
2224
2225    string.no_language = MBSTRG(language);
2226    if (enc_name == NULL) {
2227        string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2228    } else {
2229        string.no_encoding = mbfl_name2no_encoding(enc_name);
2230        if (string.no_encoding == mbfl_no_encoding_invalid) {
2231            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2232            RETURN_FALSE;
2233        }
2234    }
2235
2236    n = mbfl_strlen(&string);
2237    if (n >= 0) {
2238        RETVAL_LONG(n);
2239    } else {
2240        RETVAL_FALSE;
2241    }
2242}
2243/* }}} */
2244
2245/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2246   Find position of first occurrence of a string within another */
2247PHP_FUNCTION(mb_strpos)
2248{
2249    int n, reverse = 0;
2250    zend_long offset;
2251    mbfl_string haystack, needle;
2252    char *enc_name = NULL;
2253    size_t enc_name_len;
2254
2255    mbfl_string_init(&haystack);
2256    mbfl_string_init(&needle);
2257    haystack.no_language = MBSTRG(language);
2258    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2259    needle.no_language = MBSTRG(language);
2260    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2261    offset = 0;
2262
2263    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2264        return;
2265    }
2266
2267    if (enc_name != NULL) {
2268        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2269        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2270            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2271            RETURN_FALSE;
2272        }
2273    }
2274
2275    if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2276        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2277        RETURN_FALSE;
2278    }
2279    if (needle.len == 0) {
2280        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2281        RETURN_FALSE;
2282    }
2283
2284    n = mbfl_strpos(&haystack, &needle, offset, reverse);
2285    if (n >= 0) {
2286        RETVAL_LONG(n);
2287    } else {
2288        switch (-n) {
2289        case 1:
2290            break;
2291        case 2:
2292            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2293            break;
2294        case 4:
2295            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2296            break;
2297        case 8:
2298            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2299            break;
2300        default:
2301            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2302            break;
2303        }
2304        RETVAL_FALSE;
2305    }
2306}
2307/* }}} */
2308
2309/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2310   Find position of last occurrence of a string within another */
2311PHP_FUNCTION(mb_strrpos)
2312{
2313    int n;
2314    mbfl_string haystack, needle;
2315    char *enc_name = NULL;
2316    size_t enc_name_len;
2317    zval *zoffset = NULL;
2318    long offset = 0, str_flg;
2319    char *enc_name2 = NULL;
2320    int enc_name_len2;
2321
2322    mbfl_string_init(&haystack);
2323    mbfl_string_init(&needle);
2324    haystack.no_language = MBSTRG(language);
2325    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2326    needle.no_language = MBSTRG(language);
2327    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2328
2329    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2330        return;
2331    }
2332
2333    if (zoffset) {
2334        if (Z_TYPE_P(zoffset) == IS_STRING) {
2335            enc_name2     = Z_STRVAL_P(zoffset);
2336            enc_name_len2 = Z_STRLEN_P(zoffset);
2337            str_flg       = 1;
2338
2339            if (enc_name2 != NULL) {
2340                switch (*enc_name2) {
2341                    case '0':
2342                    case '1':
2343                    case '2':
2344                    case '3':
2345                    case '4':
2346                    case '5':
2347                    case '6':
2348                    case '7':
2349                    case '8':
2350                    case '9':
2351                    case ' ':
2352                    case '-':
2353                    case '.':
2354                        break;
2355                    default :
2356                        str_flg = 0;
2357                        break;
2358                }
2359            }
2360
2361            if (str_flg) {
2362                convert_to_long_ex(zoffset);
2363                offset   = Z_LVAL_P(zoffset);
2364            } else {
2365                enc_name     = enc_name2;
2366                enc_name_len = enc_name_len2;
2367            }
2368        } else {
2369            convert_to_long_ex(zoffset);
2370            offset = Z_LVAL_P(zoffset);
2371        }
2372    }
2373
2374    if (enc_name != NULL) {
2375        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2376        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2377            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2378            RETURN_FALSE;
2379        }
2380    }
2381
2382    if (haystack.len <= 0) {
2383        RETURN_FALSE;
2384    }
2385    if (needle.len <= 0) {
2386        RETURN_FALSE;
2387    }
2388
2389    {
2390        int haystack_char_len = mbfl_strlen(&haystack);
2391        if ((offset > 0 && offset > haystack_char_len) ||
2392            (offset < 0 && -offset > haystack_char_len)) {
2393            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2394            RETURN_FALSE;
2395        }
2396    }
2397
2398    n = mbfl_strpos(&haystack, &needle, offset, 1);
2399    if (n >= 0) {
2400        RETVAL_LONG(n);
2401    } else {
2402        RETVAL_FALSE;
2403    }
2404}
2405/* }}} */
2406
2407/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2408   Finds position of first occurrence of a string within another, case insensitive */
2409PHP_FUNCTION(mb_stripos)
2410{
2411    int n;
2412    zend_long offset;
2413    mbfl_string haystack, needle;
2414    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2415    size_t from_encoding_len;
2416    n = -1;
2417    offset = 0;
2418
2419    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2420        return;
2421    }
2422    if (needle.len == 0) {
2423        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2424        RETURN_FALSE;
2425    }
2426    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2427
2428    if (n >= 0) {
2429        RETVAL_LONG(n);
2430    } else {
2431        RETVAL_FALSE;
2432    }
2433}
2434/* }}} */
2435
2436/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2437   Finds position of last occurrence of a string within another, case insensitive */
2438PHP_FUNCTION(mb_strripos)
2439{
2440    int n;
2441    zend_long offset;
2442    mbfl_string haystack, needle;
2443    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2444    size_t from_encoding_len;
2445    n = -1;
2446    offset = 0;
2447
2448    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2449        return;
2450    }
2451
2452    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2453
2454    if (n >= 0) {
2455        RETVAL_LONG(n);
2456    } else {
2457        RETVAL_FALSE;
2458    }
2459}
2460/* }}} */
2461
2462/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2463   Finds first occurrence of a string within another */
2464PHP_FUNCTION(mb_strstr)
2465{
2466    int n, len, mblen;
2467    mbfl_string haystack, needle, result, *ret = NULL;
2468    char *enc_name = NULL;
2469    size_t enc_name_len;
2470    zend_bool part = 0;
2471
2472    mbfl_string_init(&haystack);
2473    mbfl_string_init(&needle);
2474    haystack.no_language = MBSTRG(language);
2475    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2476    needle.no_language = MBSTRG(language);
2477    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2478
2479    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2480        return;
2481    }
2482
2483    if (enc_name != NULL) {
2484        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2485        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2486            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2487            RETURN_FALSE;
2488        }
2489    }
2490
2491    if (needle.len <= 0) {
2492        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2493        RETURN_FALSE;
2494    }
2495    n = mbfl_strpos(&haystack, &needle, 0, 0);
2496    if (n >= 0) {
2497        mblen = mbfl_strlen(&haystack);
2498        if (part) {
2499            ret = mbfl_substr(&haystack, &result, 0, n);
2500            if (ret != NULL) {
2501                // TODO: avoid reallocation ???
2502                RETVAL_STRINGL((char *)ret->val, ret->len);
2503                efree(ret->val);
2504            } else {
2505                RETVAL_FALSE;
2506            }
2507        } else {
2508            len = (mblen - n);
2509            ret = mbfl_substr(&haystack, &result, n, len);
2510            if (ret != NULL) {
2511                // TODO: avoid reallocation ???
2512                RETVAL_STRINGL((char *)ret->val, ret->len);
2513                efree(ret->val);
2514            } else {
2515                RETVAL_FALSE;
2516            }
2517        }
2518    } else {
2519        RETVAL_FALSE;
2520    }
2521}
2522/* }}} */
2523
2524/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2525   Finds the last occurrence of a character in a string within another */
2526PHP_FUNCTION(mb_strrchr)
2527{
2528    int n, len, mblen;
2529    mbfl_string haystack, needle, result, *ret = NULL;
2530    char *enc_name = NULL;
2531    size_t enc_name_len;
2532    zend_bool part = 0;
2533
2534    mbfl_string_init(&haystack);
2535    mbfl_string_init(&needle);
2536    haystack.no_language = MBSTRG(language);
2537    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2538    needle.no_language = MBSTRG(language);
2539    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2540
2541    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2542        return;
2543    }
2544
2545    if (enc_name != NULL) {
2546        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2547        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2548            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2549            RETURN_FALSE;
2550        }
2551    }
2552
2553    if (haystack.len <= 0) {
2554        RETURN_FALSE;
2555    }
2556    if (needle.len <= 0) {
2557        RETURN_FALSE;
2558    }
2559    n = mbfl_strpos(&haystack, &needle, 0, 1);
2560    if (n >= 0) {
2561        mblen = mbfl_strlen(&haystack);
2562        if (part) {
2563            ret = mbfl_substr(&haystack, &result, 0, n);
2564            if (ret != NULL) {
2565                // TODO: avoid reallocation ???
2566                RETVAL_STRINGL((char *)ret->val, ret->len);
2567                efree(ret->val);
2568            } else {
2569                RETVAL_FALSE;
2570            }
2571        } else {
2572            len = (mblen - n);
2573            ret = mbfl_substr(&haystack, &result, n, len);
2574            if (ret != NULL) {
2575                // TODO: avoid reallocation ???
2576                RETVAL_STRINGL((char *)ret->val, ret->len);
2577                efree(ret->val);
2578            } else {
2579                RETVAL_FALSE;
2580            }
2581        }
2582    } else {
2583        RETVAL_FALSE;
2584    }
2585}
2586/* }}} */
2587
2588/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2589   Finds first occurrence of a string within another, case insensitive */
2590PHP_FUNCTION(mb_stristr)
2591{
2592    zend_bool part = 0;
2593    size_t from_encoding_len, len, mblen;
2594    int n;
2595    mbfl_string haystack, needle, result, *ret = NULL;
2596    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2597    mbfl_string_init(&haystack);
2598    mbfl_string_init(&needle);
2599    haystack.no_language = MBSTRG(language);
2600    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2601    needle.no_language = MBSTRG(language);
2602    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2603
2604
2605    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2606        return;
2607    }
2608
2609    if (!needle.len) {
2610        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2611        RETURN_FALSE;
2612    }
2613
2614    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2615    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2616        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2617        RETURN_FALSE;
2618    }
2619
2620    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2621
2622    if (n <0) {
2623        RETURN_FALSE;
2624    }
2625
2626    mblen = mbfl_strlen(&haystack);
2627
2628    if (part) {
2629        ret = mbfl_substr(&haystack, &result, 0, n);
2630        if (ret != NULL) {
2631            // TODO: avoid reallocation ???
2632            RETVAL_STRINGL((char *)ret->val, ret->len);
2633            efree(ret->val);
2634        } else {
2635            RETVAL_FALSE;
2636        }
2637    } else {
2638        len = (mblen - n);
2639        ret = mbfl_substr(&haystack, &result, n, len);
2640        if (ret != NULL) {
2641            // TODO: avoid reallocaton ???
2642            RETVAL_STRINGL((char *)ret->val, ret->len);
2643            efree(ret->val);
2644        } else {
2645            RETVAL_FALSE;
2646        }
2647    }
2648}
2649/* }}} */
2650
2651/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2652   Finds the last occurrence of a character in a string within another, case insensitive */
2653PHP_FUNCTION(mb_strrichr)
2654{
2655    zend_bool part = 0;
2656    int n, len, mblen;
2657    size_t from_encoding_len;
2658    mbfl_string haystack, needle, result, *ret = NULL;
2659    const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2660    mbfl_string_init(&haystack);
2661    mbfl_string_init(&needle);
2662    haystack.no_language = MBSTRG(language);
2663    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2664    needle.no_language = MBSTRG(language);
2665    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2666
2667
2668    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2669        return;
2670    }
2671
2672    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2673    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2674        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2675        RETURN_FALSE;
2676    }
2677
2678    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2679
2680    if (n <0) {
2681        RETURN_FALSE;
2682    }
2683
2684    mblen = mbfl_strlen(&haystack);
2685
2686    if (part) {
2687        ret = mbfl_substr(&haystack, &result, 0, n);
2688        if (ret != NULL) {
2689            // TODO: avoid reallocation ???
2690            RETVAL_STRINGL((char *)ret->val, ret->len);
2691            efree(ret->val);
2692        } else {
2693            RETVAL_FALSE;
2694        }
2695    } else {
2696        len = (mblen - n);
2697        ret = mbfl_substr(&haystack, &result, n, len);
2698        if (ret != NULL) {
2699            // TODO: avoid reallocation ???
2700            RETVAL_STRINGL((char *)ret->val, ret->len);
2701            efree(ret->val);
2702        } else {
2703            RETVAL_FALSE;
2704        }
2705    }
2706}
2707/* }}} */
2708
2709/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2710   Count the number of substring occurrences */
2711PHP_FUNCTION(mb_substr_count)
2712{
2713    int n;
2714    mbfl_string haystack, needle;
2715    char *enc_name = NULL;
2716    size_t enc_name_len;
2717
2718    mbfl_string_init(&haystack);
2719    mbfl_string_init(&needle);
2720    haystack.no_language = MBSTRG(language);
2721    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2722    needle.no_language = MBSTRG(language);
2723    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2724
2725    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2726        return;
2727    }
2728
2729    if (enc_name != NULL) {
2730        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2731        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2732            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2733            RETURN_FALSE;
2734        }
2735    }
2736
2737    if (needle.len <= 0) {
2738        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2739        RETURN_FALSE;
2740    }
2741
2742    n = mbfl_substr_count(&haystack, &needle);
2743    if (n >= 0) {
2744        RETVAL_LONG(n);
2745    } else {
2746        RETVAL_FALSE;
2747    }
2748}
2749/* }}} */
2750
2751/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2752   Returns part of a string */
2753PHP_FUNCTION(mb_substr)
2754{
2755    size_t argc = ZEND_NUM_ARGS();
2756    char *str, *encoding;
2757    zend_long from, len;
2758    int mblen;
2759    size_t str_len, encoding_len;
2760    zval *z_len = NULL;
2761    mbfl_string string, result, *ret;
2762
2763    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2764        return;
2765    }
2766
2767    mbfl_string_init(&string);
2768    string.no_language = MBSTRG(language);
2769    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2770
2771    if (argc == 4) {
2772        string.no_encoding = mbfl_name2no_encoding(encoding);
2773        if (string.no_encoding == mbfl_no_encoding_invalid) {
2774            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2775            RETURN_FALSE;
2776        }
2777    }
2778
2779    string.val = (unsigned char *)str;
2780    string.len = str_len;
2781
2782    if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) {
2783        len = str_len;
2784    } else {
2785        convert_to_long_ex(z_len);
2786        len = Z_LVAL_P(z_len);
2787    }
2788
2789    /* measures length */
2790    mblen = 0;
2791    if (from < 0 || len < 0) {
2792        mblen = mbfl_strlen(&string);
2793    }
2794
2795    /* if "from" position is negative, count start position from the end
2796     * of the string
2797     */
2798    if (from < 0) {
2799        from = mblen + from;
2800        if (from < 0) {
2801            from = 0;
2802        }
2803    }
2804
2805    /* if "length" position is negative, set it to the length
2806     * needed to stop that many chars from the end of the string
2807     */
2808    if (len < 0) {
2809        len = (mblen - from) + len;
2810        if (len < 0) {
2811            len = 0;
2812        }
2813    }
2814
2815    if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2816        && (from >= mbfl_strlen(&string))) {
2817        RETURN_FALSE;
2818    }
2819
2820    ret = mbfl_substr(&string, &result, from, len);
2821    if (NULL == ret) {
2822        RETURN_FALSE;
2823    }
2824
2825    // TODO: avoid reallocation ???
2826    RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2827    efree(ret->val);
2828}
2829/* }}} */
2830
2831/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2832   Returns part of a string */
2833PHP_FUNCTION(mb_strcut)
2834{
2835    size_t argc = ZEND_NUM_ARGS();
2836    char *encoding;
2837    zend_long from, len;
2838    size_t encoding_len;
2839    zval *z_len = NULL;
2840    mbfl_string string, result, *ret;
2841
2842    mbfl_string_init(&string);
2843    string.no_language = MBSTRG(language);
2844    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2845
2846    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2847        return;
2848    }
2849
2850    if (argc == 4) {
2851        string.no_encoding = mbfl_name2no_encoding(encoding);
2852        if (string.no_encoding == mbfl_no_encoding_invalid) {
2853            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2854            RETURN_FALSE;
2855        }
2856    }
2857
2858    if (argc < 3 || Z_TYPE_P(z_len) == IS_NULL) {
2859        len = string.len;
2860    } else {
2861        convert_to_long_ex(z_len);
2862        len = Z_LVAL_P(z_len);
2863    }
2864
2865    /* if "from" position is negative, count start position from the end
2866     * of the string
2867     */
2868    if (from < 0) {
2869        from = string.len + from;
2870        if (from < 0) {
2871            from = 0;
2872        }
2873    }
2874
2875    /* if "length" position is negative, set it to the length
2876     * needed to stop that many chars from the end of the string
2877     */
2878    if (len < 0) {
2879        len = (string.len - from) + len;
2880        if (len < 0) {
2881            len = 0;
2882        }
2883    }
2884
2885    if ((unsigned int)from > string.len) {
2886        RETURN_FALSE;
2887    }
2888
2889    ret = mbfl_strcut(&string, &result, from, len);
2890    if (ret == NULL) {
2891        RETURN_FALSE;
2892    }
2893
2894    // TODO: avoid reallocation ???
2895    RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2896    efree(ret->val);
2897}
2898/* }}} */
2899
2900/* {{{ proto int mb_strwidth(string str [, string encoding])
2901   Gets terminal width of a string */
2902PHP_FUNCTION(mb_strwidth)
2903{
2904    int n;
2905    mbfl_string string;
2906    char *enc_name = NULL;
2907    size_t enc_name_len;
2908
2909    mbfl_string_init(&string);
2910
2911    string.no_language = MBSTRG(language);
2912    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2913
2914    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2915        return;
2916    }
2917
2918    if (enc_name != NULL) {
2919        string.no_encoding = mbfl_name2no_encoding(enc_name);
2920        if (string.no_encoding == mbfl_no_encoding_invalid) {
2921            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2922            RETURN_FALSE;
2923        }
2924    }
2925
2926    n = mbfl_strwidth(&string);
2927    if (n >= 0) {
2928        RETVAL_LONG(n);
2929    } else {
2930        RETVAL_FALSE;
2931    }
2932}
2933/* }}} */
2934
2935/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2936   Trim the string in terminal width */
2937PHP_FUNCTION(mb_strimwidth)
2938{
2939    char *str, *trimmarker, *encoding;
2940    zend_long from, width;
2941    size_t str_len, trimmarker_len, encoding_len;
2942    mbfl_string string, result, marker, *ret;
2943
2944    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2945        return;
2946    }
2947
2948    mbfl_string_init(&string);
2949    mbfl_string_init(&marker);
2950    string.no_language = MBSTRG(language);
2951    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2952    marker.no_language = MBSTRG(language);
2953    marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2954    marker.val = NULL;
2955    marker.len = 0;
2956
2957    if (ZEND_NUM_ARGS() == 5) {
2958        string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2959        if (string.no_encoding == mbfl_no_encoding_invalid) {
2960            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2961            RETURN_FALSE;
2962        }
2963    }
2964
2965    string.val = (unsigned char *)str;
2966    string.len = str_len;
2967
2968    if (from < 0 || from > str_len) {
2969        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2970        RETURN_FALSE;
2971    }
2972
2973    if (width < 0) {
2974        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2975        RETURN_FALSE;
2976    }
2977
2978    if (ZEND_NUM_ARGS() >= 4) {
2979        marker.val = (unsigned char *)trimmarker;
2980        marker.len = trimmarker_len;
2981    }
2982
2983    ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2984
2985    if (ret == NULL) {
2986        RETURN_FALSE;
2987    }
2988    // TODO: avoid reallocation ???
2989    RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2990    efree(ret->val);
2991}
2992/* }}} */
2993
2994/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2995MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2996{
2997    mbfl_string string, result, *ret;
2998    const mbfl_encoding *from_encoding, *to_encoding;
2999    mbfl_buffer_converter *convd;
3000    size_t size;
3001    const mbfl_encoding **list;
3002    char *output=NULL;
3003
3004    if (output_len) {
3005        *output_len = 0;
3006    }
3007    if (!input) {
3008        return NULL;
3009    }
3010    /* new encoding */
3011    if (_to_encoding && strlen(_to_encoding)) {
3012        to_encoding = mbfl_name2encoding(_to_encoding);
3013        if (!to_encoding) {
3014            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
3015            return NULL;
3016        }
3017    } else {
3018        to_encoding = MBSTRG(current_internal_encoding);
3019    }
3020
3021    /* initialize string */
3022    mbfl_string_init(&string);
3023    mbfl_string_init(&result);
3024    from_encoding = MBSTRG(current_internal_encoding);
3025    string.no_encoding = from_encoding->no_encoding;
3026    string.no_language = MBSTRG(language);
3027    string.val = (unsigned char *)input;
3028    string.len = length;
3029
3030    /* pre-conversion encoding */
3031    if (_from_encodings) {
3032        list = NULL;
3033        size = 0;
3034        php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
3035        if (size == 1) {
3036            from_encoding = *list;
3037            string.no_encoding = from_encoding->no_encoding;
3038        } else if (size > 1) {
3039            /* auto detect */
3040            from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3041            if (from_encoding) {
3042                string.no_encoding = from_encoding->no_encoding;
3043            } else {
3044                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
3045                from_encoding = &mbfl_encoding_pass;
3046                to_encoding = from_encoding;
3047                string.no_encoding = from_encoding->no_encoding;
3048            }
3049        } else {
3050            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3051        }
3052        if (list != NULL) {
3053            efree((void *)list);
3054        }
3055    }
3056
3057    /* initialize converter */
3058    convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3059    if (convd == NULL) {
3060        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3061        return NULL;
3062    }
3063    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3064    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3065
3066    /* do it */
3067    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3068    if (ret) {
3069        if (output_len) {
3070            *output_len = ret->len;
3071        }
3072        output = (char *)ret->val;
3073    }
3074
3075    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3076    mbfl_buffer_converter_delete(convd);
3077    return output;
3078}
3079/* }}} */
3080
3081/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3082   Returns converted string in desired encoding */
3083PHP_FUNCTION(mb_convert_encoding)
3084{
3085    char *arg_str, *arg_new;
3086    size_t str_len, new_len;
3087    zval *arg_old;
3088    size_t size, l, n;
3089    char *_from_encodings = NULL, *ret, *s_free = NULL;
3090
3091    zval *hash_entry;
3092    HashTable *target_hash;
3093
3094    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3095        return;
3096    }
3097
3098    if (ZEND_NUM_ARGS() == 3) {
3099        switch (Z_TYPE_P(arg_old)) {
3100            case IS_ARRAY:
3101                target_hash = Z_ARRVAL_P(arg_old);
3102                _from_encodings = NULL;
3103
3104                ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3105
3106                    convert_to_string_ex(hash_entry);
3107
3108                    if ( _from_encodings) {
3109                        l = strlen(_from_encodings);
3110                        n = strlen(Z_STRVAL_P(hash_entry));
3111                        _from_encodings = erealloc(_from_encodings, l+n+2);
3112                        memcpy(_from_encodings + l, ",", 1);
3113                        memcpy(_from_encodings + l + 1, Z_STRVAL_P(hash_entry), Z_STRLEN_P(hash_entry) + 1);
3114                    } else {
3115                        _from_encodings = estrdup(Z_STRVAL_P(hash_entry));
3116                    }
3117                } ZEND_HASH_FOREACH_END();
3118
3119                if (_from_encodings != NULL && !strlen(_from_encodings)) {
3120                    efree(_from_encodings);
3121                    _from_encodings = NULL;
3122                }
3123                s_free = _from_encodings;
3124                break;
3125            default:
3126                convert_to_string(arg_old);
3127                _from_encodings = Z_STRVAL_P(arg_old);
3128                break;
3129            }
3130    }
3131
3132    /* new encoding */
3133    ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3134    if (ret != NULL) {
3135        // TODO: avoid reallocation ???
3136        RETVAL_STRINGL(ret, size);      /* the string is already strdup()'ed */
3137        efree(ret);
3138    } else {
3139        RETVAL_FALSE;
3140    }
3141
3142    if ( s_free) {
3143        efree(s_free);
3144    }
3145}
3146/* }}} */
3147
3148/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3149   Returns a case-folded version of sourcestring */
3150PHP_FUNCTION(mb_convert_case)
3151{
3152    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3153    char *str;
3154    size_t str_len, from_encoding_len;
3155    zend_long case_mode = 0;
3156    char *newstr;
3157    size_t ret_len;
3158
3159    RETVAL_FALSE;
3160    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3161                &case_mode, &from_encoding, &from_encoding_len) == FAILURE) {
3162        return;
3163    }
3164
3165    newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3166
3167    if (newstr) {
3168        // TODO: avoid reallocation ???
3169        RETVAL_STRINGL(newstr, ret_len);
3170        efree(newstr);
3171    }
3172}
3173/* }}} */
3174
3175/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3176 *  Returns a uppercased version of sourcestring
3177 */
3178PHP_FUNCTION(mb_strtoupper)
3179{
3180    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3181    char *str;
3182    size_t str_len, from_encoding_len;
3183    char *newstr;
3184    size_t ret_len;
3185
3186    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3187                &from_encoding, &from_encoding_len) == FAILURE) {
3188        return;
3189    }
3190    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3191
3192    if (newstr) {
3193        // TODO: avoid reallocation ???
3194        RETVAL_STRINGL(newstr, ret_len);
3195        efree(newstr);
3196        return;
3197    }
3198    RETURN_FALSE;
3199}
3200/* }}} */
3201
3202/* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3203 *  Returns a lowercased version of sourcestring
3204 */
3205PHP_FUNCTION(mb_strtolower)
3206{
3207    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3208    char *str;
3209    size_t str_len, from_encoding_len;
3210    char *newstr;
3211    size_t ret_len;
3212
3213    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3214                &from_encoding, &from_encoding_len) == FAILURE) {
3215        return;
3216    }
3217    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3218
3219    if (newstr) {
3220        // TODO: avoid reallocation ???
3221        RETVAL_STRINGL(newstr, ret_len);
3222        efree(newstr);
3223        return;
3224    }
3225    RETURN_FALSE;
3226}
3227/* }}} */
3228
3229/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3230   Encodings of the given string is returned (as a string) */
3231PHP_FUNCTION(mb_detect_encoding)
3232{
3233    char *str;
3234    size_t str_len;
3235    zend_bool strict=0;
3236    zval *encoding_list;
3237
3238    mbfl_string string;
3239    const mbfl_encoding *ret;
3240    const mbfl_encoding **elist, **list;
3241    size_t size;
3242
3243    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3244        return;
3245    }
3246
3247    /* make encoding list */
3248    list = NULL;
3249    size = 0;
3250    if (ZEND_NUM_ARGS() >= 2 && !Z_ISNULL_P(encoding_list)) {
3251        switch (Z_TYPE_P(encoding_list)) {
3252        case IS_ARRAY:
3253            if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3254                if (list) {
3255                    efree(list);
3256                    list = NULL;
3257                    size = 0;
3258                }
3259            }
3260            break;
3261        default:
3262            convert_to_string(encoding_list);
3263            if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3264                if (list) {
3265                    efree(list);
3266                    list = NULL;
3267                    size = 0;
3268                }
3269            }
3270            break;
3271        }
3272        if (size <= 0) {
3273            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3274        }
3275    }
3276
3277    if (ZEND_NUM_ARGS() < 3) {
3278        strict = (zend_bool)MBSTRG(strict_detection);
3279    }
3280
3281    if (size > 0 && list != NULL) {
3282        elist = list;
3283    } else {
3284        elist = MBSTRG(current_detect_order_list);
3285        size = MBSTRG(current_detect_order_list_size);
3286    }
3287
3288    mbfl_string_init(&string);
3289    string.no_language = MBSTRG(language);
3290    string.val = (unsigned char *)str;
3291    string.len = str_len;
3292    ret = mbfl_identify_encoding2(&string, elist, size, strict);
3293
3294    if (list != NULL) {
3295        efree((void *)list);
3296    }
3297
3298    if (ret == NULL) {
3299        RETURN_FALSE;
3300    }
3301
3302    RETVAL_STRING((char *)ret->name);
3303}
3304/* }}} */
3305
3306/* {{{ proto mixed mb_list_encodings()
3307   Returns an array of all supported entity encodings */
3308PHP_FUNCTION(mb_list_encodings)
3309{
3310    const mbfl_encoding **encodings;
3311    const mbfl_encoding *encoding;
3312    int i;
3313
3314    array_init(return_value);
3315    i = 0;
3316    encodings = mbfl_get_supported_encodings();
3317    while ((encoding = encodings[i++]) != NULL) {
3318        add_next_index_string(return_value, (char *) encoding->name);
3319    }
3320}
3321/* }}} */
3322
3323/* {{{ proto array mb_encoding_aliases(string encoding)
3324   Returns an array of the aliases of a given encoding name */
3325PHP_FUNCTION(mb_encoding_aliases)
3326{
3327    const mbfl_encoding *encoding;
3328    char *name = NULL;
3329    size_t name_len;
3330
3331    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3332        return;
3333    }
3334
3335    encoding = mbfl_name2encoding(name);
3336    if (!encoding) {
3337        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3338        RETURN_FALSE;
3339    }
3340
3341    array_init(return_value);
3342    if (encoding->aliases != NULL) {
3343        const char **alias;
3344        for (alias = *encoding->aliases; *alias; ++alias) {
3345            add_next_index_string(return_value, (char *)*alias);
3346        }
3347    }
3348}
3349/* }}} */
3350
3351/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3352   Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3353PHP_FUNCTION(mb_encode_mimeheader)
3354{
3355    enum mbfl_no_encoding charset, transenc;
3356    mbfl_string  string, result, *ret;
3357    char *charset_name = NULL;
3358    size_t charset_name_len;
3359    char *trans_enc_name = NULL;
3360    size_t trans_enc_name_len;
3361    char *linefeed = "\r\n";
3362    size_t linefeed_len;
3363    zend_long indent = 0;
3364
3365    mbfl_string_init(&string);
3366    string.no_language = MBSTRG(language);
3367    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3368
3369    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3370        return;
3371    }
3372
3373    charset = mbfl_no_encoding_pass;
3374    transenc = mbfl_no_encoding_base64;
3375
3376    if (charset_name != NULL) {
3377        charset = mbfl_name2no_encoding(charset_name);
3378        if (charset == mbfl_no_encoding_invalid) {
3379            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3380            RETURN_FALSE;
3381        }
3382    } else {
3383        const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3384        if (lang != NULL) {
3385            charset = lang->mail_charset;
3386            transenc = lang->mail_header_encoding;
3387        }
3388    }
3389
3390    if (trans_enc_name != NULL) {
3391        if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3392            transenc = mbfl_no_encoding_base64;
3393        } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3394            transenc = mbfl_no_encoding_qprint;
3395        }
3396    }
3397
3398    mbfl_string_init(&result);
3399    ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3400    if (ret != NULL) {
3401        // TODO: avoid reallocation ???
3402        RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3403        efree(ret->val);
3404    } else {
3405        RETVAL_FALSE;
3406    }
3407}
3408/* }}} */
3409
3410/* {{{ proto string mb_decode_mimeheader(string string)
3411   Decodes the MIME "encoded-word" in the string */
3412PHP_FUNCTION(mb_decode_mimeheader)
3413{
3414    mbfl_string string, result, *ret;
3415
3416    mbfl_string_init(&string);
3417    string.no_language = MBSTRG(language);
3418    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3419
3420    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3421        return;
3422    }
3423
3424    mbfl_string_init(&result);
3425    ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3426    if (ret != NULL) {
3427        // TODO: avoid reallocation ???
3428        RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
3429        efree(ret->val);
3430    } else {
3431        RETVAL_FALSE;
3432    }
3433}
3434/* }}} */
3435
3436/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3437   Conversion between full-width character and half-width character (Japanese) */
3438PHP_FUNCTION(mb_convert_kana)
3439{
3440    int opt, i;
3441    mbfl_string string, result, *ret;
3442    char *optstr = NULL;
3443    size_t optstr_len;
3444    char *encname = NULL;
3445    size_t encname_len;
3446
3447    mbfl_string_init(&string);
3448    string.no_language = MBSTRG(language);
3449    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3450
3451    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3452        return;
3453    }
3454
3455    /* option */
3456    if (optstr != NULL) {
3457        char *p = optstr;
3458        int n = optstr_len;
3459        i = 0;
3460        opt = 0;
3461        while (i < n) {
3462            i++;
3463            switch (*p++) {
3464            case 'A':
3465                opt |= 0x1;
3466                break;
3467            case 'a':
3468                opt |= 0x10;
3469                break;
3470            case 'R':
3471                opt |= 0x2;
3472                break;
3473            case 'r':
3474                opt |= 0x20;
3475                break;
3476            case 'N':
3477                opt |= 0x4;
3478                break;
3479            case 'n':
3480                opt |= 0x40;
3481                break;
3482            case 'S':
3483                opt |= 0x8;
3484                break;
3485            case 's':
3486                opt |= 0x80;
3487                break;
3488            case 'K':
3489                opt |= 0x100;
3490                break;
3491            case 'k':
3492                opt |= 0x1000;
3493                break;
3494            case 'H':
3495                opt |= 0x200;
3496                break;
3497            case 'h':
3498                opt |= 0x2000;
3499                break;
3500            case 'V':
3501                opt |= 0x800;
3502                break;
3503            case 'C':
3504                opt |= 0x10000;
3505                break;
3506            case 'c':
3507                opt |= 0x20000;
3508                break;
3509            case 'M':
3510                opt |= 0x100000;
3511                break;
3512            case 'm':
3513                opt |= 0x200000;
3514                break;
3515            }
3516        }
3517    } else {
3518        opt = 0x900;
3519    }
3520
3521    /* encoding */
3522    if (encname != NULL) {
3523        string.no_encoding = mbfl_name2no_encoding(encname);
3524        if (string.no_encoding == mbfl_no_encoding_invalid) {
3525            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3526            RETURN_FALSE;
3527        }
3528    }
3529
3530    ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3531    if (ret != NULL) {
3532        // TODO: avoid reallocation ???
3533        RETVAL_STRINGL((char *)ret->val, ret->len);     /* the string is already strdup()'ed */
3534        efree(ret->val);
3535    } else {
3536        RETVAL_FALSE;
3537    }
3538}
3539/* }}} */
3540
3541#define PHP_MBSTR_STACK_BLOCK_SIZE 32
3542
3543/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3544   Converts the string resource in variables to desired encoding */
3545PHP_FUNCTION(mb_convert_variables)
3546{
3547    zval *args, *stack, *var, *hash_entry, *hash_entry_ptr, *zfrom_enc;
3548    HashTable *target_hash;
3549    mbfl_string string, result, *ret;
3550    const mbfl_encoding *from_encoding, *to_encoding;
3551    mbfl_encoding_detector *identd;
3552    mbfl_buffer_converter *convd;
3553    int n, argc, stack_level, stack_max;
3554    size_t to_enc_len;
3555    size_t elistsz;
3556    const mbfl_encoding **elist;
3557    char *to_enc;
3558    void *ptmp;
3559
3560    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3561        return;
3562    }
3563
3564    /* new encoding */
3565    to_encoding = mbfl_name2encoding(to_enc);
3566    if (!to_encoding) {
3567        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3568        RETURN_FALSE;
3569    }
3570
3571    /* initialize string */
3572    mbfl_string_init(&string);
3573    mbfl_string_init(&result);
3574    from_encoding = MBSTRG(current_internal_encoding);
3575    string.no_encoding = from_encoding->no_encoding;
3576    string.no_language = MBSTRG(language);
3577
3578    /* pre-conversion encoding */
3579    elist = NULL;
3580    elistsz = 0;
3581    switch (Z_TYPE_P(zfrom_enc)) {
3582        case IS_ARRAY:
3583            php_mb_parse_encoding_array(zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3584            break;
3585        default:
3586            convert_to_string_ex(zfrom_enc);
3587            php_mb_parse_encoding_list(Z_STRVAL_P(zfrom_enc), Z_STRLEN_P(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3588            break;
3589    }
3590
3591    if (elistsz <= 0) {
3592        from_encoding = &mbfl_encoding_pass;
3593    } else if (elistsz == 1) {
3594        from_encoding = *elist;
3595    } else {
3596        /* auto detect */
3597        from_encoding = NULL;
3598        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3599        stack = (zval *)safe_emalloc(stack_max, sizeof(zval), 0);
3600        stack_level = 0;
3601        identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3602        if (identd != NULL) {
3603            n = 0;
3604            while (n < argc || stack_level > 0) {
3605                if (stack_level <= 0) {
3606                    var = &args[n++];
3607                    ZVAL_DEREF(var);
3608                    SEPARATE_ZVAL_NOREF(var);
3609                    if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3610                        target_hash = HASH_OF(var);
3611                        if (target_hash != NULL) {
3612                            zend_hash_internal_pointer_reset(target_hash);
3613                        }
3614                    }
3615                } else {
3616                    stack_level--;
3617                    var = &stack[stack_level];
3618                }
3619                if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3620                    target_hash = HASH_OF(var);
3621                    if (target_hash != NULL) {
3622                        while ((hash_entry = zend_hash_get_current_data(target_hash)) != NULL) {
3623                            zend_hash_move_forward(target_hash);
3624                            if (Z_TYPE_P(hash_entry) == IS_INDIRECT) {
3625                                hash_entry = Z_INDIRECT_P(hash_entry);
3626                            }
3627                            ZVAL_DEREF(hash_entry);
3628                            if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3629                                if (stack_level >= stack_max) {
3630                                    stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3631                                    ptmp = erealloc(stack, sizeof(zval) * stack_max);
3632                                    stack = (zval *)ptmp;
3633                                }
3634                                ZVAL_COPY_VALUE(&stack[stack_level], var);
3635                                stack_level++;
3636                                var = hash_entry;
3637                                target_hash = HASH_OF(var);
3638                                if (target_hash != NULL) {
3639                                    zend_hash_internal_pointer_reset(target_hash);
3640                                    continue;
3641                                }
3642                            } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3643                                string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3644                                string.len = Z_STRLEN_P(hash_entry);
3645                                if (mbfl_encoding_detector_feed(identd, &string)) {
3646                                    goto detect_end;        /* complete detecting */
3647                                }
3648                            }
3649                        }
3650                    }
3651                } else if (Z_TYPE_P(var) == IS_STRING) {
3652                    string.val = (unsigned char *)Z_STRVAL_P(var);
3653                    string.len = Z_STRLEN_P(var);
3654                    if (mbfl_encoding_detector_feed(identd, &string)) {
3655                        goto detect_end;        /* complete detecting */
3656                    }
3657                }
3658            }
3659detect_end:
3660            from_encoding = mbfl_encoding_detector_judge2(identd);
3661            mbfl_encoding_detector_delete(identd);
3662        }
3663        efree(stack);
3664
3665        if (!from_encoding) {
3666            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3667            from_encoding = &mbfl_encoding_pass;
3668        }
3669    }
3670    if (elist != NULL) {
3671        efree((void *)elist);
3672    }
3673    /* create converter */
3674    convd = NULL;
3675    if (from_encoding != &mbfl_encoding_pass) {
3676        convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3677        if (convd == NULL) {
3678            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3679            RETURN_FALSE;
3680        }
3681        mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3682        mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3683    }
3684
3685    /* convert */
3686    if (convd != NULL) {
3687        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3688        stack = (zval*)safe_emalloc(stack_max, sizeof(zval), 0);
3689        stack_level = 0;
3690        n = 0;
3691        while (n < argc || stack_level > 0) {
3692            if (stack_level <= 0) {
3693                var = &args[n++];
3694                ZVAL_DEREF(var);
3695                SEPARATE_ZVAL_NOREF(var);
3696                if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3697                    target_hash = HASH_OF(var);
3698                    if (target_hash != NULL) {
3699                        zend_hash_internal_pointer_reset(target_hash);
3700                    }
3701                }
3702            } else {
3703                stack_level--;
3704                var = &stack[stack_level];
3705            }
3706            if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3707                target_hash = HASH_OF(var);
3708                if (target_hash != NULL) {
3709                    while ((hash_entry_ptr = zend_hash_get_current_data(target_hash)) != NULL) {
3710                        zend_hash_move_forward(target_hash);
3711                        if (Z_TYPE_P(hash_entry_ptr) == IS_INDIRECT) {
3712                            hash_entry_ptr = Z_INDIRECT_P(hash_entry_ptr);
3713                        }
3714                        hash_entry = hash_entry_ptr;
3715                        ZVAL_DEREF(hash_entry);
3716                        if (Z_TYPE_P(hash_entry) == IS_ARRAY || Z_TYPE_P(hash_entry) == IS_OBJECT) {
3717                            if (stack_level >= stack_max) {
3718                                stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3719                                ptmp = erealloc(stack, sizeof(zval) * stack_max);
3720                                stack = (zval *)ptmp;
3721                            }
3722                            ZVAL_COPY_VALUE(&stack[stack_level], var);
3723                            stack_level++;
3724                            var = hash_entry;
3725                            SEPARATE_ZVAL(hash_entry);
3726                            target_hash = HASH_OF(var);
3727                            if (target_hash != NULL) {
3728                                zend_hash_internal_pointer_reset(target_hash);
3729                                continue;
3730                            }
3731                        } else if (Z_TYPE_P(hash_entry) == IS_STRING) {
3732                            string.val = (unsigned char *)Z_STRVAL_P(hash_entry);
3733                            string.len = Z_STRLEN_P(hash_entry);
3734                            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3735                            if (ret != NULL) {
3736                                zval_ptr_dtor(hash_entry_ptr);
3737                                // TODO: avoid reallocation ???
3738                                ZVAL_STRINGL(hash_entry_ptr, (char *)ret->val, ret->len);
3739                                efree(ret->val);
3740                            }
3741                        }
3742                    }
3743                }
3744            } else if (Z_TYPE_P(var) == IS_STRING) {
3745                string.val = (unsigned char *)Z_STRVAL_P(var);
3746                string.len = Z_STRLEN_P(var);
3747                ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3748                if (ret != NULL) {
3749                    zval_ptr_dtor(var);
3750                    // TODO: avoid reallocation ???
3751                    ZVAL_STRINGL(var, (char *)ret->val, ret->len);
3752                    efree(ret->val);
3753                }
3754            }
3755        }
3756        efree(stack);
3757
3758        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3759        mbfl_buffer_converter_delete(convd);
3760    }
3761
3762    if (from_encoding) {
3763        RETURN_STRING(from_encoding->name);
3764    } else {
3765        RETURN_FALSE;
3766    }
3767}
3768/* }}} */
3769
3770/* {{{ HTML numeric entity */
3771/* {{{ static void php_mb_numericentity_exec() */
3772static void
3773php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3774{
3775    char *str, *encoding;
3776    size_t str_len, encoding_len;
3777    zval *zconvmap, *hash_entry;
3778    HashTable *target_hash;
3779    int argc = ZEND_NUM_ARGS();
3780    int i, *convmap, *mapelm, mapsize=0;
3781    zend_bool is_hex = 0;
3782    mbfl_string string, result, *ret;
3783    enum mbfl_no_encoding no_encoding;
3784
3785    if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3786        return;
3787    }
3788
3789    mbfl_string_init(&string);
3790    string.no_language = MBSTRG(language);
3791    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3792    string.val = (unsigned char *)str;
3793    string.len = str_len;
3794
3795    /* encoding */
3796    if ((argc == 3 || argc == 4) && encoding_len > 0) {
3797        no_encoding = mbfl_name2no_encoding(encoding);
3798        if (no_encoding == mbfl_no_encoding_invalid) {
3799            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3800            RETURN_FALSE;
3801        } else {
3802            string.no_encoding = no_encoding;
3803        }
3804    }
3805
3806    if (argc == 4) {
3807        if (type == 0 && is_hex) {
3808            type = 2; /* output in hex format */
3809        }
3810    }
3811
3812    /* conversion map */
3813    convmap = NULL;
3814    if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3815        target_hash = Z_ARRVAL_P(zconvmap);
3816        i = zend_hash_num_elements(target_hash);
3817        if (i > 0) {
3818            convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3819            mapelm = convmap;
3820            mapsize = 0;
3821            ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3822                convert_to_long_ex(hash_entry);
3823                *mapelm++ = Z_LVAL_P(hash_entry);
3824                mapsize++;
3825            } ZEND_HASH_FOREACH_END();
3826        }
3827    }
3828    if (convmap == NULL) {
3829        RETURN_FALSE;
3830    }
3831    mapsize /= 4;
3832
3833    ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3834    if (ret != NULL) {
3835        // TODO: avoid reallocation ???
3836        RETVAL_STRINGL((char *)ret->val, ret->len);
3837        efree(ret->val);
3838    } else {
3839        RETVAL_FALSE;
3840    }
3841    efree((void *)convmap);
3842}
3843/* }}} */
3844
3845/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3846   Converts specified characters to HTML numeric entities */
3847PHP_FUNCTION(mb_encode_numericentity)
3848{
3849    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3850}
3851/* }}} */
3852
3853/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3854   Converts HTML numeric entities to character code */
3855PHP_FUNCTION(mb_decode_numericentity)
3856{
3857    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3858}
3859/* }}} */
3860/* }}} */
3861
3862/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3863 *  Sends an email message with MIME scheme
3864 */
3865
3866#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                                     \
3867    if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {    \
3868        pos += 2;                                           \
3869        while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {                           \
3870            pos++;                                          \
3871        }                                               \
3872        continue;                                           \
3873    }
3874
3875#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)            \
3876    pp = str;                   \
3877    ee = pp + len;                  \
3878    while ((pp = memchr(pp, '\0', (ee - pp)))) {    \
3879        *pp = ' ';              \
3880    }                       \
3881
3882static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3883{
3884    const char *ps;
3885    size_t icnt;
3886    int state = 0;
3887    int crlf_state = -1;
3888    char *token;
3889    size_t token_pos;
3890    zend_string *fld_name, *fld_val;
3891
3892    ps = str;
3893    icnt = str_len;
3894    fld_name = fld_val = NULL;
3895
3896    /*
3897     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3898     *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3899     *      state  0            1           2          3
3900     *
3901     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3902     *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3903     * crlf_state -1                       0                     1 -1
3904     *
3905     */
3906
3907    while (icnt > 0) {
3908        switch (*ps) {
3909            case ':':
3910                if (crlf_state == 1) {
3911                    token_pos++;
3912                }
3913
3914                if (state == 0 || state == 1) {
3915                    fld_name = zend_string_init(token, token_pos, 0);
3916
3917                    state = 2;
3918                } else {
3919                    token_pos++;
3920                }
3921
3922                crlf_state = 0;
3923                break;
3924
3925            case '\n':
3926                if (crlf_state == -1) {
3927                    goto out;
3928                }
3929                crlf_state = -1;
3930                break;
3931
3932            case '\r':
3933                if (crlf_state == 1) {
3934                    token_pos++;
3935                } else {
3936                    crlf_state = 1;
3937                }
3938                break;
3939
3940            case ' ': case '\t':
3941                if (crlf_state == -1) {
3942                    if (state == 3) {
3943                        /* continuing from the previous line */
3944                        state = 4;
3945                    } else {
3946                        /* simply skipping this new line */
3947                        state = 5;
3948                    }
3949                } else {
3950                    if (crlf_state == 1) {
3951                        token_pos++;
3952                    }
3953                    if (state == 1 || state == 3) {
3954                        token_pos++;
3955                    }
3956                }
3957                crlf_state = 0;
3958                break;
3959
3960            default:
3961                switch (state) {
3962                    case 0:
3963                        token = (char*)ps;
3964                        token_pos = 0;
3965                        state = 1;
3966                        break;
3967
3968                    case 2:
3969                        if (crlf_state != -1) {
3970                            token = (char*)ps;
3971                            token_pos = 0;
3972
3973                            state = 3;
3974                            break;
3975                        }
3976                        /* break is missing intentionally */
3977
3978                    case 3:
3979                        if (crlf_state == -1) {
3980                            fld_val = zend_string_init(token, token_pos, 0);
3981
3982                            if (fld_name != NULL && fld_val != NULL) {
3983                                zval val;
3984                                /* FIXME: some locale free implementation is
3985                                 * really required here,,, */
3986                                php_strtoupper(fld_name->val, fld_name->len);
3987                                ZVAL_STR(&val, fld_val);
3988
3989                                zend_hash_update(ht, fld_name, &val);
3990
3991                                zend_string_release(fld_name);
3992                            }
3993
3994                            fld_name = fld_val = NULL;
3995                            token = (char*)ps;
3996                            token_pos = 0;
3997
3998                            state = 1;
3999                        }
4000                        break;
4001
4002                    case 4:
4003                        token_pos++;
4004                        state = 3;
4005                        break;
4006                }
4007
4008                if (crlf_state == 1) {
4009                    token_pos++;
4010                }
4011
4012                token_pos++;
4013
4014                crlf_state = 0;
4015                break;
4016        }
4017        ps++, icnt--;
4018    }
4019out:
4020    if (state == 2) {
4021        token = "";
4022        token_pos = 0;
4023
4024        state = 3;
4025    }
4026    if (state == 3) {
4027        fld_val = zend_string_init(token, 0, 0);
4028
4029        if (fld_name != NULL && fld_val != NULL) {
4030            zval val;
4031            /* FIXME: some locale free implementation is
4032             * really required here,,, */
4033            php_strtoupper(fld_name->val, fld_name->len);
4034            ZVAL_STR(&val, fld_val);
4035
4036            zend_hash_update(ht, fld_name, &val);
4037
4038            zend_string_release(fld_name);
4039        }
4040    }
4041    return state;
4042}
4043
4044PHP_FUNCTION(mb_send_mail)
4045{
4046    int n;
4047    char *to = NULL;
4048    size_t to_len;
4049    char *message = NULL;
4050    size_t message_len;
4051    char *headers = NULL;
4052    size_t headers_len;
4053    char *subject = NULL;
4054    zend_string *extra_cmd = NULL;
4055    size_t subject_len;
4056    int i;
4057    char *to_r = NULL;
4058    char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4059    struct {
4060        int cnt_type:1;
4061        int cnt_trans_enc:1;
4062    } suppressed_hdrs = { 0, 0 };
4063
4064    char *message_buf = NULL, *subject_buf = NULL, *p;
4065    mbfl_string orig_str, conv_str;
4066    mbfl_string *pstr;  /* pointer to mbfl string for return value */
4067    enum mbfl_no_encoding
4068        tran_cs,    /* transfar text charset */
4069        head_enc,   /* header transfar encoding */
4070        body_enc;   /* body transfar encoding */
4071    mbfl_memory_device device;  /* automatic allocateable buffer for additional header */
4072    const mbfl_language *lang;
4073    int err = 0;
4074    HashTable ht_headers;
4075    zval *s;
4076    extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4077    char *pp, *ee;
4078
4079    /* initialize */
4080    mbfl_memory_device_init(&device, 0, 0);
4081    mbfl_string_init(&orig_str);
4082    mbfl_string_init(&conv_str);
4083
4084    /* character-set, transfer-encoding */
4085    tran_cs = mbfl_no_encoding_utf8;
4086    head_enc = mbfl_no_encoding_base64;
4087    body_enc = mbfl_no_encoding_base64;
4088    lang = mbfl_no2language(MBSTRG(language));
4089    if (lang != NULL) {
4090        tran_cs = lang->mail_charset;
4091        head_enc = lang->mail_header_encoding;
4092        body_enc = lang->mail_body_encoding;
4093    }
4094
4095    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
4096        return;
4097    }
4098
4099    /* ASCIIZ check */
4100    MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4101    MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4102    MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4103    if (headers) {
4104        MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4105    }
4106    if (extra_cmd) {
4107        MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd->val, extra_cmd->len);
4108    }
4109
4110    zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
4111
4112    if (headers != NULL) {
4113        _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4114    }
4115
4116    if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
4117        char *tmp;
4118        char *param_name;
4119        char *charset = NULL;
4120
4121        p = strchr(Z_STRVAL_P(s), ';');
4122
4123        if (p != NULL) {
4124            /* skipping the padded spaces */
4125            do {
4126                ++p;
4127            } while (*p == ' ' || *p == '\t');
4128
4129            if (*p != '\0') {
4130                if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4131                    if (strcasecmp(param_name, "charset") == 0) {
4132                        enum mbfl_no_encoding _tran_cs = tran_cs;
4133
4134                        charset = php_strtok_r(NULL, "= \"", &tmp);
4135                        if (charset != NULL) {
4136                            _tran_cs = mbfl_name2no_encoding(charset);
4137                        }
4138
4139                        if (_tran_cs == mbfl_no_encoding_invalid) {
4140                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4141                            _tran_cs = mbfl_no_encoding_ascii;
4142                        }
4143                        tran_cs = _tran_cs;
4144                    }
4145                }
4146            }
4147        }
4148        suppressed_hdrs.cnt_type = 1;
4149    }
4150
4151    if ((s = zend_hash_str_find_ptr(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
4152        enum mbfl_no_encoding _body_enc;
4153
4154        _body_enc = mbfl_name2no_encoding(Z_STRVAL_P(s));
4155        switch (_body_enc) {
4156            case mbfl_no_encoding_base64:
4157            case mbfl_no_encoding_7bit:
4158            case mbfl_no_encoding_8bit:
4159                body_enc = _body_enc;
4160                break;
4161
4162            default:
4163                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
4164                body_enc =  mbfl_no_encoding_8bit;
4165                break;
4166        }
4167        suppressed_hdrs.cnt_trans_enc = 1;
4168    }
4169
4170    /* To: */
4171    if (to != NULL) {
4172        if (to_len > 0) {
4173            to_r = estrndup(to, to_len);
4174            for (; to_len; to_len--) {
4175                if (!isspace((unsigned char) to_r[to_len - 1])) {
4176                    break;
4177                }
4178                to_r[to_len - 1] = '\0';
4179            }
4180            for (i = 0; to_r[i]; i++) {
4181            if (iscntrl((unsigned char) to_r[i])) {
4182                /* According to RFC 822, section 3.1.1 long headers may be separated into
4183                 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4184                 * To prevent these separators from being replaced with a space, we use the
4185                 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4186                 */
4187                SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4188                to_r[i] = ' ';
4189            }
4190            }
4191        } else {
4192            to_r = to;
4193        }
4194    } else {
4195        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4196        err = 1;
4197    }
4198
4199    /* Subject: */
4200    if (subject != NULL && subject_len >= 0) {
4201        orig_str.no_language = MBSTRG(language);
4202        orig_str.val = (unsigned char *)subject;
4203        orig_str.len = subject_len;
4204        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4205        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4206            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4207            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4208        }
4209        pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4210        if (pstr != NULL) {
4211            subject_buf = subject = (char *)pstr->val;
4212        }
4213    } else {
4214        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4215        err = 1;
4216    }
4217
4218    /* message body */
4219    if (message != NULL) {
4220        orig_str.no_language = MBSTRG(language);
4221        orig_str.val = (unsigned char *)message;
4222        orig_str.len = (unsigned int)message_len;
4223        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4224
4225        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4226            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4227            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4228        }
4229
4230        pstr = NULL;
4231        {
4232            mbfl_string tmpstr;
4233
4234            if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4235                tmpstr.no_encoding=mbfl_no_encoding_8bit;
4236                pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4237                efree(tmpstr.val);
4238            }
4239        }
4240        if (pstr != NULL) {
4241            message_buf = message = (char *)pstr->val;
4242        }
4243    } else {
4244        /* this is not really an error, so it is allowed. */
4245        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4246        message = NULL;
4247    }
4248
4249    /* other headers */
4250#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4251#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4252#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4253#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4254    if (headers != NULL) {
4255        p = headers;
4256        n = headers_len;
4257        mbfl_memory_device_strncat(&device, p, n);
4258        if (n > 0 && p[n - 1] != '\n') {
4259            mbfl_memory_device_strncat(&device, "\n", 1);
4260        }
4261    }
4262
4263    if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4264        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4265        mbfl_memory_device_strncat(&device, "\n", 1);
4266    }
4267
4268    if (!suppressed_hdrs.cnt_type) {
4269        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4270
4271        p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4272        if (p != NULL) {
4273            mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4274            mbfl_memory_device_strcat(&device, p);
4275        }
4276        mbfl_memory_device_strncat(&device, "\n", 1);
4277    }
4278    if (!suppressed_hdrs.cnt_trans_enc) {
4279        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4280        p = (char *)mbfl_no2preferred_mime_name(body_enc);
4281        if (p == NULL) {
4282            p = "7bit";
4283        }
4284        mbfl_memory_device_strcat(&device, p);
4285        mbfl_memory_device_strncat(&device, "\n", 1);
4286    }
4287
4288    mbfl_memory_device_unput(&device);
4289    mbfl_memory_device_output('\0', &device);
4290    headers = (char *)device.buffer;
4291
4292    if (force_extra_parameters) {
4293        extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4294    } else if (extra_cmd) {
4295        extra_cmd = php_escape_shell_cmd(extra_cmd->val);
4296    }
4297
4298    if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? extra_cmd->val : NULL TSRMLS_CC)) {
4299        RETVAL_TRUE;
4300    } else {
4301        RETVAL_FALSE;
4302    }
4303
4304    if (extra_cmd) {
4305        zend_string_release(extra_cmd);
4306    }
4307
4308    if (to_r != to) {
4309        efree(to_r);
4310    }
4311    if (subject_buf) {
4312        efree((void *)subject_buf);
4313    }
4314    if (message_buf) {
4315        efree((void *)message_buf);
4316    }
4317    mbfl_memory_device_clear(&device);
4318    zend_hash_destroy(&ht_headers);
4319}
4320
4321#undef SKIP_LONG_HEADER_SEP_MBSTRING
4322#undef MAIL_ASCIIZ_CHECK_MBSTRING
4323#undef PHP_MBSTR_MAIL_MIME_HEADER1
4324#undef PHP_MBSTR_MAIL_MIME_HEADER2
4325#undef PHP_MBSTR_MAIL_MIME_HEADER3
4326#undef PHP_MBSTR_MAIL_MIME_HEADER4
4327/* }}} */
4328
4329/* {{{ proto mixed mb_get_info([string type])
4330   Returns the current settings of mbstring */
4331PHP_FUNCTION(mb_get_info)
4332{
4333    char *typ = NULL;
4334    size_t typ_len;
4335    size_t n;
4336    char *name;
4337    const struct mb_overload_def *over_func;
4338    zval row1, row2;
4339    const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4340    const mbfl_encoding **entry;
4341
4342    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4343        return;
4344    }
4345
4346    if (!typ || !strcasecmp("all", typ)) {
4347        array_init(return_value);
4348        if (MBSTRG(current_internal_encoding)) {
4349            add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
4350        }
4351        if (MBSTRG(http_input_identify)) {
4352            add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
4353        }
4354        if (MBSTRG(current_http_output_encoding)) {
4355            add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
4356        }
4357        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4358            add_assoc_string(return_value, "http_output_conv_mimetypes", name);
4359        }
4360        add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4361        if (MBSTRG(func_overload)){
4362            over_func = &(mb_ovld[0]);
4363            array_init(&row1);
4364            while (over_func->type > 0) {
4365                if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4366                    add_assoc_string(&row1, over_func->orig_func, over_func->ovld_func);
4367                }
4368                over_func++;
4369            }
4370            add_assoc_zval(return_value, "func_overload_list", &row1);
4371        } else {
4372            add_assoc_string(return_value, "func_overload_list", "no overload");
4373        }
4374        if (lang != NULL) {
4375            if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4376                add_assoc_string(return_value, "mail_charset", name);
4377            }
4378            if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4379                add_assoc_string(return_value, "mail_header_encoding", name);
4380            }
4381            if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4382                add_assoc_string(return_value, "mail_body_encoding", name);
4383            }
4384        }
4385        add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4386        if (MBSTRG(encoding_translation)) {
4387            add_assoc_string(return_value, "encoding_translation", "On");
4388        } else {
4389            add_assoc_string(return_value, "encoding_translation", "Off");
4390        }
4391        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4392            add_assoc_string(return_value, "language", name);
4393        }
4394        n = MBSTRG(current_detect_order_list_size);
4395        entry = MBSTRG(current_detect_order_list);
4396        if (n > 0) {
4397            size_t i;
4398            array_init(&row2);
4399            for (i = 0; i < n; i++) {
4400                add_next_index_string(&row2, (*entry)->name);
4401                entry++;
4402            }
4403            add_assoc_zval(return_value, "detect_order", &row2);
4404        }
4405        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4406            add_assoc_string(return_value, "substitute_character", "none");
4407        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4408            add_assoc_string(return_value, "substitute_character", "long");
4409        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4410            add_assoc_string(return_value, "substitute_character", "entity");
4411        } else {
4412            add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4413        }
4414        if (MBSTRG(strict_detection)) {
4415            add_assoc_string(return_value, "strict_detection", "On");
4416        } else {
4417            add_assoc_string(return_value, "strict_detection", "Off");
4418        }
4419    } else if (!strcasecmp("internal_encoding", typ)) {
4420        if (MBSTRG(current_internal_encoding)) {
4421            RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
4422        }
4423    } else if (!strcasecmp("http_input", typ)) {
4424        if (MBSTRG(http_input_identify)) {
4425            RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
4426        }
4427    } else if (!strcasecmp("http_output", typ)) {
4428        if (MBSTRG(current_http_output_encoding)) {
4429            RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
4430        }
4431    } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4432        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
4433            RETVAL_STRING(name);
4434        }
4435    } else if (!strcasecmp("func_overload", typ)) {
4436        RETVAL_LONG(MBSTRG(func_overload));
4437    } else if (!strcasecmp("func_overload_list", typ)) {
4438        if (MBSTRG(func_overload)){
4439                over_func = &(mb_ovld[0]);
4440                array_init(return_value);
4441                while (over_func->type > 0) {
4442                    if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4443                        add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func);
4444                    }
4445                    over_func++;
4446                }
4447        } else {
4448            RETVAL_STRING("no overload");
4449        }
4450    } else if (!strcasecmp("mail_charset", typ)) {
4451        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4452            RETVAL_STRING(name);
4453        }
4454    } else if (!strcasecmp("mail_header_encoding", typ)) {
4455        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4456            RETVAL_STRING(name);
4457        }
4458    } else if (!strcasecmp("mail_body_encoding", typ)) {
4459        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4460            RETVAL_STRING(name);
4461        }
4462    } else if (!strcasecmp("illegal_chars", typ)) {
4463        RETVAL_LONG(MBSTRG(illegalchars));
4464    } else if (!strcasecmp("encoding_translation", typ)) {
4465        if (MBSTRG(encoding_translation)) {
4466            RETVAL_STRING("On");
4467        } else {
4468            RETVAL_STRING("Off");
4469        }
4470    } else if (!strcasecmp("language", typ)) {
4471        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4472            RETVAL_STRING(name);
4473        }
4474    } else if (!strcasecmp("detect_order", typ)) {
4475        n = MBSTRG(current_detect_order_list_size);
4476        entry = MBSTRG(current_detect_order_list);
4477        if (n > 0) {
4478            size_t i;
4479            array_init(return_value);
4480            for (i = 0; i < n; i++) {
4481                add_next_index_string(return_value, (*entry)->name);
4482                entry++;
4483            }
4484        }
4485    } else if (!strcasecmp("substitute_character", typ)) {
4486        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4487            RETVAL_STRING("none");
4488        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4489            RETVAL_STRING("long");
4490        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4491            RETVAL_STRING("entity");
4492        } else {
4493            RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4494        }
4495    } else if (!strcasecmp("strict_detection", typ)) {
4496        if (MBSTRG(strict_detection)) {
4497            RETVAL_STRING("On");
4498        } else {
4499            RETVAL_STRING("Off");
4500        }
4501    } else {
4502        RETURN_FALSE;
4503    }
4504}
4505/* }}} */
4506
4507/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4508   Check if the string is valid for the specified encoding */
4509PHP_FUNCTION(mb_check_encoding)
4510{
4511    char *var = NULL;
4512    size_t var_len;
4513    char *enc = NULL;
4514    size_t enc_len;
4515    mbfl_buffer_converter *convd;
4516    const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4517    mbfl_string string, result, *ret = NULL;
4518    long illegalchars = 0;
4519
4520    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4521        return;
4522    }
4523
4524    if (var == NULL) {
4525        RETURN_BOOL(MBSTRG(illegalchars) == 0);
4526    }
4527
4528    if (enc != NULL) {
4529        encoding = mbfl_name2encoding(enc);
4530        if (!encoding || encoding == &mbfl_encoding_pass) {
4531            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4532            RETURN_FALSE;
4533        }
4534    }
4535
4536    convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4537    if (convd == NULL) {
4538        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4539        RETURN_FALSE;
4540    }
4541    mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4542    mbfl_buffer_converter_illegal_substchar(convd, 0);
4543
4544    /* initialize string */
4545    mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4546    mbfl_string_init(&result);
4547
4548    string.val = (unsigned char *)var;
4549    string.len = var_len;
4550    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4551    illegalchars = mbfl_buffer_illegalchars(convd);
4552    mbfl_buffer_converter_delete(convd);
4553
4554    RETVAL_FALSE;
4555    if (ret != NULL) {
4556        if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4557            RETVAL_TRUE;
4558        }
4559        mbfl_string_clear(&result);
4560    }
4561}
4562/* }}} */
4563
4564/* {{{ php_mb_populate_current_detect_order_list */
4565static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4566{
4567    const mbfl_encoding **entry = 0;
4568    size_t nentries;
4569
4570    if (MBSTRG(current_detect_order_list)) {
4571        return;
4572    }
4573
4574    if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4575        nentries = MBSTRG(detect_order_list_size);
4576        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4577        memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4578    } else {
4579        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4580        size_t i;
4581        nentries = MBSTRG(default_detect_order_list_size);
4582        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4583        for (i = 0; i < nentries; i++) {
4584            entry[i] = mbfl_no2encoding(src[i]);
4585        }
4586    }
4587    MBSTRG(current_detect_order_list) = entry;
4588    MBSTRG(current_detect_order_list_size) = nentries;
4589}
4590/* }}} */
4591
4592/* {{{ static int php_mb_encoding_translation() */
4593static int php_mb_encoding_translation(TSRMLS_D)
4594{
4595    return MBSTRG(encoding_translation);
4596}
4597/* }}} */
4598
4599/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4600MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4601{
4602    if (enc != NULL) {
4603        if (enc->flag & MBFL_ENCTYPE_MBCS) {
4604            if (enc->mblen_table != NULL) {
4605                if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4606            }
4607        } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4608            return 2;
4609        } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4610            return 4;
4611        }
4612    }
4613    return 1;
4614}
4615/* }}} */
4616
4617/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4618MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4619{
4620    return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4621}
4622/* }}} */
4623
4624/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4625MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4626{
4627    register const char *p = s;
4628    char *last=NULL;
4629
4630    if (nbytes == (size_t)-1) {
4631        size_t nb = 0;
4632
4633        while (*p != '\0') {
4634            if (nb == 0) {
4635                if ((unsigned char)*p == (unsigned char)c) {
4636                    last = (char *)p;
4637                }
4638                nb = php_mb_mbchar_bytes_ex(p, enc);
4639                if (nb == 0) {
4640                    return NULL; /* something is going wrong! */
4641                }
4642            }
4643            --nb;
4644            ++p;
4645        }
4646    } else {
4647        register size_t bcnt = nbytes;
4648        register size_t nbytes_char;
4649        while (bcnt > 0) {
4650            if ((unsigned char)*p == (unsigned char)c) {
4651                last = (char *)p;
4652            }
4653            nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4654            if (bcnt < nbytes_char) {
4655                return NULL;
4656            }
4657            p += nbytes_char;
4658            bcnt -= nbytes_char;
4659        }
4660    }
4661    return last;
4662}
4663/* }}} */
4664
4665/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4666MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4667{
4668    return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4669}
4670/* }}} */
4671
4672/* {{{ MBSTRING_API int php_mb_stripos()
4673 */
4674MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4675{
4676    int n;
4677    mbfl_string haystack, needle;
4678    n = -1;
4679
4680    mbfl_string_init(&haystack);
4681    mbfl_string_init(&needle);
4682    haystack.no_language = MBSTRG(language);
4683    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4684    needle.no_language = MBSTRG(language);
4685    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4686
4687    do {
4688        size_t len = 0;
4689        haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4690        haystack.len = len;
4691
4692        if (!haystack.val) {
4693            break;
4694        }
4695
4696        if (haystack.len <= 0) {
4697            break;
4698        }
4699
4700        needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4701        needle.len = len;
4702
4703        if (!needle.val) {
4704            break;
4705        }
4706
4707        if (needle.len <= 0) {
4708            break;
4709        }
4710
4711        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4712        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4713            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4714            break;
4715        }
4716
4717        {
4718            int haystack_char_len = mbfl_strlen(&haystack);
4719
4720            if (mode) {
4721                if ((offset > 0 && offset > haystack_char_len) ||
4722                    (offset < 0 && -offset > haystack_char_len)) {
4723                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4724                    break;
4725                }
4726            } else {
4727                if (offset < 0 || offset > haystack_char_len) {
4728                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4729                    break;
4730                }
4731            }
4732        }
4733
4734        n = mbfl_strpos(&haystack, &needle, offset, mode);
4735    } while(0);
4736
4737    if (haystack.val) {
4738        efree(haystack.val);
4739    }
4740
4741    if (needle.val) {
4742        efree(needle.val);
4743    }
4744
4745    return n;
4746}
4747/* }}} */
4748
4749static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4750{
4751    *list = (const zend_encoding **)MBSTRG(http_input_list);
4752    *list_size = MBSTRG(http_input_list_size);
4753}
4754/* }}} */
4755
4756static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4757{
4758    MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4759}
4760/* }}} */
4761
4762#endif  /* HAVE_MBSTRING */
4763
4764/*
4765 * Local variables:
4766 * tab-width: 4
4767 * c-basic-offset: 4
4768 * End:
4769 * vim600: fdm=marker
4770 * vim: noet sw=4 ts=4
4771 */
4772