1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16   |         Rui Hirokawa <hirokawa@php.net>                              |
17   +----------------------------------------------------------------------+
18 */
19
20/* $Id$ */
21
22/*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 *   2000.5.19  Release php-4.0RC2_jstring-1.0
27 *   2001.4.1   Release php4_jstring-1.0.91
28 *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29 *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32/*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 *    Hironori Sato <satoh@jpnnet.com>
42 *    Shigeru Kanemoto <sgk@happysize.co.jp>
43 *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47/* {{{ includes */
48#ifdef HAVE_CONFIG_H
49#include "config.h"
50#endif
51
52#include "php.h"
53#include "php_ini.h"
54#include "php_variables.h"
55#include "mbstring.h"
56#include "ext/standard/php_string.h"
57#include "ext/standard/php_mail.h"
58#include "ext/standard/exec.h"
59#include "ext/standard/php_smart_str.h"
60#include "ext/standard/url.h"
61#include "main/php_output.h"
62#include "ext/standard/info.h"
63
64#include "libmbfl/mbfl/mbfl_allocators.h"
65#include "libmbfl/mbfl/mbfilter_pass.h"
66
67#include "php_variables.h"
68#include "php_globals.h"
69#include "rfc1867.h"
70#include "php_content_types.h"
71#include "SAPI.h"
72#include "php_unicode.h"
73#include "TSRM.h"
74
75#include "mb_gpc.h"
76
77#if HAVE_MBREGEX
78#include "php_mbregex.h"
79#endif
80
81#include "zend_multibyte.h"
82
83#if HAVE_ONIG
84#include "php_onig_compat.h"
85#include <oniguruma.h>
86#undef UChar
87#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88#include "ext/pcre/php_pcre.h"
89#endif
90/* }}} */
91
92#if HAVE_MBSTRING
93
94/* {{{ prototypes */
95ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96
97static PHP_GINIT_FUNCTION(mbstring);
98static PHP_GSHUTDOWN_FUNCTION(mbstring);
99
100static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101
102static int php_mb_encoding_translation(TSRMLS_D);
103
104static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105
106static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107
108/* }}} */
109
110/* {{{ php_mb_default_identify_list */
111typedef struct _php_mb_nls_ident_list {
112    enum mbfl_no_language lang;
113    const enum mbfl_no_encoding *list;
114    size_t list_size;
115} php_mb_nls_ident_list;
116
117static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118    mbfl_no_encoding_ascii,
119    mbfl_no_encoding_jis,
120    mbfl_no_encoding_utf8,
121    mbfl_no_encoding_euc_jp,
122    mbfl_no_encoding_sjis
123};
124
125static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126    mbfl_no_encoding_ascii,
127    mbfl_no_encoding_utf8,
128    mbfl_no_encoding_euc_cn,
129    mbfl_no_encoding_cp936
130};
131
132static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133    mbfl_no_encoding_ascii,
134    mbfl_no_encoding_utf8,
135    mbfl_no_encoding_euc_tw,
136    mbfl_no_encoding_big5
137};
138
139static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140    mbfl_no_encoding_ascii,
141    mbfl_no_encoding_utf8,
142    mbfl_no_encoding_euc_kr,
143    mbfl_no_encoding_uhc
144};
145
146static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147    mbfl_no_encoding_ascii,
148    mbfl_no_encoding_utf8,
149    mbfl_no_encoding_koi8r,
150    mbfl_no_encoding_cp1251,
151    mbfl_no_encoding_cp866
152};
153
154static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155    mbfl_no_encoding_ascii,
156    mbfl_no_encoding_utf8,
157    mbfl_no_encoding_armscii8
158};
159
160static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161    mbfl_no_encoding_ascii,
162    mbfl_no_encoding_utf8,
163    mbfl_no_encoding_cp1254,
164    mbfl_no_encoding_8859_9
165};
166
167static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168    mbfl_no_encoding_ascii,
169    mbfl_no_encoding_utf8,
170    mbfl_no_encoding_koi8u
171};
172
173static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174    mbfl_no_encoding_ascii,
175    mbfl_no_encoding_utf8
176};
177
178
179static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180    { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181    { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182    { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183    { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184    { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185    { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186    { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187    { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188    { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189};
190
191/* }}} */
192
193/* {{{ mb_overload_def mb_ovld[] */
194static const struct mb_overload_def mb_ovld[] = {
195    {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196    {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197    {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198    {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199    {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200    {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201    {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202    {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203    {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204    {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205    {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206    {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207    {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208#if HAVE_MBREGEX
209    {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210    {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211    {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212    {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213    {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214#endif
215    {0, NULL, NULL, NULL}
216};
217/* }}} */
218
219/* {{{ arginfo */
220ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221    ZEND_ARG_INFO(0, language)
222ZEND_END_ARG_INFO()
223
224ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225    ZEND_ARG_INFO(0, encoding)
226ZEND_END_ARG_INFO()
227
228ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229    ZEND_ARG_INFO(0, type)
230ZEND_END_ARG_INFO()
231
232ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233    ZEND_ARG_INFO(0, encoding)
234ZEND_END_ARG_INFO()
235
236ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237    ZEND_ARG_INFO(0, encoding)
238ZEND_END_ARG_INFO()
239
240ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241    ZEND_ARG_INFO(0, substchar)
242ZEND_END_ARG_INFO()
243
244ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245    ZEND_ARG_INFO(0, encoding)
246ZEND_END_ARG_INFO()
247
248ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249    ZEND_ARG_INFO(0, encoded_string)
250    ZEND_ARG_INFO(1, result)
251ZEND_END_ARG_INFO()
252
253ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254    ZEND_ARG_INFO(0, contents)
255    ZEND_ARG_INFO(0, status)
256ZEND_END_ARG_INFO()
257
258ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259    ZEND_ARG_INFO(0, str)
260    ZEND_ARG_INFO(0, encoding)
261ZEND_END_ARG_INFO()
262
263ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264    ZEND_ARG_INFO(0, haystack)
265    ZEND_ARG_INFO(0, needle)
266    ZEND_ARG_INFO(0, offset)
267    ZEND_ARG_INFO(0, encoding)
268ZEND_END_ARG_INFO()
269
270ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271    ZEND_ARG_INFO(0, haystack)
272    ZEND_ARG_INFO(0, needle)
273    ZEND_ARG_INFO(0, offset)
274    ZEND_ARG_INFO(0, encoding)
275ZEND_END_ARG_INFO()
276
277ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278    ZEND_ARG_INFO(0, haystack)
279    ZEND_ARG_INFO(0, needle)
280    ZEND_ARG_INFO(0, offset)
281    ZEND_ARG_INFO(0, encoding)
282ZEND_END_ARG_INFO()
283
284ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285    ZEND_ARG_INFO(0, haystack)
286    ZEND_ARG_INFO(0, needle)
287    ZEND_ARG_INFO(0, offset)
288    ZEND_ARG_INFO(0, encoding)
289ZEND_END_ARG_INFO()
290
291ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292    ZEND_ARG_INFO(0, haystack)
293    ZEND_ARG_INFO(0, needle)
294    ZEND_ARG_INFO(0, part)
295    ZEND_ARG_INFO(0, encoding)
296ZEND_END_ARG_INFO()
297
298ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299    ZEND_ARG_INFO(0, haystack)
300    ZEND_ARG_INFO(0, needle)
301    ZEND_ARG_INFO(0, part)
302    ZEND_ARG_INFO(0, encoding)
303ZEND_END_ARG_INFO()
304
305ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306    ZEND_ARG_INFO(0, haystack)
307    ZEND_ARG_INFO(0, needle)
308    ZEND_ARG_INFO(0, part)
309    ZEND_ARG_INFO(0, encoding)
310ZEND_END_ARG_INFO()
311
312ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313    ZEND_ARG_INFO(0, haystack)
314    ZEND_ARG_INFO(0, needle)
315    ZEND_ARG_INFO(0, part)
316    ZEND_ARG_INFO(0, encoding)
317ZEND_END_ARG_INFO()
318
319ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320    ZEND_ARG_INFO(0, haystack)
321    ZEND_ARG_INFO(0, needle)
322    ZEND_ARG_INFO(0, encoding)
323ZEND_END_ARG_INFO()
324
325ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326    ZEND_ARG_INFO(0, str)
327    ZEND_ARG_INFO(0, start)
328    ZEND_ARG_INFO(0, length)
329    ZEND_ARG_INFO(0, encoding)
330ZEND_END_ARG_INFO()
331
332ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333    ZEND_ARG_INFO(0, str)
334    ZEND_ARG_INFO(0, start)
335    ZEND_ARG_INFO(0, length)
336    ZEND_ARG_INFO(0, encoding)
337ZEND_END_ARG_INFO()
338
339ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340    ZEND_ARG_INFO(0, str)
341    ZEND_ARG_INFO(0, encoding)
342ZEND_END_ARG_INFO()
343
344ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345    ZEND_ARG_INFO(0, str)
346    ZEND_ARG_INFO(0, start)
347    ZEND_ARG_INFO(0, width)
348    ZEND_ARG_INFO(0, trimmarker)
349    ZEND_ARG_INFO(0, encoding)
350ZEND_END_ARG_INFO()
351
352ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353    ZEND_ARG_INFO(0, str)
354    ZEND_ARG_INFO(0, to)
355    ZEND_ARG_INFO(0, from)
356ZEND_END_ARG_INFO()
357
358ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359    ZEND_ARG_INFO(0, sourcestring)
360    ZEND_ARG_INFO(0, mode)
361    ZEND_ARG_INFO(0, encoding)
362ZEND_END_ARG_INFO()
363
364ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365    ZEND_ARG_INFO(0, sourcestring)
366    ZEND_ARG_INFO(0, encoding)
367ZEND_END_ARG_INFO()
368
369ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370    ZEND_ARG_INFO(0, sourcestring)
371    ZEND_ARG_INFO(0, encoding)
372ZEND_END_ARG_INFO()
373
374ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375    ZEND_ARG_INFO(0, str)
376    ZEND_ARG_INFO(0, encoding_list)
377    ZEND_ARG_INFO(0, strict)
378ZEND_END_ARG_INFO()
379
380ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381ZEND_END_ARG_INFO()
382
383ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384    ZEND_ARG_INFO(0, encoding)
385ZEND_END_ARG_INFO()
386
387ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388    ZEND_ARG_INFO(0, str)
389    ZEND_ARG_INFO(0, charset)
390    ZEND_ARG_INFO(0, transfer)
391    ZEND_ARG_INFO(0, linefeed)
392    ZEND_ARG_INFO(0, indent)
393ZEND_END_ARG_INFO()
394
395ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396    ZEND_ARG_INFO(0, string)
397ZEND_END_ARG_INFO()
398
399ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400    ZEND_ARG_INFO(0, str)
401    ZEND_ARG_INFO(0, option)
402    ZEND_ARG_INFO(0, encoding)
403ZEND_END_ARG_INFO()
404
405ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
406    ZEND_ARG_INFO(0, to)
407    ZEND_ARG_INFO(0, from)
408    ZEND_ARG_VARIADIC_INFO(1, vars)
409ZEND_END_ARG_INFO()
410
411ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412    ZEND_ARG_INFO(0, string)
413    ZEND_ARG_INFO(0, convmap)
414    ZEND_ARG_INFO(0, encoding)
415    ZEND_ARG_INFO(0, is_hex)
416ZEND_END_ARG_INFO()
417
418ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419    ZEND_ARG_INFO(0, string)
420    ZEND_ARG_INFO(0, convmap)
421    ZEND_ARG_INFO(0, encoding)
422ZEND_END_ARG_INFO()
423
424ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425    ZEND_ARG_INFO(0, to)
426    ZEND_ARG_INFO(0, subject)
427    ZEND_ARG_INFO(0, message)
428    ZEND_ARG_INFO(0, additional_headers)
429    ZEND_ARG_INFO(0, additional_parameters)
430ZEND_END_ARG_INFO()
431
432ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433    ZEND_ARG_INFO(0, type)
434ZEND_END_ARG_INFO()
435
436ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437    ZEND_ARG_INFO(0, var)
438    ZEND_ARG_INFO(0, encoding)
439ZEND_END_ARG_INFO()
440
441ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442    ZEND_ARG_INFO(0, encoding)
443ZEND_END_ARG_INFO()
444
445ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446    ZEND_ARG_INFO(0, pattern)
447    ZEND_ARG_INFO(0, string)
448    ZEND_ARG_INFO(1, registers)
449ZEND_END_ARG_INFO()
450
451ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452    ZEND_ARG_INFO(0, pattern)
453    ZEND_ARG_INFO(0, string)
454    ZEND_ARG_INFO(1, registers)
455ZEND_END_ARG_INFO()
456
457ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458    ZEND_ARG_INFO(0, pattern)
459    ZEND_ARG_INFO(0, replacement)
460    ZEND_ARG_INFO(0, string)
461    ZEND_ARG_INFO(0, option)
462ZEND_END_ARG_INFO()
463
464ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465    ZEND_ARG_INFO(0, pattern)
466    ZEND_ARG_INFO(0, replacement)
467    ZEND_ARG_INFO(0, string)
468ZEND_END_ARG_INFO()
469
470ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471    ZEND_ARG_INFO(0, pattern)
472    ZEND_ARG_INFO(0, callback)
473    ZEND_ARG_INFO(0, string)
474    ZEND_ARG_INFO(0, option)
475ZEND_END_ARG_INFO()
476
477ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478    ZEND_ARG_INFO(0, pattern)
479    ZEND_ARG_INFO(0, string)
480    ZEND_ARG_INFO(0, limit)
481ZEND_END_ARG_INFO()
482
483ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484    ZEND_ARG_INFO(0, pattern)
485    ZEND_ARG_INFO(0, string)
486    ZEND_ARG_INFO(0, option)
487ZEND_END_ARG_INFO()
488
489ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490    ZEND_ARG_INFO(0, pattern)
491    ZEND_ARG_INFO(0, option)
492ZEND_END_ARG_INFO()
493
494ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495    ZEND_ARG_INFO(0, pattern)
496    ZEND_ARG_INFO(0, option)
497ZEND_END_ARG_INFO()
498
499ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500    ZEND_ARG_INFO(0, pattern)
501    ZEND_ARG_INFO(0, option)
502ZEND_END_ARG_INFO()
503
504ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505    ZEND_ARG_INFO(0, string)
506    ZEND_ARG_INFO(0, pattern)
507    ZEND_ARG_INFO(0, option)
508ZEND_END_ARG_INFO()
509
510ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511ZEND_END_ARG_INFO()
512
513ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514ZEND_END_ARG_INFO()
515
516ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517    ZEND_ARG_INFO(0, position)
518ZEND_END_ARG_INFO()
519
520ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521    ZEND_ARG_INFO(0, options)
522ZEND_END_ARG_INFO()
523/* }}} */
524
525/* {{{ zend_function_entry mbstring_functions[] */
526const zend_function_entry mbstring_functions[] = {
527    PHP_FE(mb_convert_case,         arginfo_mb_convert_case)
528    PHP_FE(mb_strtoupper,           arginfo_mb_strtoupper)
529    PHP_FE(mb_strtolower,           arginfo_mb_strtolower)
530    PHP_FE(mb_language,             arginfo_mb_language)
531    PHP_FE(mb_internal_encoding,    arginfo_mb_internal_encoding)
532    PHP_FE(mb_http_input,           arginfo_mb_http_input)
533    PHP_FE(mb_http_output,          arginfo_mb_http_output)
534    PHP_FE(mb_detect_order,         arginfo_mb_detect_order)
535    PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536    PHP_FE(mb_parse_str,            arginfo_mb_parse_str)
537    PHP_FE(mb_output_handler,       arginfo_mb_output_handler)
538    PHP_FE(mb_preferred_mime_name,  arginfo_mb_preferred_mime_name)
539    PHP_FE(mb_strlen,               arginfo_mb_strlen)
540    PHP_FE(mb_strpos,               arginfo_mb_strpos)
541    PHP_FE(mb_strrpos,              arginfo_mb_strrpos)
542    PHP_FE(mb_stripos,              arginfo_mb_stripos)
543    PHP_FE(mb_strripos,             arginfo_mb_strripos)
544    PHP_FE(mb_strstr,               arginfo_mb_strstr)
545    PHP_FE(mb_strrchr,              arginfo_mb_strrchr)
546    PHP_FE(mb_stristr,              arginfo_mb_stristr)
547    PHP_FE(mb_strrichr,             arginfo_mb_strrichr)
548    PHP_FE(mb_substr_count,         arginfo_mb_substr_count)
549    PHP_FE(mb_substr,               arginfo_mb_substr)
550    PHP_FE(mb_strcut,               arginfo_mb_strcut)
551    PHP_FE(mb_strwidth,             arginfo_mb_strwidth)
552    PHP_FE(mb_strimwidth,           arginfo_mb_strimwidth)
553    PHP_FE(mb_convert_encoding,     arginfo_mb_convert_encoding)
554    PHP_FE(mb_detect_encoding,      arginfo_mb_detect_encoding)
555    PHP_FE(mb_list_encodings,       arginfo_mb_list_encodings)
556    PHP_FE(mb_encoding_aliases,     arginfo_mb_encoding_aliases)
557    PHP_FE(mb_convert_kana,         arginfo_mb_convert_kana)
558    PHP_FE(mb_encode_mimeheader,    arginfo_mb_encode_mimeheader)
559    PHP_FE(mb_decode_mimeheader,    arginfo_mb_decode_mimeheader)
560    PHP_FE(mb_convert_variables,    arginfo_mb_convert_variables)
561    PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562    PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563    PHP_FE(mb_send_mail,            arginfo_mb_send_mail)
564    PHP_FE(mb_get_info,             arginfo_mb_get_info)
565    PHP_FE(mb_check_encoding,       arginfo_mb_check_encoding)
566#if HAVE_MBREGEX
567    PHP_MBREGEX_FUNCTION_ENTRIES
568#endif
569    PHP_FE_END
570};
571/* }}} */
572
573/* {{{ zend_module_entry mbstring_module_entry */
574zend_module_entry mbstring_module_entry = {
575    STANDARD_MODULE_HEADER,
576    "mbstring",
577    mbstring_functions,
578    PHP_MINIT(mbstring),
579    PHP_MSHUTDOWN(mbstring),
580    PHP_RINIT(mbstring),
581    PHP_RSHUTDOWN(mbstring),
582    PHP_MINFO(mbstring),
583    NO_VERSION_YET,
584    PHP_MODULE_GLOBALS(mbstring),
585    PHP_GINIT(mbstring),
586    PHP_GSHUTDOWN(mbstring),
587    NULL,
588    STANDARD_MODULE_PROPERTIES_EX
589};
590/* }}} */
591
592/* {{{ static sapi_post_entry php_post_entries[] */
593static sapi_post_entry php_post_entries[] = {
594    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
596    { NULL, 0, NULL, NULL }
597};
598/* }}} */
599
600#ifdef COMPILE_DL_MBSTRING
601ZEND_GET_MODULE(mbstring)
602#endif
603
604static char *get_internal_encoding(TSRMLS_D) {
605    if (PG(internal_encoding) && PG(internal_encoding)[0]) {
606        return PG(internal_encoding);
607    } else if (SG(default_charset)) {
608        return SG(default_charset);
609    }
610    return "";
611}
612
613static char *get_input_encoding(TSRMLS_D) {
614    if (PG(input_encoding) && PG(input_encoding)[0]) {
615        return PG(input_encoding);
616    } else if (SG(default_charset)) {
617        return SG(default_charset);
618    }
619    return "";
620}
621
622static char *get_output_encoding(TSRMLS_D) {
623    if (PG(output_encoding) && PG(output_encoding)[0]) {
624        return PG(output_encoding);
625    } else if (SG(default_charset)) {
626        return SG(default_charset);
627    }
628    return "";
629}
630
631
632/* {{{ allocators */
633static void *_php_mb_allocators_malloc(unsigned int sz)
634{
635    return emalloc(sz);
636}
637
638static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
639{
640    return erealloc(ptr, sz);
641}
642
643static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
644{
645    return ecalloc(nelems, szelem);
646}
647
648static void _php_mb_allocators_free(void *ptr)
649{
650    efree(ptr);
651}
652
653static void *_php_mb_allocators_pmalloc(unsigned int sz)
654{
655    return pemalloc(sz, 1);
656}
657
658static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
659{
660    return perealloc(ptr, sz, 1);
661}
662
663static void _php_mb_allocators_pfree(void *ptr)
664{
665    pefree(ptr, 1);
666}
667
668static mbfl_allocators _php_mb_allocators = {
669    _php_mb_allocators_malloc,
670    _php_mb_allocators_realloc,
671    _php_mb_allocators_calloc,
672    _php_mb_allocators_free,
673    _php_mb_allocators_pmalloc,
674    _php_mb_allocators_prealloc,
675    _php_mb_allocators_pfree
676};
677/* }}} */
678
679/* {{{ static sapi_post_entry mbstr_post_entries[] */
680static sapi_post_entry mbstr_post_entries[] = {
681    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
682    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
683    { NULL, 0, NULL, NULL }
684};
685/* }}} */
686
687/* {{{ static int php_mb_parse_encoding_list()
688 *  Return 0 if input contains any illegal encoding, otherwise 1.
689 *  Even if any illegal encoding is detected the result may contain a list
690 *  of parsed encodings.
691 */
692static int
693php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
694{
695    int size, bauto, ret = SUCCESS;
696    size_t n;
697    char *p, *p1, *p2, *endp, *tmpstr;
698    const mbfl_encoding **entry, **list;
699
700    list = NULL;
701    if (value == NULL || value_length <= 0) {
702        if (return_list) {
703            *return_list = NULL;
704        }
705        if (return_size) {
706            *return_size = 0;
707        }
708        return FAILURE;
709    } else {
710        /* copy the value string for work */
711        if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
712            tmpstr = (char *)estrndup(value+1, value_length-2);
713            value_length -= 2;
714        }
715        else
716            tmpstr = (char *)estrndup(value, value_length);
717        if (tmpstr == NULL) {
718            return FAILURE;
719        }
720        /* count the number of listed encoding names */
721        endp = tmpstr + value_length;
722        n = 1;
723        p1 = tmpstr;
724        while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
725            p1 = p2 + 1;
726            n++;
727        }
728        size = n + MBSTRG(default_detect_order_list_size);
729        /* make list */
730        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
731        if (list != NULL) {
732            entry = list;
733            n = 0;
734            bauto = 0;
735            p1 = tmpstr;
736            do {
737                p2 = p = php_memnstr(p1, ",", 1, endp);
738                if (p == NULL) {
739                    p = endp;
740                }
741                *p = '\0';
742                /* trim spaces */
743                while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
744                    p1++;
745                }
746                p--;
747                while (p > p1 && (*p == ' ' || *p == '\t')) {
748                    *p = '\0';
749                    p--;
750                }
751                /* convert to the encoding number and check encoding */
752                if (strcasecmp(p1, "auto") == 0) {
753                    if (!bauto) {
754                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
755                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
756                        size_t i;
757                        bauto = 1;
758                        for (i = 0; i < identify_list_size; i++) {
759                            *entry++ = mbfl_no2encoding(*src++);
760                            n++;
761                        }
762                    }
763                } else {
764                    const mbfl_encoding *encoding = mbfl_name2encoding(p1);
765                    if (encoding) {
766                        *entry++ = encoding;
767                        n++;
768                    } else {
769                        ret = 0;
770                    }
771                }
772                p1 = p2 + 1;
773            } while (n < size && p2 != NULL);
774            if (n > 0) {
775                if (return_list) {
776                    *return_list = list;
777                } else {
778                    pefree(list, persistent);
779                }
780            } else {
781                pefree(list, persistent);
782                if (return_list) {
783                    *return_list = NULL;
784                }
785                ret = 0;
786            }
787            if (return_size) {
788                *return_size = n;
789            }
790        } else {
791            if (return_list) {
792                *return_list = NULL;
793            }
794            if (return_size) {
795                *return_size = 0;
796            }
797            ret = 0;
798        }
799        efree(tmpstr);
800    }
801
802    return ret;
803}
804/* }}} */
805
806/* {{{ static int php_mb_parse_encoding_array()
807 *  Return 0 if input contains any illegal encoding, otherwise 1.
808 *  Even if any illegal encoding is detected the result may contain a list
809 *  of parsed encodings.
810 */
811static int
812php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
813{
814    zval **hash_entry;
815    HashTable *target_hash;
816    int i, n, size, bauto, ret = SUCCESS;
817    const mbfl_encoding **list, **entry;
818
819    list = NULL;
820    if (Z_TYPE_P(array) == IS_ARRAY) {
821        target_hash = Z_ARRVAL_P(array);
822        zend_hash_internal_pointer_reset(target_hash);
823        i = zend_hash_num_elements(target_hash);
824        size = i + MBSTRG(default_detect_order_list_size);
825        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
826        if (list != NULL) {
827            entry = list;
828            bauto = 0;
829            n = 0;
830            while (i > 0) {
831                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
832                    break;
833                }
834                convert_to_string_ex(hash_entry);
835                if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
836                    if (!bauto) {
837                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839                        size_t j;
840
841                        bauto = 1;
842                        for (j = 0; j < identify_list_size; j++) {
843                            *entry++ = mbfl_no2encoding(*src++);
844                            n++;
845                        }
846                    }
847                } else {
848                    const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
849                    if (encoding) {
850                        *entry++ = encoding;
851                        n++;
852                    } else {
853                        ret = FAILURE;
854                    }
855                }
856                zend_hash_move_forward(target_hash);
857                i--;
858            }
859            if (n > 0) {
860                if (return_list) {
861                    *return_list = list;
862                } else {
863                    pefree(list, persistent);
864                }
865            } else {
866                pefree(list, persistent);
867                if (return_list) {
868                    *return_list = NULL;
869                }
870                ret = FAILURE;
871            }
872            if (return_size) {
873                *return_size = n;
874            }
875        } else {
876            if (return_list) {
877                *return_list = NULL;
878            }
879            if (return_size) {
880                *return_size = 0;
881            }
882            ret = FAILURE;
883        }
884    }
885
886    return ret;
887}
888/* }}} */
889
890/* {{{ zend_multibyte interface */
891static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
892{
893    return (const zend_encoding*)mbfl_name2encoding(encoding_name);
894}
895
896static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
897{
898    return ((const mbfl_encoding *)encoding)->name;
899}
900
901static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
902{
903    const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
904    if (encoding->flag & MBFL_ENCTYPE_SBCS) {
905        return 1;
906    }
907    if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
908        return 1;
909    }
910    return 0;
911}
912
913static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
914{
915    mbfl_string string;
916
917    if (!list) {
918        list = (const zend_encoding **)MBSTRG(current_detect_order_list);
919        list_size = MBSTRG(current_detect_order_list_size);
920    }
921
922    mbfl_string_init(&string);
923    string.no_language = MBSTRG(language);
924    string.val = (unsigned char *)arg_string;
925    string.len = arg_length;
926    return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
927}
928
929static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
930{
931    mbfl_string string, result;
932    mbfl_buffer_converter *convd;
933    int status, loc;
934
935    /* new encoding */
936    /* initialize string */
937    mbfl_string_init(&string);
938    mbfl_string_init(&result);
939    string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
940    string.no_language = MBSTRG(language);
941    string.val = (unsigned char*)from;
942    string.len = from_length;
943
944    /* initialize converter */
945    convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
946    if (convd == NULL) {
947        return -1;
948    }
949    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
950    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
951
952    /* do it */
953    status = mbfl_buffer_converter_feed2(convd, &string, &loc);
954    if (status) {
955        mbfl_buffer_converter_delete(convd);
956        return (size_t)-1;
957    }
958
959    mbfl_buffer_converter_flush(convd);
960    if (!mbfl_buffer_converter_result(convd, &result)) {
961        mbfl_buffer_converter_delete(convd);
962        return (size_t)-1;
963    }
964
965    *to = result.val;
966    *to_length = result.len;
967
968    mbfl_buffer_converter_delete(convd);
969
970    return loc;
971}
972
973static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
974{
975    return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
976}
977
978static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
979{
980    return (const zend_encoding *)MBSTRG(internal_encoding);
981}
982
983static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
984{
985    MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
986    return SUCCESS;
987}
988
989static zend_multibyte_functions php_mb_zend_multibyte_functions = {
990    "mbstring",
991    php_mb_zend_encoding_fetcher,
992    php_mb_zend_encoding_name_getter,
993    php_mb_zend_encoding_lexer_compatibility_checker,
994    php_mb_zend_encoding_detector,
995    php_mb_zend_encoding_converter,
996    php_mb_zend_encoding_list_parser,
997    php_mb_zend_internal_encoding_getter,
998    php_mb_zend_internal_encoding_setter
999};
1000/* }}} */
1001
1002static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
1003static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1004static void _php_mb_free_regex(void *opaque);
1005
1006#if HAVE_ONIG
1007/* {{{ _php_mb_compile_regex */
1008static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1009{
1010    php_mb_regex_t *retval;
1011    OnigErrorInfo err_info;
1012    int err_code;
1013
1014    if ((err_code = onig_new(&retval,
1015            (const OnigUChar *)pattern,
1016            (const OnigUChar *)pattern + strlen(pattern),
1017            ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1018            ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1019        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1020        onig_error_code_to_str(err_str, err_code, err_info);
1021        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
1022        retval = NULL;
1023    }
1024    return retval;
1025}
1026/* }}} */
1027
1028/* {{{ _php_mb_match_regex */
1029static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1030{
1031    return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1032            (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1033            (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1034}
1035/* }}} */
1036
1037/* {{{ _php_mb_free_regex */
1038static void _php_mb_free_regex(void *opaque)
1039{
1040    onig_free((php_mb_regex_t *)opaque);
1041}
1042/* }}} */
1043#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1044/* {{{ _php_mb_compile_regex */
1045static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1046{
1047    pcre *retval;
1048    const char *err_str;
1049    int err_offset;
1050
1051    if (!(retval = pcre_compile(pattern,
1052            PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1053        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1054    }
1055    return retval;
1056}
1057/* }}} */
1058
1059/* {{{ _php_mb_match_regex */
1060static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1061{
1062    return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1063            0, NULL, 0) >= 0;
1064}
1065/* }}} */
1066
1067/* {{{ _php_mb_free_regex */
1068static void _php_mb_free_regex(void *opaque)
1069{
1070    pcre_free(opaque);
1071}
1072/* }}} */
1073#endif
1074
1075/* {{{ php_mb_nls_get_default_detect_order_list */
1076static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1077{
1078    size_t i;
1079
1080    *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1081    *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1082
1083    for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1084        if (php_mb_default_identify_list[i].lang == lang) {
1085            *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1086            *plist_size = php_mb_default_identify_list[i].list_size;
1087            return 1;
1088        }
1089    }
1090    return 0;
1091}
1092/* }}} */
1093
1094static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1095{
1096    char *result = emalloc(len + 2);
1097    char *resp = result;
1098    int i;
1099
1100    for (i = 0; i < len && start[i] != quote; ++i) {
1101        if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1102            *resp++ = start[++i];
1103        } else {
1104            size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1105
1106            while (j-- > 0 && i < len) {
1107                *resp++ = start[i++];
1108            }
1109            --i;
1110        }
1111    }
1112
1113    *resp = '\0';
1114    return result;
1115}
1116
1117static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1118{
1119    char *pos = *line, quote;
1120    char *res;
1121
1122    while (*pos && *pos != stop) {
1123        if ((quote = *pos) == '"' || quote == '\'') {
1124            ++pos;
1125            while (*pos && *pos != quote) {
1126                if (*pos == '\\' && pos[1] && pos[1] == quote) {
1127                    pos += 2;
1128                } else {
1129                    ++pos;
1130                }
1131            }
1132            if (*pos) {
1133                ++pos;
1134            }
1135        } else {
1136            pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1137
1138        }
1139    }
1140    if (*pos == '\0') {
1141        res = estrdup(*line);
1142        *line += strlen(*line);
1143        return res;
1144    }
1145
1146    res = estrndup(*line, pos - *line);
1147
1148    while (*pos == stop) {
1149        pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1150    }
1151
1152    *line = pos;
1153    return res;
1154}
1155/* }}} */
1156
1157static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1158{
1159    while (*str && isspace(*(unsigned char *)str)) {
1160        ++str;
1161    }
1162
1163    if (!*str) {
1164        return estrdup("");
1165    }
1166
1167    if (*str == '"' || *str == '\'') {
1168        char quote = *str;
1169
1170        str++;
1171        return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1172    } else {
1173        char *strend = str;
1174
1175        while (*strend && !isspace(*(unsigned char *)strend)) {
1176            ++strend;
1177        }
1178        return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1179    }
1180}
1181/* }}} */
1182
1183static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1184{
1185    char *s, *s2;
1186    const size_t filename_len = strlen(filename);
1187
1188    /* The \ check should technically be needed for win32 systems only where
1189     * it is a valid path separator. However, IE in all it's wisdom always sends
1190     * the full path of the file on the user's filesystem, which means that unless
1191     * the user does basename() they get a bogus file name. Until IE's user base drops
1192     * to nill or problem is fixed this code must remain enabled for all systems. */
1193    s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1194    s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1195
1196    if (s && s2) {
1197        if (s > s2) {
1198            return ++s;
1199        } else {
1200            return ++s2;
1201        }
1202    } else if (s) {
1203        return ++s;
1204    } else if (s2) {
1205        return ++s2;
1206    } else {
1207        return filename;
1208    }
1209}
1210/* }}} */
1211
1212/* {{{ php.ini directive handler */
1213/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
1214static PHP_INI_MH(OnUpdate_mbstring_language)
1215{
1216    enum mbfl_no_language no_language;
1217
1218    no_language = mbfl_name2no_language(new_value);
1219    if (no_language == mbfl_no_language_invalid) {
1220        MBSTRG(language) = mbfl_no_language_neutral;
1221        return FAILURE;
1222    }
1223    MBSTRG(language) = no_language;
1224    php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1225    return SUCCESS;
1226}
1227/* }}} */
1228
1229/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
1230static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1231{
1232    const mbfl_encoding **list;
1233    size_t size;
1234
1235    if (!new_value) {
1236        if (MBSTRG(detect_order_list)) {
1237            pefree(MBSTRG(detect_order_list), 1);
1238        }
1239        MBSTRG(detect_order_list) = NULL;
1240        MBSTRG(detect_order_list_size) = 0;
1241        return SUCCESS;
1242    }
1243
1244    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245        return FAILURE;
1246    }
1247
1248    if (MBSTRG(detect_order_list)) {
1249        pefree(MBSTRG(detect_order_list), 1);
1250    }
1251    MBSTRG(detect_order_list) = list;
1252    MBSTRG(detect_order_list_size) = size;
1253    return SUCCESS;
1254}
1255/* }}} */
1256
1257/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1258static PHP_INI_MH(OnUpdate_mbstring_http_input)
1259{
1260    const mbfl_encoding **list;
1261    size_t size;
1262
1263    if (!new_value) {
1264        if (MBSTRG(http_input_list)) {
1265            pefree(MBSTRG(http_input_list), 1);
1266        }
1267        if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) {
1268            MBSTRG(http_input_list) = list;
1269            MBSTRG(http_input_list_size) = size;
1270            return SUCCESS;
1271        }
1272        MBSTRG(http_input_list) = NULL;
1273        MBSTRG(http_input_list_size) = 0;
1274        return SUCCESS;
1275    }
1276
1277    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1278        return FAILURE;
1279    }
1280
1281    if (MBSTRG(http_input_list)) {
1282        pefree(MBSTRG(http_input_list), 1);
1283    }
1284    MBSTRG(http_input_list) = list;
1285    MBSTRG(http_input_list_size) = size;
1286
1287    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1288        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1289    }
1290
1291    return SUCCESS;
1292}
1293/* }}} */
1294
1295/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1296static PHP_INI_MH(OnUpdate_mbstring_http_output)
1297{
1298    const mbfl_encoding *encoding;
1299
1300    if (new_value == NULL || new_value_length == 0) {
1301        encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C));
1302        if (!encoding) {
1303            MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1304            MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1305            return SUCCESS;
1306        }
1307    } else {
1308        encoding = mbfl_name2encoding(new_value);
1309        if (!encoding) {
1310            MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1311            MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1312            return FAILURE;
1313        }
1314    }
1315    MBSTRG(http_output_encoding) = encoding;
1316    MBSTRG(current_http_output_encoding) = encoding;
1317
1318    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1319        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1320    }
1321
1322    return SUCCESS;
1323}
1324/* }}} */
1325
1326/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1327int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1328{
1329    const mbfl_encoding *encoding;
1330
1331    if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1332        /* falls back to UTF-8 if an unknown encoding name is given */
1333        encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1334    }
1335    MBSTRG(internal_encoding) = encoding;
1336    MBSTRG(current_internal_encoding) = encoding;
1337#if HAVE_MBREGEX
1338    {
1339        const char *enc_name = new_value;
1340        if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1341            /* falls back to UTF-8 if an unknown encoding name is given */
1342            enc_name = "UTF-8";
1343            php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1344        }
1345        php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1346    }
1347#endif
1348    return SUCCESS;
1349}
1350/* }}} */
1351
1352/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1353static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1354{
1355    if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1356        php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1357    }
1358
1359    if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1360        return FAILURE;
1361    }
1362
1363    if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1364        if (new_value_length) {
1365            return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1366        } else {
1367            return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC);
1368        }
1369    } else {
1370        /* the corresponding mbstring globals needs to be set according to the
1371         * ini value in the later stage because it never falls back to the
1372         * default value if 1. no value for mbstring.internal_encoding is given,
1373         * 2. mbstring.language directive is processed in per-dir or runtime
1374         * context and 3. call to the handler for mbstring.language is done
1375         * after mbstring.internal_encoding is handled. */
1376        return SUCCESS;
1377    }
1378}
1379/* }}} */
1380
1381/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1382static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1383{
1384    int c;
1385    char *endptr = NULL;
1386
1387    if (new_value != NULL) {
1388        if (strcasecmp("none", new_value) == 0) {
1389            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1390            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1391        } else if (strcasecmp("long", new_value) == 0) {
1392            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1393            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1394        } else if (strcasecmp("entity", new_value) == 0) {
1395            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1396            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1397        } else {
1398            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1399            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1400            if (new_value_length >0) {
1401                c = strtol(new_value, &endptr, 0);
1402                if (*endptr == '\0') {
1403                    MBSTRG(filter_illegal_substchar) = c;
1404                    MBSTRG(current_filter_illegal_substchar) = c;
1405                }
1406            }
1407        }
1408    } else {
1409        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1410        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1411        MBSTRG(filter_illegal_substchar) = 0x3f;    /* '?' */
1412        MBSTRG(current_filter_illegal_substchar) = 0x3f;    /* '?' */
1413    }
1414
1415    return SUCCESS;
1416}
1417/* }}} */
1418
1419/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1420static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1421{
1422    if (new_value == NULL) {
1423        return FAILURE;
1424    }
1425
1426    OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1427
1428    if (MBSTRG(encoding_translation)) {
1429        sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1430        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1431    } else {
1432        sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1433        sapi_register_post_entries(php_post_entries TSRMLS_CC);
1434    }
1435
1436    return SUCCESS;
1437}
1438/* }}} */
1439
1440/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1441static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1442{
1443    zval tmp;
1444    void *re = NULL;
1445
1446    if (!new_value) {
1447        new_value = entry->orig_value;
1448        new_value_length = entry->orig_value_length;
1449    }
1450    php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1451
1452    if (Z_STRLEN(tmp) > 0) {
1453        if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1454            zval_dtor(&tmp);
1455            return FAILURE;
1456        }
1457    }
1458
1459    if (MBSTRG(http_output_conv_mimetypes)) {
1460        _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1461    }
1462
1463    MBSTRG(http_output_conv_mimetypes) = re;
1464
1465    zval_dtor(&tmp);
1466    return SUCCESS;
1467}
1468/* }}} */
1469/* }}} */
1470
1471/* {{{ php.ini directive registration */
1472PHP_INI_BEGIN()
1473    PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1474    PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1475    PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1476    PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1477    STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1478    PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1479    STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1480    PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1481
1482    STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1483        PHP_INI_SYSTEM | PHP_INI_PERDIR,
1484        OnUpdate_mbstring_encoding_translation,
1485        encoding_translation, zend_mbstring_globals, mbstring_globals)
1486    PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1487        "^(text/|application/xhtml\\+xml)",
1488        PHP_INI_ALL,
1489        OnUpdate_mbstring_http_output_conv_mimetypes)
1490
1491    STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1492        PHP_INI_ALL,
1493        OnUpdateLong,
1494        strict_detection, zend_mbstring_globals, mbstring_globals)
1495PHP_INI_END()
1496/* }}} */
1497
1498/* {{{ module global initialize handler */
1499static PHP_GINIT_FUNCTION(mbstring)
1500{
1501    mbstring_globals->language = mbfl_no_language_uni;
1502    mbstring_globals->internal_encoding = NULL;
1503    mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1504    mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1505    mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1506    mbstring_globals->http_input_identify = NULL;
1507    mbstring_globals->http_input_identify_get = NULL;
1508    mbstring_globals->http_input_identify_post = NULL;
1509    mbstring_globals->http_input_identify_cookie = NULL;
1510    mbstring_globals->http_input_identify_string = NULL;
1511    mbstring_globals->http_input_list = NULL;
1512    mbstring_globals->http_input_list_size = 0;
1513    mbstring_globals->detect_order_list = NULL;
1514    mbstring_globals->detect_order_list_size = 0;
1515    mbstring_globals->current_detect_order_list = NULL;
1516    mbstring_globals->current_detect_order_list_size = 0;
1517    mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1518    mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1519    mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1520    mbstring_globals->filter_illegal_substchar = 0x3f;  /* '?' */
1521    mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1522    mbstring_globals->current_filter_illegal_substchar = 0x3f;  /* '?' */
1523    mbstring_globals->illegalchars = 0;
1524    mbstring_globals->func_overload = 0;
1525    mbstring_globals->encoding_translation = 0;
1526    mbstring_globals->strict_detection = 0;
1527    mbstring_globals->outconv = NULL;
1528    mbstring_globals->http_output_conv_mimetypes = NULL;
1529#if HAVE_MBREGEX
1530    mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1531#endif
1532}
1533/* }}} */
1534
1535/* {{{ PHP_GSHUTDOWN_FUNCTION */
1536static PHP_GSHUTDOWN_FUNCTION(mbstring)
1537{
1538    if (mbstring_globals->http_input_list) {
1539        free(mbstring_globals->http_input_list);
1540    }
1541    if (mbstring_globals->detect_order_list) {
1542        free(mbstring_globals->detect_order_list);
1543    }
1544    if (mbstring_globals->http_output_conv_mimetypes) {
1545        _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1546    }
1547#if HAVE_MBREGEX
1548    php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1549#endif
1550}
1551/* }}} */
1552
1553/* {{{ PHP_MINIT_FUNCTION(mbstring) */
1554PHP_MINIT_FUNCTION(mbstring)
1555{
1556    __mbfl_allocators = &_php_mb_allocators;
1557
1558    REGISTER_INI_ENTRIES();
1559
1560    /* This is a global handler. Should not be set in a per-request handler. */
1561    sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1562
1563    /* Post handlers are stored in the thread-local context. */
1564    if (MBSTRG(encoding_translation)) {
1565        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1566    }
1567
1568    REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1569    REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1570    REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1571
1572    REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1573    REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1574    REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1575
1576#if HAVE_MBREGEX
1577    PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1578#endif
1579
1580    if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1581        return FAILURE;
1582    }
1583
1584    php_rfc1867_set_multibyte_callbacks(
1585        php_mb_encoding_translation,
1586        php_mb_gpc_get_detect_order,
1587        php_mb_gpc_set_input_encoding,
1588        php_mb_rfc1867_getword,
1589        php_mb_rfc1867_getword_conf,
1590        php_mb_rfc1867_basename);
1591
1592    return SUCCESS;
1593}
1594/* }}} */
1595
1596/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1597PHP_MSHUTDOWN_FUNCTION(mbstring)
1598{
1599    UNREGISTER_INI_ENTRIES();
1600
1601#if HAVE_MBREGEX
1602    PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1603#endif
1604
1605    return SUCCESS;
1606}
1607/* }}} */
1608
1609/* {{{ PHP_RINIT_FUNCTION(mbstring) */
1610PHP_RINIT_FUNCTION(mbstring)
1611{
1612    zend_function *func, *orig;
1613    const struct mb_overload_def *p;
1614
1615    MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1616    MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1617    MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1618    MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1619
1620    MBSTRG(illegalchars) = 0;
1621
1622    php_mb_populate_current_detect_order_list(TSRMLS_C);
1623
1624    /* override original function. */
1625    if (MBSTRG(func_overload)){
1626        p = &(mb_ovld[0]);
1627
1628        while (p->type > 0) {
1629            if ((MBSTRG(func_overload) & p->type) == p->type &&
1630                zend_hash_find(EG(function_table), p->save_func,
1631                    strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1632
1633                zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1634
1635                if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1636                    php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637                    return FAILURE;
1638                } else {
1639                    zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1640
1641                    if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1642                        NULL) == FAILURE) {
1643                        php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1644                        return FAILURE;
1645                    }
1646                }
1647            }
1648            p++;
1649        }
1650    }
1651#if HAVE_MBREGEX
1652    PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1653#endif
1654    zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1655
1656    return SUCCESS;
1657}
1658/* }}} */
1659
1660/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1661PHP_RSHUTDOWN_FUNCTION(mbstring)
1662{
1663    const struct mb_overload_def *p;
1664    zend_function *orig;
1665
1666    if (MBSTRG(current_detect_order_list) != NULL) {
1667        efree(MBSTRG(current_detect_order_list));
1668        MBSTRG(current_detect_order_list) = NULL;
1669        MBSTRG(current_detect_order_list_size) = 0;
1670    }
1671    if (MBSTRG(outconv) != NULL) {
1672        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1673        mbfl_buffer_converter_delete(MBSTRG(outconv));
1674        MBSTRG(outconv) = NULL;
1675    }
1676
1677    /* clear http input identification. */
1678    MBSTRG(http_input_identify) = NULL;
1679    MBSTRG(http_input_identify_post) = NULL;
1680    MBSTRG(http_input_identify_get) = NULL;
1681    MBSTRG(http_input_identify_cookie) = NULL;
1682    MBSTRG(http_input_identify_string) = NULL;
1683
1684    /*  clear overloaded function. */
1685    if (MBSTRG(func_overload)){
1686        p = &(mb_ovld[0]);
1687        while (p->type > 0) {
1688            if ((MBSTRG(func_overload) & p->type) == p->type &&
1689                zend_hash_find(EG(function_table), p->save_func,
1690                               strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1691
1692                zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1693                zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1694            }
1695            p++;
1696        }
1697    }
1698
1699#if HAVE_MBREGEX
1700    PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1701#endif
1702
1703    return SUCCESS;
1704}
1705/* }}} */
1706
1707/* {{{ PHP_MINFO_FUNCTION(mbstring) */
1708PHP_MINFO_FUNCTION(mbstring)
1709{
1710    php_info_print_table_start();
1711    php_info_print_table_row(2, "Multibyte Support", "enabled");
1712    php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1713    php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1714    {
1715        char tmp[256];
1716        snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1717        php_info_print_table_row(2, "libmbfl version", tmp);
1718    }
1719    php_info_print_table_end();
1720
1721    php_info_print_table_start();
1722    php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1723    php_info_print_table_end();
1724
1725#if HAVE_MBREGEX
1726    PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1727#endif
1728
1729    DISPLAY_INI_ENTRIES();
1730}
1731/* }}} */
1732
1733/* {{{ proto string mb_language([string language])
1734   Sets the current language or Returns the current language as a string */
1735PHP_FUNCTION(mb_language)
1736{
1737    char *name = NULL;
1738    int name_len = 0;
1739
1740    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1741        return;
1742    }
1743    if (name == NULL) {
1744        RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1745    } else {
1746        if (FAILURE == zend_alter_ini_entry(
1747                "mbstring.language", sizeof("mbstring.language"),
1748                name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1749            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1750            RETVAL_FALSE;
1751        } else {
1752            RETVAL_TRUE;
1753        }
1754    }
1755}
1756/* }}} */
1757
1758/* {{{ proto string mb_internal_encoding([string encoding])
1759   Sets the current internal encoding or Returns the current internal encoding as a string */
1760PHP_FUNCTION(mb_internal_encoding)
1761{
1762    const char *name = NULL;
1763    int name_len;
1764    const mbfl_encoding *encoding;
1765
1766    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1767        RETURN_FALSE;
1768    }
1769    if (name == NULL) {
1770        name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1771        if (name != NULL) {
1772            RETURN_STRING(name, 1);
1773        } else {
1774            RETURN_FALSE;
1775        }
1776    } else {
1777        encoding = mbfl_name2encoding(name);
1778        if (!encoding) {
1779            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1780            RETURN_FALSE;
1781        } else {
1782            MBSTRG(current_internal_encoding) = encoding;
1783            RETURN_TRUE;
1784        }
1785    }
1786}
1787/* }}} */
1788
1789/* {{{ proto mixed mb_http_input([string type])
1790   Returns the input encoding */
1791PHP_FUNCTION(mb_http_input)
1792{
1793    char *typ = NULL;
1794    int typ_len;
1795    int retname;
1796    char *list, *temp;
1797    const mbfl_encoding *result = NULL;
1798
1799    retname = 1;
1800    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1801        RETURN_FALSE;
1802    }
1803    if (typ == NULL) {
1804        result = MBSTRG(http_input_identify);
1805    } else {
1806        switch (*typ) {
1807        case 'G':
1808        case 'g':
1809            result = MBSTRG(http_input_identify_get);
1810            break;
1811        case 'P':
1812        case 'p':
1813            result = MBSTRG(http_input_identify_post);
1814            break;
1815        case 'C':
1816        case 'c':
1817            result = MBSTRG(http_input_identify_cookie);
1818            break;
1819        case 'S':
1820        case 's':
1821            result = MBSTRG(http_input_identify_string);
1822            break;
1823        case 'I':
1824        case 'i':
1825            {
1826                const mbfl_encoding **entry = MBSTRG(http_input_list);
1827                const size_t n = MBSTRG(http_input_list_size);
1828                size_t i;
1829                array_init(return_value);
1830                for (i = 0; i < n; i++) {
1831                    add_next_index_string(return_value, (*entry)->name, 1);
1832                    entry++;
1833                }
1834                retname = 0;
1835            }
1836            break;
1837        case 'L':
1838        case 'l':
1839            {
1840                const mbfl_encoding **entry = MBSTRG(http_input_list);
1841                const size_t n = MBSTRG(http_input_list_size);
1842                size_t i;
1843                list = NULL;
1844                for (i = 0; i < n; i++) {
1845                    if (list) {
1846                        temp = list;
1847                        spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1848                        efree(temp);
1849                        if (!list) {
1850                            break;
1851                        }
1852                    } else {
1853                        list = estrdup((*entry)->name);
1854                    }
1855                    entry++;
1856                }
1857            }
1858            if (!list) {
1859                RETURN_FALSE;
1860            }
1861            RETVAL_STRING(list, 0);
1862            retname = 0;
1863            break;
1864        default:
1865            result = MBSTRG(http_input_identify);
1866            break;
1867        }
1868    }
1869
1870    if (retname) {
1871        if (result) {
1872            RETVAL_STRING(result->name, 1);
1873        } else {
1874            RETVAL_FALSE;
1875        }
1876    }
1877}
1878/* }}} */
1879
1880/* {{{ proto string mb_http_output([string encoding])
1881   Sets the current output_encoding or returns the current output_encoding as a string */
1882PHP_FUNCTION(mb_http_output)
1883{
1884    const char *name = NULL;
1885    int name_len;
1886    const mbfl_encoding *encoding;
1887
1888    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1889        RETURN_FALSE;
1890    }
1891
1892    if (name == NULL) {
1893        name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1894        if (name != NULL) {
1895            RETURN_STRING(name, 1);
1896        } else {
1897            RETURN_FALSE;
1898        }
1899    } else {
1900        encoding = mbfl_name2encoding(name);
1901        if (!encoding) {
1902            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1903            RETURN_FALSE;
1904        } else {
1905            MBSTRG(current_http_output_encoding) = encoding;
1906            RETURN_TRUE;
1907        }
1908    }
1909}
1910/* }}} */
1911
1912/* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1913   Sets the current detect_order or Return the current detect_order as a array */
1914PHP_FUNCTION(mb_detect_order)
1915{
1916    zval **arg1 = NULL;
1917
1918    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1919        return;
1920    }
1921
1922    if (!arg1) {
1923        size_t i;
1924        size_t n = MBSTRG(current_detect_order_list_size);
1925        const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1926        array_init(return_value);
1927        for (i = 0; i < n; i++) {
1928            add_next_index_string(return_value, (*entry)->name, 1);
1929            entry++;
1930        }
1931    } else {
1932        const mbfl_encoding **list = NULL;
1933        size_t size = 0;
1934        switch (Z_TYPE_PP(arg1)) {
1935        case IS_ARRAY:
1936            if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1937                if (list) {
1938                    efree(list);
1939                }
1940                RETURN_FALSE;
1941            }
1942            break;
1943        default:
1944            convert_to_string_ex(arg1);
1945            if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1946                if (list) {
1947                    efree(list);
1948                }
1949                RETURN_FALSE;
1950            }
1951            break;
1952        }
1953
1954        if (list == NULL) {
1955            RETURN_FALSE;
1956        }
1957
1958        if (MBSTRG(current_detect_order_list)) {
1959            efree(MBSTRG(current_detect_order_list));
1960        }
1961        MBSTRG(current_detect_order_list) = list;
1962        MBSTRG(current_detect_order_list_size) = size;
1963        RETURN_TRUE;
1964    }
1965}
1966/* }}} */
1967
1968/* {{{ proto mixed mb_substitute_character([mixed substchar])
1969   Sets the current substitute_character or returns the current substitute_character */
1970PHP_FUNCTION(mb_substitute_character)
1971{
1972    zval **arg1 = NULL;
1973
1974    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1975        return;
1976    }
1977
1978    if (!arg1) {
1979        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1980            RETURN_STRING("none", 1);
1981        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1982            RETURN_STRING("long", 1);
1983        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1984            RETURN_STRING("entity", 1);
1985        } else {
1986            RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1987        }
1988    } else {
1989        RETVAL_TRUE;
1990
1991        switch (Z_TYPE_PP(arg1)) {
1992        case IS_STRING:
1993            if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1994                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1995            } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1996                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1997            } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1998                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1999            } else {
2000                convert_to_long_ex(arg1);
2001
2002                if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2003                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2004                    MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2005                } else {
2006                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2007                    RETURN_FALSE;
2008                }
2009            }
2010            break;
2011        default:
2012            convert_to_long_ex(arg1);
2013            if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2014                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2015                MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2016            } else {
2017                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2018                RETURN_FALSE;
2019            }
2020            break;
2021        }
2022    }
2023}
2024/* }}} */
2025
2026/* {{{ proto string mb_preferred_mime_name(string encoding)
2027   Return the preferred MIME name (charset) as a string */
2028PHP_FUNCTION(mb_preferred_mime_name)
2029{
2030    enum mbfl_no_encoding no_encoding;
2031    char *name = NULL;
2032    int name_len;
2033
2034    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2035        return;
2036    } else {
2037        no_encoding = mbfl_name2no_encoding(name);
2038        if (no_encoding == mbfl_no_encoding_invalid) {
2039            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2040            RETVAL_FALSE;
2041        } else {
2042            const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2043            if (preferred_name == NULL || *preferred_name == '\0') {
2044                php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2045                RETVAL_FALSE;
2046            } else {
2047                RETVAL_STRING((char *)preferred_name, 1);
2048            }
2049        }
2050    }
2051}
2052/* }}} */
2053
2054#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2055#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2056
2057/* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2058   Parses GET/POST/COOKIE data and sets global variables */
2059PHP_FUNCTION(mb_parse_str)
2060{
2061    zval *track_vars_array = NULL;
2062    char *encstr = NULL;
2063    int encstr_len;
2064    php_mb_encoding_handler_info_t info;
2065    const mbfl_encoding *detected;
2066
2067    track_vars_array = NULL;
2068    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2069        return;
2070    }
2071
2072    if (track_vars_array != NULL) {
2073        /* Clear out the array */
2074        zval_dtor(track_vars_array);
2075        array_init(track_vars_array);
2076    }
2077
2078    encstr = estrndup(encstr, encstr_len);
2079
2080    info.data_type              = PARSE_STRING;
2081    info.separator              = PG(arg_separator).input;
2082    info.report_errors          = 1;
2083    info.to_encoding            = MBSTRG(current_internal_encoding);
2084    info.to_language            = MBSTRG(language);
2085    info.from_encodings         = MBSTRG(http_input_list);
2086    info.num_from_encodings     = MBSTRG(http_input_list_size);
2087    info.from_language          = MBSTRG(language);
2088
2089    if (track_vars_array != NULL) {
2090        detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2091    } else {
2092        zval tmp;
2093        if (!EG(active_symbol_table)) {
2094            zend_rebuild_symbol_table(TSRMLS_C);
2095        }
2096        Z_ARRVAL(tmp) = EG(active_symbol_table);
2097        detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2098    }
2099
2100    MBSTRG(http_input_identify) = detected;
2101
2102    RETVAL_BOOL(detected);
2103
2104    if (encstr != NULL) efree(encstr);
2105}
2106/* }}} */
2107
2108/* {{{ proto string mb_output_handler(string contents, int status)
2109   Returns string in output buffer converted to the http_output encoding */
2110PHP_FUNCTION(mb_output_handler)
2111{
2112    char *arg_string;
2113    int arg_string_len;
2114    long arg_status;
2115    mbfl_string string, result;
2116    const char *charset;
2117    char *p;
2118    const mbfl_encoding *encoding;
2119    int last_feed, len;
2120    unsigned char send_text_mimetype = 0;
2121    char *s, *mimetype = NULL;
2122
2123    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2124        return;
2125    }
2126
2127    encoding = MBSTRG(current_http_output_encoding);
2128
2129    /* start phase only */
2130    if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2131        /* delete the converter just in case. */
2132        if (MBSTRG(outconv)) {
2133            MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2134            mbfl_buffer_converter_delete(MBSTRG(outconv));
2135            MBSTRG(outconv) = NULL;
2136        }
2137        if (encoding == &mbfl_encoding_pass) {
2138            RETURN_STRINGL(arg_string, arg_string_len, 1);
2139        }
2140
2141        /* analyze mime type */
2142        if (SG(sapi_headers).mimetype &&
2143            _php_mb_match_regex(
2144                MBSTRG(http_output_conv_mimetypes),
2145                SG(sapi_headers).mimetype,
2146                strlen(SG(sapi_headers).mimetype))) {
2147            if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2148                mimetype = estrdup(SG(sapi_headers).mimetype);
2149            } else {
2150                mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2151            }
2152            send_text_mimetype = 1;
2153        } else if (SG(sapi_headers).send_default_content_type) {
2154            mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2155        }
2156
2157        /* if content-type is not yet set, set it and activate the converter */
2158        if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2159            charset = encoding->mime_name;
2160            if (charset) {
2161                len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2162                if (sapi_add_header(p, len, 0) != FAILURE) {
2163                    SG(sapi_headers).send_default_content_type = 0;
2164                }
2165            }
2166            /* activate the converter */
2167            MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2168            if (send_text_mimetype){
2169                efree(mimetype);
2170            }
2171        }
2172    }
2173
2174    /* just return if the converter is not activated. */
2175    if (MBSTRG(outconv) == NULL) {
2176        RETURN_STRINGL(arg_string, arg_string_len, 1);
2177    }
2178
2179    /* flag */
2180    last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2181    /* mode */
2182    mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2183    mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2184
2185    /* feed the string */
2186    mbfl_string_init(&string);
2187    /* these are not needed. convd has encoding info.
2188    string.no_language = MBSTRG(language);
2189    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2190    */
2191    string.val = (unsigned char *)arg_string;
2192    string.len = arg_string_len;
2193    mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2194    if (last_feed) {
2195        mbfl_buffer_converter_flush(MBSTRG(outconv));
2196    }
2197    /* get the converter output, and return it */
2198    mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2199    RETVAL_STRINGL((char *)result.val, result.len, 0);      /* the string is already strdup()'ed */
2200
2201    /* delete the converter if it is the last feed. */
2202    if (last_feed) {
2203        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2204        mbfl_buffer_converter_delete(MBSTRG(outconv));
2205        MBSTRG(outconv) = NULL;
2206    }
2207}
2208/* }}} */
2209
2210/* {{{ proto int mb_strlen(string str [, string encoding])
2211   Get character numbers of a string */
2212PHP_FUNCTION(mb_strlen)
2213{
2214    int n;
2215    mbfl_string string;
2216    char *enc_name = NULL;
2217    int enc_name_len;
2218
2219    mbfl_string_init(&string);
2220
2221    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2222        RETURN_FALSE;
2223    }
2224
2225    string.no_language = MBSTRG(language);
2226    if (enc_name == NULL) {
2227        string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2228    } else {
2229        string.no_encoding = mbfl_name2no_encoding(enc_name);
2230        if (string.no_encoding == mbfl_no_encoding_invalid) {
2231            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2232            RETURN_FALSE;
2233        }
2234    }
2235
2236    n = mbfl_strlen(&string);
2237    if (n >= 0) {
2238        RETVAL_LONG(n);
2239    } else {
2240        RETVAL_FALSE;
2241    }
2242}
2243/* }}} */
2244
2245/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2246   Find position of first occurrence of a string within another */
2247PHP_FUNCTION(mb_strpos)
2248{
2249    int n, reverse = 0;
2250    long offset;
2251    mbfl_string haystack, needle;
2252    char *enc_name = NULL;
2253    int enc_name_len;
2254
2255    mbfl_string_init(&haystack);
2256    mbfl_string_init(&needle);
2257    haystack.no_language = MBSTRG(language);
2258    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2259    needle.no_language = MBSTRG(language);
2260    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2261    offset = 0;
2262
2263    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2264        RETURN_FALSE;
2265    }
2266
2267    if (enc_name != NULL) {
2268        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2269        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2270            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2271            RETURN_FALSE;
2272        }
2273    }
2274
2275    if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2276        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2277        RETURN_FALSE;
2278    }
2279    if (needle.len == 0) {
2280        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2281        RETURN_FALSE;
2282    }
2283
2284    n = mbfl_strpos(&haystack, &needle, offset, reverse);
2285    if (n >= 0) {
2286        RETVAL_LONG(n);
2287    } else {
2288        switch (-n) {
2289        case 1:
2290            break;
2291        case 2:
2292            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2293            break;
2294        case 4:
2295            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2296            break;
2297        case 8:
2298            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2299            break;
2300        default:
2301            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2302            break;
2303        }
2304        RETVAL_FALSE;
2305    }
2306}
2307/* }}} */
2308
2309/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2310   Find position of last occurrence of a string within another */
2311PHP_FUNCTION(mb_strrpos)
2312{
2313    int n;
2314    mbfl_string haystack, needle;
2315    char *enc_name = NULL;
2316    int enc_name_len;
2317    zval **zoffset = NULL;
2318    long offset = 0, str_flg;
2319    char *enc_name2 = NULL;
2320    int enc_name_len2;
2321
2322    mbfl_string_init(&haystack);
2323    mbfl_string_init(&needle);
2324    haystack.no_language = MBSTRG(language);
2325    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2326    needle.no_language = MBSTRG(language);
2327    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2328
2329    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2330        RETURN_FALSE;
2331    }
2332
2333    if (zoffset) {
2334        if (Z_TYPE_PP(zoffset) == IS_STRING) {
2335            enc_name2     = Z_STRVAL_PP(zoffset);
2336            enc_name_len2 = Z_STRLEN_PP(zoffset);
2337            str_flg       = 1;
2338
2339            if (enc_name2 != NULL) {
2340                switch (*enc_name2) {
2341                case '0':
2342                case '1':
2343                case '2':
2344                case '3':
2345                case '4':
2346                case '5':
2347                case '6':
2348                case '7':
2349                case '8':
2350                case '9':
2351                case ' ':
2352                case '-':
2353                case '.':
2354                    break;
2355                default :
2356                    str_flg = 0;
2357                    break;
2358                }
2359            }
2360
2361            if (str_flg) {
2362                convert_to_long_ex(zoffset);
2363                offset   = Z_LVAL_PP(zoffset);
2364            } else {
2365                enc_name     = enc_name2;
2366                enc_name_len = enc_name_len2;
2367            }
2368        } else {
2369            convert_to_long_ex(zoffset);
2370            offset = Z_LVAL_PP(zoffset);
2371        }
2372    }
2373
2374    if (enc_name != NULL) {
2375        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2376        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2377            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2378            RETURN_FALSE;
2379        }
2380    }
2381
2382    if (haystack.len <= 0) {
2383        RETURN_FALSE;
2384    }
2385    if (needle.len <= 0) {
2386        RETURN_FALSE;
2387    }
2388
2389    {
2390        int haystack_char_len = mbfl_strlen(&haystack);
2391        if ((offset > 0 && offset > haystack_char_len) ||
2392            (offset < 0 && -offset > haystack_char_len)) {
2393            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2394            RETURN_FALSE;
2395        }
2396    }
2397
2398    n = mbfl_strpos(&haystack, &needle, offset, 1);
2399    if (n >= 0) {
2400        RETVAL_LONG(n);
2401    } else {
2402        RETVAL_FALSE;
2403    }
2404}
2405/* }}} */
2406
2407/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2408   Finds position of first occurrence of a string within another, case insensitive */
2409PHP_FUNCTION(mb_stripos)
2410{
2411    int n;
2412    long offset;
2413    mbfl_string haystack, needle;
2414    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2415    int from_encoding_len;
2416    n = -1;
2417    offset = 0;
2418
2419    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2420        RETURN_FALSE;
2421    }
2422    if (needle.len == 0) {
2423        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2424        RETURN_FALSE;
2425    }
2426    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2427
2428    if (n >= 0) {
2429        RETVAL_LONG(n);
2430    } else {
2431        RETVAL_FALSE;
2432    }
2433}
2434/* }}} */
2435
2436/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2437   Finds position of last occurrence of a string within another, case insensitive */
2438PHP_FUNCTION(mb_strripos)
2439{
2440    int n;
2441    long offset;
2442    mbfl_string haystack, needle;
2443    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2444    int from_encoding_len;
2445    n = -1;
2446    offset = 0;
2447
2448    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2449        RETURN_FALSE;
2450    }
2451
2452    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2453
2454    if (n >= 0) {
2455        RETVAL_LONG(n);
2456    } else {
2457        RETVAL_FALSE;
2458    }
2459}
2460/* }}} */
2461
2462/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2463   Finds first occurrence of a string within another */
2464PHP_FUNCTION(mb_strstr)
2465{
2466    int n, len, mblen;
2467    mbfl_string haystack, needle, result, *ret = NULL;
2468    char *enc_name = NULL;
2469    int enc_name_len;
2470    zend_bool part = 0;
2471
2472    mbfl_string_init(&haystack);
2473    mbfl_string_init(&needle);
2474    haystack.no_language = MBSTRG(language);
2475    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2476    needle.no_language = MBSTRG(language);
2477    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2478
2479    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2480        RETURN_FALSE;
2481    }
2482
2483    if (enc_name != NULL) {
2484        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2485        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2486            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2487            RETURN_FALSE;
2488        }
2489    }
2490
2491    if (needle.len <= 0) {
2492        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2493        RETURN_FALSE;
2494    }
2495    n = mbfl_strpos(&haystack, &needle, 0, 0);
2496    if (n >= 0) {
2497        mblen = mbfl_strlen(&haystack);
2498        if (part) {
2499            ret = mbfl_substr(&haystack, &result, 0, n);
2500            if (ret != NULL) {
2501                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2502            } else {
2503                RETVAL_FALSE;
2504            }
2505        } else {
2506            len = (mblen - n);
2507            ret = mbfl_substr(&haystack, &result, n, len);
2508            if (ret != NULL) {
2509                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2510            } else {
2511                RETVAL_FALSE;
2512            }
2513        }
2514    } else {
2515        RETVAL_FALSE;
2516    }
2517}
2518/* }}} */
2519
2520/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2521   Finds the last occurrence of a character in a string within another */
2522PHP_FUNCTION(mb_strrchr)
2523{
2524    int n, len, mblen;
2525    mbfl_string haystack, needle, result, *ret = NULL;
2526    char *enc_name = NULL;
2527    int enc_name_len;
2528    zend_bool part = 0;
2529
2530    mbfl_string_init(&haystack);
2531    mbfl_string_init(&needle);
2532    haystack.no_language = MBSTRG(language);
2533    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2534    needle.no_language = MBSTRG(language);
2535    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2536
2537    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2538        RETURN_FALSE;
2539    }
2540
2541    if (enc_name != NULL) {
2542        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2543        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2544            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2545            RETURN_FALSE;
2546        }
2547    }
2548
2549    if (haystack.len <= 0) {
2550        RETURN_FALSE;
2551    }
2552    if (needle.len <= 0) {
2553        RETURN_FALSE;
2554    }
2555    n = mbfl_strpos(&haystack, &needle, 0, 1);
2556    if (n >= 0) {
2557        mblen = mbfl_strlen(&haystack);
2558        if (part) {
2559            ret = mbfl_substr(&haystack, &result, 0, n);
2560            if (ret != NULL) {
2561                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2562            } else {
2563                RETVAL_FALSE;
2564            }
2565        } else {
2566            len = (mblen - n);
2567            ret = mbfl_substr(&haystack, &result, n, len);
2568            if (ret != NULL) {
2569                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2570            } else {
2571                RETVAL_FALSE;
2572            }
2573        }
2574    } else {
2575        RETVAL_FALSE;
2576    }
2577}
2578/* }}} */
2579
2580/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2581   Finds first occurrence of a string within another, case insensitive */
2582PHP_FUNCTION(mb_stristr)
2583{
2584    zend_bool part = 0;
2585    unsigned int from_encoding_len, len, mblen;
2586    int n;
2587    mbfl_string haystack, needle, result, *ret = NULL;
2588    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2589    mbfl_string_init(&haystack);
2590    mbfl_string_init(&needle);
2591    haystack.no_language = MBSTRG(language);
2592    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2593    needle.no_language = MBSTRG(language);
2594    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2595
2596
2597    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2598        RETURN_FALSE;
2599    }
2600
2601    if (!needle.len) {
2602        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2603        RETURN_FALSE;
2604    }
2605
2606    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2607    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2608        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2609        RETURN_FALSE;
2610    }
2611
2612    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2613
2614    if (n <0) {
2615        RETURN_FALSE;
2616    }
2617
2618    mblen = mbfl_strlen(&haystack);
2619
2620    if (part) {
2621        ret = mbfl_substr(&haystack, &result, 0, n);
2622        if (ret != NULL) {
2623            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2624        } else {
2625            RETVAL_FALSE;
2626        }
2627    } else {
2628        len = (mblen - n);
2629        ret = mbfl_substr(&haystack, &result, n, len);
2630        if (ret != NULL) {
2631            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2632        } else {
2633            RETVAL_FALSE;
2634        }
2635    }
2636}
2637/* }}} */
2638
2639/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2640   Finds the last occurrence of a character in a string within another, case insensitive */
2641PHP_FUNCTION(mb_strrichr)
2642{
2643    zend_bool part = 0;
2644    int n, from_encoding_len, len, mblen;
2645    mbfl_string haystack, needle, result, *ret = NULL;
2646    const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2647    mbfl_string_init(&haystack);
2648    mbfl_string_init(&needle);
2649    haystack.no_language = MBSTRG(language);
2650    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2651    needle.no_language = MBSTRG(language);
2652    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2653
2654
2655    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2656        RETURN_FALSE;
2657    }
2658
2659    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2660    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2661        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2662        RETURN_FALSE;
2663    }
2664
2665    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2666
2667    if (n <0) {
2668        RETURN_FALSE;
2669    }
2670
2671    mblen = mbfl_strlen(&haystack);
2672
2673    if (part) {
2674        ret = mbfl_substr(&haystack, &result, 0, n);
2675        if (ret != NULL) {
2676            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2677        } else {
2678            RETVAL_FALSE;
2679        }
2680    } else {
2681        len = (mblen - n);
2682        ret = mbfl_substr(&haystack, &result, n, len);
2683        if (ret != NULL) {
2684            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2685        } else {
2686            RETVAL_FALSE;
2687        }
2688    }
2689}
2690/* }}} */
2691
2692/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2693   Count the number of substring occurrences */
2694PHP_FUNCTION(mb_substr_count)
2695{
2696    int n;
2697    mbfl_string haystack, needle;
2698    char *enc_name = NULL;
2699    int enc_name_len;
2700
2701    mbfl_string_init(&haystack);
2702    mbfl_string_init(&needle);
2703    haystack.no_language = MBSTRG(language);
2704    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2705    needle.no_language = MBSTRG(language);
2706    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2707
2708    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2709        return;
2710    }
2711
2712    if (enc_name != NULL) {
2713        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2714        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2715            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2716            RETURN_FALSE;
2717        }
2718    }
2719
2720    if (needle.len <= 0) {
2721        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2722        RETURN_FALSE;
2723    }
2724
2725    n = mbfl_substr_count(&haystack, &needle);
2726    if (n >= 0) {
2727        RETVAL_LONG(n);
2728    } else {
2729        RETVAL_FALSE;
2730    }
2731}
2732/* }}} */
2733
2734/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2735   Returns part of a string */
2736PHP_FUNCTION(mb_substr)
2737{
2738    size_t argc = ZEND_NUM_ARGS();
2739    char *str, *encoding;
2740    long from, len;
2741    int mblen, str_len, encoding_len;
2742    zval **z_len = NULL;
2743    mbfl_string string, result, *ret;
2744
2745    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2746        return;
2747    }
2748
2749    mbfl_string_init(&string);
2750    string.no_language = MBSTRG(language);
2751    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2752
2753    if (argc == 4) {
2754        string.no_encoding = mbfl_name2no_encoding(encoding);
2755        if (string.no_encoding == mbfl_no_encoding_invalid) {
2756            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2757            RETURN_FALSE;
2758        }
2759    }
2760
2761    string.val = (unsigned char *)str;
2762    string.len = str_len;
2763
2764    if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2765        len = str_len;
2766    } else {
2767        convert_to_long_ex(z_len);
2768        len = Z_LVAL_PP(z_len);
2769    }
2770
2771    /* measures length */
2772    mblen = 0;
2773    if (from < 0 || len < 0) {
2774        mblen = mbfl_strlen(&string);
2775    }
2776
2777    /* if "from" position is negative, count start position from the end
2778     * of the string
2779     */
2780    if (from < 0) {
2781        from = mblen + from;
2782        if (from < 0) {
2783            from = 0;
2784        }
2785    }
2786
2787    /* if "length" position is negative, set it to the length
2788     * needed to stop that many chars from the end of the string
2789     */
2790    if (len < 0) {
2791        len = (mblen - from) + len;
2792        if (len < 0) {
2793            len = 0;
2794        }
2795    }
2796
2797    if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2798        && (from >= mbfl_strlen(&string))) {
2799        RETURN_FALSE;
2800    }
2801
2802    ret = mbfl_substr(&string, &result, from, len);
2803    if (NULL == ret) {
2804        RETURN_FALSE;
2805    }
2806
2807    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2808}
2809/* }}} */
2810
2811/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2812   Returns part of a string */
2813PHP_FUNCTION(mb_strcut)
2814{
2815    size_t argc = ZEND_NUM_ARGS();
2816    char *encoding;
2817    long from, len;
2818    int encoding_len;
2819    zval **z_len = NULL;
2820    mbfl_string string, result, *ret;
2821
2822    mbfl_string_init(&string);
2823    string.no_language = MBSTRG(language);
2824    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2825
2826    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2827        return;
2828    }
2829
2830    if (argc == 4) {
2831        string.no_encoding = mbfl_name2no_encoding(encoding);
2832        if (string.no_encoding == mbfl_no_encoding_invalid) {
2833            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2834            RETURN_FALSE;
2835        }
2836    }
2837
2838    if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2839        len = string.len;
2840    } else {
2841        convert_to_long_ex(z_len);
2842        len = Z_LVAL_PP(z_len);
2843    }
2844
2845    /* if "from" position is negative, count start position from the end
2846     * of the string
2847     */
2848    if (from < 0) {
2849        from = string.len + from;
2850        if (from < 0) {
2851            from = 0;
2852        }
2853    }
2854
2855    /* if "length" position is negative, set it to the length
2856     * needed to stop that many chars from the end of the string
2857     */
2858    if (len < 0) {
2859        len = (string.len - from) + len;
2860        if (len < 0) {
2861            len = 0;
2862        }
2863    }
2864
2865    if ((unsigned int)from > string.len) {
2866        RETURN_FALSE;
2867    }
2868
2869    ret = mbfl_strcut(&string, &result, from, len);
2870    if (ret == NULL) {
2871        RETURN_FALSE;
2872    }
2873
2874    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2875}
2876/* }}} */
2877
2878/* {{{ proto int mb_strwidth(string str [, string encoding])
2879   Gets terminal width of a string */
2880PHP_FUNCTION(mb_strwidth)
2881{
2882    int n;
2883    mbfl_string string;
2884    char *enc_name = NULL;
2885    int enc_name_len;
2886
2887    mbfl_string_init(&string);
2888
2889    string.no_language = MBSTRG(language);
2890    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2891
2892    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2893        return;
2894    }
2895
2896    if (enc_name != NULL) {
2897        string.no_encoding = mbfl_name2no_encoding(enc_name);
2898        if (string.no_encoding == mbfl_no_encoding_invalid) {
2899            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2900            RETURN_FALSE;
2901        }
2902    }
2903
2904    n = mbfl_strwidth(&string);
2905    if (n >= 0) {
2906        RETVAL_LONG(n);
2907    } else {
2908        RETVAL_FALSE;
2909    }
2910}
2911/* }}} */
2912
2913/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2914   Trim the string in terminal width */
2915PHP_FUNCTION(mb_strimwidth)
2916{
2917    char *str, *trimmarker, *encoding;
2918    long from, width;
2919    int str_len, trimmarker_len, encoding_len;
2920    mbfl_string string, result, marker, *ret;
2921
2922    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2923        return;
2924    }
2925
2926    mbfl_string_init(&string);
2927    mbfl_string_init(&marker);
2928    string.no_language = MBSTRG(language);
2929    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2930    marker.no_language = MBSTRG(language);
2931    marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2932    marker.val = NULL;
2933    marker.len = 0;
2934
2935    if (ZEND_NUM_ARGS() == 5) {
2936        string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2937        if (string.no_encoding == mbfl_no_encoding_invalid) {
2938            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2939            RETURN_FALSE;
2940        }
2941    }
2942
2943    string.val = (unsigned char *)str;
2944    string.len = str_len;
2945
2946    if (from < 0 || from > str_len) {
2947        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2948        RETURN_FALSE;
2949    }
2950
2951    if (width < 0) {
2952        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2953        RETURN_FALSE;
2954    }
2955
2956    if (ZEND_NUM_ARGS() >= 4) {
2957        marker.val = (unsigned char *)trimmarker;
2958        marker.len = trimmarker_len;
2959    }
2960
2961    ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2962
2963    if (ret == NULL) {
2964        RETURN_FALSE;
2965    }
2966
2967    RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2968}
2969/* }}} */
2970
2971/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2972MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2973{
2974    mbfl_string string, result, *ret;
2975    const mbfl_encoding *from_encoding, *to_encoding;
2976    mbfl_buffer_converter *convd;
2977    size_t size;
2978    const mbfl_encoding **list;
2979    char *output=NULL;
2980
2981    if (output_len) {
2982        *output_len = 0;
2983    }
2984    if (!input) {
2985        return NULL;
2986    }
2987    /* new encoding */
2988    if (_to_encoding && strlen(_to_encoding)) {
2989        to_encoding = mbfl_name2encoding(_to_encoding);
2990        if (!to_encoding) {
2991            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2992            return NULL;
2993        }
2994    } else {
2995        to_encoding = MBSTRG(current_internal_encoding);
2996    }
2997
2998    /* initialize string */
2999    mbfl_string_init(&string);
3000    mbfl_string_init(&result);
3001    from_encoding = MBSTRG(current_internal_encoding);
3002    string.no_encoding = from_encoding->no_encoding;
3003    string.no_language = MBSTRG(language);
3004    string.val = (unsigned char *)input;
3005    string.len = length;
3006
3007    /* pre-conversion encoding */
3008    if (_from_encodings) {
3009        list = NULL;
3010        size = 0;
3011        php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
3012        if (size == 1) {
3013            from_encoding = *list;
3014            string.no_encoding = from_encoding->no_encoding;
3015        } else if (size > 1) {
3016            /* auto detect */
3017            from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3018            if (from_encoding) {
3019                string.no_encoding = from_encoding->no_encoding;
3020            } else {
3021                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
3022                from_encoding = &mbfl_encoding_pass;
3023                to_encoding = from_encoding;
3024                string.no_encoding = from_encoding->no_encoding;
3025            }
3026        } else {
3027            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3028        }
3029        if (list != NULL) {
3030            efree((void *)list);
3031        }
3032    }
3033
3034    /* initialize converter */
3035    convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3036    if (convd == NULL) {
3037        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3038        return NULL;
3039    }
3040    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3041    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3042
3043    /* do it */
3044    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3045    if (ret) {
3046        if (output_len) {
3047            *output_len = ret->len;
3048        }
3049        output = (char *)ret->val;
3050    }
3051
3052    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3053    mbfl_buffer_converter_delete(convd);
3054    return output;
3055}
3056/* }}} */
3057
3058/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3059   Returns converted string in desired encoding */
3060PHP_FUNCTION(mb_convert_encoding)
3061{
3062    char *arg_str, *arg_new;
3063    int str_len, new_len;
3064    zval *arg_old;
3065    int i;
3066    size_t size, l, n;
3067    char *_from_encodings = NULL, *ret, *s_free = NULL;
3068
3069    zval **hash_entry;
3070    HashTable *target_hash;
3071
3072    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3073        return;
3074    }
3075
3076    if (ZEND_NUM_ARGS() == 3) {
3077        switch (Z_TYPE_P(arg_old)) {
3078        case IS_ARRAY:
3079            target_hash = Z_ARRVAL_P(arg_old);
3080            zend_hash_internal_pointer_reset(target_hash);
3081            i = zend_hash_num_elements(target_hash);
3082            _from_encodings = NULL;
3083
3084            while (i > 0) {
3085                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3086                    break;
3087                }
3088
3089                convert_to_string_ex(hash_entry);
3090
3091                if ( _from_encodings) {
3092                    l = strlen(_from_encodings);
3093                    n = strlen(Z_STRVAL_PP(hash_entry));
3094                    _from_encodings = erealloc(_from_encodings, l+n+2);
3095                    strcpy(_from_encodings+l, ",");
3096                    strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3097                } else {
3098                    _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3099                }
3100
3101                zend_hash_move_forward(target_hash);
3102                i--;
3103            }
3104
3105            if (_from_encodings != NULL && !strlen(_from_encodings)) {
3106                efree(_from_encodings);
3107                _from_encodings = NULL;
3108            }
3109            s_free = _from_encodings;
3110            break;
3111        default:
3112            convert_to_string(arg_old);
3113            _from_encodings = Z_STRVAL_P(arg_old);
3114            break;
3115        }
3116    }
3117
3118    /* new encoding */
3119    ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3120    if (ret != NULL) {
3121        RETVAL_STRINGL(ret, size, 0);       /* the string is already strdup()'ed */
3122    } else {
3123        RETVAL_FALSE;
3124    }
3125
3126    if ( s_free) {
3127        efree(s_free);
3128    }
3129}
3130/* }}} */
3131
3132/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3133   Returns a case-folded version of sourcestring */
3134PHP_FUNCTION(mb_convert_case)
3135{
3136    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3137    char *str;
3138    int str_len, from_encoding_len;
3139    long case_mode = 0;
3140    char *newstr;
3141    size_t ret_len;
3142
3143    RETVAL_FALSE;
3144    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3145                &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3146        RETURN_FALSE;
3147
3148    newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3149
3150    if (newstr) {
3151        RETVAL_STRINGL(newstr, ret_len, 0);
3152    }
3153}
3154/* }}} */
3155
3156/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3157 *  Returns a uppercased version of sourcestring
3158 */
3159PHP_FUNCTION(mb_strtoupper)
3160{
3161    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3162    char *str;
3163    int str_len, from_encoding_len;
3164    char *newstr;
3165    size_t ret_len;
3166
3167    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3168                &from_encoding, &from_encoding_len) == FAILURE) {
3169        return;
3170    }
3171    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3172
3173    if (newstr) {
3174        RETURN_STRINGL(newstr, ret_len, 0);
3175    }
3176    RETURN_FALSE;
3177}
3178/* }}} */
3179
3180/* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3181 *  Returns a lowercased version of sourcestring
3182 */
3183PHP_FUNCTION(mb_strtolower)
3184{
3185    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3186    char *str;
3187    int str_len, from_encoding_len;
3188    char *newstr;
3189    size_t ret_len;
3190
3191    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3192                &from_encoding, &from_encoding_len) == FAILURE) {
3193        return;
3194    }
3195    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3196
3197    if (newstr) {
3198        RETURN_STRINGL(newstr, ret_len, 0);
3199    }
3200    RETURN_FALSE;
3201}
3202/* }}} */
3203
3204/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3205   Encodings of the given string is returned (as a string) */
3206PHP_FUNCTION(mb_detect_encoding)
3207{
3208    char *str;
3209    int str_len;
3210    zend_bool strict=0;
3211    zval *encoding_list;
3212
3213    mbfl_string string;
3214    const mbfl_encoding *ret;
3215    const mbfl_encoding **elist, **list;
3216    size_t size;
3217
3218    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3219        return;
3220    }
3221
3222    /* make encoding list */
3223    list = NULL;
3224    size = 0;
3225    if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3226        switch (Z_TYPE_P(encoding_list)) {
3227        case IS_ARRAY:
3228            if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3229                if (list) {
3230                    efree(list);
3231                    list = NULL;
3232                    size = 0;
3233                }
3234            }
3235            break;
3236        default:
3237            convert_to_string(encoding_list);
3238            if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3239                if (list) {
3240                    efree(list);
3241                    list = NULL;
3242                    size = 0;
3243                }
3244            }
3245            break;
3246        }
3247        if (size <= 0) {
3248            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3249        }
3250    }
3251
3252    if (ZEND_NUM_ARGS() < 3) {
3253        strict = (zend_bool)MBSTRG(strict_detection);
3254    }
3255
3256    if (size > 0 && list != NULL) {
3257        elist = list;
3258    } else {
3259        elist = MBSTRG(current_detect_order_list);
3260        size = MBSTRG(current_detect_order_list_size);
3261    }
3262
3263    mbfl_string_init(&string);
3264    string.no_language = MBSTRG(language);
3265    string.val = (unsigned char *)str;
3266    string.len = str_len;
3267    ret = mbfl_identify_encoding2(&string, elist, size, strict);
3268
3269    if (list != NULL) {
3270        efree((void *)list);
3271    }
3272
3273    if (ret == NULL) {
3274        RETURN_FALSE;
3275    }
3276
3277    RETVAL_STRING((char *)ret->name, 1);
3278}
3279/* }}} */
3280
3281/* {{{ proto mixed mb_list_encodings()
3282   Returns an array of all supported entity encodings */
3283PHP_FUNCTION(mb_list_encodings)
3284{
3285    const mbfl_encoding **encodings;
3286    const mbfl_encoding *encoding;
3287    int i;
3288
3289    array_init(return_value);
3290    i = 0;
3291    encodings = mbfl_get_supported_encodings();
3292    while ((encoding = encodings[i++]) != NULL) {
3293        add_next_index_string(return_value, (char *) encoding->name, 1);
3294    }
3295}
3296/* }}} */
3297
3298/* {{{ proto array mb_encoding_aliases(string encoding)
3299   Returns an array of the aliases of a given encoding name */
3300PHP_FUNCTION(mb_encoding_aliases)
3301{
3302    const mbfl_encoding *encoding;
3303    char *name = NULL;
3304    int name_len;
3305
3306    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3307        RETURN_FALSE;
3308    }
3309
3310    encoding = mbfl_name2encoding(name);
3311    if (!encoding) {
3312        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3313        RETURN_FALSE;
3314    }
3315
3316    array_init(return_value);
3317    if (encoding->aliases != NULL) {
3318        const char **alias;
3319        for (alias = *encoding->aliases; *alias; ++alias) {
3320            add_next_index_string(return_value, (char *)*alias, 1);
3321        }
3322    }
3323}
3324/* }}} */
3325
3326/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3327   Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3328PHP_FUNCTION(mb_encode_mimeheader)
3329{
3330    enum mbfl_no_encoding charset, transenc;
3331    mbfl_string  string, result, *ret;
3332    char *charset_name = NULL;
3333    int charset_name_len;
3334    char *trans_enc_name = NULL;
3335    int trans_enc_name_len;
3336    char *linefeed = "\r\n";
3337    int linefeed_len;
3338    long indent = 0;
3339
3340    mbfl_string_init(&string);
3341    string.no_language = MBSTRG(language);
3342    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3343
3344    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3345        return;
3346    }
3347
3348    charset = mbfl_no_encoding_pass;
3349    transenc = mbfl_no_encoding_base64;
3350
3351    if (charset_name != NULL) {
3352        charset = mbfl_name2no_encoding(charset_name);
3353        if (charset == mbfl_no_encoding_invalid) {
3354            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3355            RETURN_FALSE;
3356        }
3357    } else {
3358        const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3359        if (lang != NULL) {
3360            charset = lang->mail_charset;
3361            transenc = lang->mail_header_encoding;
3362        }
3363    }
3364
3365    if (trans_enc_name != NULL) {
3366        if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3367            transenc = mbfl_no_encoding_base64;
3368        } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3369            transenc = mbfl_no_encoding_qprint;
3370        }
3371    }
3372
3373    mbfl_string_init(&result);
3374    ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3375    if (ret != NULL) {
3376        RETVAL_STRINGL((char *)ret->val, ret->len, 0);  /* the string is already strdup()'ed */
3377    } else {
3378        RETVAL_FALSE;
3379    }
3380}
3381/* }}} */
3382
3383/* {{{ proto string mb_decode_mimeheader(string string)
3384   Decodes the MIME "encoded-word" in the string */
3385PHP_FUNCTION(mb_decode_mimeheader)
3386{
3387    mbfl_string string, result, *ret;
3388
3389    mbfl_string_init(&string);
3390    string.no_language = MBSTRG(language);
3391    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3392
3393    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3394        return;
3395    }
3396
3397    mbfl_string_init(&result);
3398    ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3399    if (ret != NULL) {
3400        RETVAL_STRINGL((char *)ret->val, ret->len, 0);  /* the string is already strdup()'ed */
3401    } else {
3402        RETVAL_FALSE;
3403    }
3404}
3405/* }}} */
3406
3407/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3408   Conversion between full-width character and half-width character (Japanese) */
3409PHP_FUNCTION(mb_convert_kana)
3410{
3411    int opt, i;
3412    mbfl_string string, result, *ret;
3413    char *optstr = NULL;
3414    int optstr_len;
3415    char *encname = NULL;
3416    int encname_len;
3417
3418    mbfl_string_init(&string);
3419    string.no_language = MBSTRG(language);
3420    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3421
3422    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3423        return;
3424    }
3425
3426    /* option */
3427    if (optstr != NULL) {
3428        char *p = optstr;
3429        int n = optstr_len;
3430        i = 0;
3431        opt = 0;
3432        while (i < n) {
3433            i++;
3434            switch (*p++) {
3435            case 'A':
3436                opt |= 0x1;
3437                break;
3438            case 'a':
3439                opt |= 0x10;
3440                break;
3441            case 'R':
3442                opt |= 0x2;
3443                break;
3444            case 'r':
3445                opt |= 0x20;
3446                break;
3447            case 'N':
3448                opt |= 0x4;
3449                break;
3450            case 'n':
3451                opt |= 0x40;
3452                break;
3453            case 'S':
3454                opt |= 0x8;
3455                break;
3456            case 's':
3457                opt |= 0x80;
3458                break;
3459            case 'K':
3460                opt |= 0x100;
3461                break;
3462            case 'k':
3463                opt |= 0x1000;
3464                break;
3465            case 'H':
3466                opt |= 0x200;
3467                break;
3468            case 'h':
3469                opt |= 0x2000;
3470                break;
3471            case 'V':
3472                opt |= 0x800;
3473                break;
3474            case 'C':
3475                opt |= 0x10000;
3476                break;
3477            case 'c':
3478                opt |= 0x20000;
3479                break;
3480            case 'M':
3481                opt |= 0x100000;
3482                break;
3483            case 'm':
3484                opt |= 0x200000;
3485                break;
3486            }
3487        }
3488    } else {
3489        opt = 0x900;
3490    }
3491
3492    /* encoding */
3493    if (encname != NULL) {
3494        string.no_encoding = mbfl_name2no_encoding(encname);
3495        if (string.no_encoding == mbfl_no_encoding_invalid) {
3496            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3497            RETURN_FALSE;
3498        }
3499    }
3500
3501    ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3502    if (ret != NULL) {
3503        RETVAL_STRINGL((char *)ret->val, ret->len, 0);      /* the string is already strdup()'ed */
3504    } else {
3505        RETVAL_FALSE;
3506    }
3507}
3508/* }}} */
3509
3510#define PHP_MBSTR_STACK_BLOCK_SIZE 32
3511
3512/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3513   Converts the string resource in variables to desired encoding */
3514PHP_FUNCTION(mb_convert_variables)
3515{
3516    zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3517    HashTable *target_hash;
3518    mbfl_string string, result, *ret;
3519    const mbfl_encoding *from_encoding, *to_encoding;
3520    mbfl_encoding_detector *identd;
3521    mbfl_buffer_converter *convd;
3522    int n, to_enc_len, argc, stack_level, stack_max;
3523    size_t elistsz;
3524    const mbfl_encoding **elist;
3525    char *to_enc;
3526    void *ptmp;
3527
3528    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3529        return;
3530    }
3531
3532    /* new encoding */
3533    to_encoding = mbfl_name2encoding(to_enc);
3534    if (!to_encoding) {
3535        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3536        efree(args);
3537        RETURN_FALSE;
3538    }
3539
3540    /* initialize string */
3541    mbfl_string_init(&string);
3542    mbfl_string_init(&result);
3543    from_encoding = MBSTRG(current_internal_encoding);
3544    string.no_encoding = from_encoding->no_encoding;
3545    string.no_language = MBSTRG(language);
3546
3547    /* pre-conversion encoding */
3548    elist = NULL;
3549    elistsz = 0;
3550    switch (Z_TYPE_PP(zfrom_enc)) {
3551    case IS_ARRAY:
3552        php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3553        break;
3554    default:
3555        convert_to_string_ex(zfrom_enc);
3556        php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3557        break;
3558    }
3559    if (elistsz <= 0) {
3560        from_encoding = &mbfl_encoding_pass;
3561    } else if (elistsz == 1) {
3562        from_encoding = *elist;
3563    } else {
3564        /* auto detect */
3565        from_encoding = NULL;
3566        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3567        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3568        stack_level = 0;
3569        identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3570        if (identd != NULL) {
3571            n = 0;
3572            while (n < argc || stack_level > 0) {
3573                if (stack_level <= 0) {
3574                    var = args[n++];
3575                    if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3576                        target_hash = HASH_OF(*var);
3577                        if (target_hash != NULL) {
3578                            zend_hash_internal_pointer_reset(target_hash);
3579                        }
3580                    }
3581                } else {
3582                    stack_level--;
3583                    var = stack[stack_level];
3584                }
3585                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3586                    target_hash = HASH_OF(*var);
3587                    if (target_hash != NULL) {
3588                        while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3589                            zend_hash_move_forward(target_hash);
3590                            if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3591                                if (stack_level >= stack_max) {
3592                                    stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3593                                    ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3594                                    stack = (zval ***)ptmp;
3595                                }
3596                                stack[stack_level] = var;
3597                                stack_level++;
3598                                var = hash_entry;
3599                                target_hash = HASH_OF(*var);
3600                                if (target_hash != NULL) {
3601                                    zend_hash_internal_pointer_reset(target_hash);
3602                                    continue;
3603                                }
3604                            } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3605                                string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3606                                string.len = Z_STRLEN_PP(hash_entry);
3607                                if (mbfl_encoding_detector_feed(identd, &string)) {
3608                                    goto detect_end;        /* complete detecting */
3609                                }
3610                            }
3611                        }
3612                    }
3613                } else if (Z_TYPE_PP(var) == IS_STRING) {
3614                    string.val = (unsigned char *)Z_STRVAL_PP(var);
3615                    string.len = Z_STRLEN_PP(var);
3616                    if (mbfl_encoding_detector_feed(identd, &string)) {
3617                        goto detect_end;        /* complete detecting */
3618                    }
3619                }
3620            }
3621detect_end:
3622            from_encoding = mbfl_encoding_detector_judge2(identd);
3623            mbfl_encoding_detector_delete(identd);
3624        }
3625        efree(stack);
3626
3627        if (!from_encoding) {
3628            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3629            from_encoding = &mbfl_encoding_pass;
3630        }
3631    }
3632    if (elist != NULL) {
3633        efree((void *)elist);
3634    }
3635    /* create converter */
3636    convd = NULL;
3637    if (from_encoding != &mbfl_encoding_pass) {
3638        convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3639        if (convd == NULL) {
3640            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3641            RETURN_FALSE;
3642        }
3643        mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3644        mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3645    }
3646
3647    /* convert */
3648    if (convd != NULL) {
3649        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3650        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3651        stack_level = 0;
3652        n = 0;
3653        while (n < argc || stack_level > 0) {
3654            if (stack_level <= 0) {
3655                var = args[n++];
3656                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3657                    target_hash = HASH_OF(*var);
3658                    if (target_hash != NULL) {
3659                        zend_hash_internal_pointer_reset(target_hash);
3660                    }
3661                }
3662            } else {
3663                stack_level--;
3664                var = stack[stack_level];
3665            }
3666            if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3667                target_hash = HASH_OF(*var);
3668                if (target_hash != NULL) {
3669                    while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3670                        zend_hash_move_forward(target_hash);
3671                        if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3672                            if (stack_level >= stack_max) {
3673                                stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3674                                ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3675                                stack = (zval ***)ptmp;
3676                            }
3677                            stack[stack_level] = var;
3678                            stack_level++;
3679                            var = hash_entry;
3680                            SEPARATE_ZVAL(hash_entry);
3681                            target_hash = HASH_OF(*var);
3682                            if (target_hash != NULL) {
3683                                zend_hash_internal_pointer_reset(target_hash);
3684                                continue;
3685                            }
3686                        } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3687                            string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3688                            string.len = Z_STRLEN_PP(hash_entry);
3689                            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3690                            if (ret != NULL) {
3691                                if (Z_REFCOUNT_PP(hash_entry) > 1) {
3692                                    Z_DELREF_PP(hash_entry);
3693                                    MAKE_STD_ZVAL(*hash_entry);
3694                                } else {
3695                                    zval_dtor(*hash_entry);
3696                                }
3697                            ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3698                        }
3699                    }
3700                }
3701            }
3702        } else if (Z_TYPE_PP(var) == IS_STRING) {
3703            string.val = (unsigned char *)Z_STRVAL_PP(var);
3704            string.len = Z_STRLEN_PP(var);
3705            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3706            if (ret != NULL) {
3707                zval_dtor(*var);
3708                ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3709                }
3710            }
3711        }
3712        efree(stack);
3713
3714        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3715        mbfl_buffer_converter_delete(convd);
3716    }
3717
3718    efree(args);
3719
3720    if (from_encoding) {
3721        RETURN_STRING(from_encoding->name, 1);
3722    } else {
3723        RETURN_FALSE;
3724    }
3725}
3726/* }}} */
3727
3728/* {{{ HTML numeric entity */
3729/* {{{ static void php_mb_numericentity_exec() */
3730static void
3731php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3732{
3733    char *str, *encoding;
3734    int str_len, encoding_len;
3735    zval *zconvmap, **hash_entry;
3736    HashTable *target_hash;
3737    size_t argc = ZEND_NUM_ARGS();
3738    int i, *convmap, *mapelm, mapsize=0;
3739    zend_bool is_hex = 0;
3740    mbfl_string string, result, *ret;
3741    enum mbfl_no_encoding no_encoding;
3742
3743    if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3744        return;
3745    }
3746
3747    mbfl_string_init(&string);
3748    string.no_language = MBSTRG(language);
3749    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3750    string.val = (unsigned char *)str;
3751    string.len = str_len;
3752
3753    /* encoding */
3754    if ((argc == 3 || argc == 4) && encoding_len > 0) {
3755        no_encoding = mbfl_name2no_encoding(encoding);
3756        if (no_encoding == mbfl_no_encoding_invalid) {
3757            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3758            RETURN_FALSE;
3759        } else {
3760            string.no_encoding = no_encoding;
3761        }
3762    }
3763
3764    if (argc == 4) {
3765        if (type == 0 && is_hex) {
3766            type = 2; /* output in hex format */
3767        }
3768    }
3769
3770    /* conversion map */
3771    convmap = NULL;
3772    if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3773        target_hash = Z_ARRVAL_P(zconvmap);
3774        zend_hash_internal_pointer_reset(target_hash);
3775        i = zend_hash_num_elements(target_hash);
3776        if (i > 0) {
3777            convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3778            mapelm = convmap;
3779            mapsize = 0;
3780            while (i > 0) {
3781                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3782                    break;
3783                }
3784                convert_to_long_ex(hash_entry);
3785                *mapelm++ = Z_LVAL_PP(hash_entry);
3786                mapsize++;
3787                i--;
3788                zend_hash_move_forward(target_hash);
3789            }
3790        }
3791    }
3792    if (convmap == NULL) {
3793        RETURN_FALSE;
3794    }
3795    mapsize /= 4;
3796
3797    ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3798    if (ret != NULL) {
3799        RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3800    } else {
3801        RETVAL_FALSE;
3802    }
3803    efree((void *)convmap);
3804}
3805/* }}} */
3806
3807/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3808   Converts specified characters to HTML numeric entities */
3809PHP_FUNCTION(mb_encode_numericentity)
3810{
3811    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3812}
3813/* }}} */
3814
3815/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3816   Converts HTML numeric entities to character code */
3817PHP_FUNCTION(mb_decode_numericentity)
3818{
3819    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3820}
3821/* }}} */
3822/* }}} */
3823
3824/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3825 *  Sends an email message with MIME scheme
3826 */
3827
3828#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                                     \
3829    if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {    \
3830        pos += 2;                                           \
3831        while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {                           \
3832            pos++;                                          \
3833        }                                               \
3834        continue;                                           \
3835    }
3836
3837#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)            \
3838    pp = str;                   \
3839    ee = pp + len;                  \
3840    while ((pp = memchr(pp, '\0', (ee - pp)))) {    \
3841        *pp = ' ';              \
3842    }                       \
3843
3844#define APPEND_ONE_CHAR(ch) do { \
3845    if (token.a > 0) { \
3846        smart_str_appendc(&token, ch); \
3847    } else {\
3848        token.len++; \
3849    } \
3850} while (0)
3851
3852#define SEPARATE_SMART_STR(str) do {\
3853    if ((str)->a == 0) { \
3854        char *tmp_ptr; \
3855        (str)->a = 1; \
3856        while ((str)->a < (str)->len) { \
3857            (str)->a <<= 1; \
3858        } \
3859        tmp_ptr = emalloc((str)->a + 1); \
3860        memcpy(tmp_ptr, (str)->c, (str)->len); \
3861        (str)->c = tmp_ptr; \
3862    } \
3863} while (0)
3864
3865static void my_smart_str_dtor(smart_str *s)
3866{
3867    if (s->a > 0) {
3868        smart_str_free(s);
3869    }
3870}
3871
3872static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3873{
3874    const char *ps;
3875    size_t icnt;
3876    int state = 0;
3877    int crlf_state = -1;
3878
3879    smart_str token = { 0, 0, 0 };
3880    smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3881
3882    ps = str;
3883    icnt = str_len;
3884
3885    /*
3886     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3887     *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3888     *      state  0            1           2          3
3889     *
3890     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3891     *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3892     * crlf_state -1                       0                     1 -1
3893     *
3894     */
3895
3896    while (icnt > 0) {
3897        switch (*ps) {
3898            case ':':
3899                if (crlf_state == 1) {
3900                    APPEND_ONE_CHAR('\r');
3901                }
3902
3903                if (state == 0 || state == 1) {
3904                    fld_name = token;
3905
3906                    state = 2;
3907                } else {
3908                    APPEND_ONE_CHAR(*ps);
3909                }
3910
3911                crlf_state = 0;
3912                break;
3913
3914            case '\n':
3915                if (crlf_state == -1) {
3916                    goto out;
3917                }
3918                crlf_state = -1;
3919                break;
3920
3921            case '\r':
3922                if (crlf_state == 1) {
3923                    APPEND_ONE_CHAR('\r');
3924                } else {
3925                    crlf_state = 1;
3926                }
3927                break;
3928
3929            case ' ': case '\t':
3930                if (crlf_state == -1) {
3931                    if (state == 3) {
3932                        /* continuing from the previous line */
3933                        SEPARATE_SMART_STR(&token);
3934                        state = 4;
3935                    } else {
3936                        /* simply skipping this new line */
3937                        state = 5;
3938                    }
3939                } else {
3940                    if (crlf_state == 1) {
3941                        APPEND_ONE_CHAR('\r');
3942                    }
3943                    if (state == 1 || state == 3) {
3944                        APPEND_ONE_CHAR(*ps);
3945                    }
3946                }
3947                crlf_state = 0;
3948                break;
3949
3950            default:
3951                switch (state) {
3952                    case 0:
3953                        token.c = (char *)ps;
3954                        token.len = 0;
3955                        token.a = 0;
3956                        state = 1;
3957                        break;
3958
3959                    case 2:
3960                        if (crlf_state != -1) {
3961                            token.c = (char *)ps;
3962                            token.len = 0;
3963                            token.a = 0;
3964
3965                            state = 3;
3966                            break;
3967                        }
3968                        /* break is missing intentionally */
3969
3970                    case 3:
3971                        if (crlf_state == -1) {
3972                            fld_val = token;
3973
3974                            if (fld_name.c != NULL && fld_val.c != NULL) {
3975                                char *dummy;
3976
3977                                /* FIXME: some locale free implementation is
3978                                 * really required here,,, */
3979                                SEPARATE_SMART_STR(&fld_name);
3980                                php_strtoupper(fld_name.c, fld_name.len);
3981
3982                                zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3983
3984                                my_smart_str_dtor(&fld_name);
3985                            }
3986
3987                            memset(&fld_name, 0, sizeof(smart_str));
3988                            memset(&fld_val, 0, sizeof(smart_str));
3989
3990                            token.c = (char *)ps;
3991                            token.len = 0;
3992                            token.a = 0;
3993
3994                            state = 1;
3995                        }
3996                        break;
3997
3998                    case 4:
3999                        APPEND_ONE_CHAR(' ');
4000                        state = 3;
4001                        break;
4002                }
4003
4004                if (crlf_state == 1) {
4005                    APPEND_ONE_CHAR('\r');
4006                }
4007
4008                APPEND_ONE_CHAR(*ps);
4009
4010                crlf_state = 0;
4011                break;
4012        }
4013        ps++, icnt--;
4014    }
4015out:
4016    if (state == 2) {
4017        token.c = "";
4018        token.len = 0;
4019        token.a = 0;
4020
4021        state = 3;
4022    }
4023    if (state == 3) {
4024        fld_val = token;
4025
4026        if (fld_name.c != NULL && fld_val.c != NULL) {
4027            void *dummy;
4028
4029            /* FIXME: some locale free implementation is
4030             * really required here,,, */
4031            SEPARATE_SMART_STR(&fld_name);
4032            php_strtoupper(fld_name.c, fld_name.len);
4033
4034            zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4035
4036            my_smart_str_dtor(&fld_name);
4037        }
4038    }
4039    return state;
4040}
4041
4042PHP_FUNCTION(mb_send_mail)
4043{
4044    int n;
4045    char *to = NULL;
4046    int to_len;
4047    char *message = NULL;
4048    int message_len;
4049    char *headers = NULL;
4050    int headers_len;
4051    char *subject = NULL;
4052    int subject_len;
4053    char *extra_cmd = NULL;
4054    int extra_cmd_len;
4055    int i;
4056    char *to_r = NULL;
4057    char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4058    struct {
4059        int cnt_type:1;
4060        int cnt_trans_enc:1;
4061    } suppressed_hdrs = { 0, 0 };
4062
4063    char *message_buf = NULL, *subject_buf = NULL, *p;
4064    mbfl_string orig_str, conv_str;
4065    mbfl_string *pstr;  /* pointer to mbfl string for return value */
4066    enum mbfl_no_encoding
4067        tran_cs,    /* transfar text charset */
4068        head_enc,   /* header transfar encoding */
4069        body_enc;   /* body transfar encoding */
4070    mbfl_memory_device device;  /* automatic allocateable buffer for additional header */
4071    const mbfl_language *lang;
4072    int err = 0;
4073    HashTable ht_headers;
4074    smart_str *s;
4075    extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4076    char *pp, *ee;
4077
4078    /* initialize */
4079    mbfl_memory_device_init(&device, 0, 0);
4080    mbfl_string_init(&orig_str);
4081    mbfl_string_init(&conv_str);
4082
4083    /* character-set, transfer-encoding */
4084    tran_cs = mbfl_no_encoding_utf8;
4085    head_enc = mbfl_no_encoding_base64;
4086    body_enc = mbfl_no_encoding_base64;
4087    lang = mbfl_no2language(MBSTRG(language));
4088    if (lang != NULL) {
4089        tran_cs = lang->mail_charset;
4090        head_enc = lang->mail_header_encoding;
4091        body_enc = lang->mail_body_encoding;
4092    }
4093
4094    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4095        return;
4096    }
4097
4098    /* ASCIIZ check */
4099    MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4100    MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4101    MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4102    if (headers) {
4103        MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4104    }
4105    if (extra_cmd) {
4106        MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4107    }
4108
4109    zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4110
4111    if (headers != NULL) {
4112        _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4113    }
4114
4115    if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4116        char *tmp;
4117        char *param_name;
4118        char *charset = NULL;
4119
4120        SEPARATE_SMART_STR(s);
4121        smart_str_0(s);
4122
4123        p = strchr(s->c, ';');
4124
4125        if (p != NULL) {
4126            /* skipping the padded spaces */
4127            do {
4128                ++p;
4129            } while (*p == ' ' || *p == '\t');
4130
4131            if (*p != '\0') {
4132                if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4133                    if (strcasecmp(param_name, "charset") == 0) {
4134                        enum mbfl_no_encoding _tran_cs = tran_cs;
4135
4136                        charset = php_strtok_r(NULL, "= \"", &tmp);
4137                        if (charset != NULL) {
4138                            _tran_cs = mbfl_name2no_encoding(charset);
4139                        }
4140
4141                        if (_tran_cs == mbfl_no_encoding_invalid) {
4142                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4143                            _tran_cs = mbfl_no_encoding_ascii;
4144                        }
4145                        tran_cs = _tran_cs;
4146                    }
4147                }
4148            }
4149        }
4150        suppressed_hdrs.cnt_type = 1;
4151    }
4152
4153    if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4154        enum mbfl_no_encoding _body_enc;
4155        SEPARATE_SMART_STR(s);
4156        smart_str_0(s);
4157
4158        _body_enc = mbfl_name2no_encoding(s->c);
4159        switch (_body_enc) {
4160            case mbfl_no_encoding_base64:
4161            case mbfl_no_encoding_7bit:
4162            case mbfl_no_encoding_8bit:
4163                body_enc = _body_enc;
4164                break;
4165
4166            default:
4167                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4168                body_enc =  mbfl_no_encoding_8bit;
4169                break;
4170        }
4171        suppressed_hdrs.cnt_trans_enc = 1;
4172    }
4173
4174    /* To: */
4175    if (to != NULL) {
4176        if (to_len > 0) {
4177            to_r = estrndup(to, to_len);
4178            for (; to_len; to_len--) {
4179                if (!isspace((unsigned char) to_r[to_len - 1])) {
4180                    break;
4181                }
4182                to_r[to_len - 1] = '\0';
4183            }
4184            for (i = 0; to_r[i]; i++) {
4185            if (iscntrl((unsigned char) to_r[i])) {
4186                /* According to RFC 822, section 3.1.1 long headers may be separated into
4187                 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4188                 * To prevent these separators from being replaced with a space, we use the
4189                 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4190                 */
4191                SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4192                to_r[i] = ' ';
4193            }
4194            }
4195        } else {
4196            to_r = to;
4197        }
4198    } else {
4199        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4200        err = 1;
4201    }
4202
4203    /* Subject: */
4204    if (subject != NULL && subject_len >= 0) {
4205        orig_str.no_language = MBSTRG(language);
4206        orig_str.val = (unsigned char *)subject;
4207        orig_str.len = subject_len;
4208        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4209        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4210            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4211            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4212        }
4213        pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4214        if (pstr != NULL) {
4215            subject_buf = subject = (char *)pstr->val;
4216        }
4217    } else {
4218        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4219        err = 1;
4220    }
4221
4222    /* message body */
4223    if (message != NULL) {
4224        orig_str.no_language = MBSTRG(language);
4225        orig_str.val = (unsigned char *)message;
4226        orig_str.len = (unsigned int)message_len;
4227        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4228
4229        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4230            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4231            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4232        }
4233
4234        pstr = NULL;
4235        {
4236            mbfl_string tmpstr;
4237
4238            if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4239                tmpstr.no_encoding=mbfl_no_encoding_8bit;
4240                pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4241                efree(tmpstr.val);
4242            }
4243        }
4244        if (pstr != NULL) {
4245            message_buf = message = (char *)pstr->val;
4246        }
4247    } else {
4248        /* this is not really an error, so it is allowed. */
4249        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4250        message = NULL;
4251    }
4252
4253    /* other headers */
4254#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4255#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4256#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4257#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4258    if (headers != NULL) {
4259        p = headers;
4260        n = headers_len;
4261        mbfl_memory_device_strncat(&device, p, n);
4262        if (n > 0 && p[n - 1] != '\n') {
4263            mbfl_memory_device_strncat(&device, "\n", 1);
4264        }
4265    }
4266
4267    if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4268        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4269        mbfl_memory_device_strncat(&device, "\n", 1);
4270    }
4271
4272    if (!suppressed_hdrs.cnt_type) {
4273        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4274
4275        p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4276        if (p != NULL) {
4277            mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4278            mbfl_memory_device_strcat(&device, p);
4279        }
4280        mbfl_memory_device_strncat(&device, "\n", 1);
4281    }
4282    if (!suppressed_hdrs.cnt_trans_enc) {
4283        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4284        p = (char *)mbfl_no2preferred_mime_name(body_enc);
4285        if (p == NULL) {
4286            p = "7bit";
4287        }
4288        mbfl_memory_device_strcat(&device, p);
4289        mbfl_memory_device_strncat(&device, "\n", 1);
4290    }
4291
4292    mbfl_memory_device_unput(&device);
4293    mbfl_memory_device_output('\0', &device);
4294    headers = (char *)device.buffer;
4295
4296    if (force_extra_parameters) {
4297        extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4298    } else if (extra_cmd) {
4299        extra_cmd = php_escape_shell_cmd(extra_cmd);
4300    }
4301
4302    if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4303        RETVAL_TRUE;
4304    } else {
4305        RETVAL_FALSE;
4306    }
4307
4308    if (extra_cmd) {
4309        efree(extra_cmd);
4310    }
4311    if (to_r != to) {
4312        efree(to_r);
4313    }
4314    if (subject_buf) {
4315        efree((void *)subject_buf);
4316    }
4317    if (message_buf) {
4318        efree((void *)message_buf);
4319    }
4320    mbfl_memory_device_clear(&device);
4321    zend_hash_destroy(&ht_headers);
4322}
4323
4324#undef SKIP_LONG_HEADER_SEP_MBSTRING
4325#undef MAIL_ASCIIZ_CHECK_MBSTRING
4326#undef APPEND_ONE_CHAR
4327#undef SEPARATE_SMART_STR
4328#undef PHP_MBSTR_MAIL_MIME_HEADER1
4329#undef PHP_MBSTR_MAIL_MIME_HEADER2
4330#undef PHP_MBSTR_MAIL_MIME_HEADER3
4331#undef PHP_MBSTR_MAIL_MIME_HEADER4
4332/* }}} */
4333
4334/* {{{ proto mixed mb_get_info([string type])
4335   Returns the current settings of mbstring */
4336PHP_FUNCTION(mb_get_info)
4337{
4338    char *typ = NULL;
4339    int typ_len;
4340    size_t n;
4341    char *name;
4342    const struct mb_overload_def *over_func;
4343    zval *row1, *row2;
4344    const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4345    const mbfl_encoding **entry;
4346
4347    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4348        RETURN_FALSE;
4349    }
4350
4351    if (!typ || !strcasecmp("all", typ)) {
4352        array_init(return_value);
4353        if (MBSTRG(current_internal_encoding)) {
4354            add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4355        }
4356        if (MBSTRG(http_input_identify)) {
4357            add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4358        }
4359        if (MBSTRG(current_http_output_encoding)) {
4360            add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4361        }
4362        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4363            add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4364        }
4365        add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4366        if (MBSTRG(func_overload)){
4367            over_func = &(mb_ovld[0]);
4368            MAKE_STD_ZVAL(row1);
4369            array_init(row1);
4370            while (over_func->type > 0) {
4371                if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4372                    add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4373                }
4374                over_func++;
4375            }
4376            add_assoc_zval(return_value, "func_overload_list", row1);
4377        } else {
4378            add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4379        }
4380        if (lang != NULL) {
4381            if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4382                add_assoc_string(return_value, "mail_charset", name, 1);
4383            }
4384            if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4385                add_assoc_string(return_value, "mail_header_encoding", name, 1);
4386            }
4387            if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4388                add_assoc_string(return_value, "mail_body_encoding", name, 1);
4389            }
4390        }
4391        add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4392        if (MBSTRG(encoding_translation)) {
4393            add_assoc_string(return_value, "encoding_translation", "On", 1);
4394        } else {
4395            add_assoc_string(return_value, "encoding_translation", "Off", 1);
4396        }
4397        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4398            add_assoc_string(return_value, "language", name, 1);
4399        }
4400        n = MBSTRG(current_detect_order_list_size);
4401        entry = MBSTRG(current_detect_order_list);
4402        if (n > 0) {
4403            size_t i;
4404            MAKE_STD_ZVAL(row2);
4405            array_init(row2);
4406            for (i = 0; i < n; i++) {
4407                add_next_index_string(row2, (*entry)->name, 1);
4408                entry++;
4409            }
4410            add_assoc_zval(return_value, "detect_order", row2);
4411        }
4412        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4413            add_assoc_string(return_value, "substitute_character", "none", 1);
4414        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4415            add_assoc_string(return_value, "substitute_character", "long", 1);
4416        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4417            add_assoc_string(return_value, "substitute_character", "entity", 1);
4418        } else {
4419            add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4420        }
4421        if (MBSTRG(strict_detection)) {
4422            add_assoc_string(return_value, "strict_detection", "On", 1);
4423        } else {
4424            add_assoc_string(return_value, "strict_detection", "Off", 1);
4425        }
4426    } else if (!strcasecmp("internal_encoding", typ)) {
4427        if (MBSTRG(current_internal_encoding)) {
4428            RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4429        }
4430    } else if (!strcasecmp("http_input", typ)) {
4431        if (MBSTRG(http_input_identify)) {
4432            RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4433        }
4434    } else if (!strcasecmp("http_output", typ)) {
4435        if (MBSTRG(current_http_output_encoding)) {
4436            RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4437        }
4438    } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4439        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4440            RETVAL_STRING(name, 1);
4441        }
4442    } else if (!strcasecmp("func_overload", typ)) {
4443        RETVAL_LONG(MBSTRG(func_overload));
4444    } else if (!strcasecmp("func_overload_list", typ)) {
4445        if (MBSTRG(func_overload)){
4446                over_func = &(mb_ovld[0]);
4447                array_init(return_value);
4448                while (over_func->type > 0) {
4449                    if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4450                        add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4451                    }
4452                    over_func++;
4453                }
4454        } else {
4455            RETVAL_STRING("no overload", 1);
4456        }
4457    } else if (!strcasecmp("mail_charset", typ)) {
4458        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4459            RETVAL_STRING(name, 1);
4460        }
4461    } else if (!strcasecmp("mail_header_encoding", typ)) {
4462        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4463            RETVAL_STRING(name, 1);
4464        }
4465    } else if (!strcasecmp("mail_body_encoding", typ)) {
4466        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4467            RETVAL_STRING(name, 1);
4468        }
4469    } else if (!strcasecmp("illegal_chars", typ)) {
4470        RETVAL_LONG(MBSTRG(illegalchars));
4471    } else if (!strcasecmp("encoding_translation", typ)) {
4472        if (MBSTRG(encoding_translation)) {
4473            RETVAL_STRING("On", 1);
4474        } else {
4475            RETVAL_STRING("Off", 1);
4476        }
4477    } else if (!strcasecmp("language", typ)) {
4478        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4479            RETVAL_STRING(name, 1);
4480        }
4481    } else if (!strcasecmp("detect_order", typ)) {
4482        n = MBSTRG(current_detect_order_list_size);
4483        entry = MBSTRG(current_detect_order_list);
4484        if (n > 0) {
4485            size_t i;
4486            array_init(return_value);
4487            for (i = 0; i < n; i++) {
4488                add_next_index_string(return_value, (*entry)->name, 1);
4489                entry++;
4490            }
4491        }
4492    } else if (!strcasecmp("substitute_character", typ)) {
4493        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4494            RETVAL_STRING("none", 1);
4495        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4496            RETVAL_STRING("long", 1);
4497        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4498            RETVAL_STRING("entity", 1);
4499        } else {
4500            RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4501        }
4502    } else if (!strcasecmp("strict_detection", typ)) {
4503        if (MBSTRG(strict_detection)) {
4504            RETVAL_STRING("On", 1);
4505        } else {
4506            RETVAL_STRING("Off", 1);
4507        }
4508    } else {
4509        RETURN_FALSE;
4510    }
4511}
4512/* }}} */
4513
4514/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4515   Check if the string is valid for the specified encoding */
4516PHP_FUNCTION(mb_check_encoding)
4517{
4518    char *var = NULL;
4519    int var_len;
4520    char *enc = NULL;
4521    int enc_len;
4522    mbfl_buffer_converter *convd;
4523    const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4524    mbfl_string string, result, *ret = NULL;
4525    long illegalchars = 0;
4526
4527    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4528        RETURN_FALSE;
4529    }
4530
4531    if (var == NULL) {
4532        RETURN_BOOL(MBSTRG(illegalchars) == 0);
4533    }
4534
4535    if (enc != NULL) {
4536        encoding = mbfl_name2encoding(enc);
4537        if (!encoding || encoding == &mbfl_encoding_pass) {
4538            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4539            RETURN_FALSE;
4540        }
4541    }
4542
4543    convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4544    if (convd == NULL) {
4545        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4546        RETURN_FALSE;
4547    }
4548    mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4549    mbfl_buffer_converter_illegal_substchar(convd, 0);
4550
4551    /* initialize string */
4552    mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4553    mbfl_string_init(&result);
4554
4555    string.val = (unsigned char *)var;
4556    string.len = var_len;
4557    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4558    illegalchars = mbfl_buffer_illegalchars(convd);
4559    mbfl_buffer_converter_delete(convd);
4560
4561    RETVAL_FALSE;
4562    if (ret != NULL) {
4563        if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4564            RETVAL_TRUE;
4565        }
4566        mbfl_string_clear(&result);
4567    }
4568}
4569/* }}} */
4570
4571
4572/* {{{ php_mb_populate_current_detect_order_list */
4573static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4574{
4575    const mbfl_encoding **entry = 0;
4576    size_t nentries;
4577
4578    if (MBSTRG(current_detect_order_list)) {
4579        return;
4580    }
4581
4582    if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4583        nentries = MBSTRG(detect_order_list_size);
4584        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4585        memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4586    } else {
4587        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4588        size_t i;
4589        nentries = MBSTRG(default_detect_order_list_size);
4590        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4591        for (i = 0; i < nentries; i++) {
4592            entry[i] = mbfl_no2encoding(src[i]);
4593        }
4594    }
4595    MBSTRG(current_detect_order_list) = entry;
4596    MBSTRG(current_detect_order_list_size) = nentries;
4597}
4598
4599/* {{{ static int php_mb_encoding_translation() */
4600static int php_mb_encoding_translation(TSRMLS_D)
4601{
4602    return MBSTRG(encoding_translation);
4603}
4604/* }}} */
4605
4606/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4607MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4608{
4609    if (enc != NULL) {
4610        if (enc->flag & MBFL_ENCTYPE_MBCS) {
4611            if (enc->mblen_table != NULL) {
4612                if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4613            }
4614        } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4615            return 2;
4616        } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4617            return 4;
4618        }
4619    }
4620    return 1;
4621}
4622/* }}} */
4623
4624/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4625MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4626{
4627    return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4628}
4629/* }}} */
4630
4631/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4632MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4633{
4634    register const char *p = s;
4635    char *last=NULL;
4636
4637    if (nbytes == (size_t)-1) {
4638        size_t nb = 0;
4639
4640        while (*p != '\0') {
4641            if (nb == 0) {
4642                if ((unsigned char)*p == (unsigned char)c) {
4643                    last = (char *)p;
4644                }
4645                nb = php_mb_mbchar_bytes_ex(p, enc);
4646                if (nb == 0) {
4647                    return NULL; /* something is going wrong! */
4648                }
4649            }
4650            --nb;
4651            ++p;
4652        }
4653    } else {
4654        register size_t bcnt = nbytes;
4655        register size_t nbytes_char;
4656        while (bcnt > 0) {
4657            if ((unsigned char)*p == (unsigned char)c) {
4658                last = (char *)p;
4659            }
4660            nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4661            if (bcnt < nbytes_char) {
4662                return NULL;
4663            }
4664            p += nbytes_char;
4665            bcnt -= nbytes_char;
4666        }
4667    }
4668    return last;
4669}
4670/* }}} */
4671
4672/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4673MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4674{
4675    return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4676}
4677/* }}} */
4678
4679/* {{{ MBSTRING_API int php_mb_stripos()
4680 */
4681MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4682{
4683    int n;
4684    mbfl_string haystack, needle;
4685    n = -1;
4686
4687    mbfl_string_init(&haystack);
4688    mbfl_string_init(&needle);
4689    haystack.no_language = MBSTRG(language);
4690    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4691    needle.no_language = MBSTRG(language);
4692    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4693
4694    do {
4695        size_t len = 0;
4696        haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4697        haystack.len = len;
4698
4699        if (!haystack.val) {
4700            break;
4701        }
4702
4703        if (haystack.len <= 0) {
4704            break;
4705        }
4706
4707        needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4708        needle.len = len;
4709
4710        if (!needle.val) {
4711            break;
4712        }
4713
4714        if (needle.len <= 0) {
4715            break;
4716        }
4717
4718        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4719        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4720            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4721            break;
4722        }
4723
4724        {
4725            int haystack_char_len = mbfl_strlen(&haystack);
4726
4727            if (mode) {
4728                if ((offset > 0 && offset > haystack_char_len) ||
4729                    (offset < 0 && -offset > haystack_char_len)) {
4730                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4731                    break;
4732                }
4733            } else {
4734                if (offset < 0 || offset > haystack_char_len) {
4735                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4736                    break;
4737                }
4738            }
4739        }
4740
4741        n = mbfl_strpos(&haystack, &needle, offset, mode);
4742    } while(0);
4743
4744    if (haystack.val) {
4745        efree(haystack.val);
4746    }
4747
4748    if (needle.val) {
4749        efree(needle.val);
4750    }
4751
4752    return n;
4753}
4754/* }}} */
4755
4756static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4757{
4758    *list = (const zend_encoding **)MBSTRG(http_input_list);
4759    *list_size = MBSTRG(http_input_list_size);
4760}
4761/* }}} */
4762
4763static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4764{
4765    MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4766}
4767/* }}} */
4768
4769#endif  /* HAVE_MBSTRING */
4770
4771/*
4772 * Local variables:
4773 * tab-width: 4
4774 * c-basic-offset: 4
4775 * End:
4776 * vim600: fdm=marker
4777 * vim: noet sw=4 ts=4
4778 */
4779